diff --git a/src/cooperbench/agents/openhands_agent_sdk/adapter.py b/src/cooperbench/agents/openhands_agent_sdk/adapter.py
index 16c6f895..adce77ca 100644
--- a/src/cooperbench/agents/openhands_agent_sdk/adapter.py
+++ b/src/cooperbench/agents/openhands_agent_sdk/adapter.py
@@ -17,6 +17,7 @@
 import modal
 
 from cooperbench.agents import AgentResult
+from cooperbench.agents._coop.runtime import rewrite_comm_url_for_container
 from cooperbench.agents.openhands_agent_sdk.utils import git_push_with_retry, wait_for_git_server
 from cooperbench.agents.registry import register
 
@@ -198,39 +199,141 @@ def _retrieve_sent_messages(redis_url: str, agent_id: str) -> list[dict]:
         return []
 
 
-def _extract_patch(workspace: Any, base_commit: str | None) -> str:
-    """Extract git diff patch from workspace.
-    
-    Captures all changes: staged, unstaged, and new untracked files.
-    Uses `git add -A` first to ensure new files are included in the diff.
-    
-    Args:
-        workspace: RemoteWorkspace instance
-        base_commit: Base commit SHA to diff from
-        
-    Returns:
-        Patch content as string, or empty string on failure
+def _submission_instructions(*, is_coop: bool) -> str:
+    """Submission block appended to the user task message.
+
+    Mirrors mini_swe_agent_v2's ``coop.yaml`` submission section so both
+    adapters extract patches the same way: the agent writes a unified diff
+    to ``patch.txt`` and the harness reads it. OpenHands ends via its own
+    ``finish`` flow rather than the mini-swe echo signal, so the wording
+    is adapted accordingly. In coop runs we also remind the agent that
+    overlapping line edits cause both patches to be discarded.
     """
-    if not base_commit or not workspace:
-        return ""
-    
-    try:
-        # Stage all changes (including new files) so they appear in diff
-        workspace.execute_command(
-            "git add -A",
-            cwd="/workspace/repo",
-            timeout=30.0
-        )
-        # Diff from base commit to working tree (includes all staged/unstaged changes)
-        diff_result = workspace.execute_command(
-            f"git diff {base_commit}",
-            cwd="/workspace/repo",
-            timeout=60.0
-        )
-        return diff_result.stdout if diff_result.exit_code == 0 else ""
-    except Exception as e:
-        logger.warning(f"Failed to extract patch: {e}")
-        return ""
+    coop_reminder = (
+        "\nThe goal is for your patch to NOT conflict with your teammate's when our\n"
+        "harness merges them — if any of your edits touch the same lines they\n"
+        "touched, both patches are thrown away. Keep `patch.txt` scoped to the\n"
+        "files and hunks you actually need.\n"
+        if is_coop
+        else ""
+    )
+    return (
+        "\n\n## Submission\n\n"
+        "`patch.txt` is the artifact we evaluate — write whatever unified diff\n"
+        "you want to submit to that file, however it makes sense given how you\n"
+        "worked.\n\n"
+        "Write the patch (one common way — `git diff` of your in-place edits):\n\n"
+        "```bash\n"
+        "git diff -- path/to/file1 path/to/file2 > patch.txt\n"
+        "```\n\n"
+        "Verify it contains what you intend:\n\n"
+        "```bash\n"
+        "cat patch.txt\n"
+        "```\n"
+        f"{coop_reminder}"
+        "\nOnce `patch.txt` contains the diff you want submitted, finish the task.\n\n"
+        "The patch must be a unified diff and contain only source files you\n"
+        "intentionally modified. Exclude:\n\n"
+        "- reproduction or scratch test scripts you wrote\n"
+        "- helper scripts or tools you created\n"
+        "- installation, build, packaging, or configuration files\n"
+        "- binaries or compiled files\n\n"
+        "<CRITICAL>\n"
+        "Do NOT run `rm -rf .git`, `git init`, `git rm -rf .`, or `git reset --hard`\n"
+        "inside `/workspace/repo` — these corrupt `.git/` and your patch will be\n"
+        "unapplyable.\n"
+        "</CRITICAL>\n"
+    )
+
+
+def _collect_sandbox_credentials(
+    coop_info: dict | None,
+    *,
+    rewrite_localhost: bool,
+) -> dict[str, str]:
+    """Collect API keys, credentials, and coop info as env vars for the agent-server.
+
+    Shared by ModalSandboxContext and DockerSandboxContext. When
+    ``rewrite_localhost`` is True, ``REDIS_URL`` is rewritten so that
+    ``localhost`` / ``127.0.0.1`` resolve to the host gateway from
+    inside a docker container (mirrors what every other adapter does).
+    """
+    creds: dict[str, str] = {}
+
+    for key in [
+        "GEMINI_API_KEY",
+        "ANTHROPIC_API_KEY",
+        "ANTHROPIC_BASE_URL",
+        "OPENAI_API_KEY",
+        "OPENAI_BASE_URL",
+        "GOOGLE_CLOUD_PROJECT",
+        "VERTEXAI_PROJECT",
+        "VERTEXAI_LOCATION",
+    ]:
+        if value := os.environ.get(key):
+            creds[key] = value
+
+    gcp_creds_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
+    if not gcp_creds_path:
+        home = os.path.expanduser("~")
+        default_adc_path = os.path.join(home, ".config", "gcloud", "application_default_credentials.json")
+        if os.path.exists(default_adc_path):
+            gcp_creds_path = default_adc_path
+
+    if gcp_creds_path and os.path.exists(gcp_creds_path):
+        with open(gcp_creds_path) as f:
+            creds_content = f.read()
+        creds["GOOGLE_APPLICATION_CREDENTIALS_JSON"] = creds_content
+        if "VERTEXAI_PROJECT" not in creds:
+            try:
+                adc_data = json.loads(creds_content)
+                if project_id := adc_data.get("quota_project_id"):
+                    creds["VERTEXAI_PROJECT"] = project_id
+                    creds["GOOGLE_CLOUD_PROJECT"] = project_id
+            except json.JSONDecodeError:
+                pass
+
+    if coop_info:
+        redis_url = coop_info.get("redis_url")
+        if redis_url:
+            if rewrite_localhost:
+                redis_url = rewrite_comm_url_for_container(redis_url) or redis_url
+            creds["REDIS_URL"] = redis_url
+        if coop_info.get("git_url"):
+            creds["GIT_URL"] = coop_info["git_url"]
+        if coop_info.get("agent_id"):
+            creds["AGENT_ID"] = coop_info["agent_id"]
+        if coop_info.get("agents"):
+            creds["AGENTS"] = ",".join(coop_info["agents"])
+        team_env = coop_info.get("team_env") or {}
+        for k, v in team_env.items():
+            if v:
+                creds[k] = v
+
+    return creds
+
+
+def _wait_for_agent_server(url: str, timeout: int = 120) -> None:
+    """Block until the agent-server's /health endpoint returns 200.
+
+    Shared by ModalSandboxContext and DockerSandboxContext.
+    """
+    import httpx
+
+    start = time.time()
+    last_error: Exception | None = None
+    while time.time() - start < timeout:
+        try:
+            response = httpx.get(f"{url}/health", timeout=10)
+            if response.status_code == 200:
+                return
+        except Exception as e:
+            last_error = e
+        time.sleep(2)
+
+    raise TimeoutError(
+        f"Agent-server did not become ready within {timeout}s. Last error: {last_error}"
+    )
 
 
 @register("openhands_sdk")
@@ -376,44 +479,50 @@ def run(
         status = "Error"
         error = None
         
+        config = config or {}
+        backend = config.get("backend", "docker")
+
         # Determine if this is a coop run
         is_coop = (messaging_enabled or git_enabled) and agents and len(agents) > 1
         redis_url = comm_url
-        # OpenHands adapter manages its own git server - ignore git_server_url from coop.py
-        # This ensures git setup works correctly with RemoteWorkspace
+        # On Modal we self-manage the git server (ignoring git_server_url from
+        # coop.py); on Docker we use the shared DockerGitServer that coop.py
+        # creates and passes through git_server_url.
         git_url = None
         run_id = None
-        owns_redis = False  # Track if we need to release Redis reference
-        owns_git = False  # Track if we need to release Git server reference
-        
+        owns_redis = False  # Track if we need to release Redis reference (Modal only)
+        owns_git = False  # Track if we need to release Git server reference (Modal only)
+
         if is_coop:
             # Extract run_id from config or comm_url namespace
-            config = config or {}
             if comm_url and "#run:" in comm_url:
                 # Extract run_id from namespaced URL: redis://host:port#run:abc123
                 run_id = comm_url.split("#run:")[1]
             else:
                 run_id = config.get("run_id")
-            
+
             # Generate run_id if not provided
             if not run_id:
                 import uuid
                 run_id = uuid.uuid4().hex[:8]
-            
-            # Create Modal Redis if needed (localhost not reachable from Modal)
-            if messaging_enabled and _needs_modal_redis(comm_url):
-                redis_url = _get_or_create_redis(run_id, agents, self.timeout)
-                owns_redis = True
-            
-            # Create Modal Git server if git is enabled
-            # OpenHands adapter always creates its own git server (ignores git_server_url from coop.py)
-            # to ensure git setup works correctly with RemoteWorkspace
-            if git_enabled:
-                git_url = _get_or_create_git_server(run_id, agents, self.timeout)
-                owns_git = True
+
+            if backend == "modal":
+                # Modal can't reach host-side Redis/Git, so self-manage them.
+                if messaging_enabled and _needs_modal_redis(comm_url):
+                    redis_url = _get_or_create_redis(run_id, agents, self.timeout)
+                    owns_redis = True
+                if git_enabled:
+                    git_url = _get_or_create_git_server(run_id, agents, self.timeout)
+                    owns_git = True
+            else:
+                # Docker backend: reuse host Redis (ensure_redis) + shared
+                # DockerGitServer (created by coop.py). REDIS_URL gets
+                # rewritten to host.docker.internal inside the credential
+                # collector when injected into the sandbox.
+                if git_enabled:
+                    git_url = git_server_url
 
         workspace = None
-        base_commit = None
 
         try:
             # Build coop_info for both sandbox env vars AND agent system prompt
@@ -455,7 +564,8 @@ def run(
                 # message gets ignored (oh_team_v2 failure mode).
                 coop_info["team_section"] = team_session.prompt_section(agent_id=agent_id)
             
-            with ModalSandboxContext(oh_image, self.timeout, coop_info=coop_info) as sandbox_url:
+            sandbox_cls = ModalSandboxContext if backend == "modal" else DockerSandboxContext
+            with sandbox_cls(oh_image, self.timeout, coop_info=coop_info) as sandbox_url:
 
                 # Import SDK components
                 from openhands.sdk import LLM
@@ -493,18 +603,6 @@ def run(
                     working_dir="/workspace/repo",
                 )
                 
-                # Capture base commit for patch generation (before any changes)
-                try:
-                    base_result = workspace.execute_command(
-                        "git rev-parse HEAD",
-                        cwd="/workspace/repo",
-                        timeout=10.0
-                    )
-                    base_commit = base_result.stdout.strip() if base_result.exit_code == 0 else None
-                except Exception as e:
-                    logger.warning(f"Failed to get base commit: {e}")
-                    base_commit = None
-                
                 # Set up git remote if git collaboration is enabled
                 if coop_info and coop_info.get("git_enabled") and coop_info.get("git_url"):
                     self._setup_git_remote(
@@ -557,10 +655,12 @@ def event_callback(event):
                     visualizer=None,
                 )
 
-                # Send task and run the conversation
+                # Send task and run the conversation. Append the patch.txt
+                # submission instructions so the agent writes its diff to a
+                # known file before finishing (matches mini_swe_agent's flow).
                 # Message checking for coop mode happens inside the agent loop
                 # (in LocalConversation._check_inbox_messages before each step)
-                conversation.send_message(task)
+                conversation.send_message(task + _submission_instructions(is_coop=is_coop))
                 try:
                     conversation.run(blocking=True, timeout=float(self.timeout))
                     status = "Submitted"
@@ -575,8 +675,22 @@ def event_callback(event):
                         error = error_str
                         status = "Error"
 
-                # Extract patch while sandbox is still alive
-                patch = _extract_patch(workspace, base_commit)
+                # Read patch.txt that the agent wrote during submission.
+                # Mirrors mini_swe_agent_v2's submission flow (see config/coop.yaml):
+                # the agent is prompted to write its diff to patch.txt before
+                # finishing, and we extract that file as-is.
+                patch = ""
+                try:
+                    patch_result = workspace.execute_command(
+                        "cat patch.txt 2>/dev/null",
+                        cwd="/workspace/repo",
+                        timeout=30.0,
+                    )
+                    if patch_result.exit_code == 0:
+                        from cooperbench.agents._coop.runtime import normalize_patch
+                        patch = normalize_patch(patch_result.stdout or "")
+                except Exception as e:
+                    logger.warning(f"Failed to read patch.txt: {e}")
 
                 # Get cost and token usage from conversation stats
                 try:
@@ -669,66 +783,7 @@ def __init__(self, image_name: str, timeout: int, coop_info: dict | None = None)
 
     def _collect_credentials(self) -> dict[str, str]:
         """Collect API keys, credentials, and coop info from environment."""
-        creds = {}
-        
-        # Collect API keys and Vertex AI config (litellm reads VERTEXAI_* env vars)
-        for key in [
-            "GEMINI_API_KEY",
-            "ANTHROPIC_API_KEY",
-            "ANTHROPIC_BASE_URL",
-            "OPENAI_API_KEY",
-            "OPENAI_BASE_URL",
-            "GOOGLE_CLOUD_PROJECT",
-            "VERTEXAI_PROJECT",
-            "VERTEXAI_LOCATION",
-        ]:
-            if value := os.environ.get(key):
-                creds[key] = value
-        
-        # Read Google Cloud credentials JSON if available
-        gcp_creds_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
-        
-        # If not explicitly set, check standard gcloud ADC location
-        if not gcp_creds_path:
-            home = os.path.expanduser("~")
-            default_adc_path = os.path.join(home, ".config", "gcloud", "application_default_credentials.json")
-            if os.path.exists(default_adc_path):
-                gcp_creds_path = default_adc_path
-        
-        if gcp_creds_path and os.path.exists(gcp_creds_path):
-            with open(gcp_creds_path) as f:
-                creds_content = f.read()
-                creds["GOOGLE_APPLICATION_CREDENTIALS_JSON"] = creds_content
-                
-                # Extract project from ADC if not already set
-                if "VERTEXAI_PROJECT" not in creds:
-                    import json
-                    try:
-                        adc_data = json.loads(creds_content)
-                        if project_id := adc_data.get("quota_project_id"):
-                            creds["VERTEXAI_PROJECT"] = project_id
-                            creds["GOOGLE_CLOUD_PROJECT"] = project_id
-                    except json.JSONDecodeError:
-                        pass
-        
-        # Add coop info for collaboration tools
-        if self.coop_info:
-            if self.coop_info.get("redis_url"):
-                creds["REDIS_URL"] = self.coop_info["redis_url"]
-            if self.coop_info.get("git_url"):
-                creds["GIT_URL"] = self.coop_info["git_url"]
-            if self.coop_info.get("agent_id"):
-                creds["AGENT_ID"] = self.coop_info["agent_id"]
-            if self.coop_info.get("agents"):
-                creds["AGENTS"] = ",".join(self.coop_info["agents"])
-            # Team-mode env vars consumed by the in-container
-            # coop-task-* CLI.
-            team_env = self.coop_info.get("team_env") or {}
-            for k, v in team_env.items():
-                if v:
-                    creds[k] = v
-        
-        return creds
+        return _collect_sandbox_credentials(self.coop_info, rewrite_localhost=False)
 
     def __enter__(self) -> str:
         """Start sandbox, run agent-server, and return the tunnel URL."""
@@ -865,21 +920,239 @@ def __exit__(self, exc_type, exc_val, exc_tb):
 
     def _wait_for_server(self, url: str, timeout: int = 120):
         """Wait for the agent-server to be ready."""
-        import httpx
+        _wait_for_agent_server(url, timeout=timeout)
 
-        start = time.time()
-        last_error = None
-        
-        while time.time() - start < timeout:
+
+class DockerSandboxContext:
+    """Context manager for a local Docker sandbox running the agent-server.
+
+    Mirrors ``ModalSandboxContext`` but uses local docker:
+
+    * ``docker run -d --rm --platform linux/amd64 -p 0:8000`` on the
+      ``-oh`` image, with a random host port for concurrency safety.
+    * Joins the shared ``cooperbench`` bridge network when ``git_enabled``
+      so the agent-server can resolve ``cooperbench-git`` by name.
+    * Adds ``--add-host=host.docker.internal:host-gateway`` whenever a
+      ``REDIS_URL`` is configured, so the agent-server can reach the
+      host-side ``cooperbench-redis`` container.
+    * Injects credentials via a temp ``--env-file`` (handles multi-line
+      ``GOOGLE_APPLICATION_CREDENTIALS_JSON``).
+    * Lays in the team-mode coop-task CLI via ``docker cp`` when needed,
+      matching the Modal path's image-layering behavior.
+    """
+
+    AGENT_SERVER_PORT = 8000
+
+    def __init__(self, image_name: str, timeout: int, coop_info: dict | None = None):
+        del timeout  # docker run has no equivalent timeout; lifecycle bounded by context manager.
+        self.image_name = image_name
+        self.coop_info = coop_info
+        self._container_name: str | None = None
+        self._env_file_path: str | None = None
+
+    def __enter__(self) -> str:
+        import secrets as _secrets
+        import shutil
+        import subprocess
+        import tempfile
+
+        if shutil.which("docker") is None:
+            raise RuntimeError("docker CLI not found on PATH — install Docker to use the docker backend")
+
+        self._container_name = f"cooperbench-oh-{_secrets.token_hex(4)}"
+
+        creds = _collect_sandbox_credentials(self.coop_info, rewrite_localhost=True)
+
+        # Write credentials to a temp env-file (handles multi-line ADC JSON).
+        env_file = tempfile.NamedTemporaryFile(
+            mode="w",
+            prefix="cooperbench-oh-env-",
+            suffix=".env",
+            delete=False,
+        )
+        try:
+            for key, value in creds.items():
+                # docker --env-file is line-oriented; multi-line values
+                # (ADC JSON) need to be base64-encoded or passed via -e
+                # in their entirety. We pass single-line values through
+                # the env-file and multi-line values via -e with the
+                # raw value, which docker forwards correctly.
+                if "\n" not in value:
+                    env_file.write(f"{key}={value}\n")
+            env_file.close()
+        except Exception:
+            env_file.close()
+            os.unlink(env_file.name)
+            raise
+        self._env_file_path = env_file.name
+
+        multiline_env = {k: v for k, v in creds.items() if "\n" in v}
+
+        cmd: list[str] = [
+            "docker",
+            "run",
+            "-d",
+            "--rm",
+            "--platform",
+            "linux/amd64",
+            "--name",
+            self._container_name,
+            "-p",
+            f"0:{self.AGENT_SERVER_PORT}",
+            "--env-file",
+            self._env_file_path,
+            "--workdir",
+            "/",
+        ]
+
+        if self.coop_info and self.coop_info.get("git_enabled"):
+            cmd += ["--network", "cooperbench"]
+
+        if creds.get("REDIS_URL"):
+            cmd += ["--add-host", "host.docker.internal:host-gateway"]
+
+        for key, value in multiline_env.items():
+            cmd += ["-e", f"{key}={value}"]
+
+        cmd.append(self.image_name)
+
+        try:
+            subprocess.run(cmd, check=True, capture_output=True, text=True)
+        except subprocess.CalledProcessError as e:
+            self._cleanup_env_file()
+            raise RuntimeError(
+                f"docker run failed for {self.image_name}: {e.stderr or e.stdout}"
+            ) from e
+
+        try:
+            self._install_team_cli_if_needed()
+        except Exception:
+            self.__exit__(None, None, None)
+            raise
+
+        try:
+            host_port = self._discover_host_port()
+        except Exception:
+            self.__exit__(None, None, None)
+            raise
+
+        url = f"http://localhost:{host_port}"
+        try:
+            _wait_for_agent_server(url)
+        except Exception:
+            self.__exit__(None, None, None)
+            raise
+
+        return url
+
+    def _discover_host_port(self) -> int:
+        import subprocess
+
+        result = subprocess.run(
+            ["docker", "port", self._container_name or "", f"{self.AGENT_SERVER_PORT}/tcp"],
+            check=True,
+            capture_output=True,
+            text=True,
+        )
+        # Output looks like ``0.0.0.0:32773\n[::]:32773``; first line is enough.
+        first = result.stdout.strip().splitlines()[0]
+        return int(first.rsplit(":", 1)[1])
+
+    def _install_team_cli_if_needed(self) -> None:
+        """Drop coop-task CLI + tracker override into the running container.
+
+        Equivalent to the Modal path's image-layering block but done via
+        ``docker cp`` against the already-running container.  Triggered
+        on the same condition as the Modal path: ``team_env`` is set and
+        either ``task_list`` or ``protocol`` is enabled.
+        """
+        import subprocess
+        from pathlib import Path as _Path
+
+        team_env = (self.coop_info or {}).get("team_env") if self.coop_info else None
+        team_features = (self.coop_info or {}).get("team_features") if self.coop_info else None
+        install = bool(team_env) and (
+            team_features is None
+            or getattr(team_features, "task_list", True)
+            or getattr(team_features, "protocol", True)
+        )
+        if not install:
+            return
+
+        from cooperbench.team_harness import COOP_TASK_SCRIPT_PATH
+
+        oh_tools_dir = (
+            _Path(__file__).resolve().parent / "openhands-tools" / "openhands" / "tools"
+        )
+        coop_tracker_path = oh_tools_dir / "task_tracker" / "coop_definition.py"
+        init_override_path = oh_tools_dir / "task_tracker" / "_team_init_override.py"
+
+        name = self._container_name or ""
+
+        # Copy the coop-task script + task-tracker overrides into the container.
+        for src, dst in [
+            (COOP_TASK_SCRIPT_PATH, "/usr/local/bin/cb-coop-task.py"),
+            (coop_tracker_path, "/tmp/cb-coop-tracker.py"),
+            (init_override_path, "/tmp/cb-task-tracker-init.py"),
+        ]:
+            subprocess.run(
+                ["docker", "cp", str(src), f"{name}:{dst}"],
+                check=True,
+                capture_output=True,
+                text=True,
+            )
+
+        # Patch the openhands install: drop coop_definition.py + override
+        # __init__, install redis, and create coop-task-* wrapper scripts.
+        patch_script = (
+            "set -e; "
+            "pip install --quiet redis >/dev/null 2>&1 || pip install redis; "
+            'OH_DIR="$(python3 -c \'import openhands.tools.task_tracker as t, os; '
+            "print(os.path.dirname(t.__file__))')\"; "
+            'cp /tmp/cb-coop-tracker.py "$OH_DIR/coop_definition.py"; '
+            'cp /tmp/cb-task-tracker-init.py "$OH_DIR/__init__.py"; '
+            'find "$OH_DIR" -name "*.pyc" -delete; '
+            'find "$OH_DIR" -name __pycache__ -type d -exec rm -rf {} + 2>/dev/null || true; '
+            "for sub in create claim update list request respond pending; do "
+            'printf "#!/bin/bash\\nexec python3 /usr/local/bin/cb-coop-task.py %s \\"\\$@\\"\\n" "$sub" '
+            '> "/usr/local/bin/coop-task-$sub" && chmod +x "/usr/local/bin/coop-task-$sub"; '
+            "done"
+        )
+        subprocess.run(
+            ["docker", "exec", name, "bash", "-c", patch_script],
+            check=True,
+            capture_output=True,
+            text=True,
+        )
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        import subprocess
+
+        if self._container_name:
+            # Skip the graceful `docker stop` step and go straight to
+            # `docker rm -f` (SIGKILL + remove in one call). The graceful
+            # stop was timing out under concurrent load (Docker Desktop on
+            # Apple Silicon emulating dozens of amd64 containers via
+            # Rosetta) and the agent-server doesn't need a clean shutdown —
+            # all artifacts we care about (patch.txt, trajectory) have
+            # already been extracted via the HTTP API before this point.
             try:
-                response = httpx.get(f"{url}/health", timeout=10)
-                if response.status_code == 200:
-                    return
+                subprocess.run(
+                    ["docker", "rm", "-f", self._container_name],
+                    check=False,
+                    capture_output=True,
+                    text=True,
+                    timeout=30,
+                )
             except Exception as e:
-                last_error = e
-            time.sleep(2)
+                logger.warning(f"docker rm -f {self._container_name} failed: {e}")
+            self._container_name = None
+        self._cleanup_env_file()
 
-        raise TimeoutError(
-            f"Agent-server did not become ready within {timeout}s. "
-            f"Last error: {last_error}"
-        )
+    def _cleanup_env_file(self) -> None:
+        if self._env_file_path and os.path.exists(self._env_file_path):
+            try:
+                os.unlink(self._env_file_path)
+            except OSError:
+                pass
+        self._env_file_path = None
diff --git a/src/cooperbench/eval/backends/docker.py b/src/cooperbench/eval/backends/docker.py
index 81f30ca1..59144777 100644
--- a/src/cooperbench/eval/backends/docker.py
+++ b/src/cooperbench/eval/backends/docker.py
@@ -95,16 +95,34 @@ def create_sandbox(
         # which would otherwise consume "sleep infinity" as an argument and
         # exit immediately, matching the handling in the Modal and GCP
         # backends).
-        container = client.containers.run(
+        #
+        # Benchmark images are a mix: most were built natively (so on Apple
+        # Silicon hosts they're arm64-only), a handful were built for Modal
+        # (amd64-only). Try the host's native architecture first; if that
+        # fails with "no matching manifest" / platform-mismatch, fall back
+        # to linux/amd64 (which works for the amd64-only minority via
+        # Rosetta emulation).
+        run_kwargs = dict(
             image=image,
             entrypoint="",
             command="sleep infinity",
             detach=True,
             working_dir=workdir,
             remove=False,
-            # Set timeout via stop_signal behavior (container can be stopped)
             stop_signal="SIGTERM",
         )
+        try:
+            container = client.containers.run(**run_kwargs)
+        except docker.errors.APIError as e:
+            msg = str(e).lower()
+            if (
+                "no matching manifest" in msg
+                or "does not provide the specified platform" in msg
+                or "no match for platform" in msg
+            ):
+                container = client.containers.run(platform="linux/amd64", **run_kwargs)
+            else:
+                raise
 
         sandbox = DockerSandbox(container, workdir, timeout)
 
diff --git a/src/cooperbench/runner/coop.py b/src/cooperbench/runner/coop.py
index 304766f9..738d2190 100644
--- a/src/cooperbench/runner/coop.py
+++ b/src/cooperbench/runner/coop.py
@@ -82,12 +82,14 @@ def execute_coop(
 
     namespaced_redis = f"{redis_url}#run:{run_id}"
 
-    # Create git server if enabled
-    # Note: openhands_sdk manages its own git server internally, so we skip creation here
+    # Create git server if enabled.
+    # openhands_sdk self-manages its git server on the Modal backend (because
+    # Modal sandboxes can't reach a host-side git daemon). On the docker
+    # backend it uses the shared DockerGitServer like every other adapter.
     git_server = None
     git_server_url = None
     git_network = None
-    if git_enabled and agent_name != "openhands_sdk":
+    if git_enabled and not (agent_name == "openhands_sdk" and backend == "modal"):
         if not quiet:
             console.print("  [dim]git[/dim] creating shared server...")
         app = modal.App.lookup("cooperbench", create_if_missing=True) if backend == "modal" else None