diff --git a/assets/templates/browserbase/cua/Dockerfile.runtime b/assets/templates/browserbase/cua/Dockerfile.runtime index b2d862bfd..90a70a021 100644 --- a/assets/templates/browserbase/cua/Dockerfile.runtime +++ b/assets/templates/browserbase/cua/Dockerfile.runtime @@ -16,8 +16,10 @@ FROM --platform=linux/amd64 node:22-slim WORKDIR /app/cua-server -# Install curl for health checks -RUN apt-get update -qq && apt-get install -y -qq curl && rm -rf /var/lib/apt/lists/* +# Install curl for health checks. +# Acquire::Retries=3 mitigates transient archive.ubuntu.com CDN sync mismatches +# (launchpad bug #1876035). +RUN apt-get -o Acquire::Retries=3 update -qq && apt-get -o Acquire::Retries=3 install -y -qq curl && rm -rf /var/lib/apt/lists/* # Copy pre-built binary COPY dist/sea/cua-server-linux-x64 ./cua-server-linux-x64 diff --git a/assets/templates/browserbase/cua/setup-binary.sh b/assets/templates/browserbase/cua/setup-binary.sh index ba8f73c17..d25c295e2 100644 --- a/assets/templates/browserbase/cua/setup-binary.sh +++ b/assets/templates/browserbase/cua/setup-binary.sh @@ -11,10 +11,12 @@ set -e cd /app/cua-server -# Install curl if not present (needed for health checks) +# Install curl if not present (needed for health checks). +# Acquire::Retries=3 mitigates transient archive.ubuntu.com CDN sync mismatches +# that fail fresh-sandbox apt-get update mid-rollout (launchpad bug #1876035). if ! command -v curl &> /dev/null; then echo "Installing curl..." - apt-get update -qq && apt-get install -y -qq curl + apt-get -o Acquire::Retries=3 update -qq && apt-get -o Acquire::Retries=3 install -y -qq curl fi # Set server configuration diff --git a/assets/templates/browserbase/cua/setup.sh b/assets/templates/browserbase/cua/setup.sh index 32405f609..b506184a6 100644 --- a/assets/templates/browserbase/cua/setup.sh +++ b/assets/templates/browserbase/cua/setup.sh @@ -12,10 +12,12 @@ set -e cd /app/cua-server -# Install curl if not present (needed for health checks) +# Install curl if not present (needed for health checks). +# Acquire::Retries=3 mitigates transient archive.ubuntu.com CDN sync mismatches +# that fail fresh-sandbox apt-get update mid-rollout (launchpad bug #1876035). if ! command -v curl &> /dev/null; then echo "Installing curl..." - apt-get update -qq && apt-get install -y -qq curl + apt-get -o Acquire::Retries=3 update -qq && apt-get -o Acquire::Retries=3 install -y -qq curl fi # Install pnpm if not present diff --git a/environments/hello_mcp_harbor/hello_mcp_harbor.py b/environments/hello_mcp_harbor/hello_mcp_harbor.py index 6c3ac3dca..9b9364822 100644 --- a/environments/hello_mcp_harbor/hello_mcp_harbor.py +++ b/environments/hello_mcp_harbor/hello_mcp_harbor.py @@ -46,7 +46,9 @@ def _build_run_command(agent_workdir: str) -> str: return f""" set -e -apt-get update && apt-get install -y curl +# Acquire::Retries=3 mitigates transient archive.ubuntu.com CDN sync mismatches +# that fail fresh-sandbox apt-get update mid-rollout (launchpad bug #1876035). +apt-get -o Acquire::Retries=3 update && apt-get -o Acquire::Retries=3 install -y curl curl -fsSL https://opencode.ai/install | bash export PATH="$HOME/.opencode/bin:$PATH" diff --git a/environments/openenv_echo/proj/server/Dockerfile b/environments/openenv_echo/proj/server/Dockerfile index 3b9804056..03e69b44a 100644 --- a/environments/openenv_echo/proj/server/Dockerfile +++ b/environments/openenv_echo/proj/server/Dockerfile @@ -32,8 +32,10 @@ RUN if ! command -v uv >/dev/null 2>&1; then \ mv /root/.local/bin/uvx /usr/local/bin/uvx; \ fi -# Install git for building from git repos (build-time only) -RUN apt-get update && apt-get install -y --no-install-recommends \ +# Install git for building from git repos (build-time only). +# Acquire::Retries=3 mitigates transient archive.ubuntu.com CDN sync mismatches +# (launchpad bug #1876035). +RUN apt-get -o Acquire::Retries=3 update && apt-get -o Acquire::Retries=3 install -y --no-install-recommends \ git \ && rm -rf /var/lib/apt/lists/* diff --git a/environments/openenv_textarena/proj/server/Dockerfile b/environments/openenv_textarena/proj/server/Dockerfile index c298c0f32..591ffc7e3 100644 --- a/environments/openenv_textarena/proj/server/Dockerfile +++ b/environments/openenv_textarena/proj/server/Dockerfile @@ -32,8 +32,10 @@ RUN if ! command -v uv >/dev/null 2>&1; then \ mv /root/.local/bin/uvx /usr/local/bin/uvx; \ fi -# Install git for building from git repos (build-time only) -RUN apt-get update && apt-get install -y --no-install-recommends \ +# Install git for building from git repos (build-time only). +# Acquire::Retries=3 mitigates transient archive.ubuntu.com CDN sync mismatches +# (launchpad bug #1876035). +RUN apt-get -o Acquire::Retries=3 update && apt-get -o Acquire::Retries=3 install -y --no-install-recommends \ git \ && rm -rf /var/lib/apt/lists/* @@ -59,8 +61,8 @@ FROM ${BASE_IMAGE} WORKDIR /app -# Install runtime system libraries required by TextArena (cv2 needs libGL, glib) -RUN apt-get update && apt-get install -y --no-install-recommends \ +# Install runtime system libraries required by TextArena (cv2 needs libGL, glib). +RUN apt-get -o Acquire::Retries=3 update && apt-get -o Acquire::Retries=3 install -y --no-install-recommends \ libgl1 \ libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* diff --git a/environments/terminus_harbor/terminus_harbor.py b/environments/terminus_harbor/terminus_harbor.py index 3905d961e..749ea185a 100644 --- a/environments/terminus_harbor/terminus_harbor.py +++ b/environments/terminus_harbor/terminus_harbor.py @@ -57,10 +57,13 @@ async def post_sandbox_setup(self, state: vf.State) -> None: sandbox_id = state["sandbox_id"] - # Install curl, git, uv, and Python + # Install curl, git, uv, and Python. + # Acquire::Retries=3 mitigates transient archive.ubuntu.com CDN sync + # mismatches that fail fresh-sandbox apt-get update mid-rollout + # (launchpad bug #1876035). await self.sandbox_client.execute_command( sandbox_id, - "apt-get update && apt-get install -y curl git 2>&1", + "apt-get -o Acquire::Retries=3 update && apt-get -o Acquire::Retries=3 install -y curl git 2>&1", working_dir=None, timeout=120, ) diff --git a/tests/test_opencode_rlm_env.py b/tests/test_opencode_rlm_env.py index fee3d4095..d51e12f80 100644 --- a/tests/test_opencode_rlm_env.py +++ b/tests/test_opencode_rlm_env.py @@ -151,7 +151,10 @@ def test_config_renders_valid_json_after_shell_expansion(self): class TestRunCommand: def test_run_command_installs_jq(self): env = build_env() - assert "apt-get install -y curl git unzip jq" in env.run_command + assert ( + "apt-get -o Acquire::Retries=3 install -y curl git unzip jq" + in env.run_command + ) def test_run_command_installs_bun(self): env = build_env() diff --git a/tests/test_v1_harbor_cli.py b/tests/test_v1_harbor_cli.py index a314a34b5..67c458f75 100644 --- a/tests/test_v1_harbor_cli.py +++ b/tests/test_v1_harbor_cli.py @@ -104,17 +104,23 @@ def test_opencode_config_owns_opencode_harness_fields() -> None: program = cast(dict[str, object], harness.program) command = cast(list[object], program["command"]) mcp_setup = cast(dict[str, object], program["tools"])["mcp"] + setup = cast(str, program["setup"]) assert harness.config.agent_workdir == "/workspace" assert harness.config.disabled_tools == ["webfetch"] assert harness.config.system_prompt is None assert harness.config.max_turns == 2 + assert "apt-get -o Acquire::Retries=3 update" in setup + assert "apt-get -o Acquire::Retries=3 install" in setup assert "/workspace" in cast(str, command[2]) assert '"webfetch": false' in cast(str, mcp_setup) assert "/opencode/system.txt" not in cast(dict[str, object], program["files"]) def test_pi_harness_writes_intercepted_model_and_mcp_config() -> None: + harness = vf.Pi() + program = cast(dict[str, object], harness.program) + setup = cast(str, program["setup"]) models = json.loads( pi_models_json( { @@ -127,6 +133,8 @@ def test_pi_harness_writes_intercepted_model_and_mcp_config() -> None: ) mcp = json.loads(pi_mcp_json()) + assert "apt-get -o Acquire::Retries=3 update" in setup + assert "apt-get -o Acquire::Retries=3 install" in setup provider = models["providers"]["verifiers"] assert provider["baseUrl"] == "http://127.0.0.1:1/rollout/key/v1" assert provider["api"] == "openai-completions" diff --git a/tests/test_v1_mini_swe_agent.py b/tests/test_v1_mini_swe_agent.py index 64b67946b..ffff0d049 100644 --- a/tests/test_v1_mini_swe_agent.py +++ b/tests/test_v1_mini_swe_agent.py @@ -40,6 +40,8 @@ def test_mini_swe_agent_builds_sandbox_program(): assert isinstance(harness, vf.CLIHarness) assert program["sandbox"] is not False assert "OPENAI_MODEL" in cast(dict[str, object], program["env"]) + assert "apt-get -o Acquire::Retries=3 update" in cast(str, program["setup"]) + assert "apt-get -o Acquire::Retries=3 install" in cast(str, program["setup"]) assert "/mini-swe-agent/prompt.txt" in cast(dict[str, object], program["files"]) assert "/mini-swe-agent/system.txt" in cast(dict[str, object], program["files"]) assert "mini_swe_agent_log" in cast(dict[str, object], program["artifacts"]) diff --git a/tests/test_v1_rlm_swe.py b/tests/test_v1_rlm_swe.py index e69110d17..3382f9b84 100644 --- a/tests/test_v1_rlm_swe.py +++ b/tests/test_v1_rlm_swe.py @@ -20,9 +20,13 @@ def test_rlm_harness_builds_sandbox_program_without_eager_checkout(): program = as_mapping(harness.program) program_env = as_mapping(program["env"]) artifacts = as_mapping(program["artifacts"]) + setup = program["setup"] assert isinstance(harness, vf.CLIHarness) assert program["sandbox"] is not False + assert isinstance(setup, list) + assert "apt-get -o Acquire::Retries=3 update" in setup[0] + assert "apt-get -o Acquire::Retries=3 install" in setup[0] assert "RLM_MODEL" in program_env assert "rlm_metrics" in artifacts diff --git a/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py b/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py index 4a90b2586..c6ab44348 100644 --- a/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py +++ b/verifiers/envs/experimental/composable/harnesses/mini_swe_agent.py @@ -39,11 +39,14 @@ def build_mini_swe_agent_install_script( """Build the shell script that installs mini-SWE-agent.""" install_tools = "" if install_python: + # Acquire::Retries=3 mitigates transient archive.ubuntu.com CDN sync + # mismatches that fail fresh-sandbox apt-get update mid-rollout. See + # launchpad bug #1876035. install_tools = """\ export DEBIAN_FRONTEND=noninteractive if ! command -v python3 >/dev/null 2>&1 || ! python3 -m pip --version >/dev/null 2>&1; then - apt-get update -qq - apt-get install -y -qq python3 python3-pip ca-certificates + apt-get -o Acquire::Retries=3 update -qq + apt-get -o Acquire::Retries=3 install -y -qq python3 python3-pip ca-certificates fi """ diff --git a/verifiers/envs/experimental/composable/harnesses/opencode.py b/verifiers/envs/experimental/composable/harnesses/opencode.py index 8f4d884e9..953816642 100644 --- a/verifiers/envs/experimental/composable/harnesses/opencode.py +++ b/verifiers/envs/experimental/composable/harnesses/opencode.py @@ -58,14 +58,17 @@ def build_install_script( ) -> str: """Build the shell script that installs OpenCode in a sandbox.""" rg_install = ( - "apt-get install -y -qq ripgrep > /dev/null 2>&1 || true" + "apt-get -o Acquire::Retries=3 install -y -qq ripgrep > /dev/null 2>&1 || true" if install_ripgrep else "" ) sha256_check = f'echo "{release_sha256} /tmp/opencode.tar.gz" | sha256sum -c -' + # Acquire::Retries=3 mitigates transient archive.ubuntu.com CDN sync mismatches + # (e.g. "File has unexpected size ... Mirror sync in progress?"). See launchpad + # bug #1876035. apt's default retries is 0, so one bad fetch fails the rollout. return f"""\ set -e -apt-get update -qq && apt-get install -y -qq curl tar > /dev/null 2>&1 +apt-get -o Acquire::Retries=3 update -qq && apt-get -o Acquire::Retries=3 install -y -qq curl tar > /dev/null 2>&1 {rg_install} OPENCODE_RELEASE_REPO="{release_repo}" diff --git a/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py b/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py index b57c73e0e..c7802a3cd 100644 --- a/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py +++ b/verifiers/envs/experimental/composable/tasksets/swe/multi_swe.py @@ -264,7 +264,9 @@ async def setup(self, state) -> None: commands = [ f"test -d {shlex.quote(repo_path)}", "test -f /home/fix-run.sh", - "command -v patch || (apt-get update && apt-get install -y patch)", + # Acquire::Retries=3: harden against transient archive.ubuntu.com CDN + # mirror-sync mismatches mid-rollout (launchpad bug #1876035). + "command -v patch || (apt-get -o Acquire::Retries=3 update && apt-get -o Acquire::Retries=3 install -y patch)", "rm -f /home/fix.patch /home/test_output.txt /home/create_fix_patch.sh", ] for command in commands: diff --git a/verifiers/envs/experimental/opencode_env.py b/verifiers/envs/experimental/opencode_env.py index 3d8911a14..59861a73c 100644 --- a/verifiers/envs/experimental/opencode_env.py +++ b/verifiers/envs/experimental/opencode_env.py @@ -54,7 +54,9 @@ DEFAULT_RUN_COMMAND_TEMPLATE = """\ set -eo pipefail -apt-get update && apt-get install -y curl +# Acquire::Retries=3 mitigates transient archive.ubuntu.com CDN sync mismatches +# that fail fresh-sandbox apt-get update mid-rollout (launchpad bug #1876035). +apt-get -o Acquire::Retries=3 update && apt-get -o Acquire::Retries=3 install -y curl for install_attempt in 1 2 3; do if {install_command}; then diff --git a/verifiers/envs/experimental/opencode_rlm_env.py b/verifiers/envs/experimental/opencode_rlm_env.py index e37667bf3..1c8715246 100644 --- a/verifiers/envs/experimental/opencode_rlm_env.py +++ b/verifiers/envs/experimental/opencode_rlm_env.py @@ -96,7 +96,9 @@ async def metric(state: State) -> float: RLM_RUN_COMMAND_TEMPLATE = """\ set -e -apt-get update && apt-get install -y curl git unzip jq +# Acquire::Retries=3 mitigates transient archive.ubuntu.com CDN sync mismatches +# that fail fresh-sandbox apt-get update mid-rollout (launchpad bug #1876035). +apt-get -o Acquire::Retries=3 update && apt-get -o Acquire::Retries=3 install -y curl git unzip jq # Install bun (TypeScript runtime required by the RLM plugin) curl -fsSL https://bun.sh/install | bash diff --git a/verifiers/v1/packages/harnesses/mini_swe_agent.py b/verifiers/v1/packages/harnesses/mini_swe_agent.py index 284e50cae..66d942c76 100644 --- a/verifiers/v1/packages/harnesses/mini_swe_agent.py +++ b/verifiers/v1/packages/harnesses/mini_swe_agent.py @@ -123,8 +123,8 @@ def build_mini_swe_agent_install_script( install_tools = """\ export DEBIAN_FRONTEND=noninteractive if ! command -v python3 >/dev/null 2>&1 || ! python3 -m pip --version >/dev/null 2>&1; then - apt-get update -qq - apt-get install -y -qq python3 python3-pip ca-certificates + apt-get -o Acquire::Retries=3 update -qq + apt-get -o Acquire::Retries=3 install -y -qq python3 python3-pip ca-certificates fi """ diff --git a/verifiers/v1/packages/harnesses/opencode.py b/verifiers/v1/packages/harnesses/opencode.py index 2980bfb74..47f66339e 100644 --- a/verifiers/v1/packages/harnesses/opencode.py +++ b/verifiers/v1/packages/harnesses/opencode.py @@ -153,14 +153,16 @@ def build_install_script( install_ripgrep: bool = True, ) -> str: rg_install = ( - "apt-get install -y -qq ripgrep > /dev/null 2>&1 || true" + "apt-get -o Acquire::Retries=3 install -y -qq ripgrep > /dev/null 2>&1 || true" if install_ripgrep else "" ) sha256_check = f'echo "{release_sha256} /tmp/opencode.tar.gz" | sha256sum -c -' + # Acquire::Retries=3 mitigates transient archive.ubuntu.com CDN sync + # mismatches that fail fresh-sandbox apt-get calls mid-rollout. return f"""\ set -e -apt-get update -qq && apt-get install -y -qq curl tar ca-certificates > /dev/null 2>&1 +apt-get -o Acquire::Retries=3 update -qq && apt-get -o Acquire::Retries=3 install -y -qq curl tar ca-certificates > /dev/null 2>&1 {rg_install} OPENCODE_RELEASE_REPO={shlex.quote(release_repo)} diff --git a/verifiers/v1/packages/harnesses/pi.py b/verifiers/v1/packages/harnesses/pi.py index 0822d61d4..2887d4143 100644 --- a/verifiers/v1/packages/harnesses/pi.py +++ b/verifiers/v1/packages/harnesses/pi.py @@ -87,7 +87,7 @@ def __init__( def build_pi_install_script(package: str = DEFAULT_PI_PACKAGE) -> str: return f"""\ set -e -apt-get update -qq && apt-get install -y -qq curl ca-certificates nodejs npm > /dev/null 2>&1 +apt-get -o Acquire::Retries=3 update -qq && apt-get -o Acquire::Retries=3 install -y -qq curl ca-certificates nodejs npm > /dev/null 2>&1 npm install -g {shlex.quote(package)} """ diff --git a/verifiers/v1/packages/harnesses/rlm.py b/verifiers/v1/packages/harnesses/rlm.py index be442a23a..f1c321fb8 100644 --- a/verifiers/v1/packages/harnesses/rlm.py +++ b/verifiers/v1/packages/harnesses/rlm.py @@ -116,7 +116,8 @@ def __init__( }, dirs=dirs, setup=[ - "apt-get update && apt-get install -y --no-install-recommends " + "apt-get -o Acquire::Retries=3 update && " + "apt-get -o Acquire::Retries=3 install -y --no-install-recommends " "ca-certificates curl git && rm -rf /var/lib/apt/lists/*", build_install_command(), ], diff --git a/verifiers/v1/utils/sandbox_utils.py b/verifiers/v1/utils/sandbox_utils.py index cf6c9f3b8..1b1089f49 100644 --- a/verifiers/v1/utils/sandbox_utils.py +++ b/verifiers/v1/utils/sandbox_utils.py @@ -453,11 +453,15 @@ def python_package_install_command(package_args: str) -> str: "if command -v python3 >/dev/null 2>&1; then PYTHON=python3; " "elif command -v python >/dev/null 2>&1; then PYTHON=python; " "elif command -v apt-get >/dev/null 2>&1; then " - "apt-get update && apt-get install -y python3 python3-pip && PYTHON=python3; " + "apt-get -o Acquire::Retries=3 update && " + "apt-get -o Acquire::Retries=3 install -y python3 python3-pip && " + "PYTHON=python3; " "else echo 'python is required to install sandbox packages' >&2; exit 127; fi\n" "$PYTHON -m pip --version >/dev/null 2>&1 || " "$PYTHON -m ensurepip --upgrade || " - "(command -v apt-get >/dev/null 2>&1 && apt-get update && apt-get install -y python3-pip)\n" + "(command -v apt-get >/dev/null 2>&1 && " + "apt-get -o Acquire::Retries=3 update && " + "apt-get -o Acquire::Retries=3 install -y python3-pip)\n" "$PYTHON -m pip install --disable-pip-version-check --break-system-packages " f"{package_args} || " "$PYTHON -m pip install --disable-pip-version-check " @@ -471,7 +475,8 @@ def python_runtime_setup_command() -> str: "if command -v python3 >/dev/null 2>&1; then exit 0; fi\n" "if command -v python >/dev/null 2>&1; then exit 0; fi\n" "if command -v apt-get >/dev/null 2>&1; then " - "apt-get update && apt-get install -y python3; exit 0; fi\n" + "apt-get -o Acquire::Retries=3 update && " + "apt-get -o Acquire::Retries=3 install -y python3; exit 0; fi\n" "echo 'python is required for sandbox Python programs' >&2\n" "exit 127" )