Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions tests/test_rlm_composable_env.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def test_rlm_harness_uses_explicit_local_checkout(tmp_path):

assert harness.get_upload_dirs() == {"rlm_checkout": checkout.resolve()}
assert harness.upload_dir_mapping == {"rlm_checkout": "/tmp/rlm-checkout"}
assert harness.metrics_path == "{workdir}/.rlm/sessions/*/meta.json"
assert harness.metrics_path == "/tmp/.rlm/sessions/*/meta.json"
assert harness.metrics_key == "metrics"
assert harness.metrics_prefix == "rlm_"
assert harness.skills_path == "/task/rlm-skills"
Expand Down Expand Up @@ -703,7 +703,7 @@ async def test_rlm_collects_logs_and_metrics(tmp_path):
),
call(
"sbx",
'f=$(ls /testbed/.rlm/sessions/*/meta.json 2>/dev/null | head -1) && cat "$f" || echo "{}"',
'f=$(ls /tmp/.rlm/sessions/*/meta.json 2>/dev/null | head -1) && cat "$f" || echo "{}"',
working_dir=None,
),
]
Expand Down
14 changes: 13 additions & 1 deletion verifiers/envs/experimental/composable/harnesses/rlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,11 @@
DEFAULT_APPEND_TO_SYSTEM_PROMPT_PATH = "/task/append_to_system_prompt.txt"
DEFAULT_RLM_CHECKOUT_PATH = "/tmp/rlm-checkout"
DEFAULT_RLM_CHECKOUT_UPLOAD_NAME = "rlm_checkout"
# Pinned outside the agent workdir because the agent has full destructive
# control over its workdir (git clean -fdx, rm -rf, git stash, etc.) and any
# of those wipe rlm's session dir mid-rollout, breaking the next meta.json
# write with FileNotFoundError.
DEFAULT_RLM_HOME = "/tmp/.rlm"
DEFAULT_RLM_LOCAL_CHECKOUT_CACHE_ROOT = (
Path.home() / ".cache" / "verifiers" / "rlm-checkouts"
)
Expand Down Expand Up @@ -130,6 +135,12 @@ def rlm_harness(
``ComposableEnv(environment_vars=...)`` themselves; pass the kwargs
here and the harness owns the env var plumbing.

The harness also pins ``RLM_HOME`` to ``DEFAULT_RLM_HOME`` (an
absolute path outside any task workdir) so rlm session state cannot
be wiped by the agent's own destructive ops on its workdir.
``metrics_path`` follows the same path so post-rollout metrics
collection keeps matching where rlm actually writes.

Git access from inside the agent is refused at the rlm tool layer
(bash + ipython): rlm detects shell escapes and AST-walks
``subprocess.run`` / ``os.system`` / ``os.popen`` for a literal
Expand Down Expand Up @@ -175,6 +186,7 @@ def get_upload_dirs() -> dict[str, Traversable | Path]:
"RLM_MAX_TURNS": str(rlm_max_turns),
"RLM_EXEC_TIMEOUT": str(rlm_exec_timeout),
"RLM_MAX_DEPTH": str(rlm_max_depth),
"RLM_HOME": DEFAULT_RLM_HOME,
}

def env_vars_for_rollout(state: State) -> dict[str, str]:
Expand All @@ -197,7 +209,7 @@ def env_vars_for_rollout(state: State) -> dict[str, str]:
skills_path="/task/rlm-skills",
get_upload_dirs=get_upload_dirs,
upload_dir_mapping=upload_dir_mapping,
metrics_path="{workdir}/.rlm/sessions/*/meta.json",
metrics_path=f"{DEFAULT_RLM_HOME}/sessions/*/meta.json",
metrics_key="metrics",
metrics_prefix="rlm_",
tool_names=tool_names,
Expand Down
Loading