opendatahub-io · EmilienM · Jun 10, 2026 · Jun 10, 2026 · coderabbitai · Jun 10, 2026
diff --git a/.claude/skills/test-e2e-openshell/SKILL.md b/.claude/skills/test-e2e-openshell/SKILL.md
@@ -143,7 +143,7 @@ Verify:
 - Output shows `Created sandbox: ci`
 - Output shows `Running Claude Code (claude-haiku-4-5) via openshell backend`
 - Claude's response contains `A1_OK`
-- Token metrics show non-zero counts, cost around `$0.04`
+- Token metrics show non-zero counts and cost is non-zero (e.g. `$0.04`)
 - `Agent exit code: 0`
 - `Sandbox deleted` and `Gateway stopped` at the end
 
@@ -174,7 +174,7 @@ Verify:
 - Output shows `Auth: API key`
 - Output shows `Creating Anthropic API key provider`
 - Claude's response contains `B1_OK`
-- Token metrics show non-zero counts and cost
+- Token metrics show non-zero counts and cost is non-zero
 - `Agent exit code: 0`
 
 ---
@@ -311,6 +311,53 @@ podman exec openshell-e2e rm -rf /tmp/workdir-test
 
 ---
 
+## Section F: OTEL telemetry collection
+
+Verifies that the sandbox-local OTEL collector receives metrics from the
+agent and prints a token/cost summary. Uses Vertex AI auth and Claude Code
+(the only harness that supports OTEL).
+
+The OpenShell sandbox network isolation prevents reaching an external OTEL
+collector, so agentic-ci embeds a lightweight OTLP receiver inside the
+sandbox on localhost. After the run, the OTEL log is downloaded from the
+sandbox and the summary is printed on the host.
+
+Requires `OPENSHELL_SUPERVISOR_IMAGE` (see "Before you start").
+
+Run cleanup first.
+
+### F1. Run with OTEL enabled
+
+```bash
+podman exec \
+  -e ANTHROPIC_VERTEX_PROJECT_ID=<your-project-id> \
+  -e CLOUD_ML_REGION=global \
+  -e OPENSHELL_SUPERVISOR_IMAGE=quay.io/mprpic/openshell-supervisor:pr1763 \
+  -e SANDBOX_IMAGE="$CLAUDE_SANDBOX_IMAGE" \
+  openshell-e2e bash -c '
+    cd /tmp/e2e-workdir && \
-    cd /tmp/e2e-workdir && \
+   openshell-e2e bash -c '
+     agentic-ci run \
-    cd /tmp/e2e-workdir && \
+   openshell-e2e bash -c '
+     agentic-ci run \
+    agentic-ci run \
+      --backend openshell \
+      --harness claude-code \
+      --image "$SANDBOX_IMAGE" \
+      --model claude-haiku-4-5 \
+      "Respond with exactly: F1_OK"
+  '
+```
+
+Note: no `--no-otel` flag.
+
+Verify:
+- Output shows `Running Claude Code (claude-haiku-4-5) via openshell backend`
+- Agent runs and completes with `F1_OK` in the response
+- Output shows `Token/Cost Summary (OpenTelemetry)` section
+- Token counts are non-zero (input tokens, output tokens, cache)
+- Cost is non-zero (e.g. `$0.04`)
+- `Agent exit code: 0`
+- `Sandbox deleted` and `Gateway stopped` at the end
+
+---
+
 ## Final cleanup
 
 ```bash
@@ -319,7 +366,7 @@ podman rm -f openshell-e2e
 
 ## Running the full suite
 
-Execute sections in order (A through E), running the cleanup step before each
+Execute sections in order (A through F), running the cleanup step before each
 section. Skip sections whose prerequisites are not met. If any step fails,
 check the gateway log inside the container:
 

diff --git a/src/agentic_ci/backend.py b/src/agentic_ci/backend.py
@@ -30,7 +30,7 @@ def __init__(self, workdir=".", image=None, *, harness: Harness):
         self.verdict_path: Path | None = None
 
     @abstractmethod
-    def setup(self):
+    def setup(self, otel_port: int | None = None):
         """Prepare the backend. Idempotent."""
 
     @abstractmethod

diff --git a/src/agentic_ci/backends/openshell/__init__.py b/src/agentic_ci/backends/openshell/__init__.py
@@ -14,6 +14,8 @@
 if TYPE_CHECKING:
     from agentic_ci.harness import Harness
 
+_OPENSHELL_HOST = "host.openshell.internal"
+
 
 class OpenShellBackend(Backend):
     """Runs an AI agent inside an OpenShell sandbox.
@@ -38,7 +40,7 @@ def __init__(self, workdir=".", image=None, policy=None, extra_env=None, *, harn
         self.policy_path = policy
         self._extra_env = extra_env or {}
 
-    def setup(self):
+    def setup(self, otel_port=None):
         if not gateway.is_running():
             log.section("Starting OpenShell gateway")
             gateway.start()
@@ -55,7 +57,7 @@ def setup(self):
         image_info = f", image: {self.image}" if self.image else ""
         log.section(f"Creating sandbox ({image_info.lstrip(', ') or 'default image'})")
 
-        sandbox.create(image=self.image, policy_path=self.policy_path)
+        sandbox.create(image=self.image, policy_path=self.policy_path, otel_port=otel_port)
 
         log.section("Uploading workdir")
         sandbox.upload(self.workdir)
@@ -107,9 +109,25 @@ def _write_env_script(self, model, otel_port=None, otel_rate_file=None):
 
         Uses the harness's native env script (Vertex AI vars or API key)
         since the google-cloud provider injects GCP credentials directly.
+
+        For OTEL, uses ``host.openshell.internal`` to reach the host-side
+        collector through the gateway proxy instead of the harness default
+        (which uses an IP unreachable from the sandbox).
         """
-        lines = self.harness.build_env_script_lines(otel_port, otel_rate_file)
-        lines.append("export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1")
+        lines = self.harness.build_env_script_lines()
+        if otel_port:
+            lines.extend(
+                [
+                    "export CLAUDE_CODE_ENABLE_TELEMETRY=1",
+                    "export OTEL_METRICS_EXPORTER=otlp",
+                    "export OTEL_LOGS_EXPORTER=otlp",
+                    "export OTEL_EXPORTER_OTLP_PROTOCOL=http/json",
+                    f"export OTEL_EXPORTER_OTLP_ENDPOINT=http://{_OPENSHELL_HOST}:{otel_port}",
+                    "export OTEL_METRIC_EXPORT_INTERVAL=5000",
+                ]
+            )
+        else:
+            lines.append("export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1")
 
         for key, val in self._extra_env.items():
             lines.append(f"export {key}={shlex.quote(val)}")

diff --git a/src/agentic_ci/backends/openshell/sandbox.py b/src/agentic_ci/backends/openshell/sandbox.py
@@ -24,7 +24,7 @@ def exists():
     return result.returncode == 0
 
 
-def create(image=None, policy_path=None):
+def create(image=None, policy_path=None, otel_port=None):
     """Create a persistent sandbox with the CI provider attached.
 
     The sandbox is created first, then the network policy is applied
@@ -47,16 +47,18 @@ def create(image=None, policy_path=None):
     args.extend(["--", "true"])
     _run(args, check=True)
 
-    _apply_policy(policy_path)
+    _apply_policy(policy_path, otel_port=otel_port)
 
 
-def _apply_policy(policy_path):
+def _apply_policy(policy_path, otel_port=None):
     """Apply network policy endpoints and wait for activation.
 
     Uses ``openshell policy update --wait`` which blocks until the
     supervisor has compiled and loaded the new policy revision.
     """
     endpoints = resolve_endpoints(policy_path)
+    if otel_port:
+        endpoints.append(f"host.openshell.internal:{otel_port}:read-write")
     if not endpoints:
         return
 

diff --git a/src/agentic_ci/backends/podman.py b/src/agentic_ci/backends/podman.py
@@ -41,7 +41,7 @@ def __init__(
         self._config_dir = None
         self._extra_env = extra_env or {}
 
-    def setup(self):
+    def setup(self, otel_port=None):
         self._resolve_image()
         if self.harness.auth_mode == "vertex":
             self._resolve_credentials()

diff --git a/src/agentic_ci/cli.py b/src/agentic_ci/cli.py
@@ -52,8 +52,6 @@ def cmd_run(args, backend, harness):
                 sys.exit(0)
             log.info(f"{gate.name}: passed")
 
-    backend.setup()
-
     model_env = harness.model_env_var()
     if args.model:
         model = args.model
@@ -73,10 +71,16 @@ def cmd_run(args, backend, harness):
     try:
         if not args.no_otel and harness.supports_otel:
             log.section("Starting OTEL collector")
-            otel_proc, otel_port, otel_log, otel_rate = otel.start_collector(run_dir)
+            bind_addr = "0.0.0.0" if args.backend == "openshell" else "127.0.0.1"
+            otel_proc, otel_port, otel_log, otel_rate = otel.start_collector(
+                run_dir, bind_addr=bind_addr
+            )
+            os.environ["OTEL_RATE_FILE"] = otel_rate
             log.detail("pid", str(otel_proc.pid))
             log.detail("port", str(otel_port))
 
+        backend.setup(otel_port=otel_port)
+
         log.section(f"Running {harness.name} ({model}) via {args.backend} backend")
         rc = backend.run(
             prompt=args.prompt,
@@ -128,6 +132,7 @@ def cmd_run(args, backend, harness):
     finally:
         if otel_proc:
             otel.stop_collector(otel_proc)
+            os.environ.pop("OTEL_RATE_FILE", None)
         if not args.keep:
             backend.stop()
 

diff --git a/src/agentic_ci/harness.py b/src/agentic_ci/harness.py
@@ -10,6 +10,8 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
+_OPENSHELL_GATEWAY_HOST = "10.200.0.1"
+
 
 class Harness(ABC):
     """Base class for agent CLI harnesses."""
@@ -148,12 +150,10 @@ def build_env_script_lines(self, otel_port=None, otel_rate_file=None):
                     "export OTEL_METRICS_EXPORTER=otlp",
                     "export OTEL_LOGS_EXPORTER=otlp",
                     "export OTEL_EXPORTER_OTLP_PROTOCOL=http/json",
-                    f"export OTEL_EXPORTER_OTLP_ENDPOINT=http://10.200.0.1:{otel_port}",
+                    f"export OTEL_EXPORTER_OTLP_ENDPOINT=http://{_OPENSHELL_GATEWAY_HOST}:{otel_port}",
                     "export OTEL_METRIC_EXPORT_INTERVAL=10000",
                 ]
             )
-            if otel_rate_file:
-                lines.append(f"export OTEL_RATE_FILE={shlex.quote(otel_rate_file)}")
         return lines
 
     def build_otel_exec_env(self, otel_port=None):

diff --git a/src/agentic_ci/otel.py b/src/agentic_ci/otel.py
@@ -90,7 +90,7 @@ def _update_token_rate(payload):
     os.replace(tmp, rate_file)
 
 
-def start_collector(run_dir):
+def start_collector(run_dir, bind_addr="127.0.0.1"):
     """Start the OTEL collector as a subprocess. Returns (proc, port)."""
     otel_log = os.path.join(run_dir, "claude-otel.jsonl")
     otel_rate = os.path.join(run_dir, "claude-otel-rate.json")
@@ -108,6 +108,7 @@ def start_collector(run_dir):
         "OTEL_RATE_FILE": otel_rate,
         "OTEL_COLLECTOR_PORT": "0",
         "OTEL_PORT_FILE": port_file,
+        "OTEL_BIND_ADDR": bind_addr,
     }
     proc = subprocess.Popen(
         [sys.executable, "-m", "agentic_ci.otel"],
@@ -258,7 +259,8 @@ def print_summary(log_file):
 def main():
     """Run the OTEL collector server."""
     port = int(os.environ.get("OTEL_COLLECTOR_PORT", "4318"))
-    server = HTTPServer(("127.0.0.1", port), OTLPHandler)
+    bind_addr = os.environ.get("OTEL_BIND_ADDR", "127.0.0.1")
+    server = HTTPServer((bind_addr, port), OTLPHandler)
     actual_port = server.server_address[1]
     port_file = os.environ.get("OTEL_PORT_FILE")
     if port_file:
@@ -267,7 +269,7 @@ def main():
     signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
     log_file = os.environ.get("OTEL_LOG_FILE", "/tmp/claude-otel.jsonl")
     print(
-        f"OTLP collector listening on 127.0.0.1:{actual_port}, writing to {log_file}",
+        f"OTLP collector listening on {bind_addr}:{actual_port}, writing to {log_file}",
         file=sys.stderr,
     )
     try:

diff --git a/tests/test_harness.py b/tests/test_harness.py
@@ -127,13 +127,13 @@ def test_build_env_script_lines_api_key(self, monkeypatch):
         assert not any("CLAUDE_CODE_USE_VERTEX" in line for line in lines)
         assert not any("GOOGLE_APPLICATION_CREDENTIALS" in line for line in lines)
 
-    def test_build_env_script_lines_with_otel(self, monkeypatch, tmp_path):
+    def test_build_env_script_lines_with_otel(self, monkeypatch):
         monkeypatch.setenv("ANTHROPIC_VERTEX_PROJECT_ID", "proj")
         harness = ClaudeCodeHarness()
-        rate_file = str(tmp_path / "rate.json")
-        lines = harness.build_env_script_lines(otel_port=4318, otel_rate_file=rate_file)
+        lines = harness.build_env_script_lines(otel_port=4318)
         assert any("CLAUDE_CODE_ENABLE_TELEMETRY=1" in line for line in lines)
-        assert any(f"OTEL_RATE_FILE={rate_file}" in line for line in lines)
+        assert any("OTEL_EXPORTER_OTLP_ENDPOINT=http://10.200.0.1:4318" in line for line in lines)
+        assert not any("OTEL_RATE_FILE" in line for line in lines)
 
     def test_credential_mount_target(self):
         assert ClaudeCodeHarness().credential_mount_target() == "/home/agent-ci"