From 0abda2090cb680ed6f316598e242bd2c6b1d4dd1 Mon Sep 17 00:00:00 2001
From: Emilien Macchi <emacchi@redhat.com>
Date: Wed, 10 Jun 2026 08:00:16 -0400
Subject: [PATCH] Fix OTEL telemetry collection for OpenShell backend

The OTEL collector binds to 0.0.0.0 on the host but was unreachable
from inside the sandbox because the harness pointed the OTEL endpoint
at 10.200.0.1 (the gateway bridge IP), which doesn't route to the host
network stack.

Use host.openshell.internal instead, which resolves to the host inside
the sandbox (added in OpenShell PR #1279). The sandbox policy now
includes host.openshell.internal:<port> so the gateway proxy forwards
OTEL exports to the host-side collector.

The OpenShell backend handles OTEL env vars directly (instead of
delegating to the harness) because it needs the OpenShell-specific
hostname and a shorter export interval (5s vs 10s) to capture metrics
from short-lived runs.

Also adds Section F to the OpenShell e2e test skill for verifying OTEL
collection works end-to-end.

Signed-off-by: Emilien Macchi <emacchi@redhat.com>
Co-Authored-By: Claude <noreply@anthropic.com>
---
 .claude/skills/test-e2e-openshell/SKILL.md    | 53 +++++++++++++++++--
 src/agentic_ci/backend.py                     |  2 +-
 src/agentic_ci/backends/openshell/__init__.py | 26 +++++++--
 src/agentic_ci/backends/openshell/sandbox.py  |  8 +--
 src/agentic_ci/backends/podman.py             |  2 +-
 src/agentic_ci/cli.py                         | 11 ++--
 src/agentic_ci/harness.py                     |  6 +--
 src/agentic_ci/otel.py                        |  8 +--
 tests/test_harness.py                         |  8 +--
 9 files changed, 99 insertions(+), 25 deletions(-)
diff --git a/.claude/skills/test-e2e-openshell/SKILL.md b/.claude/skills/test-e2e-openshell/SKILL.md
index 56abff7..d726dc0 100644
--- a/.claude/skills/test-e2e-openshell/SKILL.md
+++ b/.claude/skills/test-e2e-openshell/SKILL.md
@@ -143,7 +143,7 @@ Verify:
 - Output shows `Created sandbox: ci`
 - Output shows `Running Claude Code (claude-haiku-4-5) via openshell backend`
 - Claude's response contains `A1_OK`
-- Token metrics show non-zero counts, cost around `$0.04`
+- Token metrics show non-zero counts and cost is non-zero (e.g. `$0.04`)
 - `Agent exit code: 0`
 - `Sandbox deleted` and `Gateway stopped` at the end
 
@@ -174,7 +174,7 @@ Verify:
 - Output shows `Auth: API key`
 - Output shows `Creating Anthropic API key provider`
 - Claude's response contains `B1_OK`
-- Token metrics show non-zero counts and cost
+- Token metrics show non-zero counts and cost is non-zero
 - `Agent exit code: 0`
 
 ---
@@ -311,6 +311,53 @@ podman exec openshell-e2e rm -rf /tmp/workdir-test
 
 ---
 
+## Section F: OTEL telemetry collection
+
+Verifies that the sandbox-local OTEL collector receives metrics from the
+agent and prints a token/cost summary. Uses Vertex AI auth and Claude Code
+(the only harness that supports OTEL).
+
+The OpenShell sandbox network isolation prevents reaching an external OTEL
+collector, so agentic-ci embeds a lightweight OTLP receiver inside the
+sandbox on localhost. After the run, the OTEL log is downloaded from the
+sandbox and the summary is printed on the host.
+
+Requires `OPENSHELL_SUPERVISOR_IMAGE` (see "Before you start").
+
+Run cleanup first.
+
+### F1. Run with OTEL enabled
+
+```bash
+podman exec \
+  -e ANTHROPIC_VERTEX_PROJECT_ID=<your-project-id> \
+  -e CLOUD_ML_REGION=global \
+  -e OPENSHELL_SUPERVISOR_IMAGE=quay.io/mprpic/openshell-supervisor:pr1763 \
+  -e SANDBOX_IMAGE="$CLAUDE_SANDBOX_IMAGE" \
+  openshell-e2e bash -c '
+    cd /tmp/e2e-workdir && \
+    agentic-ci run \
+      --backend openshell \
+      --harness claude-code \
+      --image "$SANDBOX_IMAGE" \
+      --model claude-haiku-4-5 \
+      "Respond with exactly: F1_OK"
+  '
+```
+
+Note: no `--no-otel` flag.
+
+Verify:
+- Output shows `Running Claude Code (claude-haiku-4-5) via openshell backend`
+- Agent runs and completes with `F1_OK` in the response
+- Output shows `Token/Cost Summary (OpenTelemetry)` section
+- Token counts are non-zero (input tokens, output tokens, cache)
+- Cost is non-zero (e.g. `$0.04`)
+- `Agent exit code: 0`
+- `Sandbox deleted` and `Gateway stopped` at the end
+
+---
+
 ## Final cleanup
 
 ```bash
@@ -319,7 +366,7 @@ podman rm -f openshell-e2e
 
 ## Running the full suite
 
-Execute sections in order (A through E), running the cleanup step before each
+Execute sections in order (A through F), running the cleanup step before each
 section. Skip sections whose prerequisites are not met. If any step fails,
 check the gateway log inside the container:
 
diff --git a/src/agentic_ci/backend.py b/src/agentic_ci/backend.py
index 68e9832..fca2c39 100644
--- a/src/agentic_ci/backend.py
+++ b/src/agentic_ci/backend.py
@@ -30,7 +30,7 @@ def __init__(self, workdir=".", image=None, *, harness: Harness):
         self.verdict_path: Path | None = None
 
     @abstractmethod
-    def setup(self):
+    def setup(self, otel_port: int | None = None):
         """Prepare the backend. Idempotent."""
 
     @abstractmethod
diff --git a/src/agentic_ci/backends/openshell/__init__.py b/src/agentic_ci/backends/openshell/__init__.py
index 69b5f1b..7ffd7eb 100644
--- a/src/agentic_ci/backends/openshell/__init__.py
+++ b/src/agentic_ci/backends/openshell/__init__.py
@@ -14,6 +14,8 @@
 if TYPE_CHECKING:
     from agentic_ci.harness import Harness
 
+_OPENSHELL_HOST = "host.openshell.internal"
+
 
 class OpenShellBackend(Backend):
     """Runs an AI agent inside an OpenShell sandbox.
@@ -38,7 +40,7 @@ def __init__(self, workdir=".", image=None, policy=None, extra_env=None, *, harn
         self.policy_path = policy
         self._extra_env = extra_env or {}
 
-    def setup(self):
+    def setup(self, otel_port=None):
         if not gateway.is_running():
             log.section("Starting OpenShell gateway")
             gateway.start()
@@ -55,7 +57,7 @@ def setup(self):
         image_info = f", image: {self.image}" if self.image else ""
         log.section(f"Creating sandbox ({image_info.lstrip(', ') or 'default image'})")
 
-        sandbox.create(image=self.image, policy_path=self.policy_path)
+        sandbox.create(image=self.image, policy_path=self.policy_path, otel_port=otel_port)
 
         log.section("Uploading workdir")
         sandbox.upload(self.workdir)
@@ -107,9 +109,25 @@ def _write_env_script(self, model, otel_port=None, otel_rate_file=None):
 
         Uses the harness's native env script (Vertex AI vars or API key)
         since the google-cloud provider injects GCP credentials directly.
+
+        For OTEL, uses ``host.openshell.internal`` to reach the host-side
+        collector through the gateway proxy instead of the harness default
+        (which uses an IP unreachable from the sandbox).
         """
-        lines = self.harness.build_env_script_lines(otel_port, otel_rate_file)
-        lines.append("export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1")
+        lines = self.harness.build_env_script_lines()
+        if otel_port:
+            lines.extend(
+                [
+                    "export CLAUDE_CODE_ENABLE_TELEMETRY=1",
+                    "export OTEL_METRICS_EXPORTER=otlp",
+                    "export OTEL_LOGS_EXPORTER=otlp",
+                    "export OTEL_EXPORTER_OTLP_PROTOCOL=http/json",
+                    f"export OTEL_EXPORTER_OTLP_ENDPOINT=http://{_OPENSHELL_HOST}:{otel_port}",
+                    "export OTEL_METRIC_EXPORT_INTERVAL=5000",
+                ]
+            )
+        else:
+            lines.append("export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1")
 
         for key, val in self._extra_env.items():
             lines.append(f"export {key}={shlex.quote(val)}")
diff --git a/src/agentic_ci/backends/openshell/sandbox.py b/src/agentic_ci/backends/openshell/sandbox.py
index 1562112..afb40ee 100644
--- a/src/agentic_ci/backends/openshell/sandbox.py
+++ b/src/agentic_ci/backends/openshell/sandbox.py
@@ -24,7 +24,7 @@ def exists():
     return result.returncode == 0
 
 
-def create(image=None, policy_path=None):
+def create(image=None, policy_path=None, otel_port=None):
     """Create a persistent sandbox with the CI provider attached.
 
     The sandbox is created first, then the network policy is applied
@@ -47,16 +47,18 @@ def create(image=None, policy_path=None):
     args.extend(["--", "true"])
     _run(args, check=True)
 
-    _apply_policy(policy_path)
+    _apply_policy(policy_path, otel_port=otel_port)
 
 
-def _apply_policy(policy_path):
+def _apply_policy(policy_path, otel_port=None):
     """Apply network policy endpoints and wait for activation.
 
     Uses ``openshell policy update --wait`` which blocks until the
     supervisor has compiled and loaded the new policy revision.
     """
     endpoints = resolve_endpoints(policy_path)
+    if otel_port:
+        endpoints.append(f"host.openshell.internal:{otel_port}:read-write")
     if not endpoints:
         return
 
diff --git a/src/agentic_ci/backends/podman.py b/src/agentic_ci/backends/podman.py
index ffac2a8..6d1c4ba 100644
--- a/src/agentic_ci/backends/podman.py
+++ b/src/agentic_ci/backends/podman.py
@@ -41,7 +41,7 @@ def __init__(
         self._config_dir = None
         self._extra_env = extra_env or {}
 
-    def setup(self):
+    def setup(self, otel_port=None):
         self._resolve_image()
         if self.harness.auth_mode == "vertex":
             self._resolve_credentials()
diff --git a/src/agentic_ci/cli.py b/src/agentic_ci/cli.py
index 554603f..85d7072 100644
--- a/src/agentic_ci/cli.py
+++ b/src/agentic_ci/cli.py
@@ -52,8 +52,6 @@ def cmd_run(args, backend, harness):
                 sys.exit(0)
             log.info(f"{gate.name}: passed")
 
-    backend.setup()
-
     model_env = harness.model_env_var()
     if args.model:
         model = args.model
@@ -73,10 +71,16 @@ def cmd_run(args, backend, harness):
     try:
         if not args.no_otel and harness.supports_otel:
             log.section("Starting OTEL collector")
-            otel_proc, otel_port, otel_log, otel_rate = otel.start_collector(run_dir)
+            bind_addr = "0.0.0.0" if args.backend == "openshell" else "127.0.0.1"
+            otel_proc, otel_port, otel_log, otel_rate = otel.start_collector(
+                run_dir, bind_addr=bind_addr
+            )
+            os.environ["OTEL_RATE_FILE"] = otel_rate
             log.detail("pid", str(otel_proc.pid))
             log.detail("port", str(otel_port))
 
+        backend.setup(otel_port=otel_port)
+
         log.section(f"Running {harness.name} ({model}) via {args.backend} backend")
         rc = backend.run(
             prompt=args.prompt,
@@ -128,6 +132,7 @@ def cmd_run(args, backend, harness):
     finally:
         if otel_proc:
             otel.stop_collector(otel_proc)
+            os.environ.pop("OTEL_RATE_FILE", None)
         if not args.keep:
             backend.stop()
 
diff --git a/src/agentic_ci/harness.py b/src/agentic_ci/harness.py
index 8710c85..9f6f71d 100644
--- a/src/agentic_ci/harness.py
+++ b/src/agentic_ci/harness.py
@@ -10,6 +10,8 @@
 from abc import ABC, abstractmethod
 from typing import Any
 
+_OPENSHELL_GATEWAY_HOST = "10.200.0.1"
+
 
 class Harness(ABC):
     """Base class for agent CLI harnesses."""
@@ -148,12 +150,10 @@ def build_env_script_lines(self, otel_port=None, otel_rate_file=None):
                     "export OTEL_METRICS_EXPORTER=otlp",
                     "export OTEL_LOGS_EXPORTER=otlp",
                     "export OTEL_EXPORTER_OTLP_PROTOCOL=http/json",
-                    f"export OTEL_EXPORTER_OTLP_ENDPOINT=http://10.200.0.1:{otel_port}",
+                    f"export OTEL_EXPORTER_OTLP_ENDPOINT=http://{_OPENSHELL_GATEWAY_HOST}:{otel_port}",
                     "export OTEL_METRIC_EXPORT_INTERVAL=10000",
                 ]
             )
-            if otel_rate_file:
-                lines.append(f"export OTEL_RATE_FILE={shlex.quote(otel_rate_file)}")
         return lines
 
     def build_otel_exec_env(self, otel_port=None):
diff --git a/src/agentic_ci/otel.py b/src/agentic_ci/otel.py
index eb1851a..ca99705 100644
--- a/src/agentic_ci/otel.py
+++ b/src/agentic_ci/otel.py
@@ -90,7 +90,7 @@ def _update_token_rate(payload):
     os.replace(tmp, rate_file)
 
 
-def start_collector(run_dir):
+def start_collector(run_dir, bind_addr="127.0.0.1"):
     """Start the OTEL collector as a subprocess. Returns (proc, port)."""
     otel_log = os.path.join(run_dir, "claude-otel.jsonl")
     otel_rate = os.path.join(run_dir, "claude-otel-rate.json")
@@ -108,6 +108,7 @@ def start_collector(run_dir):
         "OTEL_RATE_FILE": otel_rate,
         "OTEL_COLLECTOR_PORT": "0",
         "OTEL_PORT_FILE": port_file,
+        "OTEL_BIND_ADDR": bind_addr,
     }
     proc = subprocess.Popen(
         [sys.executable, "-m", "agentic_ci.otel"],
@@ -258,7 +259,8 @@ def print_summary(log_file):
 def main():
     """Run the OTEL collector server."""
     port = int(os.environ.get("OTEL_COLLECTOR_PORT", "4318"))
-    server = HTTPServer(("127.0.0.1", port), OTLPHandler)
+    bind_addr = os.environ.get("OTEL_BIND_ADDR", "127.0.0.1")
+    server = HTTPServer((bind_addr, port), OTLPHandler)
     actual_port = server.server_address[1]
     port_file = os.environ.get("OTEL_PORT_FILE")
     if port_file:
@@ -267,7 +269,7 @@ def main():
     signal.signal(signal.SIGTERM, lambda *_: sys.exit(0))
     log_file = os.environ.get("OTEL_LOG_FILE", "/tmp/claude-otel.jsonl")
     print(
-        f"OTLP collector listening on 127.0.0.1:{actual_port}, writing to {log_file}",
+        f"OTLP collector listening on {bind_addr}:{actual_port}, writing to {log_file}",
         file=sys.stderr,
     )
     try:
diff --git a/tests/test_harness.py b/tests/test_harness.py
index f4d5271..6a8391b 100644
--- a/tests/test_harness.py
+++ b/tests/test_harness.py
@@ -127,13 +127,13 @@ def test_build_env_script_lines_api_key(self, monkeypatch):
         assert not any("CLAUDE_CODE_USE_VERTEX" in line for line in lines)
         assert not any("GOOGLE_APPLICATION_CREDENTIALS" in line for line in lines)
 
-    def test_build_env_script_lines_with_otel(self, monkeypatch, tmp_path):
+    def test_build_env_script_lines_with_otel(self, monkeypatch):
         monkeypatch.setenv("ANTHROPIC_VERTEX_PROJECT_ID", "proj")
         harness = ClaudeCodeHarness()
-        rate_file = str(tmp_path / "rate.json")
-        lines = harness.build_env_script_lines(otel_port=4318, otel_rate_file=rate_file)
+        lines = harness.build_env_script_lines(otel_port=4318)
         assert any("CLAUDE_CODE_ENABLE_TELEMETRY=1" in line for line in lines)
-        assert any(f"OTEL_RATE_FILE={rate_file}" in line for line in lines)
+        assert any("OTEL_EXPORTER_OTLP_ENDPOINT=http://10.200.0.1:4318" in line for line in lines)
+        assert not any("OTEL_RATE_FILE" in line for line in lines)
 
     def test_credential_mount_target(self):
         assert ClaudeCodeHarness().credential_mount_target() == "/home/agent-ci"