From 0abda2090cb680ed6f316598e242bd2c6b1d4dd1 Mon Sep 17 00:00:00 2001 From: Emilien Macchi Date: Wed, 10 Jun 2026 08:00:16 -0400 Subject: [PATCH] Fix OTEL telemetry collection for OpenShell backend The OTEL collector binds to 0.0.0.0 on the host but was unreachable from inside the sandbox because the harness pointed the OTEL endpoint at 10.200.0.1 (the gateway bridge IP), which doesn't route to the host network stack. Use host.openshell.internal instead, which resolves to the host inside the sandbox (added in OpenShell PR #1279). The sandbox policy now includes host.openshell.internal: so the gateway proxy forwards OTEL exports to the host-side collector. The OpenShell backend handles OTEL env vars directly (instead of delegating to the harness) because it needs the OpenShell-specific hostname and a shorter export interval (5s vs 10s) to capture metrics from short-lived runs. Also adds Section F to the OpenShell e2e test skill for verifying OTEL collection works end-to-end. Signed-off-by: Emilien Macchi Co-Authored-By: Claude --- .claude/skills/test-e2e-openshell/SKILL.md | 53 +++++++++++++++++-- src/agentic_ci/backend.py | 2 +- src/agentic_ci/backends/openshell/__init__.py | 26 +++++++-- src/agentic_ci/backends/openshell/sandbox.py | 8 +-- src/agentic_ci/backends/podman.py | 2 +- src/agentic_ci/cli.py | 11 ++-- src/agentic_ci/harness.py | 6 +-- src/agentic_ci/otel.py | 8 +-- tests/test_harness.py | 8 +-- 9 files changed, 99 insertions(+), 25 deletions(-) diff --git a/.claude/skills/test-e2e-openshell/SKILL.md b/.claude/skills/test-e2e-openshell/SKILL.md index 56abff7..d726dc0 100644 --- a/.claude/skills/test-e2e-openshell/SKILL.md +++ b/.claude/skills/test-e2e-openshell/SKILL.md @@ -143,7 +143,7 @@ Verify: - Output shows `Created sandbox: ci` - Output shows `Running Claude Code (claude-haiku-4-5) via openshell backend` - Claude's response contains `A1_OK` -- Token metrics show non-zero counts, cost around `$0.04` +- Token metrics show non-zero counts and cost is non-zero (e.g. `$0.04`) - `Agent exit code: 0` - `Sandbox deleted` and `Gateway stopped` at the end @@ -174,7 +174,7 @@ Verify: - Output shows `Auth: API key` - Output shows `Creating Anthropic API key provider` - Claude's response contains `B1_OK` -- Token metrics show non-zero counts and cost +- Token metrics show non-zero counts and cost is non-zero - `Agent exit code: 0` --- @@ -311,6 +311,53 @@ podman exec openshell-e2e rm -rf /tmp/workdir-test --- +## Section F: OTEL telemetry collection + +Verifies that the sandbox-local OTEL collector receives metrics from the +agent and prints a token/cost summary. Uses Vertex AI auth and Claude Code +(the only harness that supports OTEL). + +The OpenShell sandbox network isolation prevents reaching an external OTEL +collector, so agentic-ci embeds a lightweight OTLP receiver inside the +sandbox on localhost. After the run, the OTEL log is downloaded from the +sandbox and the summary is printed on the host. + +Requires `OPENSHELL_SUPERVISOR_IMAGE` (see "Before you start"). + +Run cleanup first. + +### F1. Run with OTEL enabled + +```bash +podman exec \ + -e ANTHROPIC_VERTEX_PROJECT_ID= \ + -e CLOUD_ML_REGION=global \ + -e OPENSHELL_SUPERVISOR_IMAGE=quay.io/mprpic/openshell-supervisor:pr1763 \ + -e SANDBOX_IMAGE="$CLAUDE_SANDBOX_IMAGE" \ + openshell-e2e bash -c ' + cd /tmp/e2e-workdir && \ + agentic-ci run \ + --backend openshell \ + --harness claude-code \ + --image "$SANDBOX_IMAGE" \ + --model claude-haiku-4-5 \ + "Respond with exactly: F1_OK" + ' +``` + +Note: no `--no-otel` flag. + +Verify: +- Output shows `Running Claude Code (claude-haiku-4-5) via openshell backend` +- Agent runs and completes with `F1_OK` in the response +- Output shows `Token/Cost Summary (OpenTelemetry)` section +- Token counts are non-zero (input tokens, output tokens, cache) +- Cost is non-zero (e.g. `$0.04`) +- `Agent exit code: 0` +- `Sandbox deleted` and `Gateway stopped` at the end + +--- + ## Final cleanup ```bash @@ -319,7 +366,7 @@ podman rm -f openshell-e2e ## Running the full suite -Execute sections in order (A through E), running the cleanup step before each +Execute sections in order (A through F), running the cleanup step before each section. Skip sections whose prerequisites are not met. If any step fails, check the gateway log inside the container: diff --git a/src/agentic_ci/backend.py b/src/agentic_ci/backend.py index 68e9832..fca2c39 100644 --- a/src/agentic_ci/backend.py +++ b/src/agentic_ci/backend.py @@ -30,7 +30,7 @@ def __init__(self, workdir=".", image=None, *, harness: Harness): self.verdict_path: Path | None = None @abstractmethod - def setup(self): + def setup(self, otel_port: int | None = None): """Prepare the backend. Idempotent.""" @abstractmethod diff --git a/src/agentic_ci/backends/openshell/__init__.py b/src/agentic_ci/backends/openshell/__init__.py index 69b5f1b..7ffd7eb 100644 --- a/src/agentic_ci/backends/openshell/__init__.py +++ b/src/agentic_ci/backends/openshell/__init__.py @@ -14,6 +14,8 @@ if TYPE_CHECKING: from agentic_ci.harness import Harness +_OPENSHELL_HOST = "host.openshell.internal" + class OpenShellBackend(Backend): """Runs an AI agent inside an OpenShell sandbox. @@ -38,7 +40,7 @@ def __init__(self, workdir=".", image=None, policy=None, extra_env=None, *, harn self.policy_path = policy self._extra_env = extra_env or {} - def setup(self): + def setup(self, otel_port=None): if not gateway.is_running(): log.section("Starting OpenShell gateway") gateway.start() @@ -55,7 +57,7 @@ def setup(self): image_info = f", image: {self.image}" if self.image else "" log.section(f"Creating sandbox ({image_info.lstrip(', ') or 'default image'})") - sandbox.create(image=self.image, policy_path=self.policy_path) + sandbox.create(image=self.image, policy_path=self.policy_path, otel_port=otel_port) log.section("Uploading workdir") sandbox.upload(self.workdir) @@ -107,9 +109,25 @@ def _write_env_script(self, model, otel_port=None, otel_rate_file=None): Uses the harness's native env script (Vertex AI vars or API key) since the google-cloud provider injects GCP credentials directly. + + For OTEL, uses ``host.openshell.internal`` to reach the host-side + collector through the gateway proxy instead of the harness default + (which uses an IP unreachable from the sandbox). """ - lines = self.harness.build_env_script_lines(otel_port, otel_rate_file) - lines.append("export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1") + lines = self.harness.build_env_script_lines() + if otel_port: + lines.extend( + [ + "export CLAUDE_CODE_ENABLE_TELEMETRY=1", + "export OTEL_METRICS_EXPORTER=otlp", + "export OTEL_LOGS_EXPORTER=otlp", + "export OTEL_EXPORTER_OTLP_PROTOCOL=http/json", + f"export OTEL_EXPORTER_OTLP_ENDPOINT=http://{_OPENSHELL_HOST}:{otel_port}", + "export OTEL_METRIC_EXPORT_INTERVAL=5000", + ] + ) + else: + lines.append("export CLAUDE_CODE_DISABLE_NONESSENTIAL_TRAFFIC=1") for key, val in self._extra_env.items(): lines.append(f"export {key}={shlex.quote(val)}") diff --git a/src/agentic_ci/backends/openshell/sandbox.py b/src/agentic_ci/backends/openshell/sandbox.py index 1562112..afb40ee 100644 --- a/src/agentic_ci/backends/openshell/sandbox.py +++ b/src/agentic_ci/backends/openshell/sandbox.py @@ -24,7 +24,7 @@ def exists(): return result.returncode == 0 -def create(image=None, policy_path=None): +def create(image=None, policy_path=None, otel_port=None): """Create a persistent sandbox with the CI provider attached. The sandbox is created first, then the network policy is applied @@ -47,16 +47,18 @@ def create(image=None, policy_path=None): args.extend(["--", "true"]) _run(args, check=True) - _apply_policy(policy_path) + _apply_policy(policy_path, otel_port=otel_port) -def _apply_policy(policy_path): +def _apply_policy(policy_path, otel_port=None): """Apply network policy endpoints and wait for activation. Uses ``openshell policy update --wait`` which blocks until the supervisor has compiled and loaded the new policy revision. """ endpoints = resolve_endpoints(policy_path) + if otel_port: + endpoints.append(f"host.openshell.internal:{otel_port}:read-write") if not endpoints: return diff --git a/src/agentic_ci/backends/podman.py b/src/agentic_ci/backends/podman.py index ffac2a8..6d1c4ba 100644 --- a/src/agentic_ci/backends/podman.py +++ b/src/agentic_ci/backends/podman.py @@ -41,7 +41,7 @@ def __init__( self._config_dir = None self._extra_env = extra_env or {} - def setup(self): + def setup(self, otel_port=None): self._resolve_image() if self.harness.auth_mode == "vertex": self._resolve_credentials() diff --git a/src/agentic_ci/cli.py b/src/agentic_ci/cli.py index 554603f..85d7072 100644 --- a/src/agentic_ci/cli.py +++ b/src/agentic_ci/cli.py @@ -52,8 +52,6 @@ def cmd_run(args, backend, harness): sys.exit(0) log.info(f"{gate.name}: passed") - backend.setup() - model_env = harness.model_env_var() if args.model: model = args.model @@ -73,10 +71,16 @@ def cmd_run(args, backend, harness): try: if not args.no_otel and harness.supports_otel: log.section("Starting OTEL collector") - otel_proc, otel_port, otel_log, otel_rate = otel.start_collector(run_dir) + bind_addr = "0.0.0.0" if args.backend == "openshell" else "127.0.0.1" + otel_proc, otel_port, otel_log, otel_rate = otel.start_collector( + run_dir, bind_addr=bind_addr + ) + os.environ["OTEL_RATE_FILE"] = otel_rate log.detail("pid", str(otel_proc.pid)) log.detail("port", str(otel_port)) + backend.setup(otel_port=otel_port) + log.section(f"Running {harness.name} ({model}) via {args.backend} backend") rc = backend.run( prompt=args.prompt, @@ -128,6 +132,7 @@ def cmd_run(args, backend, harness): finally: if otel_proc: otel.stop_collector(otel_proc) + os.environ.pop("OTEL_RATE_FILE", None) if not args.keep: backend.stop() diff --git a/src/agentic_ci/harness.py b/src/agentic_ci/harness.py index 8710c85..9f6f71d 100644 --- a/src/agentic_ci/harness.py +++ b/src/agentic_ci/harness.py @@ -10,6 +10,8 @@ from abc import ABC, abstractmethod from typing import Any +_OPENSHELL_GATEWAY_HOST = "10.200.0.1" + class Harness(ABC): """Base class for agent CLI harnesses.""" @@ -148,12 +150,10 @@ def build_env_script_lines(self, otel_port=None, otel_rate_file=None): "export OTEL_METRICS_EXPORTER=otlp", "export OTEL_LOGS_EXPORTER=otlp", "export OTEL_EXPORTER_OTLP_PROTOCOL=http/json", - f"export OTEL_EXPORTER_OTLP_ENDPOINT=http://10.200.0.1:{otel_port}", + f"export OTEL_EXPORTER_OTLP_ENDPOINT=http://{_OPENSHELL_GATEWAY_HOST}:{otel_port}", "export OTEL_METRIC_EXPORT_INTERVAL=10000", ] ) - if otel_rate_file: - lines.append(f"export OTEL_RATE_FILE={shlex.quote(otel_rate_file)}") return lines def build_otel_exec_env(self, otel_port=None): diff --git a/src/agentic_ci/otel.py b/src/agentic_ci/otel.py index eb1851a..ca99705 100644 --- a/src/agentic_ci/otel.py +++ b/src/agentic_ci/otel.py @@ -90,7 +90,7 @@ def _update_token_rate(payload): os.replace(tmp, rate_file) -def start_collector(run_dir): +def start_collector(run_dir, bind_addr="127.0.0.1"): """Start the OTEL collector as a subprocess. Returns (proc, port).""" otel_log = os.path.join(run_dir, "claude-otel.jsonl") otel_rate = os.path.join(run_dir, "claude-otel-rate.json") @@ -108,6 +108,7 @@ def start_collector(run_dir): "OTEL_RATE_FILE": otel_rate, "OTEL_COLLECTOR_PORT": "0", "OTEL_PORT_FILE": port_file, + "OTEL_BIND_ADDR": bind_addr, } proc = subprocess.Popen( [sys.executable, "-m", "agentic_ci.otel"], @@ -258,7 +259,8 @@ def print_summary(log_file): def main(): """Run the OTEL collector server.""" port = int(os.environ.get("OTEL_COLLECTOR_PORT", "4318")) - server = HTTPServer(("127.0.0.1", port), OTLPHandler) + bind_addr = os.environ.get("OTEL_BIND_ADDR", "127.0.0.1") + server = HTTPServer((bind_addr, port), OTLPHandler) actual_port = server.server_address[1] port_file = os.environ.get("OTEL_PORT_FILE") if port_file: @@ -267,7 +269,7 @@ def main(): signal.signal(signal.SIGTERM, lambda *_: sys.exit(0)) log_file = os.environ.get("OTEL_LOG_FILE", "/tmp/claude-otel.jsonl") print( - f"OTLP collector listening on 127.0.0.1:{actual_port}, writing to {log_file}", + f"OTLP collector listening on {bind_addr}:{actual_port}, writing to {log_file}", file=sys.stderr, ) try: diff --git a/tests/test_harness.py b/tests/test_harness.py index f4d5271..6a8391b 100644 --- a/tests/test_harness.py +++ b/tests/test_harness.py @@ -127,13 +127,13 @@ def test_build_env_script_lines_api_key(self, monkeypatch): assert not any("CLAUDE_CODE_USE_VERTEX" in line for line in lines) assert not any("GOOGLE_APPLICATION_CREDENTIALS" in line for line in lines) - def test_build_env_script_lines_with_otel(self, monkeypatch, tmp_path): + def test_build_env_script_lines_with_otel(self, monkeypatch): monkeypatch.setenv("ANTHROPIC_VERTEX_PROJECT_ID", "proj") harness = ClaudeCodeHarness() - rate_file = str(tmp_path / "rate.json") - lines = harness.build_env_script_lines(otel_port=4318, otel_rate_file=rate_file) + lines = harness.build_env_script_lines(otel_port=4318) assert any("CLAUDE_CODE_ENABLE_TELEMETRY=1" in line for line in lines) - assert any(f"OTEL_RATE_FILE={rate_file}" in line for line in lines) + assert any("OTEL_EXPORTER_OTLP_ENDPOINT=http://10.200.0.1:4318" in line for line in lines) + assert not any("OTEL_RATE_FILE" in line for line in lines) def test_credential_mount_target(self): assert ClaudeCodeHarness().credential_mount_target() == "/home/agent-ci"