From bee37696994c7c0bac42bffb62b0f6951efbb0da Mon Sep 17 00:00:00 2001 From: Roshni Malani Date: Fri, 12 Jun 2026 18:08:00 -0700 Subject: [PATCH 01/10] docs(rfc): add RFC 0005 for warm-pooled sandboxes Propose adopting the upstream agent-sandbox warm-pool extension CRDs (SandboxTemplate / SandboxWarmPool / SandboxClaim, extensions.agents.x-k8s.io/v1alpha1) on the Kubernetes driver to hand out pre-warmed sandbox pods in ~milliseconds instead of cold-starting a Sandbox CR per request. Documents the claim-based create flow, what bakes into the shared template vs. late-binds over the supervisor relay, the security-sensitive identity re-anchor, risks, alternatives, and a phased rollout. Signed-off-by: Roshni Malani --- rfc/0005-warm-pooled-sandboxes/README.md | 217 +++++++++++++++++++++++ 1 file changed, 217 insertions(+) create mode 100644 rfc/0005-warm-pooled-sandboxes/README.md diff --git a/rfc/0005-warm-pooled-sandboxes/README.md b/rfc/0005-warm-pooled-sandboxes/README.md new file mode 100644 index 000000000..e1bfcc9c5 --- /dev/null +++ b/rfc/0005-warm-pooled-sandboxes/README.md @@ -0,0 +1,217 @@ +--- +authors: + - "@rmalani-nv" +state: review +links: + - https://github.com/NVIDIA/OpenShell/pull/1813 + - https://github.com/kubernetes-sigs/agent-sandbox/releases/tag/v0.4.6 + - https://github.com/kubernetes-sigs/agent-sandbox + - https://agent-sandbox.sigs.k8s.io/docs/ +--- + +# RFC 0005 - Warm-Pooled Sandboxes + +## Summary + +Add support for **warm-pooled sandboxes** on the Kubernetes compute driver by +adopting the upstream [agent-sandbox](https://github.com/kubernetes-sigs/agent-sandbox) +warm-pool extension CRDs — `SandboxTemplate`, `SandboxWarmPool`, and +`SandboxClaim` (`extensions.agents.x-k8s.io/v1alpha1`). Instead of cold-starting +a `Sandbox` CR + Pod per request, the gateway claims a pre-provisioned, ready Pod +from a pool, cutting time-to-ready from seconds to milliseconds. The extensions +ship in the same `v0.4.6` release OpenShell already pins for the core `Sandbox` +CRD; OpenShell simply does not install or use them today. + +## Motivation + +Creating a Kubernetes sandbox today is a cold start: the gateway creates a +`Sandbox` CR, the agent-sandbox controller creates a Pod, the image is pulled (or +read from cache), the supervisor boots, and only then does the sandbox become +`Ready`. Measured locally this is ~4s+ even with the image preloaded. For +interactive agent workloads and high-churn "fresh sandbox per task" usage, that +latency dominates. A warm pool keeps N ready Pods standing by so a claim binds in +**~0.1s** (measured on a local spike). + +## Non-goals + +- Changing the default (cold) sandbox-create path. Warm pooling is additive and + opt-in; sandboxes that don't match a pool fall back to a cold create. +- GPU warm pools in the initial rollout (idle accelerators are expensive — opt-in + later, per pool). +- Migrating OpenShell's core `Sandbox` usage from `v1alpha1` to `v1beta1`. The + pinned `v0.4.6` release serves `v1alpha1` for both core and extensions; + upstream `main` (`v1beta1`, mutually-exclusive claim fields) is out of scope + until OpenShell bumps the pinned version. +- Multiplayer/non-Kubernetes drivers (Docker, Podman, VM) — warm pooling is a + Kubernetes-driver capability in this RFC. + +## Proposal + +### Extension CRDs (verified against v0.4.6) + +| CRD (`extensions.agents.x-k8s.io/v1alpha1`) | Role | +|---|---| +| `SandboxTemplate` | Reusable blueprint: `spec.podTemplate`, `spec.volumeClaimTemplates`, `spec.networkPolicy` | +| `SandboxWarmPool` | Keeps N Pods warm: `spec.replicas`, `spec.sandboxTemplateRef`; `status.{readyReplicas,replicas,selector}` (HPA-scalable) | +| `SandboxClaim` | Binds a warm Pod: `spec.sandboxTemplateRef` (required), `spec.warmpool`, `spec.additionalPodMetadata.{annotations,labels}`, `spec.env[]`, `spec.lifecycle`; `status.sandbox.{name,podIPs}` | + +A `SandboxWarmPool` pre-creates real `Sandbox` CRs from a `SandboxTemplate`; each +warm Pod is owned by a *controlling* `Sandbox` ownerReference. A `SandboxClaim` +binds one of those warm `Sandbox`/Pods and reports the bound `Sandbox` in +`status.sandbox.name`. The claimed Pod's owning `Sandbox` CR is in turn owned by +the `SandboxClaim` (controlling ownerReference) and labeled +`agents.x-k8s.io/claim-uid`. + +### Claim-based create flow + +The gateway pre-declares one or more `SandboxWarmPool`s (+ their +`SandboxTemplate`s), each carrying the **shared** OpenShell Pod configuration +(image, mTLS secret mount, projected SA-token volume, supervisor sideload, Linux +capabilities, host aliases, runtimeClass, resources, workspace +`volumeClaimTemplates`). On `CreateSandbox`, when the requested shape matches a +pool, the Kubernetes driver creates a `SandboxClaim` (instead of a `Sandbox`) +that injects the per-sandbox identity via +`additionalPodMetadata.annotations[openshell.io/sandbox-id]`, then watches the +claim and maps `status.sandbox.{name,podIPs}` + conditions to `SandboxPhase`. + +What bakes vs. late-binds: + +- **Baked into the shared `SandboxTemplate`:** everything generic across pooled + Pods (TLS, SA token, supervisor, caps, workspace VCT). +- **Injected per-claim (annotation only):** `openshell.io/sandbox-id`. Per-claim + `env[]` is **rejected on the warm path** (Pod env is immutable once running), so + identity must not ride Pod env. +- **Late-bound at runtime over the supervisor relay (already works):** policy, + providers. Sandbox identity is established by the existing token exchange — the + supervisor presents its projected SA token to `IssueSandboxToken`, and the + gateway resolves identity server-side. The supervisor's `--sandbox-id` is + optional (log-push/policy labeling only). + +### Identity re-anchoring (the one security-sensitive change) + +Today `validate_sandbox_owner_reference()` in +`crates/openshell-server/src/auth/k8s_sa.rs` authenticates a sandbox by +cross-checking the owning `Sandbox` CR's `openshell.ai/sandbox-id` label against +the Pod's `openshell.io/sandbox-id` annotation. On the warm path the pool +controller creates the `Sandbox` CR generically, so it carries +`agents.x-k8s.io/claim-uid` (+ a controlling `SandboxClaim` ownerReference) +instead of OpenShell's label. + +The check must therefore **re-anchor to the gateway-created `SandboxClaim`**: +resolve Pod → owning `Sandbox` CR → controlling `SandboxClaim` (name + uid) → +the sandbox-id the gateway recorded for that claim (gateway Store, keyed by +claim-uid), and verify the claim is bound (`status.sandbox.name` equals the +owning CR) and that its recorded sandbox-id equals the Pod annotation. This +preserves the existing invariant — *the sandbox-id a Pod can obtain equals a +value only the gateway wrote, on an object the sandbox workload cannot mutate*. +The sandbox ServiceAccount has no write access to `sandboxclaims` or Pods today +(confirmed on a live cluster), and the phase-2/phase-3 RBAC must preserve that. +The TokenReview, pod-UID, and ownerReference legs are unchanged. + +## Implementation plan + +Rollout is incremental; each phase is a separate, reviewable PR. The +security-sensitive auth change (phase 3) is gated behind `state:agent-ready`. + +1. **Install the extensions (this PR).** Apply `extensions.yaml` in the local + k3d dev script and the e2e kube harness so clusters are ready for warm + pooling. No gateway behavior change yet. +2. **Driver warm path (flagged).** When a sandbox maps to a configured pool, the + Kubernetes driver creates a `SandboxClaim` (template + warmpool + + `additionalPodMetadata.annotations[openshell.io/sandbox-id]`) instead of a + `Sandbox`; watch the claim and map `status` → `SandboxPhase`. Keep the + direct-`Sandbox` path as the cold fallback. Add gateway RBAC for + `extensions.agents.x-k8s.io` (`sandboxclaims`, `sandboxtemplates`, + `sandboxwarmpools`) in the Helm chart. +3. **Auth re-anchoring.** Adapt `validate_sandbox_owner_reference()` for the + claim-based identity check above; fail closed; extend the table-driven tests + in `k8s_sa.rs` with the spoof case (Pod annotation ≠ claim record → reject). +4. **Pool management.** Gateway declares/reconciles `SandboxTemplate` + + `SandboxWarmPool` from gateway config (one per template/image shape); sizing, + `replicas`, GC of drained pools. +5. **Surface + docs.** `gateway.toml` pool config (`docs/reference/gateway-config.mdx`), + CLI/TUI visibility, OCSF events, e2e coverage, published Kubernetes docs. + +## Risks + +- **Identity binding is security-sensitive.** Mishandled, a sandbox could + impersonate another sandbox-id. Mitigated by re-anchoring to the + gateway-created claim, failing closed, threat-model unit tests, an RBAC + assertion test, an adversarial security review, and OCSF detection findings on + mismatch. See phase 3. +- **Pool shape rigidity.** A pool is one (image, resources, runtimeClass, gpu) + shape; heterogeneous sandboxes need a pool each, and unmatched requests fall + back to cold. Warm pooling pays off most for the high-churn default image. +- **Idle cost.** Warm Pods consume resources while idle; GPU pools especially. + Sizing must be operator-controlled and default conservative. +- **Upstream API drift.** `v0.4.6` extensions are `v1alpha1`; `main` is `v1beta1` + with different claim semantics. Pin and bump deliberately. + +## Alternatives + +- **Patch identity onto the claimed Pod/`Sandbox` after bind** (keep the existing + label cross-check). Rejected: requires granting the gateway `patch pods` + (currently denied for immutability) and is racy. +- **Bare-Pod warm pools** (if upstream changes the pool to create Pods, not + `Sandbox` CRs — see upstream issue #390). Would break the ownerReference auth + chain and force a larger rework. The pinned `v0.4.6` creates `Sandbox` CRs. +- **Do nothing.** Accept cold-start latency. Viable for low-churn usage but poor + for interactive agents. + +## Prior art + +Upstream agent-sandbox documents warm pooling end to end, including an +HPA-driven autoscaling example keyed on `agent_sandbox_claim_creation_total` and +the `SandboxWarmPool` `status.selector`. OpenShell already builds on the core +`Sandbox` CRD from the same project. + +## Resolved decisions (implemented per issue #1879) + +- **Workspace model:** the writable per-agent `/sandbox` on a pooled pod is an + ephemeral `emptyDir` seeded from the image — single-use and fail-safe (kubelet + reclaims it, nothing to orphan), avoiding the orphaned-PVC risk entirely. The + pooled `SandboxTemplate` carries **no** `volumeClaimTemplates`. Large shared + files live on a separate, optional, **read-only** PVC mounted into the + template. Claims use `shutdownPolicy: Delete`; claimed pods are never re-pooled. +- **Identity re-anchor:** implemented as a fail-closed chain in `auth/k8s_sa.rs` + (Pod → owning `Sandbox` → controlling `SandboxClaim` → live bound claim → + durable gateway Store mapping keyed by `(namespace, claim_name, claim_uid)`, + with the mapped sandbox-id required to equal the Pod annotation). The mapping + lives in the shared gateway Store (HA-safe). +- **Supervisor boot + session (validated live):** a pooled pod boots before its + claim assigns an identity, so the supervisor boots on the image **baseline + policy** (`OPENSHELL_SANDBOX_ID` left unset, identity-gated startup skipped), + then establishes its relay session with an empty `hello.sandbox_id`; the + gateway derives the identity from the gateway-minted JWT (`handle_connect_supervisor`), + which `IssueSandboxToken` resolved from the claim annotation + durable mapping. + The supervisor retries the bootstrap at a fixed cadence until the claim binds. +- **Egress:** the pooled `SandboxTemplate` sets `networkPolicyManagement: Unmanaged`. + The controller's default `Managed` mode imposes a rule-less, default-deny + NetworkPolicy that blocked the pod's egress to the gateway (the supervisor + could never reach `IssueSandboxToken`); OpenShell enforces egress itself via + the supervisor proxy + Landlock, matching the cold path (no NetworkPolicy). +- **Policy on the warm path:** Landlock is applied once at process start and + cannot be loosened, so a pooled pod can only enforce the **baseline** policy. + Requests carrying a custom `spec.policy` are therefore **never** warm-pooled — + the gateway sets `DriverSandboxSpec.disallow_warm_pool` and they take the cold + path, so a custom policy is never silently downgraded. Per-sandbox policy on + the warm path is a follow-up that needs a different enforcement model. +- **Sandbox-id delivery:** the gateway JWT is the source of truth (the gateway + derives identity from it for the session); no Downward API volume was needed. +- **Reserved metadata:** users cannot set `openshell.io/*`, `openshell.ai/*`, + `agents.x-k8s.io/*`, `extensions.agents.x-k8s.io/*`, or `spiffe.io/*` keys, and + the warm path never injects per-claim env. + +Validated end-to-end on a local k3d cluster: pool reconcile → claim → pooled-pod +supervisor boot → `IssueSandboxToken` (auth re-anchor) → relay session → exec → +workspace-isolation assertions all pass. + +## Open questions + +- Pool sizing / autoscaling policy (HPA on `agent_sandbox_claim_creation_total`). +- Per-tenant pool isolation for user-supplied images (today only the trusted + operator default image is pooled; user images fall back to the cold path). +- Per-sandbox **policy** on the warm path (today: baseline only; custom policies + fall back to cold). Needs an enforcement model that doesn't rely on boot-time + Landlock. +- CLI/TUI surface for warm-pool visibility. From a2e09def82b528f4e9b92c1298b2f3bb25b883f5 Mon Sep 17 00:00:00 2001 From: Roshni Malani Date: Fri, 12 Jun 2026 18:08:00 -0700 Subject: [PATCH 02/10] chore(deploy): install agent-sandbox warm-pool extension CRDs in dev and e2e Apply the agent-sandbox extensions.yaml alongside manifest.yaml in the local k3d dev bootstrap and the e2e kube harness, reusing the pinned AGENT_SANDBOX_VERSION. The e2e harness waits for the new extension CRDs to be Established and for the re-rolled controller. Note the change in the helm-dev-environment skill. Signed-off-by: Roshni Malani --- .agents/skills/helm-dev-environment/SKILL.md | 10 ++++++---- e2e/with-kube-gateway.sh | 10 +++++++++- tasks/scripts/helm-k3s-local.sh | 5 +++++ 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/.agents/skills/helm-dev-environment/SKILL.md b/.agents/skills/helm-dev-environment/SKILL.md index 10813792d..250b9ecbe 100644 --- a/.agents/skills/helm-dev-environment/SKILL.md +++ b/.agents/skills/helm-dev-environment/SKILL.md @@ -26,10 +26,12 @@ mise run helm:k3s:create ``` Creates a k3d cluster and merges its kubeconfig into the worktree-local `kubeconfig` file. -Also applies the upstream agent-sandbox CRDs/controller (pinned via `AGENT_SANDBOX_VERSION` -in `tasks/scripts/helm-k3s-local.sh`, fetched from `github.com/kubernetes-sigs/agent-sandbox` -releases) and preloads the default community sandbox image into k3d so the first sandbox -create does not wait on a large registry pull. Traefik is disabled at cluster creation time. +Also applies the upstream agent-sandbox CRDs/controller plus the warm-pool extensions +(`SandboxTemplate` / `SandboxWarmPool` / `SandboxClaim`, from `extensions.yaml`) — both +pinned via `AGENT_SANDBOX_VERSION` in `tasks/scripts/helm-k3s-local.sh`, fetched from +`github.com/kubernetes-sigs/agent-sandbox` releases — and preloads the default community +sandbox image into k3d so the first sandbox create does not wait on a large registry pull. +Traefik is disabled at cluster creation time. **Multi-worktree support:** the cluster name is derived from the last component of the current git branch (e.g. branch `kube-support/local-dev/tmutch` → cluster diff --git a/e2e/with-kube-gateway.sh b/e2e/with-kube-gateway.sh index bea1c01d3..c72c43780 100755 --- a/e2e/with-kube-gateway.sh +++ b/e2e/with-kube-gateway.sh @@ -530,10 +530,18 @@ fi # The Kubernetes compute driver creates and watches Sandbox CRs reconciled # by the upstream agent-sandbox-controller. Without the CRD + controller, # every gateway K8s call 404s and CreateSandbox never produces a Pod. -echo "Installing agent-sandbox CRDs and controller (${AGENT_SANDBOX_VERSION})..." +# The warm-pool extensions (SandboxTemplate / SandboxWarmPool / SandboxClaim) are +# applied alongside core so the e2e cluster matches the dev cluster and is ready +# for warm-pooled sandbox coverage. extensions.yaml reconfigures the +# agent-sandbox-controller deployment, so wait for the rollout after both applies. +echo "Installing agent-sandbox CRDs, controller, and warm-pool extensions (${AGENT_SANDBOX_VERSION})..." _agent_sandbox_base="https://github.com/kubernetes-sigs/agent-sandbox/releases/download/${AGENT_SANDBOX_VERSION}" kctl apply -f "${_agent_sandbox_base}/manifest.yaml" +kctl apply -f "${_agent_sandbox_base}/extensions.yaml" kctl wait --for=condition=Established crd/sandboxes.agents.x-k8s.io --timeout=120s +kctl wait --for=condition=Established crd/sandboxtemplates.extensions.agents.x-k8s.io --timeout=120s +kctl wait --for=condition=Established crd/sandboxwarmpools.extensions.agents.x-k8s.io --timeout=120s +kctl wait --for=condition=Established crd/sandboxclaims.extensions.agents.x-k8s.io --timeout=120s kctl -n agent-sandbox-system rollout status deployment/agent-sandbox-controller --timeout=300s helm_extra_args=() diff --git a/tasks/scripts/helm-k3s-local.sh b/tasks/scripts/helm-k3s-local.sh index 59d8939a6..6ed2a524f 100755 --- a/tasks/scripts/helm-k3s-local.sh +++ b/tasks/scripts/helm-k3s-local.sh @@ -143,6 +143,11 @@ apply_base_manifests() { local base="https://github.com/kubernetes-sigs/agent-sandbox/releases/download/${AGENT_SANDBOX_VERSION}" echo "Applying agent-sandbox manifest (${AGENT_SANDBOX_VERSION})..." kubectl --kubeconfig="${KUBECONFIG_TARGET}" apply -f "${base}/manifest.yaml" + # Warm-pool extensions (SandboxTemplate / SandboxWarmPool / SandboxClaim) so the + # cluster is ready for warm-pooled sandboxes. extensions.yaml reconfigures the + # existing agent-sandbox-controller deployment rather than adding a new one. + echo "Applying agent-sandbox warm-pool extensions (${AGENT_SANDBOX_VERSION})..." + kubectl --kubeconfig="${KUBECONFIG_TARGET}" apply -f "${base}/extensions.yaml" } configure_ghcr_credentials() { From b64232670796c283aaf426f49f9b691b42098a01 Mon Sep 17 00:00:00 2001 From: Roshni Malani Date: Fri, 12 Jun 2026 18:08:00 -0700 Subject: [PATCH 03/10] feat(compute-driver): add warm-pool claim binding to the driver contract Extend the ComputeDriver gRPC contract: WarmClaimBinding on CreateSandboxResponse, claim_name/claim_uid on DriverSandboxStatus, and disallow_warm_pool on DriverSandboxSpec so the gateway can force the cold path for custom-policy requests. Adapt the Docker/Podman/VM drivers (they don't warm-pool). Signed-off-by: Roshni Malani --- crates/openshell-driver-docker/src/lib.rs | 6 +++- crates/openshell-driver-docker/src/tests.rs | 1 + crates/openshell-driver-podman/src/grpc.rs | 2 +- crates/openshell-driver-podman/src/watcher.rs | 3 ++ crates/openshell-driver-vm/src/driver.rs | 6 +++- proto/compute_driver.proto | 35 ++++++++++++++++++- 6 files changed, 49 insertions(+), 4 deletions(-) diff --git a/crates/openshell-driver-docker/src/lib.rs b/crates/openshell-driver-docker/src/lib.rs index d05772760..d98eb1abe 100644 --- a/crates/openshell-driver-docker/src/lib.rs +++ b/crates/openshell-driver-docker/src/lib.rs @@ -1357,7 +1357,7 @@ impl ComputeDriver for DockerComputeDriver { .sandbox .ok_or_else(|| Status::invalid_argument("sandbox is required"))?; self.create_sandbox_inner(&sandbox).await?; - Ok(Response::new(CreateSandboxResponse {})) + Ok(Response::new(CreateSandboxResponse { warm_claim: None })) } async fn stop_sandbox( @@ -1482,6 +1482,8 @@ fn pending_sandbox_snapshot( sandbox_fd: String::new(), conditions: vec![condition], deleting, + // Warm-pool claim fields apply to the Kubernetes driver only. + ..Default::default() }), } } @@ -2624,6 +2626,8 @@ fn driver_status_from_summary( last_transition_time: String::new(), }], deleting, + // Warm-pool claim fields apply to the Kubernetes driver only. + ..Default::default() } } diff --git a/crates/openshell-driver-docker/src/tests.rs b/crates/openshell-driver-docker/src/tests.rs index 8b0921e8a..e8403d801 100644 --- a/crates/openshell-driver-docker/src/tests.rs +++ b/crates/openshell-driver-docker/src/tests.rs @@ -43,6 +43,7 @@ fn test_sandbox() -> DriverSandbox { }), gpu: false, sandbox_token: String::new(), + disallow_warm_pool: false, }), status: None, } diff --git a/crates/openshell-driver-podman/src/grpc.rs b/crates/openshell-driver-podman/src/grpc.rs index 4840ee281..ee57b74f2 100644 --- a/crates/openshell-driver-podman/src/grpc.rs +++ b/crates/openshell-driver-podman/src/grpc.rs @@ -102,7 +102,7 @@ impl ComputeDriver for ComputeDriverService { .create_sandbox(&sandbox) .await .map_err(Status::from)?; - Ok(Response::new(CreateSandboxResponse {})) + Ok(Response::new(CreateSandboxResponse { warm_claim: None })) } async fn stop_sandbox( diff --git a/crates/openshell-driver-podman/src/watcher.rs b/crates/openshell-driver-podman/src/watcher.rs index 54606ea44..54cfb6cbf 100644 --- a/crates/openshell-driver-podman/src/watcher.rs +++ b/crates/openshell-driver-podman/src/watcher.rs @@ -264,6 +264,8 @@ fn build_driver_sandbox( sandbox_fd: String::new(), conditions: vec![condition], deleting, + // Warm-pool claim fields apply to the Kubernetes driver only. + ..Default::default() }), } } @@ -496,6 +498,7 @@ mod tests { sandbox_fd: String::new(), conditions: vec![condition], deleting: false, + ..Default::default() }), }; diff --git a/crates/openshell-driver-vm/src/driver.rs b/crates/openshell-driver-vm/src/driver.rs index 30fecd8be..04601ac26 100644 --- a/crates/openshell-driver-vm/src/driver.rs +++ b/crates/openshell-driver-vm/src/driver.rs @@ -572,7 +572,7 @@ impl VmDriver { task.abort(); } - Ok(CreateSandboxResponse {}) + Ok(CreateSandboxResponse { warm_claim: None }) } async fn provision_sandbox( @@ -4906,6 +4906,8 @@ fn sandbox_snapshot(sandbox: &Sandbox, condition: SandboxCondition, deleting: bo sandbox_fd: String::new(), conditions: vec![condition], deleting, + // Warm-pool claim fields apply to the Kubernetes driver only. + ..Default::default() }), ..Default::default() } @@ -4923,6 +4925,8 @@ fn status_with_condition( sandbox_fd: String::new(), conditions: vec![condition], deleting, + // Warm-pool claim fields apply to the Kubernetes driver only. + ..Default::default() } } diff --git a/proto/compute_driver.proto b/proto/compute_driver.proto index dbcb9e818..d70b824c3 100644 --- a/proto/compute_driver.proto +++ b/proto/compute_driver.proto @@ -94,6 +94,13 @@ message DriverSandboxSpec { // ServiceAccount token bootstrap instead). Never echoed to the public // Sandbox proto. string sandbox_token = 11; + // When true, the driver must NOT satisfy this sandbox from a warm pool and + // must take the cold create path. Set by the gateway for requests that carry + // a custom policy: a warm-pooled pod enforces the pool's baseline policy at + // boot and cannot late-bind a per-sandbox policy (Landlock is applied once at + // process start), so warm-pooling a custom-policy sandbox would silently + // downgrade its policy. Drivers without warm pools ignore this field. + bool disallow_warm_pool = 12; } // Driver-owned runtime template consumed by the compute platform. @@ -162,6 +169,14 @@ message DriverSandboxStatus { repeated DriverCondition conditions = 5; // True when the compute platform has begun deleting this sandbox. bool deleting = 6; + // For warm-pooled sandboxes only: the name of the `SandboxClaim` the + // gateway created to bind this sandbox. Empty on the cold path. The gateway + // uses this (with `claim_uid`) to back-fill or garbage-collect the durable + // claim->sandbox-id mapping that anchors identity re-validation. + string claim_name = 7; + // For warm-pooled sandboxes only: the UID of the binding `SandboxClaim`. + // Empty on the cold path. + string claim_uid = 8; } // Raw compute-platform condition. @@ -225,7 +240,25 @@ message CreateSandboxRequest { DriverSandbox sandbox = 1; } -message CreateSandboxResponse {} +message CreateSandboxResponse { + // Set by drivers that satisfied the create from a warm pool by binding a + // pre-warmed `SandboxClaim`. Empty on the cold path. The gateway records a + // durable `(namespace, claim_name, claim_uid) -> sandbox_id` mapping from + // this binding so the `IssueSandboxToken` bootstrap can re-anchor the + // sandbox identity to a value only the gateway wrote. + WarmClaimBinding warm_claim = 1; +} + +// Identity of the warm-pool `SandboxClaim` the driver created to satisfy a +// `CreateSandbox`. Returned only on the warm path. +message WarmClaimBinding { + // Namespace the claim was created in. + string namespace = 1; + // Name of the created `SandboxClaim`. + string claim_name = 2; + // UID the apiserver assigned to the created `SandboxClaim`. + string claim_uid = 3; +} message StopSandboxRequest { // Stable sandbox ID stored by the gateway. From 99f4c374074749d119cd07ee560058d976eee339 Mon Sep 17 00:00:00 2001 From: Roshni Malani Date: Fri, 12 Jun 2026 18:08:00 -0700 Subject: [PATCH 04/10] refactor(core): centralize agent-sandbox CRD identity constants The gateway's warm-pool identity re-anchor must agree byte-for-byte with the CRD group/version/kind, claim-uid label, and sandbox-id annotation the Kubernetes driver writes. Hoist these into openshell-core as the single source of truth so the driver and the auth path can't drift. Signed-off-by: Roshni Malani --- crates/openshell-core/src/driver_utils.rs | 49 +++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/crates/openshell-core/src/driver_utils.rs b/crates/openshell-core/src/driver_utils.rs index 9e4411b2a..1e55ceaa4 100644 --- a/crates/openshell-core/src/driver_utils.rs +++ b/crates/openshell-core/src/driver_utils.rs @@ -27,6 +27,55 @@ pub const LABEL_SANDBOX_NAME: &str = "openshell.ai/sandbox-name"; /// Container/pod label carrying the sandbox namespace. pub const LABEL_SANDBOX_NAMESPACE: &str = "openshell.ai/sandbox-namespace"; +// --------------------------------------------------------------------------- +// agent-sandbox CRD identity (group / version / kind / contract labels) +// +// These describe the upstream agent-sandbox CRDs the Kubernetes driver creates +// and that the gateway's `IssueSandboxToken` auth path re-anchors against. They +// are a cross-crate *security* contract: the ownerReference and claim-UID +// checks in `openshell-server` must agree byte-for-byte with what +// `openshell-driver-kubernetes` writes, so they live here as the single source +// of truth rather than being duplicated per crate. +// --------------------------------------------------------------------------- + +/// API group of the core agent-sandbox `Sandbox` CRD. +pub const SANDBOX_CRD_GROUP: &str = "agents.x-k8s.io"; + +/// API version shared by the agent-sandbox CRDs (base and extensions). +pub const SANDBOX_CRD_VERSION: &str = "v1alpha1"; + +/// `apiVersion` (`group/version`) of the core `Sandbox` CRD. +pub const SANDBOX_CRD_API_VERSION: &str = "agents.x-k8s.io/v1alpha1"; + +/// Kind of the core agent-sandbox `Sandbox` CRD. +pub const SANDBOX_CRD_KIND: &str = "Sandbox"; + +/// API group of the warm-pool extension CRDs. +pub const SANDBOX_EXT_GROUP: &str = "extensions.agents.x-k8s.io"; + +/// `apiVersion` (`group/version`) of the warm-pool extension CRDs. +pub const SANDBOX_EXT_API_VERSION: &str = "extensions.agents.x-k8s.io/v1alpha1"; + +/// Kind of the warm-pool `SandboxClaim` extension CRD. +pub const SANDBOX_CLAIM_KIND: &str = "SandboxClaim"; + +/// Kind of the warm-pool `SandboxTemplate` extension CRD. +pub const SANDBOX_TEMPLATE_KIND: &str = "SandboxTemplate"; + +/// Kind of the warm-pool `SandboxWarmPool` extension CRD. +pub const SANDBOX_WARM_POOL_KIND: &str = "SandboxWarmPool"; + +/// Pod annotation carrying the per-sandbox identity (note the `.io` suffix — +/// distinct from the `.ai` [`LABEL_SANDBOX_ID`]). The gateway resolves a pod's +/// ServiceAccount token back to this value during `IssueSandboxToken`. +pub const SANDBOX_ID_ANNOTATION: &str = "openshell.io/sandbox-id"; + +/// Label the upstream warm-pool controller stamps onto a bound `Sandbox` CR +/// (and that the gateway sets on the `SandboxClaim`), carrying the binding +/// `SandboxClaim`'s apiserver-assigned UID. The auth path cross-checks it +/// against the controlling `SandboxClaim` ownerReference. +pub const CLAIM_UID_LABEL: &str = "agents.x-k8s.io/claim-uid"; + // --------------------------------------------------------------------------- /// Path to the sandbox supervisor binary inside the container image. From 7d17ff0c26d3f770eb22f3f63016d43f50419cfb Mon Sep 17 00:00:00 2001 From: Roshni Malani Date: Fri, 12 Jun 2026 18:08:13 -0700 Subject: [PATCH 05/10] feat(server): persist warm-pool claim-to-sandbox mappings Add a durable, HA-safe (namespace, claim_name, claim_uid) -> sandbox_id mapping in the shared object store, keyed so a uid mismatch fails closed and rebinding an existing key to a different sandbox id is rejected. This is the record the warm-pool identity re-anchor resolves against. Signed-off-by: Roshni Malani --- .../openshell-server/src/persistence/mod.rs | 132 +++++++++++++++++ .../openshell-server/src/persistence/tests.rs | 138 ++++++++++++++++++ 2 files changed, 270 insertions(+) diff --git a/crates/openshell-server/src/persistence/mod.rs b/crates/openshell-server/src/persistence/mod.rs index 8b6172c1c..8cbcedee0 100644 --- a/crates/openshell-server/src/persistence/mod.rs +++ b/crates/openshell-server/src/persistence/mod.rs @@ -23,6 +23,14 @@ pub use sqlite::SqliteStore; pub const POLICY_OBJECT_TYPE: &str = "sandbox_policy"; /// Object type string for draft policy chunk records. pub const DRAFT_CHUNK_OBJECT_TYPE: &str = "draft_policy_chunk"; +/// Object type string for warm-pool `SandboxClaim` → sandbox-id mappings. +/// +/// Stored in the shared `objects` table so any HA gateway replica can serve the +/// `IssueSandboxToken` bootstrap. The record `id` is keyed on +/// `(namespace, claim_name, claim_uid)` (so a lookup miss on a uid mismatch +/// fails closed) and the record `name` is the sandbox-id (so a sandbox can be +/// torn down by id without re-deriving the claim key). +pub const CLAIM_MAPPING_OBJECT_TYPE: &str = "k8s_claim_mapping"; pub type PersistenceResult = Result; @@ -507,6 +515,130 @@ impl Store { updated.set_resource_version(result.resource_version); Ok(updated) } + + // ----------------------------------------------------------------------- + // Warm-pool claim mappings + // ----------------------------------------------------------------------- + + /// Persist a durable `(namespace, claim_name, claim_uid) -> sandbox_id` + /// mapping for a gateway-created warm-pool `SandboxClaim`. + /// + /// Idempotent: a re-write of the identical mapping (same key + same + /// sandbox-id) is accepted so create, bootstrap, and reconcile back-fill + /// retries converge. A write that would rebind an existing claim key to a + /// *different* sandbox-id is rejected — that must never happen because the + /// claim UID is apiserver-assigned and unique. + pub async fn put_claim_mapping(&self, mapping: &ClaimMapping) -> PersistenceResult<()> { + let id = claim_mapping_id(&mapping.namespace, &mapping.claim_name, &mapping.claim_uid); + + if let Some(existing) = self.get_claim_mapping_by_id(&id).await? { + return if existing.sandbox_id == mapping.sandbox_id { + Ok(()) + } else { + Err(claim_mapping_rebind_error(&id)) + }; + } + + let payload = serde_json::to_vec(mapping) + .map_err(|e| PersistenceError::Encode(format!("claim mapping encode error: {e}")))?; + + match self + .put_if( + CLAIM_MAPPING_OBJECT_TYPE, + &id, + &mapping.sandbox_id, + &payload, + None, + WriteCondition::MustCreate, + ) + .await + { + Ok(_) => Ok(()), + // Lost a concurrent MustCreate race: accept iff the winner stored + // the same binding, otherwise surface the conflict. + Err(PersistenceError::UniqueViolation { .. }) => { + match self.get_claim_mapping_by_id(&id).await? { + Some(existing) if existing.sandbox_id == mapping.sandbox_id => Ok(()), + _ => Err(claim_mapping_rebind_error(&id)), + } + } + Err(e) => Err(e), + } + } + + /// Look up the sandbox-id bound to a warm-pool claim. A miss (including a + /// `claim_uid` that does not match a stored record) returns `Ok(None)` so + /// callers fail closed. + pub async fn get_claim_mapping( + &self, + namespace: &str, + claim_name: &str, + claim_uid: &str, + ) -> PersistenceResult> { + self.get_claim_mapping_by_id(&claim_mapping_id(namespace, claim_name, claim_uid)) + .await + } + + async fn get_claim_mapping_by_id(&self, id: &str) -> PersistenceResult> { + let Some(record) = self.get(CLAIM_MAPPING_OBJECT_TYPE, id).await? else { + return Ok(None); + }; + let mapping: ClaimMapping = serde_json::from_slice(&record.payload) + .map_err(|e| PersistenceError::Decode(format!("claim mapping decode error: {e}")))?; + Ok(Some(mapping)) + } + + /// Delete a warm-pool claim mapping by its sandbox-id (the record name). + /// Returns `true` when a mapping was removed. + pub async fn delete_claim_mapping_for_sandbox( + &self, + sandbox_id: &str, + ) -> PersistenceResult { + self.delete_by_name(CLAIM_MAPPING_OBJECT_TYPE, sandbox_id) + .await + } + + /// List all warm-pool claim mappings (used by reconcile GC). Bounded by + /// `limit`/`offset`. + pub async fn list_claim_mappings( + &self, + limit: u32, + offset: u32, + ) -> PersistenceResult> { + let records = self.list(CLAIM_MAPPING_OBJECT_TYPE, limit, offset).await?; + let mut mappings = Vec::with_capacity(records.len()); + for record in records { + let mapping: ClaimMapping = serde_json::from_slice(&record.payload).map_err(|e| { + PersistenceError::Decode(format!("claim mapping decode error: {e}")) + })?; + mappings.push(mapping); + } + Ok(mappings) + } +} + +/// Durable mapping that binds a gateway-created warm-pool `SandboxClaim` to the +/// sandbox identity the gateway assigned. The auth bootstrap re-anchors a warm +/// sandbox pod's identity to this record (see `auth::k8s_sa`). +#[derive(Debug, Clone, PartialEq, Eq, serde::Serialize, serde::Deserialize)] +pub struct ClaimMapping { + pub namespace: String, + pub claim_name: String, + pub claim_uid: String, + pub sandbox_id: String, +} + +fn claim_mapping_id(namespace: &str, claim_name: &str, claim_uid: &str) -> String { + format!("{namespace}/{claim_name}/{claim_uid}") +} + +fn claim_mapping_rebind_error(id: &str) -> PersistenceError { + PersistenceError::unique_violation( + Some(CLAIM_MAPPING_OBJECT_TYPE.to_string()), + Some(format!( + "claim mapping {id} is already bound to a different sandbox" + )), + ) } pub fn current_time_ms() -> i64 { diff --git a/crates/openshell-server/src/persistence/tests.rs b/crates/openshell-server/src/persistence/tests.rs index d092b68de..25ae2b103 100644 --- a/crates/openshell-server/src/persistence/tests.rs +++ b/crates/openshell-server/src/persistence/tests.rs @@ -1335,3 +1335,141 @@ async fn cas_update_message_cas_conflicts_on_concurrent_updates() { "resource_version should be 2 (initial 1 + 1 successful update)" ); } + +// --------------------------------------------------------------------------- +// Warm-pool claim mappings +// --------------------------------------------------------------------------- + +use super::ClaimMapping; + +fn claim_mapping(ns: &str, name: &str, uid: &str, sandbox_id: &str) -> ClaimMapping { + ClaimMapping { + namespace: ns.to_string(), + claim_name: name.to_string(), + claim_uid: uid.to_string(), + sandbox_id: sandbox_id.to_string(), + } +} + +#[tokio::test] +async fn claim_mapping_put_get_round_trip() { + let store = test_store().await; + let mapping = claim_mapping("openshell", "claim-a", "uid-a", "sandbox-a"); + store.put_claim_mapping(&mapping).await.unwrap(); + + let fetched = store + .get_claim_mapping("openshell", "claim-a", "uid-a") + .await + .unwrap() + .expect("mapping should exist"); + assert_eq!(fetched, mapping); +} + +#[tokio::test] +async fn claim_mapping_lookup_misses_on_uid_mismatch() { + let store = test_store().await; + store + .put_claim_mapping(&claim_mapping("openshell", "claim-a", "uid-a", "sandbox-a")) + .await + .unwrap(); + + // Same namespace + claim name but a different UID must not resolve: this is + // the fail-closed leg of the identity re-anchor chain. + let miss = store + .get_claim_mapping("openshell", "claim-a", "uid-b") + .await + .unwrap(); + assert!(miss.is_none(), "uid mismatch must not resolve a mapping"); + + // A different namespace must not resolve either (cross-namespace pinning). + let miss_ns = store + .get_claim_mapping("other", "claim-a", "uid-a") + .await + .unwrap(); + assert!(miss_ns.is_none(), "namespace mismatch must not resolve"); +} + +#[tokio::test] +async fn claim_mapping_put_is_idempotent_for_identical_record() { + let store = test_store().await; + let mapping = claim_mapping("openshell", "claim-a", "uid-a", "sandbox-a"); + store.put_claim_mapping(&mapping).await.unwrap(); + // A re-write of the identical mapping (bootstrap/reconcile retry) succeeds. + store + .put_claim_mapping(&mapping) + .await + .expect("identical re-write should be idempotent"); +} + +#[tokio::test] +async fn claim_mapping_put_rejects_rebind_to_other_sandbox() { + let store = test_store().await; + store + .put_claim_mapping(&claim_mapping("openshell", "claim-a", "uid-a", "sandbox-a")) + .await + .unwrap(); + + // The same claim key must never be rebound to a different sandbox-id. + let err = store + .put_claim_mapping(&claim_mapping("openshell", "claim-a", "uid-a", "sandbox-b")) + .await + .expect_err("rebind to a different sandbox must be rejected"); + assert!(matches!(err, PersistenceError::UniqueViolation { .. })); + + // The original binding is unchanged. + let fetched = store + .get_claim_mapping("openshell", "claim-a", "uid-a") + .await + .unwrap() + .unwrap(); + assert_eq!(fetched.sandbox_id, "sandbox-a"); +} + +#[tokio::test] +async fn claim_mapping_delete_by_sandbox_id() { + let store = test_store().await; + store + .put_claim_mapping(&claim_mapping("openshell", "claim-a", "uid-a", "sandbox-a")) + .await + .unwrap(); + + let deleted = store + .delete_claim_mapping_for_sandbox("sandbox-a") + .await + .unwrap(); + assert!(deleted, "delete should report removal"); + assert!( + store + .get_claim_mapping("openshell", "claim-a", "uid-a") + .await + .unwrap() + .is_none(), + "mapping must be gone after delete" + ); + + // Deleting an unknown sandbox-id is a no-op, not an error. + let missing = store + .delete_claim_mapping_for_sandbox("sandbox-zzz") + .await + .unwrap(); + assert!(!missing, "deleting an unknown mapping returns false"); +} + +#[tokio::test] +async fn claim_mapping_list_returns_all() { + let store = test_store().await; + store + .put_claim_mapping(&claim_mapping("openshell", "claim-a", "uid-a", "sandbox-a")) + .await + .unwrap(); + store + .put_claim_mapping(&claim_mapping("openshell", "claim-b", "uid-b", "sandbox-b")) + .await + .unwrap(); + + let mut mappings = store.list_claim_mappings(100, 0).await.unwrap(); + mappings.sort_by(|a, b| a.sandbox_id.cmp(&b.sandbox_id)); + assert_eq!(mappings.len(), 2); + assert_eq!(mappings[0].sandbox_id, "sandbox-a"); + assert_eq!(mappings[1].sandbox_id, "sandbox-b"); +} From 84d14858d521c8fc78eab70d71de0e90c3927405 Mon Sep 17 00:00:00 2001 From: Roshni Malani Date: Fri, 12 Jun 2026 18:08:13 -0700 Subject: [PATCH 06/10] feat(kubernetes): claim pre-warmed pods from agent-sandbox warm pools Reconcile operator-declared warm pools (one SandboxTemplate + SandboxWarmPool per pool), bind pre-warmed pods via a SandboxClaim instead of cold-creating a Sandbox, and watch claims to surface warm sandboxes. warm_eligible() exhaustively destructures the request spec/template so any newly added field must be classified before it can ride the warm path; custom-policy requests and per-request overrides fall back to cold. The pooled template uses an ephemeral workspace, an unmanaged network policy, and no injected identity. Signed-off-by: Roshni Malani --- crates/openshell-driver-kubernetes/README.md | 47 + .../openshell-driver-kubernetes/src/config.rs | 115 ++ .../openshell-driver-kubernetes/src/driver.rs | 1085 ++++++++++++++++- .../openshell-driver-kubernetes/src/grpc.rs | 5 +- .../openshell-driver-kubernetes/src/main.rs | 4 + 5 files changed, 1238 insertions(+), 18 deletions(-) diff --git a/crates/openshell-driver-kubernetes/README.md b/crates/openshell-driver-kubernetes/README.md index 6ad0b27c8..b8c5239d4 100644 --- a/crates/openshell-driver-kubernetes/README.md +++ b/crates/openshell-driver-kubernetes/README.md @@ -32,6 +32,53 @@ This is a stopgap persistence model. It preserves user files across pod rescheduling but duplicates the base workspace and does not automatically apply image updates to existing PVCs. Future snapshotting should replace it. +## Warm Pools + +When `warm_pool.enabled` is set, the driver pre-declares one operator-owned +`SandboxTemplate` + `SandboxWarmPool` per configured pool (extension CRDs +`extensions.agents.x-k8s.io/v1alpha1`) and satisfies a matching `CreateSandbox` +by creating a `SandboxClaim` that binds a pre-warmed pod (~0.1s) instead of +cold-creating a `Sandbox`. Only the trusted `default_image` with no per-request +template or env overrides is pooled; everything else takes the cold path. + +The pooled `SandboxTemplate` bakes the shared pod blueprint (image, mTLS mount, +projected SA token, supervisor sideload, capabilities, runtimeClass, optional +read-only shared data volume) but carries **no per-sandbox identity**. The +template sets `networkPolicyManagement: Unmanaged` — OpenShell enforces egress +itself (supervisor proxy + Landlock), and the controller's default `Managed` +mode would otherwise impose a rule-less, default-deny `NetworkPolicy` that blocks +the pod's own egress to the gateway. The writable `/sandbox` workspace is an +ephemeral `emptyDir` seeded from the image: single-use and fail-safe (the kubelet +reclaims it with the pod, so there is nothing to orphan). Claims set +`shutdownPolicy: Delete` so teardown cascades to the bound `Sandbox`/Pod, and a +claimed pod is never returned to the pool. + +### Identity and policy on the warm path + +A pooled pod boots **before** its claim assigns an identity, so the supervisor +cannot fetch a per-sandbox policy or assert a sandbox-id at startup. Instead it: + +- boots on the image's **baseline policy** (Landlock is applied once at process + start and cannot be loosened later, so the baseline is what a pooled pod + enforces); +- runs with `OPENSHELL_SANDBOX_ID` unset, skipping identity-gated startup; +- establishes its relay session with an empty `hello.sandbox_id`; the gateway + derives the identity from the gateway-minted JWT obtained via + `IssueSandboxToken` (resolved server-side from the claim-injected + `openshell.io/sandbox-id` annotation + the durable claim mapping). The + supervisor retries the bootstrap at a steady cadence until the claim binds. + +Because a pooled pod can only enforce the baseline policy, a `CreateSandbox` +request that carries a **custom policy** is never warm-pooled — the gateway sets +`DriverSandboxSpec.disallow_warm_pool`, and such requests take the cold path so +their policy is applied faithfully (no silent downgrade). Claim `env` injection +is likewise disallowed. + +The driver surfaces a warm sandbox to the gateway by watching `SandboxClaim`s +(correlated to the gateway sandbox-id via the `openshell.ai/sandbox-id` label it +stamps on each claim) and returns the bound claim's `(name, uid)` so the gateway +can record the durable claim → sandbox-id mapping used by the auth re-anchor. + ## Credentials, TLS, and Relay The driver injects gateway callback configuration, sandbox identity, TLS client diff --git a/crates/openshell-driver-kubernetes/src/config.rs b/crates/openshell-driver-kubernetes/src/config.rs index 4c1153b08..9c9c80758 100644 --- a/crates/openshell-driver-kubernetes/src/config.rs +++ b/crates/openshell-driver-kubernetes/src/config.rs @@ -211,6 +211,68 @@ pub struct KubernetesComputeConfig { deserialize_with = "deserialize_provider_spiffe_workload_api_socket_path" )] pub provider_spiffe_workload_api_socket_path: String, + /// Warm-pool configuration. When enabled, the gateway pre-declares one or + /// more operator-owned `SandboxWarmPool`s and satisfies matching + /// `CreateSandbox` requests by binding a pre-warmed pod via a `SandboxClaim` + /// instead of cold-creating a `Sandbox`. Off by default. + #[serde(default)] + pub warm_pool: KubernetesWarmPoolConfig, +} + +/// Warm-pool configuration for the Kubernetes driver. +/// +/// Only **operator-declared** pools using the gateway's trusted default image +/// are warm-pooled. Requests for a user-supplied image or that carry a custom +/// template fall back to the cold path — per-tenant pool isolation for +/// untrusted images is out of scope (see RFC 0005 / issue #1879). +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[serde(default, deny_unknown_fields)] +pub struct KubernetesWarmPoolConfig { + /// Master switch. When false the driver never creates extension CRDs and + /// every request takes the cold path. + pub enabled: bool, + /// Declared pools. Each maps to exactly one (image, runtimeClass, gpu) + /// shape; the image is always the driver `default_image`. + pub pools: Vec, +} + +/// One operator-declared warm pool. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +pub struct WarmPoolSpec { + /// Pool name. Also names the backing `SandboxWarmPool` and `SandboxTemplate` + /// (suffixed). Must be a DNS-1123 label. + pub name: String, + /// Number of pods to keep warm. + pub replicas: i32, + /// `runtimeClassName` baked into the pool's `SandboxTemplate`. Empty uses + /// the driver's `default_runtime_class_name`. + #[serde(default)] + pub runtime_class_name: String, + /// Whether pooled pods request a GPU. GPU pools hold accelerators idle, so + /// they are opt-in per pool. + #[serde(default)] + pub gpu: bool, + /// Optional shared, read-only data volume mounted into every pooled pod + /// (datasets / models / caches). Safe to share precisely because it is + /// read-only and holds no per-agent state. + #[serde(default, skip_serializing_if = "Option::is_none")] + pub shared_volume: Option, +} + +/// A pre-existing PVC mounted read-only into pooled pods. +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] +#[serde(deny_unknown_fields)] +pub struct SharedVolumeSpec { + /// Name of an existing `PersistentVolumeClaim` in the sandbox namespace. + /// Must support a read-many access mode (`ReadOnlyMany`/`ReadWriteMany`) + /// when pods land on multiple nodes. + pub claim_name: String, + /// Absolute path the volume is mounted at inside the sandbox. + pub mount_path: String, + /// Optional sub-path within the volume to mount. + #[serde(default, skip_serializing_if = "String::is_empty")] + pub sub_path: String, } /// Lower bound enforced by kubelet for projected SA tokens. @@ -246,6 +308,7 @@ impl Default for KubernetesComputeConfig { default_runtime_class_name: String::new(), sa_token_ttl_secs: 3600, provider_spiffe_workload_api_socket_path: String::new(), + warm_pool: KubernetesWarmPoolConfig::default(), } } } @@ -459,4 +522,56 @@ mod tests { let cfg: KubernetesComputeConfig = serde_json::from_value(json).unwrap(); assert_eq!(cfg.image_pull_secrets, ["regcred", "backup-regcred"]); } + + #[test] + fn default_warm_pool_is_disabled() { + let cfg = KubernetesComputeConfig::default(); + assert!(!cfg.warm_pool.enabled); + assert!(cfg.warm_pool.pools.is_empty()); + } + + #[test] + fn serde_override_warm_pool() { + let json = serde_json::json!({ + "warm_pool": { + "enabled": true, + "pools": [ + { "name": "default", "replicas": 3 }, + { + "name": "gpu", + "replicas": 1, + "gpu": true, + "runtime_class_name": "nvidia", + "shared_volume": { + "claim_name": "models", + "mount_path": "/models", + "sub_path": "llama" + } + } + ] + } + }); + let cfg: KubernetesComputeConfig = serde_json::from_value(json).unwrap(); + assert!(cfg.warm_pool.enabled); + assert_eq!(cfg.warm_pool.pools.len(), 2); + assert_eq!(cfg.warm_pool.pools[0].name, "default"); + assert_eq!(cfg.warm_pool.pools[0].replicas, 3); + assert!(!cfg.warm_pool.pools[0].gpu); + assert!(cfg.warm_pool.pools[0].shared_volume.is_none()); + let gpu = &cfg.warm_pool.pools[1]; + assert!(gpu.gpu); + assert_eq!(gpu.runtime_class_name, "nvidia"); + let shared = gpu.shared_volume.as_ref().unwrap(); + assert_eq!(shared.claim_name, "models"); + assert_eq!(shared.mount_path, "/models"); + assert_eq!(shared.sub_path, "llama"); + } + + #[test] + fn serde_rejects_unknown_warm_pool_field() { + let json = serde_json::json!({ + "warm_pool": { "enabled": true, "bogus": 1 } + }); + assert!(serde_json::from_value::(json).is_err()); + } } diff --git a/crates/openshell-driver-kubernetes/src/driver.rs b/crates/openshell-driver-kubernetes/src/driver.rs index dc636efc3..bd6722337 100644 --- a/crates/openshell-driver-kubernetes/src/driver.rs +++ b/crates/openshell-driver-kubernetes/src/driver.rs @@ -5,11 +5,11 @@ use crate::config::{ AppArmorProfile, DEFAULT_SANDBOX_SERVICE_ACCOUNT_NAME, DEFAULT_WORKSPACE_STORAGE_SIZE, - KubernetesComputeConfig, SupervisorSideloadMethod, + KubernetesComputeConfig, SharedVolumeSpec, SupervisorSideloadMethod, WarmPoolSpec, }; use futures::{Stream, StreamExt, TryStreamExt}; use k8s_openapi::api::core::v1::{Event as KubeEventObj, Node}; -use kube::api::{Api, ApiResource, DeleteParams, ListParams, PostParams}; +use kube::api::{Api, ApiResource, DeleteParams, ListParams, Patch, PatchParams, PostParams}; use kube::core::gvk::GroupVersionKind; use kube::core::{DynamicObject, ObjectMeta}; use kube::runtime::watcher::{self, Event}; @@ -25,7 +25,7 @@ use openshell_core::proto::compute::v1::{ DriverCondition as SandboxCondition, DriverPlatformEvent as PlatformEvent, DriverSandbox as Sandbox, DriverSandboxSpec as SandboxSpec, DriverSandboxStatus as SandboxStatus, DriverSandboxTemplate as SandboxTemplate, - GetCapabilitiesResponse, WatchSandboxesDeletedEvent, WatchSandboxesEvent, + GetCapabilitiesResponse, WarmClaimBinding, WatchSandboxesDeletedEvent, WatchSandboxesEvent, WatchSandboxesPlatformEvent, WatchSandboxesSandboxEvent, watch_sandboxes_event, }; use openshell_core::proto_struct::{struct_to_json_object, value_to_json}; @@ -77,9 +77,36 @@ impl From for openshell_core::ComputeDriverError { /// API server is unreachable or slow. const KUBE_API_TIMEOUT: Duration = Duration::from_secs(30); -const SANDBOX_GROUP: &str = "agents.x-k8s.io"; -const SANDBOX_VERSION: &str = "v1alpha1"; -pub const SANDBOX_KIND: &str = "Sandbox"; +// agent-sandbox CRD identity. Single source of truth in `openshell-core` so the +// gateway's auth re-anchor stays byte-identical with what this driver writes. +const SANDBOX_GROUP: &str = openshell_core::driver_utils::SANDBOX_CRD_GROUP; +const SANDBOX_VERSION: &str = openshell_core::driver_utils::SANDBOX_CRD_VERSION; +pub const SANDBOX_KIND: &str = openshell_core::driver_utils::SANDBOX_CRD_KIND; + +// agent-sandbox warm-pool extension CRDs (extensions.agents.x-k8s.io/v1alpha1). +const EXT_GROUP: &str = openshell_core::driver_utils::SANDBOX_EXT_GROUP; +const EXT_VERSION: &str = openshell_core::driver_utils::SANDBOX_CRD_VERSION; +const SANDBOX_CLAIM_KIND: &str = openshell_core::driver_utils::SANDBOX_CLAIM_KIND; +const SANDBOX_TEMPLATE_KIND: &str = openshell_core::driver_utils::SANDBOX_TEMPLATE_KIND; +const SANDBOX_WARM_POOL_KIND: &str = openshell_core::driver_utils::SANDBOX_WARM_POOL_KIND; + +/// Pod annotation carrying the per-sandbox identity. On the warm path it is +/// injected via `SandboxClaim.spec.additionalPodMetadata.annotations`; the +/// gateway resolves a pod's SA token back to this value during +/// `IssueSandboxToken`. Mirrors `auth::k8s_sa::SANDBOX_ID_ANNOTATION`. +const SANDBOX_ID_ANNOTATION: &str = openshell_core::driver_utils::SANDBOX_ID_ANNOTATION; +/// Label the upstream warm-pool controller stamps onto a bound `Sandbox` CR, +/// carrying the binding `SandboxClaim`'s UID. The driver distinguishes warm +/// Sandboxes from cold ones by the *absence* of `LABEL_SANDBOX_ID` (see +/// `is_cold_openshell_sandbox`), so this label is referenced only by tests that +/// construct representative bound-warm Sandboxes. +#[cfg(test)] +const CLAIM_UID_LABEL: &str = openshell_core::driver_utils::CLAIM_UID_LABEL; +/// Label the gateway stamps onto the `SandboxClaim`/`SandboxTemplate`/ +/// `SandboxWarmPool` it owns, naming the originating pool. +const WARM_POOL_LABEL: &str = "openshell.ai/warm-pool"; +/// Field manager used for server-side-apply of pool resources. +const WARM_POOL_FIELD_MANAGER: &str = "openshell-gateway-warmpool"; const GPU_RESOURCE_NAME: &str = "nvidia.com/gpu"; const GPU_RESOURCE_QUANTITY: &str = "1"; @@ -267,6 +294,250 @@ impl KubernetesComputeDriver { Api::namespaced_with(self.client.clone(), &self.config.namespace, &resource) } + fn ext_api(&self, client: &Client, kind: &str) -> Api { + let gvk = GroupVersionKind::gvk(EXT_GROUP, EXT_VERSION, kind); + let resource = ApiResource::from_gvk(&gvk); + Api::namespaced_with(client.clone(), &self.config.namespace, &resource) + } + + fn claims_api(&self) -> Api { + self.ext_api(&self.client, SANDBOX_CLAIM_KIND) + } + + fn watch_claims_api(&self) -> Api { + self.ext_api(&self.watch_client, SANDBOX_CLAIM_KIND) + } + + /// Whether warm pooling is enabled in driver config. + fn warm_pool_enabled(&self) -> bool { + self.config.warm_pool.enabled + } + + /// Resolve the warm pool that a `CreateSandbox` request maps to, if any. + /// + /// Only the gateway's trusted `default_image` with no per-request template + /// or env overrides is warm-pooled (issue #1879, remediation #7); anything + /// else falls back to the cold path. Pools are matched by GPU shape. + fn matching_warm_pool(&self, sandbox: &Sandbox) -> Option<&WarmPoolSpec> { + if !self.warm_pool_enabled() { + return None; + } + let spec = sandbox.spec.as_ref()?; + if !warm_eligible(spec, &self.config.default_image) { + return None; + } + self.config + .warm_pool + .pools + .iter() + .find(|pool| pool.gpu == spec.gpu) + } + + /// Reconcile the operator-declared warm pools: server-side-apply one + /// `SandboxTemplate` + one `SandboxWarmPool` per configured pool. Idempotent + /// and safe to call on every startup. Non-fatal on individual failures — + /// the cold path stays available regardless. + pub async fn reconcile_warm_pools(&self) -> Result<(), KubernetesDriverError> { + if !self.warm_pool_enabled() { + return Ok(()); + } + for pool in &self.config.warm_pool.pools { + self.apply_warm_pool(pool).await?; + } + Ok(()) + } + + async fn apply_warm_pool(&self, pool: &WarmPoolSpec) -> Result<(), KubernetesDriverError> { + let resource_name = warm_resource_name(&pool.name); + let labels = warm_pool_labels(&pool.name); + + // SandboxTemplate: the shared, identity-free pod blueprint. + let template_spec = build_warm_sandbox_template_spec(&self.config, pool); + self.apply_ext_object( + SANDBOX_TEMPLATE_KIND, + &resource_name, + &labels, + serde_json::json!({ "spec": template_spec }), + ) + .await?; + + // SandboxWarmPool: keeps `replicas` pods warm against that template. + self.apply_ext_object( + SANDBOX_WARM_POOL_KIND, + &resource_name, + &labels, + serde_json::json!({ + "spec": { + "replicas": pool.replicas, + "sandboxTemplateRef": { "name": resource_name }, + } + }), + ) + .await?; + + info!( + pool = %pool.name, + replicas = pool.replicas, + gpu = pool.gpu, + "Reconciled warm pool" + ); + Ok(()) + } + + async fn apply_ext_object( + &self, + kind: &str, + name: &str, + labels: &BTreeMap, + spec: serde_json::Value, + ) -> Result<(), KubernetesDriverError> { + let gvk = GroupVersionKind::gvk(EXT_GROUP, EXT_VERSION, kind); + let resource = ApiResource::from_gvk(&gvk); + let mut obj = DynamicObject::new(name, &resource); + obj.metadata = ObjectMeta { + name: Some(name.to_string()), + namespace: Some(self.config.namespace.clone()), + labels: Some(labels.clone()), + ..Default::default() + }; + obj.data = spec; + let api = self.ext_api(&self.client, kind); + let params = PatchParams::apply(WARM_POOL_FIELD_MANAGER).force(); + tokio::time::timeout( + KUBE_API_TIMEOUT, + api.patch(name, ¶ms, &Patch::Apply(&obj)), + ) + .await + .map_err(|_| { + KubernetesDriverError::Message(format!( + "timed out applying {kind}/{name} after {}s", + KUBE_API_TIMEOUT.as_secs() + )) + })? + .map_err(KubernetesDriverError::from_kube)?; + Ok(()) + } + + /// Bind a pre-warmed pod by creating a `SandboxClaim`. Returns the claim + /// identity so the gateway can record the durable claim->sandbox-id mapping. + async fn create_warm_claim( + &self, + sandbox: &Sandbox, + pool: &WarmPoolSpec, + ) -> Result { + let name = sandbox.name.as_str(); + let resource_name = warm_resource_name(&pool.name); + + let mut labels = warm_pool_labels(&pool.name); + labels.insert(LABEL_SANDBOX_ID.to_string(), sandbox.id.clone()); + + let gvk = GroupVersionKind::gvk(EXT_GROUP, EXT_VERSION, SANDBOX_CLAIM_KIND); + let resource = ApiResource::from_gvk(&gvk); + let mut obj = DynamicObject::new(name, &resource); + obj.metadata = ObjectMeta { + name: Some(name.to_string()), + namespace: Some(self.config.namespace.clone()), + labels: Some(labels), + ..Default::default() + }; + obj.data = warm_claim_spec(&resource_name, &sandbox.id); + + let created = tokio::time::timeout( + KUBE_API_TIMEOUT, + self.claims_api().create(&PostParams::default(), &obj), + ) + .await + .map_err(|_| { + KubernetesDriverError::Message(format!( + "timed out creating SandboxClaim after {}s", + KUBE_API_TIMEOUT.as_secs() + )) + })? + .map_err(KubernetesDriverError::from_kube)?; + + let claim_uid = created.metadata.uid.unwrap_or_default(); + if claim_uid.is_empty() { + return Err(KubernetesDriverError::Message( + "created SandboxClaim has no UID".to_string(), + )); + } + info!( + sandbox_id = %sandbox.id, + claim = %name, + claim_uid = %claim_uid, + pool = %pool.name, + "Bound warm sandbox via SandboxClaim" + ); + Ok(WarmClaimBinding { + namespace: self.config.namespace.clone(), + claim_name: name.to_string(), + claim_uid, + }) + } + + async fn get_warm_sandbox(&self, name: &str) -> Result, String> { + let api = self.claims_api(); + match tokio::time::timeout(KUBE_API_TIMEOUT, api.get_opt(name)).await { + Ok(Ok(Some(obj))) => sandbox_from_claim_object(&self.config.namespace, obj).map(Some), + Ok(Ok(None)) => Ok(None), + Ok(Err(err)) => Err(err.to_string()), + Err(_elapsed) => Err(format!( + "timed out after {}s waiting for Kubernetes API", + KUBE_API_TIMEOUT.as_secs() + )), + } + } + + async fn list_warm_sandboxes(&self) -> Result, String> { + let selector = format!("{LABEL_MANAGED_BY}={LABEL_MANAGED_BY_VALUE}"); + let params = ListParams::default().labels(&selector); + let api = self.claims_api(); + match tokio::time::timeout(KUBE_API_TIMEOUT, api.list(¶ms)).await { + Ok(Ok(list)) => list + .items + .into_iter() + .filter(|obj| { + obj.metadata + .labels + .as_ref() + .is_some_and(|labels| labels.contains_key(LABEL_SANDBOX_ID)) + }) + .map(|obj| sandbox_from_claim_object(&self.config.namespace, obj)) + .collect::, _>>(), + Ok(Err(err)) => Err(err.to_string()), + Err(_elapsed) => Err(format!( + "timed out after {}s waiting for Kubernetes API", + KUBE_API_TIMEOUT.as_secs() + )), + } + } + + /// Delete a warm-pool `SandboxClaim` by name. `shutdownPolicy: Delete` + /// cascades teardown to the bound `Sandbox` + Pod. Returns `Ok(true)` when a + /// claim was deleted; `Ok(false)` when warm pooling is disabled or no such + /// claim exists (the caller then falls through to the cold `Sandbox` path — + /// e.g. a cold sandbox created while warm pooling was on). + async fn delete_warm_claim(&self, name: &str) -> Result { + if !self.warm_pool_enabled() { + return Ok(false); + } + let api = self.claims_api(); + match tokio::time::timeout(KUBE_API_TIMEOUT, api.delete(name, &DeleteParams::default())) + .await + { + Ok(Ok(_)) => { + info!(claim = %name, "Deleted warm SandboxClaim"); + Ok(true) + } + Ok(Err(KubeError::Api(err))) if err.code == 404 => Ok(false), + Ok(Err(err)) => Err(err.to_string()), + Err(_elapsed) => Err(format!( + "timed out after {}s waiting for Kubernetes API", + KUBE_API_TIMEOUT.as_secs() + )), + } + } + async fn has_gpu_capacity(&self) -> Result { let nodes: Api = Api::all(self.client.clone()); let node_list = nodes.list(&ListParams::default()).await?; @@ -302,11 +573,11 @@ impl KubernetesComputeDriver { ); let api = self.api(); - match tokio::time::timeout(KUBE_API_TIMEOUT, api.get(name)).await { - Ok(Ok(obj)) => sandbox_from_object(&self.config.namespace, obj).map(Some), + let cold = match tokio::time::timeout(KUBE_API_TIMEOUT, api.get(name)).await { + Ok(Ok(obj)) => Some(obj), Ok(Err(KubeError::Api(err))) if err.code == 404 => { debug!(sandbox_name = %name, "Sandbox not found in Kubernetes"); - Ok(None) + None } Ok(Err(err)) => { warn!( @@ -314,7 +585,7 @@ impl KubernetesComputeDriver { error = %err, "Failed to fetch sandbox from Kubernetes" ); - Err(err.to_string()) + return Err(err.to_string()); } Err(_elapsed) => { warn!( @@ -322,12 +593,26 @@ impl KubernetesComputeDriver { timeout_secs = KUBE_API_TIMEOUT.as_secs(), "Timed out fetching sandbox from Kubernetes" ); - Err(format!( + return Err(format!( "timed out after {}s waiting for Kubernetes API", KUBE_API_TIMEOUT.as_secs() - )) + )); } + }; + + // A cold Sandbox CR named `name` resolves directly; warm-pool Sandbox + // CRs (pooled or claim-bound) lack the openshell sandbox-id label and + // are addressed via their SandboxClaim instead, so fall through to the + // warm lookup for those. + if let Some(obj) = cold + && is_cold_openshell_sandbox(&obj) + { + return sandbox_from_object(&self.config.namespace, obj).map(Some); } + if self.warm_pool_enabled() { + return self.get_warm_sandbox(name).await; + } + Ok(None) } pub async fn list_sandboxes(&self) -> Result, String> { @@ -339,11 +624,19 @@ impl KubernetesComputeDriver { let api = self.api(); match tokio::time::timeout(KUBE_API_TIMEOUT, api.list(&ListParams::default())).await { Ok(Ok(list)) => { + // Cold Sandbox CRs only; warm-pool Sandboxes (pooled or bound) + // are surfaced via their SandboxClaim below — their generic + // names carry no openshell sandbox-id and would otherwise fail + // to map. let mut sandboxes = list .items .into_iter() + .filter(is_cold_openshell_sandbox) .map(|obj| sandbox_from_object(&self.config.namespace, obj)) .collect::, _>>()?; + if self.warm_pool_enabled() { + sandboxes.extend(self.list_warm_sandboxes().await?); + } sandboxes.sort_by(|left, right| { left.name .cmp(&right.name) @@ -373,10 +666,21 @@ impl KubernetesComputeDriver { } } - pub async fn create_sandbox(&self, sandbox: &Sandbox) -> Result<(), KubernetesDriverError> { + pub async fn create_sandbox( + &self, + sandbox: &Sandbox, + ) -> Result, KubernetesDriverError> { let _ = KubernetesSandboxDriverConfig::from_sandbox(sandbox) .map_err(KubernetesDriverError::InvalidArgument)?; let name = sandbox.name.as_str(); + + // Warm path: when the request matches an operator-declared pool, bind a + // pre-warmed pod via a SandboxClaim instead of cold-creating a Sandbox. + if let Some(pool) = self.matching_warm_pool(sandbox) { + let pool = pool.clone(); + return self.create_warm_claim(sandbox, &pool).await.map(Some); + } + info!( sandbox_id = %sandbox.id, sandbox_name = %name, @@ -428,7 +732,7 @@ impl KubernetesComputeDriver { sandbox_name = %name, "Sandbox created in Kubernetes successfully" ); - Ok(()) + Ok(None) } Ok(Err(err)) => { warn!( @@ -461,6 +765,14 @@ impl KubernetesComputeDriver { "Deleting sandbox from Kubernetes" ); + // Warm path: a matching SandboxClaim is single-use; deleting it + // cascades teardown to the bound Sandbox + Pod (shutdownPolicy: Delete), + // so a claimed pod/Sandbox is never returned to the pool. A miss falls + // through to the cold Sandbox delete below. + if self.delete_warm_claim(name).await? { + return Ok(true); + } + let api = self.api(); match tokio::time::timeout(KUBE_API_TIMEOUT, api.delete(name, &DeleteParams::default())) .await @@ -516,6 +828,14 @@ impl KubernetesComputeDriver { let event_api: Api = Api::namespaced(self.watch_client.clone(), &namespace); let mut sandbox_stream = watcher::watcher(sandbox_api, watcher::Config::default()).boxed(); let mut event_stream = watcher::watcher(event_api, watcher::Config::default()).boxed(); + // Watch warm-pool SandboxClaims only when warm pooling is enabled; the + // extension CRDs may not be installed otherwise. A `pending` stream + // keeps the select arm inert when disabled. + let mut claim_stream = if self.warm_pool_enabled() { + watcher::watcher(self.watch_claims_api(), watcher::Config::default()).boxed() + } else { + futures::stream::pending().boxed() + }; let (tx, rx) = mpsc::channel(256); tokio::spawn(async move { @@ -525,7 +845,13 @@ impl KubernetesComputeDriver { loop { tokio::select! { result = sandbox_stream.try_next() => match result { + // Only cold OpenShell Sandbox CRs are mapped here; warm- + // pool Sandboxes (pooled or claim-bound) lack the + // openshell sandbox-id label and are surfaced via the + // SandboxClaim watcher below, so skip them to avoid + // failing on their identity-free generic names. Ok(Some(Event::Applied(obj))) => { + if !is_cold_openshell_sandbox(&obj) { continue; } match sandbox_from_object(&namespace, obj) { Ok(sandbox) => { update_indexes(&mut sandbox_name_to_id, &mut agent_pod_to_id, &sandbox); @@ -546,6 +872,7 @@ impl KubernetesComputeDriver { } } Ok(Some(Event::Deleted(obj))) => { + if !is_cold_openshell_sandbox(&obj) { continue; } match sandbox_id_from_object(&obj) { Ok(sandbox_id) => { remove_indexes(&mut sandbox_name_to_id, &mut agent_pod_to_id, &sandbox_id); @@ -567,6 +894,7 @@ impl KubernetesComputeDriver { } Ok(Some(Event::Restarted(objs))) => { for obj in objs { + if !is_cold_openshell_sandbox(&obj) { continue; } match sandbox_from_object(&namespace, obj) { Ok(sandbox) => { update_indexes(&mut sandbox_name_to_id, &mut agent_pod_to_id, &sandbox); @@ -598,6 +926,59 @@ impl KubernetesComputeDriver { break; } }, + result = claim_stream.try_next() => match result { + Ok(Some(Event::Applied(obj))) => { + if let Some(sandbox) = managed_claim_sandbox(&namespace, &obj) { + update_indexes(&mut sandbox_name_to_id, &mut agent_pod_to_id, &sandbox); + let event = WatchSandboxesEvent { + payload: Some(watch_sandboxes_event::Payload::Sandbox( + WatchSandboxesSandboxEvent { sandbox: Some(sandbox) } + )), + }; + if tx.send(Ok(event)).await.is_err() { + break; + } + } + } + Ok(Some(Event::Deleted(obj))) => { + if let Some(sandbox_id) = sandbox_id_from_claim_object(&obj) { + remove_indexes(&mut sandbox_name_to_id, &mut agent_pod_to_id, &sandbox_id); + let event = WatchSandboxesEvent { + payload: Some(watch_sandboxes_event::Payload::Deleted( + WatchSandboxesDeletedEvent { sandbox_id } + )), + }; + if tx.send(Ok(event)).await.is_err() { + break; + } + } + } + Ok(Some(Event::Restarted(objs))) => { + for obj in objs { + if let Some(sandbox) = managed_claim_sandbox(&namespace, &obj) { + update_indexes(&mut sandbox_name_to_id, &mut agent_pod_to_id, &sandbox); + let event = WatchSandboxesEvent { + payload: Some(watch_sandboxes_event::Payload::Sandbox( + WatchSandboxesSandboxEvent { sandbox: Some(sandbox) } + )), + }; + if tx.send(Ok(event)).await.is_err() { + return; + } + } + } + } + Ok(None) => { + let _ = tx.send(Err(KubernetesDriverError::Message( + "sandbox claim watcher stream ended".to_string() + ))).await; + break; + } + Err(err) => { + let _ = tx.send(Err(KubernetesDriverError::Message(err.to_string()))).await; + break; + } + }, result = event_stream.try_next() => match result { Ok(Some(Event::Applied(obj))) => { if let Some((sandbox_id, event)) = map_kube_event_to_platform( @@ -682,6 +1063,357 @@ fn sandbox_from_object(namespace: &str, obj: DynamicObject) -> Result bool { + obj.metadata + .labels + .as_ref() + .is_some_and(|labels| labels.contains_key(LABEL_SANDBOX_ID)) +} + +/// Name shared by a pool's `SandboxTemplate` and `SandboxWarmPool`. +fn warm_resource_name(pool_name: &str) -> String { + format!("openshell-warmpool-{pool_name}") +} + +/// Labels stamped onto gateway-owned warm-pool resources. +fn warm_pool_labels(pool_name: &str) -> BTreeMap { + let mut labels = BTreeMap::new(); + labels.insert( + LABEL_MANAGED_BY.to_string(), + LABEL_MANAGED_BY_VALUE.to_string(), + ); + labels.insert(WARM_POOL_LABEL.to_string(), pool_name.to_string()); + labels +} + +/// Whether a `CreateSandbox` request is eligible for warm pooling. Only the +/// gateway's trusted `default_image` with no per-request template or env +/// overrides qualifies (issue #1879, remediation #7); anything else falls back +/// to the cold path (where per-sandbox env/identity can be injected freely). +/// +/// Fail-closed *by construction*: both the spec and its template are +/// destructured exhaustively, so adding a field to `DriverSandboxSpec` or +/// `DriverSandboxTemplate` will not compile here until the new field is +/// explicitly classified as warm-safe or cold-only. This forecloses the +/// silent-drop class of bug where a per-request field is honored on the cold +/// path but ignored by the pooled template (issue #1879, PR #1813 review #1). +fn warm_eligible(spec: &SandboxSpec, default_image: &str) -> bool { + let SandboxSpec { + // The gateway forbids warm-pooling requests carrying a custom policy: a + // warm pod boots on the pool's baseline policy and cannot late-bind a + // per-sandbox policy (Landlock is applied once at process start), so + // warm-pooling such a request would silently downgrade its policy. + disallow_warm_pool, + // Warm-safe. `gpu` selects the matching pool shape (see + // `matching_warm_pool`); `sandbox_token` is ignored by the Kubernetes + // driver (identity comes from the projected ServiceAccount token), so + // neither forces the cold path. + gpu: _, + sandbox_token: _, + // Cold-only. The pooled pod's env/log level are fixed at warm-up time + // (the cold path threads these into the pod env via `spec_pod_env`). + // (A specific GPU device now rides in the template's `driver_config`, + // which the template check below rejects, so it also forces cold.) + log_level, + environment, + // Per-request runtime overrides — validated field-by-field below. + template, + } = spec; + + if *disallow_warm_pool || !log_level.is_empty() || !environment.is_empty() { + return false; + } + + let Some(template) = template.as_ref() else { + return true; + }; + let SandboxTemplate { + // Warm-safe only when empty or equal to the pool's default image. + image, + // Cold-only: consumed cold-side as the Sandbox spec's `agentSocket`; + // the pooled template uses the default, so a per-request value would be + // silently ignored. + agent_socket_path, + // Cold-only: the pooled pod's labels/env/resources are fixed at warm-up + // and platform/driver config is baked into the `SandboxTemplate`. + labels, + environment, + resources, + platform_config, + driver_config, + } = template; + + (image.is_empty() || image.as_str() == default_image) + && agent_socket_path.is_empty() + && labels.is_empty() + && environment.is_empty() + && resources.is_none() + && platform_config + .as_ref() + .is_none_or(|pc| pc.fields.is_empty()) + && driver_config.as_ref().is_none_or(|dc| dc.fields.is_empty()) +} + +/// Build the `SandboxTemplate.spec` for a warm pool: the shared pod blueprint +/// with **no** per-sandbox identity (no `openshell.io/sandbox-id` annotation, no +/// per-sandbox env). The writable `/sandbox` workspace is an ephemeral +/// `emptyDir` seeded from the image (single-use, nothing to orphan); an optional +/// shared volume is mounted read-only. +fn build_warm_sandbox_template_spec( + config: &KubernetesComputeConfig, + pool: &WarmPoolSpec, +) -> serde_json::Value { + let runtime_class = if pool.runtime_class_name.is_empty() { + config.default_runtime_class_name.clone() + } else { + pool.runtime_class_name.clone() + }; + let params = SandboxPodParams { + default_image: &config.default_image, + image_pull_policy: &config.image_pull_policy, + image_pull_secrets: &config.image_pull_secrets, + supervisor_image: &config.supervisor_image, + supervisor_image_pull_policy: &config.supervisor_image_pull_policy, + supervisor_sideload_method: config.supervisor_sideload_method, + service_account_name: &config.service_account_name, + // Pooled pods are identity-free; the per-sandbox id is late-bound via + // the SandboxClaim annotation + the IssueSandboxToken exchange. + sandbox_id: "", + sandbox_name: "", + grpc_endpoint: &config.grpc_endpoint, + ssh_socket_path: &config.ssh_socket_path, + client_tls_secret_name: &config.client_tls_secret_name, + host_gateway_ip: &config.host_gateway_ip, + enable_user_namespaces: config.enable_user_namespaces, + app_armor_profile: config.app_armor_profile.as_ref(), + workspace_default_storage_size: &config.workspace_default_storage_size, + default_runtime_class_name: &runtime_class, + sa_token_ttl_secs: config.effective_sa_token_ttl_secs(), + // Pooled pods are identity-free and carry no per-sandbox providers, so + // they never mount provider-token-grant SPIFFE material. If a claimed + // warm sandbox needs providers that is late-bound (out of scope here). + provider_spiffe_enabled: false, + provider_spiffe_workload_api_socket_path: "", + }; + + let empty_env = std::collections::HashMap::new(); + // inject_workspace=false: suppress the cold-path PVC workspace; an ephemeral + // emptyDir workspace is attached below instead. + let mut pod_template = sandbox_template_to_k8s( + &SandboxTemplate::default(), + pool.gpu, + &empty_env, + false, + ¶ms, + ); + apply_ephemeral_workspace( + &mut pod_template, + &config.default_image, + &config.image_pull_policy, + ); + if let Some(shared) = pool.shared_volume.as_ref() { + apply_shared_readonly_volume(&mut pod_template, shared); + } + + serde_json::json!({ + "podTemplate": pod_template, + // Reject per-claim env injection on the warm path: pod env is immutable + // once running and identity must never ride pod env. + "envVarsInjectionPolicy": "Disallowed", + // Do NOT let the warm-pool controller manage a NetworkPolicy for these + // pods. With no rules a Managed (the upstream default) NetworkPolicy is + // default-deny and blocks the pod's egress to the gateway, so the + // supervisor can never reach `IssueSandboxToken` / open its relay + // session. OpenShell enforces egress itself via the supervisor proxy + + // Landlock, so cluster NetworkPolicy management is left off here, matching + // the cold path (which has no NetworkPolicy at all). + "networkPolicyManagement": "Unmanaged", + }) +} + +/// Build the `SandboxClaim.spec` JSON that binds a pre-warmed pod: it references +/// the pool template/warmpool, injects only the per-sandbox identity annotation, +/// and forces single-use teardown (`shutdownPolicy: Delete`, never `Retain`). +fn warm_claim_spec(resource_name: &str, sandbox_id: &str) -> serde_json::Value { + serde_json::json!({ + "spec": { + "sandboxTemplateRef": { "name": resource_name }, + "warmpool": resource_name, + "additionalPodMetadata": { + "annotations": { SANDBOX_ID_ANNOTATION: sandbox_id } + }, + // Single-use: claim deletion must destroy the bound Sandbox/Pod (and + // any backing storage). Never `Retain` (orphans data). + "lifecycle": { "shutdownPolicy": "Delete" } + } + }) +} + +/// Attach an ephemeral `emptyDir` writable workspace at `/sandbox`, seeded from +/// the image on first start. Single-use and fail-safe: the kubelet reclaims it +/// with the pod, so there is nothing to orphan (vs. the cold path's PVC). +fn apply_ephemeral_workspace( + pod_template: &mut serde_json::Value, + image: &str, + image_pull_policy: &str, +) { + // The cold path relies on the Sandbox CRD controller to materialise a PVC + // volume from `volumeClaimTemplates`; warm pods have none, so add the + // emptyDir volume here. The mount + seed init-container are shared. + if let Some(spec) = pod_template.get_mut("spec").and_then(|v| v.as_object_mut()) + && let Some(volumes) = spec + .entry("volumes") + .or_insert_with(|| serde_json::json!([])) + .as_array_mut() + { + volumes.push(serde_json::json!({ + "name": WORKSPACE_VOLUME_NAME, + "emptyDir": {} + })); + } + apply_workspace_persistence(pod_template, image, image_pull_policy); +} + +/// Mount a pre-existing PVC read-only into the agent container (shared datasets +/// / models / caches). Safe to share precisely because it is read-only and +/// holds no per-agent state. +fn apply_shared_readonly_volume(pod_template: &mut serde_json::Value, shared: &SharedVolumeSpec) { + const SHARED_VOLUME_NAME: &str = "openshell-shared-data"; + let Some(spec) = pod_template.get_mut("spec").and_then(|v| v.as_object_mut()) else { + return; + }; + + if let Some(volumes) = spec + .entry("volumes") + .or_insert_with(|| serde_json::json!([])) + .as_array_mut() + { + volumes.push(serde_json::json!({ + "name": SHARED_VOLUME_NAME, + "persistentVolumeClaim": { + "claimName": shared.claim_name, + "readOnly": true + } + })); + } + + let Some(containers) = spec.get_mut("containers").and_then(|v| v.as_array_mut()) else { + return; + }; + let index = containers + .iter() + .position(|c| c.get("name").and_then(|v| v.as_str()) == Some("agent")) + .unwrap_or(0); + if let Some(container) = containers.get_mut(index).and_then(|v| v.as_object_mut()) { + let mut mount = serde_json::json!({ + "name": SHARED_VOLUME_NAME, + "mountPath": shared.mount_path, + "readOnly": true + }); + if !shared.sub_path.is_empty() { + mount["subPath"] = serde_json::json!(shared.sub_path); + } + if let Some(mounts) = container + .entry("volumeMounts") + .or_insert_with(|| serde_json::json!([])) + .as_array_mut() + { + mounts.push(mount); + } + } +} + +/// Extract the gateway sandbox-id stamped on a managed `SandboxClaim`. +fn sandbox_id_from_claim_object(obj: &DynamicObject) -> Option { + obj.metadata + .labels + .as_ref()? + .get(LABEL_SANDBOX_ID) + .filter(|v| !v.is_empty()) + .cloned() +} + +/// Map a managed warm-pool `SandboxClaim` to a driver `Sandbox`, or `None` when +/// it is not one of ours (missing the gateway sandbox-id label). +fn managed_claim_sandbox(namespace: &str, obj: &DynamicObject) -> Option { + sandbox_id_from_claim_object(obj)?; + sandbox_from_claim_object(namespace, obj.clone()).ok() +} + +/// Build a driver `Sandbox` from a warm-pool `SandboxClaim`. The id comes from +/// the gateway-set `openshell.ai/sandbox-id` label; status is derived from the +/// claim binding. +fn sandbox_from_claim_object(namespace: &str, obj: DynamicObject) -> Result { + let id = sandbox_id_from_claim_object(&obj) + .ok_or_else(|| "SandboxClaim missing openshell.ai/sandbox-id label".to_string())?; + let name = obj.metadata.name.clone().unwrap_or_default(); + let namespace = obj + .metadata + .namespace + .clone() + .unwrap_or_else(|| namespace.to_string()); + let status = Some(claim_status_to_sandbox_status(&obj)); + Ok(Sandbox { + id, + name, + namespace, + spec: None, + status, + }) +} + +/// Derive a driver status from a `SandboxClaim`. The bound `Sandbox`'s name is +/// used as the instance id (best-effort event correlation) and the claim +/// identity is surfaced so the gateway can back-fill or GC the durable claim +/// mapping. Readiness is ultimately driven by the supervisor session. +fn claim_status_to_sandbox_status(obj: &DynamicObject) -> SandboxStatus { + let claim_name = obj.metadata.name.clone().unwrap_or_default(); + let claim_uid = obj.metadata.uid.clone().unwrap_or_default(); + let status_obj = obj.data.get("status").and_then(|s| s.as_object()); + + let bound_sandbox = status_obj + .and_then(|s| s.get("sandbox")) + .and_then(|s| s.as_object()) + .and_then(|s| s.get("name")) + .and_then(|v| v.as_str()) + .unwrap_or_default() + .to_string(); + + let conditions = status_obj + .and_then(|s| s.get("conditions")) + .and_then(|val| val.as_array()) + .map(|items| { + items + .iter() + .filter_map(condition_from_value) + .collect::>() + }) + .unwrap_or_default(); + + SandboxStatus { + sandbox_name: bound_sandbox.clone(), + instance_id: bound_sandbox, + agent_fd: String::new(), + sandbox_fd: String::new(), + conditions, + deleting: obj.metadata.deletion_timestamp.is_some(), + claim_name, + claim_uid, + } +} + fn update_indexes( sandbox_name_to_id: &mut std::collections::HashMap, agent_pod_to_id: &mut std::collections::HashMap, @@ -1739,8 +2471,18 @@ fn apply_required_env( tls_enabled: bool, provider_spiffe_socket_path: Option<&str>, ) { - upsert_env(env, openshell_core::sandbox_env::SANDBOX_ID, sandbox_id); - upsert_env(env, openshell_core::sandbox_env::SANDBOX, sandbox_name); + // Identity vars are set only when known at pod-create time. Warm-pool + // template pods boot without an identity (assigned at claim time), so these + // are omitted there — leaving `OPENSHELL_SANDBOX_ID`/`OPENSHELL_SANDBOX` + // unset (clap sees `None`) so the supervisor cleanly skips identity-gated + // startup (per-sandbox policy fetch, log push) and instead boots on the + // baseline policy, then establishes identity over the relay after the claim. + if !sandbox_id.is_empty() { + upsert_env(env, openshell_core::sandbox_env::SANDBOX_ID, sandbox_id); + } + if !sandbox_name.is_empty() { + upsert_env(env, openshell_core::sandbox_env::SANDBOX, sandbox_name); + } upsert_env(env, openshell_core::sandbox_env::ENDPOINT, grpc_endpoint); upsert_env( env, @@ -1894,6 +2636,9 @@ fn status_from_object(obj: &DynamicObject) -> Option { .to_string(), conditions, deleting: obj.metadata.deletion_timestamp.is_some(), + // Cold-path Sandbox CRs are not warm-pool claims. + claim_name: String::new(), + claim_uid: String::new(), }) } @@ -3493,3 +4238,311 @@ mod tests { assert_eq!(storage, DEFAULT_WORKSPACE_STORAGE_SIZE); } } + +#[cfg(test)] +mod warm_pool_tests { + use super::*; + use crate::config::{KubernetesWarmPoolConfig, SharedVolumeSpec, WarmPoolSpec}; + + fn pool(name: &str, gpu: bool) -> WarmPoolSpec { + WarmPoolSpec { + name: name.to_string(), + replicas: 2, + runtime_class_name: String::new(), + gpu, + shared_volume: None, + } + } + + fn warm_config(pools: Vec) -> KubernetesComputeConfig { + KubernetesComputeConfig { + default_image: "ghcr.io/openshell/sandbox:latest".to_string(), + warm_pool: KubernetesWarmPoolConfig { + enabled: true, + pools, + }, + ..KubernetesComputeConfig::default() + } + } + + fn claim_object( + sandbox_id: Option<&str>, + bound: Option<&str>, + claim_uid: &str, + ) -> DynamicObject { + let gvk = GroupVersionKind::gvk(EXT_GROUP, EXT_VERSION, SANDBOX_CLAIM_KIND); + let resource = ApiResource::from_gvk(&gvk); + let mut obj = DynamicObject::new("sb-name", &resource); + obj.metadata.uid = Some(claim_uid.to_string()); + let mut labels = BTreeMap::new(); + labels.insert( + LABEL_MANAGED_BY.to_string(), + LABEL_MANAGED_BY_VALUE.to_string(), + ); + if let Some(id) = sandbox_id { + labels.insert(LABEL_SANDBOX_ID.to_string(), id.to_string()); + } + obj.metadata.labels = Some(labels); + if let Some(name) = bound { + obj.data = serde_json::json!({ "status": { "sandbox": { "name": name } } }); + } + obj + } + + #[test] + fn warm_eligible_accepts_default_image_without_overrides() { + let spec = SandboxSpec::default(); + assert!(warm_eligible(&spec, "img")); + + let spec = SandboxSpec { + template: Some(SandboxTemplate { + image: "img".to_string(), + ..SandboxTemplate::default() + }), + ..SandboxSpec::default() + }; + assert!(warm_eligible(&spec, "img")); + } + + #[test] + fn warm_eligible_rejects_overrides() { + // Custom image. + let spec = SandboxSpec { + template: Some(SandboxTemplate { + image: "other/image:tag".to_string(), + ..SandboxTemplate::default() + }), + ..SandboxSpec::default() + }; + assert!(!warm_eligible(&spec, "img")); + + // Per-request env. + let mut env = std::collections::HashMap::new(); + env.insert("FOO".to_string(), "bar".to_string()); + let spec = SandboxSpec { + environment: env, + ..SandboxSpec::default() + }; + assert!(!warm_eligible(&spec, "img")); + + // Per-request driver config (e.g. a specific GPU device, which moved + // into driver_config) must fall back to cold — the pooled template + // bakes a fixed shape. + let mut fields = BTreeMap::new(); + fields.insert( + "gpu_device".to_string(), + prost_types::Value { + kind: Some(prost_types::value::Kind::StringValue("0".to_string())), + }, + ); + let spec = SandboxSpec { + template: Some(SandboxTemplate { + driver_config: Some(prost_types::Struct { fields }), + ..SandboxTemplate::default() + }), + ..SandboxSpec::default() + }; + assert!(!warm_eligible(&spec, "img")); + + // Custom template labels. + let mut labels = std::collections::HashMap::new(); + labels.insert("team".to_string(), "x".to_string()); + let spec = SandboxSpec { + template: Some(SandboxTemplate { + labels, + ..SandboxTemplate::default() + }), + ..SandboxSpec::default() + }; + assert!(!warm_eligible(&spec, "img")); + + // Custom policy (gateway-signalled): must fall back to cold so the + // per-sandbox policy is not silently downgraded to the pool baseline. + let spec = SandboxSpec { + disallow_warm_pool: true, + ..SandboxSpec::default() + }; + assert!(!warm_eligible(&spec, "img")); + + // Per-request log level: cold-only (threaded into pod env); the warm + // template is built from defaults, so it would be silently dropped. + let spec = SandboxSpec { + log_level: "debug".to_string(), + ..SandboxSpec::default() + }; + assert!(!warm_eligible(&spec, "img")); + + // Per-request agent socket: cold-only (Sandbox `agentSocket`). + let spec = SandboxSpec { + template: Some(SandboxTemplate { + agent_socket_path: "/run/agent.sock".to_string(), + ..SandboxTemplate::default() + }), + ..SandboxSpec::default() + }; + assert!(!warm_eligible(&spec, "img")); + } + + #[test] + fn matching_warm_pool_selects_by_gpu_shape() { + let config = warm_config(vec![pool("default", false), pool("gpu", true)]); + + // Driver matching is config-only; mimic it without a live client. + let pools = &config.warm_pool.pools; + let cpu_spec = SandboxSpec::default(); + let selected = pools.iter().find(|p| p.gpu == cpu_spec.gpu).unwrap(); + assert_eq!(selected.name, "default"); + + let gpu_spec = SandboxSpec { + gpu: true, + ..SandboxSpec::default() + }; + let selected = pools.iter().find(|p| p.gpu == gpu_spec.gpu).unwrap(); + assert_eq!(selected.name, "gpu"); + } + + #[test] + fn warm_claim_spec_is_single_use_and_identity_only() { + let spec = warm_claim_spec("openshell-warmpool-default", "sandbox-123"); + let claim = &spec["spec"]; + assert_eq!( + claim["sandboxTemplateRef"]["name"], + "openshell-warmpool-default" + ); + assert_eq!(claim["warmpool"], "openshell-warmpool-default"); + assert_eq!( + claim["additionalPodMetadata"]["annotations"]["openshell.io/sandbox-id"], + "sandbox-123" + ); + // Single-use teardown is mandatory; Retain would orphan workspace data. + assert_eq!(claim["lifecycle"]["shutdownPolicy"], "Delete"); + // Per-claim env must never be set on the warm path. + assert!(claim.get("env").is_none()); + } + + #[test] + fn warm_template_uses_ephemeral_workspace_and_no_identity() { + let config = warm_config(vec![pool("default", false)]); + let spec = build_warm_sandbox_template_spec(&config, &config.warm_pool.pools[0]); + + // Disallow per-claim env injection. + assert_eq!(spec["envVarsInjectionPolicy"], "Disallowed"); + // SandboxTemplate.spec carries no PVC volumeClaimTemplates (the orphan + // risk); the writable workspace is an emptyDir instead. + assert!(spec.get("volumeClaimTemplates").is_none()); + + let pod_spec = &spec["podTemplate"]["spec"]; + let volumes = pod_spec["volumes"].as_array().expect("volumes"); + let workspace = volumes + .iter() + .find(|v| v["name"] == WORKSPACE_VOLUME_NAME) + .expect("workspace volume present"); + assert!( + workspace.get("emptyDir").is_some(), + "warm workspace must be an emptyDir, got {workspace}" + ); + assert!( + workspace.get("persistentVolumeClaim").is_none(), + "warm workspace must not be PVC-backed" + ); + + // The pooled template must not bake any per-sandbox identity annotation. + let annotations = spec["podTemplate"]["metadata"].get("annotations"); + let has_sandbox_id = annotations + .and_then(|a| a.get("openshell.io/sandbox-id")) + .is_some(); + assert!(!has_sandbox_id, "pooled template must be identity-free"); + } + + #[test] + fn warm_template_mounts_shared_volume_read_only() { + let mut config = warm_config(vec![pool("data", false)]); + config.warm_pool.pools[0].shared_volume = Some(SharedVolumeSpec { + claim_name: "models".to_string(), + mount_path: "/models".to_string(), + sub_path: "llama".to_string(), + }); + let spec = build_warm_sandbox_template_spec(&config, &config.warm_pool.pools[0]); + let pod_spec = &spec["podTemplate"]["spec"]; + + let shared_vol = pod_spec["volumes"] + .as_array() + .unwrap() + .iter() + .find(|v| v["persistentVolumeClaim"]["claimName"] == "models") + .expect("shared volume present"); + assert_eq!(shared_vol["persistentVolumeClaim"]["readOnly"], true); + + let agent = pod_spec["containers"] + .as_array() + .unwrap() + .iter() + .find(|c| c["name"] == "agent") + .expect("agent container"); + let mount = agent["volumeMounts"] + .as_array() + .unwrap() + .iter() + .find(|m| m["mountPath"] == "/models") + .expect("shared mount present"); + assert_eq!(mount["readOnly"], true); + assert_eq!(mount["subPath"], "llama"); + } + + #[test] + fn is_cold_openshell_sandbox_requires_sandbox_id_label() { + let gvk = GroupVersionKind::gvk(SANDBOX_GROUP, SANDBOX_VERSION, SANDBOX_KIND); + let resource = ApiResource::from_gvk(&gvk); + + // Cold OpenShell sandbox: carries openshell.ai/sandbox-id. + let mut cold = DynamicObject::new("cold-sb", &resource); + cold.metadata.labels = Some(BTreeMap::from([( + LABEL_SANDBOX_ID.to_string(), + "sandbox-1".to_string(), + )])); + assert!(is_cold_openshell_sandbox(&cold)); + + // Bound warm Sandbox: only the upstream claim-uid label. + let mut bound_warm = DynamicObject::new("bound-sb", &resource); + bound_warm.metadata.labels = Some(BTreeMap::from([( + CLAIM_UID_LABEL.to_string(), + "claim-uid".to_string(), + )])); + assert!(!is_cold_openshell_sandbox(&bound_warm)); + + // Unbound pooled warm Sandbox: neither identity label — this is the + // case that previously slipped through and broke the watch loop. + let mut pooled_warm = DynamicObject::new("openshell-warmpool-default-abc", &resource); + pooled_warm.metadata.labels = Some(BTreeMap::from([( + WARM_POOL_LABEL.to_string(), + "default".to_string(), + )])); + assert!(!is_cold_openshell_sandbox(&pooled_warm)); + } + + #[test] + fn sandbox_from_claim_object_maps_id_and_bound_status() { + let obj = claim_object(Some("sandbox-9"), Some("bound-sandbox-xyz"), "claim-uid-9"); + let sandbox = sandbox_from_claim_object("openshell", obj).unwrap(); + assert_eq!(sandbox.id, "sandbox-9"); + assert_eq!(sandbox.name, "sb-name"); + let status = sandbox.status.expect("status"); + assert_eq!(status.sandbox_name, "bound-sandbox-xyz"); + assert_eq!(status.instance_id, "bound-sandbox-xyz"); + assert_eq!(status.claim_name, "sb-name"); + assert_eq!(status.claim_uid, "claim-uid-9"); + } + + #[test] + fn sandbox_from_claim_object_requires_sandbox_id_label() { + let obj = claim_object(None, None, "claim-uid-0"); + assert!(sandbox_from_claim_object("openshell", obj).is_err()); + let obj = claim_object(None, None, "claim-uid-0"); + assert!(managed_claim_sandbox("openshell", &obj).is_none()); + } + + #[test] + fn warm_resource_name_is_prefixed() { + assert_eq!(warm_resource_name("default"), "openshell-warmpool-default"); + } +} diff --git a/crates/openshell-driver-kubernetes/src/grpc.rs b/crates/openshell-driver-kubernetes/src/grpc.rs index eced74f57..4ce9134ce 100644 --- a/crates/openshell-driver-kubernetes/src/grpc.rs +++ b/crates/openshell-driver-kubernetes/src/grpc.rs @@ -99,11 +99,12 @@ impl ComputeDriver for ComputeDriverService { .into_inner() .sandbox .ok_or_else(|| Status::invalid_argument("sandbox is required"))?; - self.driver + let warm_claim = self + .driver .create_sandbox(&sandbox) .await .map_err(|e| Status::from(openshell_core::ComputeDriverError::from(e)))?; - Ok(Response::new(CreateSandboxResponse {})) + Ok(Response::new(CreateSandboxResponse { warm_claim })) } async fn stop_sandbox( diff --git a/crates/openshell-driver-kubernetes/src/main.rs b/crates/openshell-driver-kubernetes/src/main.rs index f7eeeba42..5df2ed1ad 100644 --- a/crates/openshell-driver-kubernetes/src/main.rs +++ b/crates/openshell-driver-kubernetes/src/main.rs @@ -135,6 +135,10 @@ async fn main() -> Result<()> { provider_spiffe_workload_api_socket_path: args .provider_spiffe_workload_api_socket_path .unwrap_or_default(), + // Warm pooling is gateway-orchestrated (it needs the shared gateway + // Store for the claim mapping); the standalone driver binary keeps it + // disabled. + warm_pool: openshell_driver_kubernetes::config::KubernetesWarmPoolConfig::default(), }) .await .into_diagnostic()?; From 7baa15097dcfd37849dd74213d14bd93dfed8e2b Mon Sep 17 00:00:00 2001 From: Roshni Malani Date: Fri, 12 Jun 2026 18:08:13 -0700 Subject: [PATCH 07/10] feat(server): re-anchor warm sandbox identity to the gateway-created SandboxClaim A warm pod's owning Sandbox CR is created generically by the pool controller and carries no openshell sandbox-id, so IssueSandboxToken must re-anchor identity through a fail-closed chain: pod -> owning Sandbox -> controlling SandboxClaim -> live bound claim -> durable gateway claim mapping. Record the mapping synchronously at create (rolling the claim back if the write fails), back-fill it after a crash window only for sandboxes the gateway minted, and GC orphaned mappings. Signed-off-by: Roshni Malani --- crates/openshell-server/src/auth/k8s_sa.rs | 516 +++++++++++++++++- crates/openshell-server/src/compute/mod.rs | 196 ++++++- crates/openshell-server/src/grpc/sandbox.rs | 2 + .../openshell-server/src/grpc/validation.rs | 74 +++ crates/openshell-server/src/lib.rs | 4 + 5 files changed, 780 insertions(+), 12 deletions(-) diff --git a/crates/openshell-server/src/auth/k8s_sa.rs b/crates/openshell-server/src/auth/k8s_sa.rs index 59f6651bb..664d1ba6d 100644 --- a/crates/openshell-server/src/auth/k8s_sa.rs +++ b/crates/openshell-server/src/auth/k8s_sa.rs @@ -38,15 +38,29 @@ pub const ISSUE_SANDBOX_TOKEN_PATH: &str = "/openshell.v1.OpenShell/IssueSandbox /// annotation only after validating the pod's `TokenReview` binding, live UID, /// and owning Sandbox CR. The K8s `Role` granted to the gateway must not /// include `patch pods` (see plan §11.8). -pub const SANDBOX_ID_ANNOTATION: &str = "openshell.io/sandbox-id"; -const SANDBOX_API_GROUP: &str = "agents.x-k8s.io"; -const SANDBOX_API_VERSION: &str = "v1alpha1"; -const SANDBOX_API_VERSION_FULL: &str = "agents.x-k8s.io/v1alpha1"; -const SANDBOX_KIND: &str = "Sandbox"; -const SANDBOX_ID_LABEL: &str = "openshell.ai/sandbox-id"; +// agent-sandbox CRD identity. Single source of truth in `openshell-core`; the +// re-anchor checks below must stay byte-identical with what the Kubernetes +// driver writes, so they are derived from the same constants the driver uses. +pub const SANDBOX_ID_ANNOTATION: &str = openshell_core::driver_utils::SANDBOX_ID_ANNOTATION; +const SANDBOX_API_GROUP: &str = openshell_core::driver_utils::SANDBOX_CRD_GROUP; +const SANDBOX_API_VERSION: &str = openshell_core::driver_utils::SANDBOX_CRD_VERSION; +const SANDBOX_API_VERSION_FULL: &str = openshell_core::driver_utils::SANDBOX_CRD_API_VERSION; +const SANDBOX_KIND: &str = openshell_core::driver_utils::SANDBOX_CRD_KIND; +const SANDBOX_ID_LABEL: &str = openshell_core::driver_utils::LABEL_SANDBOX_ID; const POD_NAME_EXTRA: &str = "authentication.kubernetes.io/pod-name"; const POD_UID_EXTRA: &str = "authentication.kubernetes.io/pod-uid"; +// Warm-pool extension CRDs. A warm sandbox's owning `Sandbox` CR is created +// generically by the pool controller — it carries no `openshell.ai/sandbox-id` +// label and is instead controlled by a `SandboxClaim` (+ the controller's +// `agents.x-k8s.io/claim-uid` label). Identity must re-anchor to the +// gateway-created `SandboxClaim` and the durable claim mapping. +const SANDBOX_CLAIM_GROUP: &str = openshell_core::driver_utils::SANDBOX_EXT_GROUP; +const SANDBOX_CLAIM_VERSION: &str = openshell_core::driver_utils::SANDBOX_CRD_VERSION; +const SANDBOX_CLAIM_API_VERSION_FULL: &str = openshell_core::driver_utils::SANDBOX_EXT_API_VERSION; +const SANDBOX_CLAIM_KIND: &str = openshell_core::driver_utils::SANDBOX_CLAIM_KIND; +const CLAIM_UID_LABEL: &str = openshell_core::driver_utils::CLAIM_UID_LABEL; + /// Resolved identity extracted from a validated SA token + pod lookup. #[derive(Debug, Clone)] pub struct ResolvedK8sIdentity { @@ -55,6 +69,37 @@ pub struct ResolvedK8sIdentity { pub pod_uid: String, } +/// Looks up the durable warm-pool claim mapping the gateway recorded at +/// `CreateSandbox` time. Backed by the shared gateway Store (HA-safe: any +/// replica can serve the bootstrap). Split out so tests can fake it. +#[async_trait] +pub trait ClaimMappingLookup: Send + Sync + 'static { + /// Resolve the sandbox-id the gateway bound to `(namespace, claim_name, + /// claim_uid)`. `Ok(None)` (including a `claim_uid` that matches no record) + /// means the caller fails closed. + async fn lookup_sandbox_id( + &self, + namespace: &str, + claim_name: &str, + claim_uid: &str, + ) -> Result, Status>; +} + +#[async_trait] +impl ClaimMappingLookup for crate::persistence::Store { + async fn lookup_sandbox_id( + &self, + namespace: &str, + claim_name: &str, + claim_uid: &str, + ) -> Result, Status> { + self.get_claim_mapping(namespace, claim_name, claim_uid) + .await + .map(|opt| opt.map(|mapping| mapping.sandbox_id)) + .map_err(|e| Status::internal(format!("claim mapping lookup failed: {e}"))) + } +} + /// Apiserver-facing operations the authenticator depends on. Split out so /// tests can fake the apiserver without standing up a kube cluster. #[async_trait] @@ -150,6 +195,8 @@ pub struct LiveK8sResolver { token_reviews_api: Api, pods_api: Api, sandboxes_api: Api, + sandbox_claims_api: Api, + claim_mapping: Arc, expected_audience: String, sandbox_namespace: String, expected_service_account: String, @@ -161,6 +208,7 @@ impl LiveK8sResolver { namespace: &str, expected_audience: String, expected_service_account: String, + claim_mapping: Arc, ) -> Self { let token_reviews_api: Api = Api::all(client.clone()); let pods_api: Api = Api::namespaced(client.clone(), namespace); @@ -168,11 +216,21 @@ impl LiveK8sResolver { GroupVersionKind::gvk(SANDBOX_API_GROUP, SANDBOX_API_VERSION, SANDBOX_KIND); let sandbox_resource = ApiResource::from_gvk(&sandbox_gvk); let sandboxes_api: Api = - Api::namespaced_with(client, namespace, &sandbox_resource); + Api::namespaced_with(client.clone(), namespace, &sandbox_resource); + let claim_gvk = GroupVersionKind::gvk( + SANDBOX_CLAIM_GROUP, + SANDBOX_CLAIM_VERSION, + SANDBOX_CLAIM_KIND, + ); + let claim_resource = ApiResource::from_gvk(&claim_gvk); + let sandbox_claims_api: Api = + Api::namespaced_with(client, namespace, &claim_resource); Self { token_reviews_api, pods_api, sandboxes_api, + sandbox_claims_api, + claim_mapping, expected_audience, sandbox_namespace: namespace.to_string(), expected_service_account, @@ -275,7 +333,53 @@ impl K8sIdentityResolver for LiveK8sResolver { ); return Err(Status::permission_denied("sandbox owner not found")); }; - validate_sandbox_owner_reference(&owner, &sandbox_id, &sandbox_cr)?; + + // Warm vs cold is decided by the owning Sandbox CR's *ownerReference*, + // never by a label a cold pod could carry. A warm Sandbox is controlled + // by a SandboxClaim; identity then re-anchors to the gateway-created + // claim mapping. A cold Sandbox keeps the original label cross-check. + match sandbox_claim_owner_reference(&sandbox_cr)? { + Some(claim_owner) => { + let live_claim = self + .sandbox_claims_api + .get_opt(&claim_owner.name) + .await + .map_err(|e| { + warn!( + pod = %identity.pod_name, + claim = %claim_owner.name, + error = %e, + "failed to fetch SandboxClaim for warm pod identity validation" + ); + Status::internal(format!("sandboxclaim GET failed: {e}")) + })?; + let Some(live_claim) = live_claim else { + warn!( + pod = %identity.pod_name, + claim = %claim_owner.name, + "owning Sandbox references a SandboxClaim that does not exist" + ); + return Err(Status::permission_denied("sandbox claim not found")); + }; + // Cross-namespace pinning: the mapping is keyed and looked up in + // the resolver's fixed sandbox namespace only. + let store_sandbox_id = self + .claim_mapping + .lookup_sandbox_id(&self.sandbox_namespace, &claim_owner.name, &claim_owner.uid) + .await?; + validate_warm_claim( + &owner, + &sandbox_cr, + &claim_owner, + &live_claim, + store_sandbox_id.as_deref(), + &sandbox_id, + )?; + } + None => { + validate_sandbox_owner_reference(&owner, &sandbox_id, &sandbox_cr)?; + } + } Ok(Some(ResolvedK8sIdentity { sandbox_id, @@ -437,6 +541,178 @@ fn validate_sandbox_owner_reference( Ok(()) } +/// Controlling `SandboxClaim` ownerReference extracted from an owning warm +/// `Sandbox` CR. +#[derive(Debug, Clone, PartialEq, Eq)] +struct SandboxClaimOwnerReference { + name: String, + uid: String, +} + +/// Extract the controlling `SandboxClaim` ownerReference from an owning +/// `Sandbox` CR, if any. +/// +/// `Ok(None)` => the owning Sandbox is a cold `OpenShell` Sandbox (no +/// `SandboxClaim` owner) and the caller uses the original label cross-check. +/// `Ok(Some(_))` => warm path. `Err` => a malformed/ambiguous claim +/// ownerReference rejects (fail closed). +#[allow(clippy::result_large_err)] +fn sandbox_claim_owner_reference( + sandbox_cr: &DynamicObject, +) -> Result, Status> { + let owner_refs = sandbox_cr + .metadata + .owner_references + .as_deref() + .unwrap_or_default(); + let mut claim_refs = owner_refs.iter().filter(|owner| { + owner.api_version == SANDBOX_CLAIM_API_VERSION_FULL && owner.kind == SANDBOX_CLAIM_KIND + }); + let Some(owner) = claim_refs.next() else { + return Ok(None); + }; + if claim_refs.next().is_some() { + return Err(Status::permission_denied( + "sandbox has multiple SandboxClaim owners", + )); + } + if owner.controller != Some(true) { + return Err(Status::permission_denied( + "sandbox SandboxClaim ownerReference is not controlling", + )); + } + if owner.name.is_empty() || owner.uid.is_empty() { + return Err(Status::permission_denied( + "sandbox SandboxClaim ownerReference is incomplete", + )); + } + Ok(Some(SandboxClaimOwnerReference { + name: owner.name.clone(), + uid: owner.uid.clone(), + })) +} + +/// The bound `Sandbox` name a `SandboxClaim` reports in `status.sandbox.name`, +/// or empty when the claim is not yet bound. +fn claim_bound_sandbox_name(claim: &DynamicObject) -> String { + claim + .data + .get("status") + .and_then(|status| status.get("sandbox")) + .and_then(|sandbox| sandbox.get("name")) + .and_then(|name| name.as_str()) + .unwrap_or_default() + .to_string() +} + +/// Fail-closed validation of the warm-pool identity chain. Every leg must +/// agree or the bootstrap is rejected. This preserves the cold-path invariant — +/// *the sandbox-id a pod can obtain equals a value only the gateway wrote, on an +/// object the sandbox workload cannot mutate* — re-anchored to the +/// gateway-created `SandboxClaim` and the durable claim mapping. +/// +/// Arguments: +/// - `sandbox_owner`: the pod's controlling `Sandbox` ownerRef (name + uid). +/// - `sandbox_cr`: the live owning `Sandbox` CR. +/// - `claim_owner`: the `SandboxClaim` controlling ownerRef on `sandbox_cr`. +/// - `live_claim`: the live `SandboxClaim` fetched by name. +/// - `store_sandbox_id`: the sandbox-id the gateway Store recorded for +/// `(namespace, claim_owner.name, claim_owner.uid)`, if any. +/// - `pod_sandbox_id`: the pod's `openshell.io/sandbox-id` annotation. +#[allow(clippy::result_large_err)] +fn validate_warm_claim( + sandbox_owner: &SandboxOwnerReference, + sandbox_cr: &DynamicObject, + claim_owner: &SandboxClaimOwnerReference, + live_claim: &DynamicObject, + store_sandbox_id: Option<&str>, + pod_sandbox_id: &str, +) -> Result<(), Status> { + // 1. The owning Sandbox CR's UID matches the pod's ownerReference UID. + let cr_uid = sandbox_cr.metadata.uid.as_deref().unwrap_or_default(); + if cr_uid != sandbox_owner.uid { + warn!( + sandbox_owner = %sandbox_owner.name, + owner_uid = %sandbox_owner.uid, + actual_uid = %cr_uid, + "warm pod Sandbox ownerReference UID does not match live Sandbox CR" + ); + return Err(Status::permission_denied("sandbox owner UID mismatch")); + } + + // 2. The controller's claim-uid label agrees with the SandboxClaim ownerRef. + let label_uid = sandbox_cr + .metadata + .labels + .as_ref() + .and_then(|labels| labels.get(CLAIM_UID_LABEL)) + .map(String::as_str) + .unwrap_or_default(); + if label_uid != claim_owner.uid { + warn!( + sandbox_owner = %sandbox_owner.name, + claim = %claim_owner.name, + claim_owner_uid = %claim_owner.uid, + label_uid = %label_uid, + "warm Sandbox claim-uid label disagrees with SandboxClaim ownerReference" + ); + return Err(Status::permission_denied( + "sandbox claim-uid label mismatch", + )); + } + + // 3. The live SandboxClaim's UID matches the ownerReference UID. + let live_uid = live_claim.metadata.uid.as_deref().unwrap_or_default(); + if live_uid != claim_owner.uid { + warn!( + claim = %claim_owner.name, + claim_owner_uid = %claim_owner.uid, + live_uid = %live_uid, + "live SandboxClaim UID does not match ownerReference" + ); + return Err(Status::permission_denied("sandbox claim UID mismatch")); + } + + // 4. The claim is bound to this exact owning Sandbox. + let bound = claim_bound_sandbox_name(live_claim); + if bound.is_empty() || bound != sandbox_owner.name { + warn!( + claim = %claim_owner.name, + bound_sandbox = %bound, + owning_sandbox = %sandbox_owner.name, + "SandboxClaim is not bound to the owning Sandbox" + ); + return Err(Status::permission_denied( + "sandbox claim is not bound to the owning sandbox", + )); + } + + // 5. The gateway-created Store mapping resolves the same sandbox-id as the + // pod annotation. A missing record (never created, or a uid that matches + // no record) fails closed. + let Some(store_sandbox_id) = store_sandbox_id else { + warn!( + claim = %claim_owner.name, + claim_owner_uid = %claim_owner.uid, + "no gateway claim mapping for SandboxClaim; rejecting" + ); + return Err(Status::permission_denied( + "no gateway mapping for sandbox claim", + )); + }; + if store_sandbox_id != pod_sandbox_id { + warn!( + claim = %claim_owner.name, + pod_sandbox_id = %pod_sandbox_id, + mapped_sandbox_id = %store_sandbox_id, + "warm pod sandbox annotation does not match gateway claim mapping" + ); + return Err(Status::permission_denied("sandbox claim mapping mismatch")); + } + + Ok(()) +} + #[cfg(test)] pub mod test_support { use super::*; @@ -838,3 +1114,227 @@ mod tests { assert_eq!(err.code(), tonic::Code::Unavailable); } } + +#[cfg(test)] +mod warm_claim_tests { + use super::*; + use k8s_openapi::apimachinery::pkg::apis::meta::v1::OwnerReference; + use std::collections::BTreeMap; + + fn claim_owner_ref(name: &str, uid: &str, controller: bool) -> OwnerReference { + OwnerReference { + api_version: SANDBOX_CLAIM_API_VERSION_FULL.to_string(), + block_owner_deletion: None, + controller: Some(controller), + kind: SANDBOX_CLAIM_KIND.to_string(), + name: name.to_string(), + uid: uid.to_string(), + } + } + + /// Owning Sandbox CR for a warm pod: claim-uid label + controlling + /// `SandboxClaim` ownerReference. + fn warm_sandbox_cr( + name: &str, + cr_uid: &str, + claim_name: &str, + claim_uid: &str, + ) -> DynamicObject { + let gvk = GroupVersionKind::gvk(SANDBOX_API_GROUP, SANDBOX_API_VERSION, SANDBOX_KIND); + let resource = ApiResource::from_gvk(&gvk); + let mut cr = DynamicObject::new(name, &resource); + cr.metadata.uid = Some(cr_uid.to_string()); + cr.metadata.labels = Some(BTreeMap::from([( + CLAIM_UID_LABEL.to_string(), + claim_uid.to_string(), + )])); + cr.metadata.owner_references = Some(vec![claim_owner_ref(claim_name, claim_uid, true)]); + cr + } + + fn sandbox_claim(name: &str, uid: &str, bound_sandbox: &str) -> DynamicObject { + let gvk = GroupVersionKind::gvk( + SANDBOX_CLAIM_GROUP, + SANDBOX_CLAIM_VERSION, + SANDBOX_CLAIM_KIND, + ); + let resource = ApiResource::from_gvk(&gvk); + let mut claim = DynamicObject::new(name, &resource); + claim.metadata.uid = Some(uid.to_string()); + if !bound_sandbox.is_empty() { + claim.data = serde_json::json!({ + "status": { "sandbox": { "name": bound_sandbox } } + }); + } + claim + } + + fn owner(name: &str, uid: &str) -> SandboxOwnerReference { + SandboxOwnerReference { + name: name.to_string(), + uid: uid.to_string(), + } + } + + fn claim_owner(name: &str, uid: &str) -> SandboxClaimOwnerReference { + SandboxClaimOwnerReference { + name: name.to_string(), + uid: uid.to_string(), + } + } + + // A fully-consistent warm binding: pod sandbox-id "sb-1", owning Sandbox + // "sandbox-a" (cr uid "cr-1"), bound by claim "claim-a" (uid "claim-1"), + // store mapping (ns, claim-a, claim-1) -> "sb-1". + struct Fixture { + owner: SandboxOwnerReference, + cr: DynamicObject, + claim_owner: SandboxClaimOwnerReference, + claim: DynamicObject, + } + + fn fixture() -> Fixture { + Fixture { + owner: owner("sandbox-a", "cr-1"), + cr: warm_sandbox_cr("sandbox-a", "cr-1", "claim-a", "claim-1"), + claim_owner: claim_owner("claim-a", "claim-1"), + claim: sandbox_claim("claim-a", "claim-1", "sandbox-a"), + } + } + + #[allow(clippy::result_large_err)] + fn validate(f: &Fixture, store: Option<&str>, pod_id: &str) -> Result<(), Status> { + validate_warm_claim(&f.owner, &f.cr, &f.claim_owner, &f.claim, store, pod_id) + } + + #[test] + fn warm_chain_accepts_consistent_binding() { + let f = fixture(); + validate(&f, Some("sb-1"), "sb-1").expect("consistent warm binding is accepted"); + } + + #[test] + fn warm_chain_rejects_cr_uid_mismatch() { + let mut f = fixture(); + f.cr.metadata.uid = Some("cr-other".to_string()); + let err = validate(&f, Some("sb-1"), "sb-1").expect_err("cr uid mismatch must reject"); + assert_eq!(err.code(), tonic::Code::PermissionDenied); + } + + #[test] + fn warm_chain_rejects_claim_uid_label_mismatch() { + let mut f = fixture(); + f.cr.metadata.labels = Some(BTreeMap::from([( + CLAIM_UID_LABEL.to_string(), + "claim-spoof".to_string(), + )])); + let err = validate(&f, Some("sb-1"), "sb-1").expect_err("claim-uid label mismatch rejects"); + assert_eq!(err.code(), tonic::Code::PermissionDenied); + } + + #[test] + fn warm_chain_rejects_live_claim_uid_mismatch() { + let mut f = fixture(); + f.claim.metadata.uid = Some("claim-different".to_string()); + let err = validate(&f, Some("sb-1"), "sb-1").expect_err("live claim uid mismatch rejects"); + assert_eq!(err.code(), tonic::Code::PermissionDenied); + } + + #[test] + fn warm_chain_rejects_unbound_claim() { + let mut f = fixture(); + f.claim = sandbox_claim("claim-a", "claim-1", ""); + let err = validate(&f, Some("sb-1"), "sb-1").expect_err("unbound claim rejects"); + assert_eq!(err.code(), tonic::Code::PermissionDenied); + } + + #[test] + fn warm_chain_rejects_claim_bound_to_other_sandbox() { + let mut f = fixture(); + f.claim = sandbox_claim("claim-a", "claim-1", "some-other-sandbox"); + let err = + validate(&f, Some("sb-1"), "sb-1").expect_err("claim bound elsewhere must reject"); + assert_eq!(err.code(), tonic::Code::PermissionDenied); + } + + #[test] + fn warm_chain_rejects_missing_store_mapping() { + // Claim exists but the gateway never recorded (or a stale record was + // deleted) the durable mapping -> fail closed. + let f = fixture(); + let err = validate(&f, None, "sb-1").expect_err("missing store mapping must reject"); + assert_eq!(err.code(), tonic::Code::PermissionDenied); + } + + #[test] + fn warm_chain_rejects_store_mapping_spoof() { + // The pod annotation claims a victim sandbox-id, but the gateway mapping + // for this claim resolves a different (correct) id -> reject. + let f = fixture(); + let err = validate(&f, Some("sb-1"), "victim-sandbox") + .expect_err("annotation/mapping mismatch must reject"); + assert_eq!(err.code(), tonic::Code::PermissionDenied); + } + + #[test] + fn claim_owner_reference_detects_warm_sandbox() { + let cr = warm_sandbox_cr("sandbox-a", "cr-1", "claim-a", "claim-1"); + let claim_owner = sandbox_claim_owner_reference(&cr) + .expect("well-formed") + .expect("warm Sandbox has a SandboxClaim owner"); + assert_eq!(claim_owner.name, "claim-a"); + assert_eq!(claim_owner.uid, "claim-1"); + } + + #[test] + fn claim_owner_reference_cold_sandbox_with_spoofed_label_is_not_warm() { + // A cold Sandbox CR carrying a user-style claim-uid label but NO + // controlling SandboxClaim ownerReference must take the cold path: + // detection is ownerReference-based, never label-based. + let gvk = GroupVersionKind::gvk(SANDBOX_API_GROUP, SANDBOX_API_VERSION, SANDBOX_KIND); + let resource = ApiResource::from_gvk(&gvk); + let mut cr = DynamicObject::new("sandbox-a", &resource); + cr.metadata.uid = Some("cr-1".to_string()); + cr.metadata.labels = Some(BTreeMap::from([ + (CLAIM_UID_LABEL.to_string(), "spoof".to_string()), + (SANDBOX_ID_LABEL.to_string(), "sb-1".to_string()), + ])); + // No owner_references. + assert!( + sandbox_claim_owner_reference(&cr).unwrap().is_none(), + "a label alone must not trigger the warm path" + ); + } + + #[test] + fn claim_owner_reference_rejects_non_controlling_owner() { + let gvk = GroupVersionKind::gvk(SANDBOX_API_GROUP, SANDBOX_API_VERSION, SANDBOX_KIND); + let resource = ApiResource::from_gvk(&gvk); + let mut cr = DynamicObject::new("sandbox-a", &resource); + cr.metadata.owner_references = Some(vec![claim_owner_ref("claim-a", "claim-1", false)]); + let err = + sandbox_claim_owner_reference(&cr).expect_err("non-controlling claim owner rejects"); + assert_eq!(err.code(), tonic::Code::PermissionDenied); + } + + #[test] + fn claim_owner_reference_rejects_multiple_claim_owners() { + let gvk = GroupVersionKind::gvk(SANDBOX_API_GROUP, SANDBOX_API_VERSION, SANDBOX_KIND); + let resource = ApiResource::from_gvk(&gvk); + let mut cr = DynamicObject::new("sandbox-a", &resource); + cr.metadata.owner_references = Some(vec![ + claim_owner_ref("claim-a", "claim-1", true), + claim_owner_ref("claim-b", "claim-2", true), + ]); + let err = sandbox_claim_owner_reference(&cr).expect_err("multiple claim owners reject"); + assert_eq!(err.code(), tonic::Code::PermissionDenied); + } + + #[test] + fn claim_bound_sandbox_name_reads_status() { + let claim = sandbox_claim("claim-a", "claim-1", "sandbox-a"); + assert_eq!(claim_bound_sandbox_name(&claim), "sandbox-a"); + let unbound = sandbox_claim("claim-a", "claim-1", ""); + assert_eq!(claim_bound_sandbox_name(&unbound), ""); + } +} diff --git a/crates/openshell-server/src/compute/mod.rs b/crates/openshell-server/src/compute/mod.rs index 6d687fb7c..523529bfc 100644 --- a/crates/openshell-server/src/compute/mod.rs +++ b/crates/openshell-server/src/compute/mod.rs @@ -21,7 +21,7 @@ use openshell_core::proto::compute::v1::{ CreateSandboxRequest, DeleteSandboxRequest, DriverCondition, DriverPlatformEvent, DriverResourceRequirements, DriverSandbox, DriverSandboxSpec, DriverSandboxStatus, DriverSandboxTemplate, GetCapabilitiesRequest, GetSandboxRequest, ListSandboxesRequest, - ValidateSandboxCreateRequest, WatchSandboxesEvent, WatchSandboxesRequest, + ValidateSandboxCreateRequest, WarmClaimBinding, WatchSandboxesEvent, WatchSandboxesRequest, compute_driver_client::ComputeDriverClient, compute_driver_server::ComputeDriver, watch_sandboxes_event, }; @@ -338,6 +338,11 @@ impl ComputeRuntime { let driver = KubernetesComputeDriver::new(config) .await .map_err(|err| ComputeError::Message(err.to_string()))?; + // Pre-declare operator-owned warm pools. Non-fatal: a reconcile failure + // (e.g. extension CRDs not installed) leaves the cold path fully working. + if let Err(err) = driver.reconcile_warm_pools().await { + warn!(error = %err, "warm pool reconcile failed; cold create path remains available"); + } let driver: SharedComputeDriver = Arc::new(ComputeDriverService::new(driver)); Self::from_driver( ComputeDriverKind::Kubernetes, @@ -487,7 +492,18 @@ impl ComputeRuntime { })) .await { - Ok(_) => { + Ok(response) => { + // Warm path: the driver bound a pre-warmed pod via a SandboxClaim + // and returned its identity. Record the durable, gateway-created + // (namespace, claim_name, claim_uid) -> sandbox_id mapping that + // the IssueSandboxToken bootstrap re-anchors against (HA-safe in + // the shared Store). If persistence fails, tear the claim back + // down — a bound-but-unmapped pod can never bootstrap — and fail + // the create so the caller retries cleanly. + if let Some(binding) = response.into_inner().warm_claim { + self.record_warm_claim_mapping(binding, &sandbox, &sandbox_id) + .await?; + } self.sandbox_watch_bus.notify(sandbox.object_id()); if let Some(metadata) = sandbox.metadata.as_mut() { metadata.resource_version = result.resource_version; @@ -570,6 +586,15 @@ impl ComputeRuntime { warn!(sandbox_id = %id, error = %e, "Failed to clean up store after delete"); } + // Drop any warm-pool claim mapping for this sandbox. Harmless no-op for + // cold sandboxes; a leaked mapping would fail closed at auth time, but + // we clean it up eagerly so stale rows never accumulate. + if matches!(self.driver_kind, Some(ComputeDriverKind::Kubernetes)) + && let Err(e) = self.store.delete_claim_mapping_for_sandbox(&id).await + { + warn!(sandbox_id = %id, error = %e, "Failed to clean up warm claim mapping"); + } + self.cleanup_sandbox_state(&id); Ok(deleted) } @@ -889,6 +914,7 @@ impl ComputeRuntime { if let Err(err) = self.reconcile_store_with_backend(ORPHAN_GRACE_PERIOD).await { warn!(error = %err, "Store reconciliation sweep failed"); } + self.gc_claim_mappings().await; tokio::select! { () = tokio::time::sleep(RECONCILE_INTERVAL) => {} _ = cancel.changed() => return, @@ -982,11 +1008,159 @@ impl ComputeRuntime { self.apply_sandbox_update_locked(incoming, existing).await } + /// Persist the durable `(namespace, claim_name, claim_uid) -> sandbox_id` + /// mapping for a warm-pool bind, rolling the claim back on failure. + /// + /// The mapping is what `IssueSandboxToken` re-anchors a warm pod's identity + /// against, so it must exist before the pod can bootstrap. If the + /// synchronous write fails, tear the claim (and the half-created sandbox) + /// back down and surface the error so the caller retries cleanly — a + /// bound-but-unmapped pod could never bootstrap. + async fn record_warm_claim_mapping( + &self, + binding: WarmClaimBinding, + sandbox: &Sandbox, + sandbox_id: &str, + ) -> Result<(), Status> { + let mapping = crate::persistence::ClaimMapping { + namespace: binding.namespace, + claim_name: binding.claim_name, + claim_uid: binding.claim_uid, + sandbox_id: sandbox_id.to_string(), + }; + if let Err(err) = self.store.put_claim_mapping(&mapping).await { + warn!( + sandbox_id = %sandbox_id, + error = %err, + "failed to persist warm claim mapping; rolling back claim" + ); + let _ = self + .driver + .delete_sandbox(Request::new(DeleteSandboxRequest { + sandbox_id: sandbox_id.to_string(), + sandbox_name: sandbox.object_name().to_string(), + })) + .await; + let _ = self.store.delete(Sandbox::object_type(), sandbox_id).await; + self.sandbox_index.remove_sandbox(sandbox_id); + return Err(Status::internal(format!( + "persist warm claim mapping failed: {err}" + ))); + } + Ok(()) + } + + /// Ensure the durable warm-pool claim mapping exists for an observed warm + /// sandbox. The driver surfaces the binding claim's `name`/`uid` on warm + /// observations; this re-creates a mapping that a replica crash (between + /// claim creation and the synchronous write) would otherwise have lost. + /// Best-effort and idempotent; a no-op for cold sandboxes. + async fn backfill_claim_mapping(&self, incoming: &DriverSandbox) { + let Some(status) = incoming.status.as_ref() else { + return; + }; + if status.claim_name.is_empty() || status.claim_uid.is_empty() { + return; + } + // Trust boundary: `incoming.id` is reconstructed from the live + // SandboxClaim's `openshell.ai/sandbox-id` label, which is authoritative + // only because claim creation is gateway-exclusive (the sandbox pod's + // ServiceAccount has no claims write — see the chart Role). To keep + // backfill from ever *fabricating* an identity, recover a mapping only + // for a sandbox the gateway actually minted: require its Store record to + // exist. The create path persists that record (MustCreate) *before* + // creating the claim, so the legitimate crash window — claim created, + // replica died before the synchronous mapping write — always satisfies + // this. `put_claim_mapping` further refuses to rebind an existing + // (namespace, claim_name, claim_uid) key to a different sandbox id, and + // the apiserver-unique claim_uid scopes that key, so a stale or forged + // claim cannot hijack an existing mapping. + match self.store.get(Sandbox::object_type(), &incoming.id).await { + Ok(Some(_)) => {} + Ok(None) => { + debug!( + sandbox_id = %incoming.id, + claim = %status.claim_name, + "warm claim mapping backfill skipped: no gateway sandbox record" + ); + return; + } + Err(err) => { + warn!( + sandbox_id = %incoming.id, + error = %err, + "warm claim mapping backfill: sandbox lookup failed" + ); + return; + } + } + let mapping = crate::persistence::ClaimMapping { + namespace: incoming.namespace.clone(), + claim_name: status.claim_name.clone(), + claim_uid: status.claim_uid.clone(), + sandbox_id: incoming.id.clone(), + }; + if let Err(err) = self.store.put_claim_mapping(&mapping).await { + debug!( + sandbox_id = %incoming.id, + claim = %status.claim_name, + error = %err, + "warm claim mapping backfill skipped" + ); + } + } + + /// Garbage-collect warm-pool claim mappings whose sandbox record is gone + /// (a backstop for missed deletes / crashes). A leaked mapping fails closed + /// at auth time regardless; this keeps the table from accumulating cruft. + async fn gc_claim_mappings(&self) { + if !matches!(self.driver_kind, Some(ComputeDriverKind::Kubernetes)) { + return; + } + let mappings = match self.store.list_claim_mappings(1000, 0).await { + Ok(mappings) => mappings, + Err(err) => { + warn!(error = %err, "failed to list warm claim mappings for GC"); + return; + } + }; + for mapping in mappings { + match self + .store + .get(Sandbox::object_type(), &mapping.sandbox_id) + .await + { + Ok(Some(_)) => {} // sandbox still exists; keep the mapping + Ok(None) => { + if let Err(err) = self + .store + .delete_claim_mapping_for_sandbox(&mapping.sandbox_id) + .await + { + warn!( + sandbox_id = %mapping.sandbox_id, + error = %err, + "failed to GC orphaned warm claim mapping" + ); + } + } + Err(err) => { + warn!(error = %err, "warm claim mapping GC lookup failed"); + } + } + } + } + async fn apply_sandbox_update_locked( &self, incoming: DriverSandbox, existing_record: Option, ) -> Result<(), String> { + // Recover a missing warm-pool claim mapping (e.g. a replica crashed + // between claim creation and the synchronous mapping write). Idempotent + // and a no-op for cold sandboxes. + self.backfill_claim_mapping(&incoming).await; + let existing = existing_record .as_ref() .map(decode_sandbox_record) @@ -1402,6 +1576,10 @@ fn driver_sandbox_spec_from_public( .transpose()?, gpu: spec.gpu, sandbox_token: String::new(), + // A custom policy cannot be applied to a warm-pooled pod (it boots on + // the pool's baseline policy and Landlock can't be loosened later), so + // force the cold path to avoid silently downgrading the policy. + disallow_warm_pool: spec.policy.is_some(), }) } @@ -1632,6 +1810,9 @@ fn driver_status_from_public(status: &SandboxStatus) -> DriverSandboxStatus { .map(driver_condition_from_public) .collect(), deleting: SandboxPhase::try_from(status.phase) == Ok(SandboxPhase::Deleting), + // Warm-pool claim identity is set only by the Kubernetes driver on warm + // observations; the public status carries none. + ..Default::default() } } @@ -1889,7 +2070,7 @@ impl ComputeDriver for NoopTestDriver { ) -> Result, Status> { Ok(tonic::Response::new( - openshell_core::proto::compute::v1::CreateSandboxResponse {}, + openshell_core::proto::compute::v1::CreateSandboxResponse { warm_claim: None }, )) } @@ -2109,7 +2290,9 @@ mod tests { &self, _request: Request, ) -> Result, Status> { - Ok(tonic::Response::new(CreateSandboxResponse {})) + Ok(tonic::Response::new(CreateSandboxResponse { + warm_claim: None, + })) } async fn stop_sandbox( @@ -2223,6 +2406,7 @@ mod tests { sandbox_fd: String::new(), conditions: vec![condition], deleting: false, + ..Default::default() } } @@ -2561,6 +2745,7 @@ mod tests { last_transition_time: String::new(), }], deleting: false, + ..Default::default() }), }) .await @@ -2770,6 +2955,7 @@ mod tests { last_transition_time: String::new(), }], deleting: false, + ..Default::default() }), }], current_sandboxes: vec![DriverSandbox { @@ -2790,6 +2976,7 @@ mod tests { last_transition_time: String::new(), }], deleting: false, + ..Default::default() }), }], })) @@ -2888,6 +3075,7 @@ mod tests { last_transition_time: String::new(), }], deleting: false, + ..Default::default() }), }], ..Default::default() diff --git a/crates/openshell-server/src/grpc/sandbox.rs b/crates/openshell-server/src/grpc/sandbox.rs index e60ce3995..33552d631 100644 --- a/crates/openshell-server/src/grpc/sandbox.rs +++ b/crates/openshell-server/src/grpc/sandbox.rs @@ -131,6 +131,8 @@ async fn handle_create_sandbox_inner( crate::grpc::validation::validate_label_key(key)?; crate::grpc::validation::validate_label_value(value)?; } + // Reject reserved identity/platform metadata keys the caller must not set. + crate::grpc::validation::validate_reserved_metadata_keys(&request.labels, "labels")?; // Validate provider names exist (fail fast). for name in &spec.providers { diff --git a/crates/openshell-server/src/grpc/validation.rs b/crates/openshell-server/src/grpc/validation.rs index 03a69d6e9..92aa6371b 100644 --- a/crates/openshell-server/src/grpc/validation.rs +++ b/crates/openshell-server/src/grpc/validation.rs @@ -171,6 +171,7 @@ fn validate_sandbox_template(tmpl: &SandboxTemplate) -> Result<(), Status> { MAX_MAP_VALUE_LEN, "template.labels", )?; + validate_reserved_metadata_keys(&tmpl.labels, "template.labels")?; validate_string_map( &tmpl.annotations, MAX_TEMPLATE_MAP_ENTRIES, @@ -178,6 +179,7 @@ fn validate_sandbox_template(tmpl: &SandboxTemplate) -> Result<(), Status> { MAX_MAP_VALUE_LEN, "template.annotations", )?; + validate_reserved_metadata_keys(&tmpl.annotations, "template.annotations")?; validate_string_map( &tmpl.environment, MAX_TEMPLATE_MAP_ENTRIES, @@ -215,6 +217,40 @@ fn validate_sandbox_template(tmpl: &SandboxTemplate) -> Result<(), Status> { Ok(()) } +/// Metadata key prefixes/keys reserved for `OpenShell` and the agent-sandbox +/// platform. Users must not set these on a sandbox's labels or template +/// labels/annotations: they drive sandbox identity, the warm-pool claim binding, +/// SPIFFE selection, and managed-by ownership. Allowing user control would let a +/// request spoof identity or confuse the warm/cold auth re-anchor (issue #1879). +const RESERVED_METADATA_PREFIXES: &[&str] = &[ + "openshell.io/", + "openshell.ai/", + "agents.x-k8s.io/", + "extensions.agents.x-k8s.io/", + "spiffe.io/", +]; + +/// Reject any reserved-prefix keys in a user-supplied metadata map. Applied to +/// `request.labels`, `template.labels`, and `template.annotations` (the latter +/// flows through to pod annotations). +pub(super) fn validate_reserved_metadata_keys( + map: &std::collections::HashMap, + field_name: &str, +) -> Result<(), Status> { + for key in map.keys() { + if let Some(prefix) = RESERVED_METADATA_PREFIXES + .iter() + .find(|prefix| key.starts_with(**prefix)) + { + return Err(Status::invalid_argument(format!( + "{field_name} key '{key}' uses the reserved '{prefix}' prefix; \ + these are managed by OpenShell and cannot be set by the caller" + ))); + } + } + Ok(()) +} + /// Validate a `map` field: entry count, key length, value length. pub(super) fn validate_string_map( map: &std::collections::HashMap, @@ -751,6 +787,44 @@ mod tests { SandboxSpec::default() } + #[test] + fn reserved_metadata_keys_rejected() { + for reserved in [ + "openshell.io/sandbox-id", + "openshell.ai/managed-by", + "agents.x-k8s.io/claim-uid", + "extensions.agents.x-k8s.io/foo", + "spiffe.io/whatever", + ] { + let map = HashMap::from([(reserved.to_string(), "x".to_string())]); + let err = validate_reserved_metadata_keys(&map, "labels") + .expect_err("reserved key must be rejected"); + assert_eq!(err.code(), Code::InvalidArgument); + } + } + + #[test] + fn reserved_metadata_allows_ordinary_keys() { + let map = HashMap::from([ + ("team".to_string(), "platform".to_string()), + ("example.com/owner".to_string(), "me".to_string()), + ]); + validate_reserved_metadata_keys(&map, "labels").expect("ordinary keys are allowed"); + } + + #[test] + fn sandbox_template_rejects_reserved_annotation() { + let tmpl = SandboxTemplate { + annotations: HashMap::from([( + "openshell.io/sandbox-id".to_string(), + "spoof".to_string(), + )]), + ..SandboxTemplate::default() + }; + let err = validate_sandbox_template(&tmpl).expect_err("reserved annotation must reject"); + assert_eq!(err.code(), Code::InvalidArgument); + } + #[test] fn level_matches_treats_ocsf_as_info() { assert!(level_matches("OCSF", "INFO")); diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index 9f1127d0e..adbaf7098 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -318,11 +318,15 @@ pub async fn run_server( let sandbox_service_account = kubernetes_config.service_account_name; match kube::Client::try_default().await { Ok(client) => { + // The warm-path identity re-anchor reads the durable claim + // mapping from the shared Store (HA-safe across replicas). + let claim_mapping: Arc = store.clone(); let resolver = Arc::new(auth::k8s_sa::LiveK8sResolver::new( client, &sandbox_namespace, "openshell-gateway".to_string(), sandbox_service_account.clone(), + claim_mapping, )); let authenticator = auth::k8s_sa::K8sServiceAccountAuthenticator::new(resolver); state.k8s_sa_authenticator = Some(Arc::new(authenticator)); From c7db35390f7baf9d6284a3807cb9f22825a695cc Mon Sep 17 00:00:00 2001 From: Roshni Malani Date: Fri, 12 Jun 2026 18:08:24 -0700 Subject: [PATCH 08/10] feat(sandbox): derive warm-path identity from the gateway-minted JWT A pooled pod boots before it is claimed, so it cannot be handed a per-sandbox identity at create time. Let the supervisor boot identity-less on the image baseline policy and have the gateway derive the sandbox id from the gateway-minted JWT principal when the supervisor hello carries none, with a fast bootstrap retry while the claim binds. Landlock is applied once at process start, so a warm pod enforces the pool baseline policy; custom-policy requests stay on the cold path. Signed-off-by: Roshni Malani --- crates/openshell-sandbox/src/lib.rs | 47 +++++++++++++---- .../src/supervisor_session.rs | 52 +++++++++++++++---- .../src/supervisor_session.rs | 28 +++++++--- 3 files changed, 100 insertions(+), 27 deletions(-) diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index 16f6138a4..83792ddb6 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -889,14 +889,23 @@ pub async fn run_sandbox( } // Spawn the persistent supervisor session if we have a gateway endpoint - // and sandbox identity. The session provides relay channels for SSH - // connect and ExecSandbox through the gateway. - if let (Some(endpoint), Some(id), Some(socket)) = ( - openshell_endpoint.as_ref(), - sandbox_id.as_ref(), - ssh_socket_path.as_ref(), - ) { - supervisor_session::spawn(endpoint.clone(), id.clone(), socket.clone(), ssh_netns_fd); + // and a relay socket. The session provides relay channels for SSH connect + // and ExecSandbox through the gateway. + // + // A boot-time sandbox identity is NOT required: warm-pooled pods boot + // before a claim assigns their `openshell.io/sandbox-id`, so the session + // sends an empty id in its hello and the gateway derives the sandbox + // identity from the gateway-minted JWT (resolved server-side from the pod + // annotation during `IssueSandboxToken`). Cold sandboxes pass their known + // id through unchanged. + if let (Some(endpoint), Some(socket)) = (openshell_endpoint.as_ref(), ssh_socket_path.as_ref()) + { + supervisor_session::spawn( + endpoint.clone(), + sandbox_id.clone(), + socket.clone(), + ssh_netns_fd, + ); info!("supervisor session task spawned"); } @@ -2135,8 +2144,26 @@ async fn load_policy( return Ok((policy, Some(Arc::new(engine)), None)); } - // gRPC mode: fetch typed proto policy, construct OPA engine from baked rules + proto data - if let (Some(id), Some(endpoint)) = (&sandbox_id, &openshell_endpoint) { + // gRPC mode: fetch typed proto policy, construct OPA engine from baked rules + proto data. + if let Some(endpoint) = &openshell_endpoint { + let identity = sandbox_id.as_deref().filter(|id| !id.is_empty()); + + // Warm-pool path: a pooled pod boots before its claim assigns an + // identity, so there is no per-sandbox policy to fetch yet. Boot on the + // image's baseline policy (default-deny) and enforce it; the per-sandbox + // policy is NOT applied on the warm path because Landlock is applied + // once at process start and cannot be loosened afterwards. Requests that + // carry a custom policy fall back to the cold path (see the driver's + // warm-pool eligibility check), so there is no silent policy downgrade. + let Some(id) = identity else { + info!("Warm-pool sandbox: loading baseline policy (no boot-time identity)"); + let mut proto_policy = discover_policy_from_disk_or_default(); + enrich_proto_baseline_paths(&mut proto_policy); + let opa_engine = Some(Arc::new(OpaEngine::from_proto(&proto_policy)?)); + let policy = SandboxPolicy::try_from(proto_policy.clone())?; + return Ok((policy, opa_engine, Some(proto_policy))); + }; + info!( sandbox_id = %id, endpoint = %endpoint, diff --git a/crates/openshell-sandbox/src/supervisor_session.rs b/crates/openshell-sandbox/src/supervisor_session.rs index 4d7392ee3..e67ec066e 100644 --- a/crates/openshell-sandbox/src/supervisor_session.rs +++ b/crates/openshell-sandbox/src/supervisor_session.rs @@ -34,6 +34,11 @@ use crate::grpc_client; const INITIAL_BACKOFF: Duration = Duration::from_secs(1); const MAX_BACKOFF: Duration = Duration::from_secs(30); +/// Fixed, short retry cadence used while the supervisor has not yet established +/// its first session — e.g. a warm-pooled pod whose identity is not assigned +/// until its pool claim binds. A steady poll (rather than exponential backoff) +/// keeps the session coming up promptly once the gateway can mint a JWT. +const BOOTSTRAP_RETRY: Duration = Duration::from_secs(2); /// Parse a gRPC endpoint URI into an OCSF `Endpoint` (host + port). Falls back /// to treating the whole string as a domain if parsing fails. @@ -232,7 +237,7 @@ fn map_stream_message( /// exponential backoff on failures. pub fn spawn( endpoint: String, - sandbox_id: String, + sandbox_id: Option, ssh_socket_path: std::path::PathBuf, netns_fd: Option, ) -> tokio::task::JoinHandle<()> { @@ -246,19 +251,33 @@ pub fn spawn( async fn run_session_loop( endpoint: String, - sandbox_id: String, + sandbox_id: Option, ssh_socket_path: std::path::PathBuf, netns_fd: Option, ) { let mut backoff = INITIAL_BACKOFF; let mut attempt: u64 = 0; + // Tracks whether we have established a session at least once. Until then + // (e.g. a warm pod still waiting to be claimed) we retry at a steady short + // cadence rather than backing off exponentially, so the session comes up + // promptly once the gateway can resolve the pod's identity. + let ever_connected = std::sync::atomic::AtomicBool::new(false); + let log_id = sandbox_id.as_deref().unwrap_or_default(); loop { attempt += 1; - match run_single_session(&endpoint, &sandbox_id, &ssh_socket_path, netns_fd).await { + match run_single_session( + &endpoint, + sandbox_id.as_deref(), + &ssh_socket_path, + netns_fd, + &ever_connected, + ) + .await + { Ok(()) => { - let event = session_closed_event(crate::ocsf_ctx(), &endpoint, &sandbox_id); + let event = session_closed_event(crate::ocsf_ctx(), &endpoint, log_id); ocsf_emit!(event); break; } @@ -266,8 +285,12 @@ async fn run_session_loop( let event = session_failed_event(crate::ocsf_ctx(), &endpoint, attempt, &e.to_string()); ocsf_emit!(event); - tokio::time::sleep(backoff).await; - backoff = (backoff * 2).min(MAX_BACKOFF); + if ever_connected.load(std::sync::atomic::Ordering::Relaxed) { + tokio::time::sleep(backoff).await; + backoff = (backoff * 2).min(MAX_BACKOFF); + } else { + tokio::time::sleep(BOOTSTRAP_RETRY).await; + } } } } @@ -275,9 +298,10 @@ async fn run_session_loop( async fn run_single_session( endpoint: &str, - sandbox_id: &str, + sandbox_id: Option<&str>, ssh_socket_path: &std::path::Path, netns_fd: Option, + ever_connected: &std::sync::atomic::AtomicBool, ) -> Result<(), Box> { // Connect to the gateway. The same `Channel` is used for both the // long-lived control stream and all data-plane `RelayStream` calls, so @@ -292,11 +316,14 @@ async fn run_single_session( let (tx, rx) = mpsc::channel::(64); let outbound = tokio_stream::wrappers::ReceiverStream::new(rx); - // Send hello as the first message. + // Send hello as the first message. On the warm path the supervisor has no + // boot-time identity, so it sends an empty `sandbox_id` and the gateway + // derives identity from the authenticated (gateway-minted) JWT. Cold + // sandboxes assert their known id, which the gateway cross-checks. let instance_id = uuid::Uuid::new_v4().to_string(); tx.send(SupervisorMessage { payload: Some(supervisor_message::Payload::Hello(SupervisorHello { - sandbox_id: sandbox_id.to_string(), + sandbox_id: sandbox_id.unwrap_or_default().to_string(), instance_id: instance_id.clone(), })), }) @@ -324,6 +351,11 @@ async fn run_single_session( _ => return Err("expected SessionAccepted or SessionRejected".into()), }; + // We have a live, gateway-accepted session; subsequent reconnects (after a + // clean close or transient error) use exponential backoff rather than the + // bootstrap poll cadence. + ever_connected.store(true, std::sync::atomic::Ordering::Relaxed); + let heartbeat_secs = accepted.heartbeat_interval_secs.max(5); let event = session_established_event( crate::ocsf_ctx(), @@ -344,7 +376,7 @@ async fn run_single_session( let msg = map_stream_message(msg, "gateway closed stream")?; handle_gateway_message( &msg, - sandbox_id, + sandbox_id.unwrap_or_default(), ssh_socket_path, netns_fd, &channel, diff --git a/crates/openshell-server/src/supervisor_session.rs b/crates/openshell-server/src/supervisor_session.rs index 4adf9e8b6..cb7f15ece 100644 --- a/crates/openshell-server/src/supervisor_session.rs +++ b/crates/openshell-server/src/supervisor_session.rs @@ -591,13 +591,27 @@ pub async fn handle_connect_supervisor( None => return Err(Status::invalid_argument("stream closed before hello")), }; - let sandbox_id = hello.sandbox_id.clone(); - if sandbox_id.is_empty() { - return Err(Status::invalid_argument("sandbox_id is required")); - } - if let Some(principal) = principal.as_ref() { - crate::auth::guard::ensure_sandbox_principal_scope(principal, &sandbox_id)?; - } + // Resolve the session's sandbox identity. A warm-pooled supervisor boots + // without an identity and sends an empty `sandbox_id`; in that case the + // identity is taken from the authenticated (gateway-minted) JWT principal, + // which the K8s SA bootstrap re-anchor already resolved server-side from + // the pod annotation + the durable claim mapping. A supervisor that does + // assert an id must match its principal (unchanged cross-check). + let sandbox_id = if hello.sandbox_id.is_empty() { + match principal.as_ref() { + Some(Principal::Sandbox(p)) if !p.sandbox_id.is_empty() => p.sandbox_id.clone(), + _ => { + return Err(Status::unauthenticated( + "supervisor hello has no sandbox_id and no authenticated sandbox identity", + )); + } + } + } else { + if let Some(principal) = principal.as_ref() { + crate::auth::guard::ensure_sandbox_principal_scope(principal, &hello.sandbox_id)?; + } + hello.sandbox_id.clone() + }; require_persisted_sandbox(&state.store, &sandbox_id).await?; let session_id = Uuid::new_v4().to_string(); From 0bf82f0ce31c153660aa66f053193127ba0d501b Mon Sep 17 00:00:00 2001 From: Roshni Malani Date: Fri, 12 Jun 2026 18:08:24 -0700 Subject: [PATCH 09/10] feat(deploy): warm-pool gateway config and least-privilege RBAC Render warm-pool settings into gateway.toml, add a values-warm-pool.yaml CI overlay, document the fields in the config reference, and grant the gateway only the extension-CRD verbs it uses: server-side apply (create+patch) on templates/pools and create/delete/get/list/watch on claims, with no status subresources. The sandbox pod ServiceAccount is intentionally granted none of these. Signed-off-by: Roshni Malani --- deploy/helm/openshell/README.md | 2 + .../helm/openshell/ci/values-warm-pool.yaml | 19 ++++++++++ .../openshell/templates/gateway-config.yaml | 18 +++++++++ deploy/helm/openshell/templates/role.yaml | 31 ++++++++++++++++ deploy/helm/openshell/values.yaml | 24 ++++++++++++ docs/reference/gateway-config.mdx | 37 +++++++++++++++++++ 6 files changed, 131 insertions(+) create mode 100644 deploy/helm/openshell/ci/values-warm-pool.yaml diff --git a/deploy/helm/openshell/README.md b/deploy/helm/openshell/README.md index e6d539592..f095bdd68 100644 --- a/deploy/helm/openshell/README.md +++ b/deploy/helm/openshell/README.md @@ -225,6 +225,8 @@ add `ci/values-spire.yaml` to the OpenShell release values files. | server.tls.certSecretName | string | `"openshell-server-tls"` | K8s secret (type kubernetes.io/tls) with tls.crt and tls.key for the server. | | server.tls.clientCaSecretName | string | `"openshell-server-client-ca"` | K8s secret with ca.crt for client certificate verification (mTLS). Set to "" to disable mTLS and run HTTPS-only (use OIDC for auth instead). | | server.tls.clientTlsSecretName | string | `"openshell-client-tls"` | K8s secret mounted into sandbox pods for mTLS to the server. | +| server.warmPool.enabled | bool | `false` | Enable warm pooling. Off by default; only the trusted default sandbox image with no per-request template/env overrides is warm-pooled — any other request falls back to the cold create path. | +| server.warmPool.pools | list | `[]` | Operator-declared pools. Each maps to one (image, runtimeClass, gpu) shape. Example: pools: - name: default replicas: 3 - name: gpu replicas: 1 gpu: true runtimeClassName: nvidia # Optional shared, read-only data volume (datasets/models/caches). sharedVolume: claimName: models # existing PVC (ROX/RWX for multi-node) mountPath: /models subPath: "" # optional | | server.workspaceDefaultStorageSize | string | `""` | Default storage size for the workspace PVC in sandbox pods. Uses Kubernetes quantity syntax (e.g. "2Gi", "10Gi", "500Mi"). Empty = built-in default (2Gi). | | service.healthPort | int | `8081` | Gateway health service port. | | service.metricsPort | int | `9090` | Gateway metrics service port. | diff --git a/deploy/helm/openshell/ci/values-warm-pool.yaml b/deploy/helm/openshell/ci/values-warm-pool.yaml new file mode 100644 index 000000000..15da28250 --- /dev/null +++ b/deploy/helm/openshell/ci/values-warm-pool.yaml @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Overlay that enables warm-pooled sandboxes for the Kubernetes e2e harness +# (issue #1879). Layer it on top of values-skaffold.yaml, e.g.: +# +# OPENSHELL_E2E_KUBE_EXTRA_VALUES=deploy/helm/openshell/ci/values-warm-pool.yaml \ +# OPENSHELL_E2E_WARM_POOL=1 \ +# mise run e2e:kubernetes +# +# Requires the agent-sandbox warm-pool extension CRDs (the kube e2e harness +# installs them). A single CPU pool of the trusted default image is enough to +# exercise the claim path and the workspace-isolation assertions. +server: + warmPool: + enabled: true + pools: + - name: default + replicas: 2 diff --git a/deploy/helm/openshell/templates/gateway-config.yaml b/deploy/helm/openshell/templates/gateway-config.yaml index 7037be88f..4e77604e6 100644 --- a/deploy/helm/openshell/templates/gateway-config.yaml +++ b/deploy/helm/openshell/templates/gateway-config.yaml @@ -141,3 +141,21 @@ data: {{- if .Values.supervisor.image.pullPolicy }} supervisor_image_pull_policy = {{ .Values.supervisor.image.pullPolicy | quote }} {{- end }} + {{- if .Values.server.warmPool.enabled }} + + [openshell.drivers.kubernetes.warm_pool] + enabled = true + {{- range .Values.server.warmPool.pools }} + + [[openshell.drivers.kubernetes.warm_pool.pools]] + name = {{ .name | quote }} + replicas = {{ .replicas }} + gpu = {{ .gpu | default false }} + {{- if .runtimeClassName }} + runtime_class_name = {{ .runtimeClassName | quote }} + {{- end }} + {{- if .sharedVolume }} + shared_volume = { claim_name = {{ .sharedVolume.claimName | quote }}, mount_path = {{ .sharedVolume.mountPath | quote }}{{ if .sharedVolume.subPath }}, sub_path = {{ .sharedVolume.subPath | quote }}{{ end }} } + {{- end }} + {{- end }} + {{- end }} diff --git a/deploy/helm/openshell/templates/role.yaml b/deploy/helm/openshell/templates/role.yaml index 5ecc4428a..b6f1f1305 100644 --- a/deploy/helm/openshell/templates/role.yaml +++ b/deploy/helm/openshell/templates/role.yaml @@ -22,6 +22,37 @@ rules: - patch - update - watch + # Warm-pool extension CRDs (issue #1879). The gateway reconciles the + # operator-owned SandboxTemplate + SandboxWarmPool, creates/deletes a + # SandboxClaim per warm-pooled sandbox, and reads claims during the + # IssueSandboxToken identity re-anchor. The sandbox pod ServiceAccount is a + # separate identity and is intentionally NOT granted any of these — a + # compromised sandbox must not be able to mutate claims, templates, or pools. + # + # Least privilege: only the verbs the gateway actually exercises are granted. + # Templates/pools are reconciled with server-side apply (create + patch only — + # never update/get/list/watch). Claims are created, deleted, read back, and + # watched (create/delete/get/list/watch) — the driver runs a live watcher on + # SandboxClaims to surface warm/claim-bound sandboxes. The gateway never writes + # the controller-owned status subresources, so they are omitted. + - apiGroups: + - extensions.agents.x-k8s.io + resources: + - sandboxtemplates + - sandboxwarmpools + verbs: + - create + - patch + - apiGroups: + - extensions.agents.x-k8s.io + resources: + - sandboxclaims + verbs: + - create + - delete + - get + - list + - watch - apiGroups: - "" resources: diff --git a/deploy/helm/openshell/values.yaml b/deploy/helm/openshell/values.yaml index d7ff8b257..f17bff06e 100644 --- a/deploy/helm/openshell/values.yaml +++ b/deploy/helm/openshell/values.yaml @@ -195,6 +195,30 @@ server: # Linux 5.12+. When enabled, container UID 0 maps to an unprivileged host # UID and capabilities become namespaced. enableUserNamespaces: false + # Warm-pooled sandboxes (issue #1879). The gateway pre-declares operator-owned + # SandboxWarmPools and satisfies matching CreateSandbox requests by binding a + # pre-warmed pod (~0.1s) instead of cold-creating one (~4s+). Requires the + # agent-sandbox warm-pool extension CRDs installed in the cluster. + warmPool: + # -- Enable warm pooling. Off by default; only the trusted default sandbox + # image with no per-request template/env overrides is warm-pooled — any + # other request falls back to the cold create path. + enabled: false + # -- Operator-declared pools. Each maps to one (image, runtimeClass, gpu) + # shape. Example: + # pools: + # - name: default + # replicas: 3 + # - name: gpu + # replicas: 1 + # gpu: true + # runtimeClassName: nvidia + # # Optional shared, read-only data volume (datasets/models/caches). + # sharedVolume: + # claimName: models # existing PVC (ROX/RWX for multi-node) + # mountPath: /models + # subPath: "" # optional + pools: [] # -- Kubernetes AppArmor profile requested for sandbox agent containers. # Default Unconfined avoids runtime/default AppArmor blocking the supervisor's # network namespace mount setup on AppArmor-enabled nodes. Set to "" to omit diff --git a/docs/reference/gateway-config.mdx b/docs/reference/gateway-config.mdx index ff4542136..14e870f95 100644 --- a/docs/reference/gateway-config.mdx +++ b/docs/reference/gateway-config.mdx @@ -189,13 +189,50 @@ workspace_default_storage_size = "10Gi" # default_runtime_class_name = "kata-containers" # Kubelet clamps projected tokens below 600 seconds. The driver caps values at 86400. sa_token_ttl_secs = 3600 + # Optional SPIFFE Workload API socket mounted into sandbox pods for dynamic # provider token grants. Use an absolute path under a dedicated directory; # shared roots such as /run, /var, /tmp, and /etc are rejected. # Supervisor-to-gateway auth still uses gateway JWTs. provider_spiffe_workload_api_socket_path = "/spiffe-workload-api/spire-agent.sock" + +# Warm-pooled sandboxes (optional). When enabled, the gateway pre-declares +# operator-owned SandboxWarmPools and satisfies matching CreateSandbox requests +# by binding a pre-warmed pod (~0.1s) instead of cold-creating one. Requires the +# agent-sandbox warm-pool extension CRDs installed in the cluster. Only the +# trusted default image with no per-request template/env overrides is pooled; +# anything else falls back to the cold path. +[openshell.drivers.kubernetes.warm_pool] +enabled = true + +[[openshell.drivers.kubernetes.warm_pool.pools]] +name = "default" +replicas = 3 + +[[openshell.drivers.kubernetes.warm_pool.pools]] +name = "gpu" +replicas = 1 +gpu = true +runtime_class_name = "nvidia" +# Optional shared, read-only data volume (datasets / models / caches). Mounted +# read-only into every pooled pod; safe to share because it holds no per-agent +# state. The PVC must support a read-many access mode (ROX/RWX) when pods land +# on multiple nodes. +shared_volume = { claim_name = "models", mount_path = "/models" } ``` +Each pool maps to one `(image, runtimeClass, gpu)` shape. The writable per-agent +workspace (`/sandbox`) on a pooled pod is an ephemeral `emptyDir` seeded from the +image — it is never shared and is reclaimed with the pod, so claimed pods are +single-use. Large shared files belong on the read-only `shared_volume` instead. +The Helm chart exposes this as `server.warmPool.{enabled,pools}`. + +A pooled pod enforces the pool's baseline policy and cannot be assigned a +per-sandbox policy after it boots, so a `sandbox create` request that supplies a +custom policy automatically falls back to the cold create path (its policy is +applied faithfully). Requests for a non-default image, a custom template, or +extra environment variables also fall back to cold. + ### Docker Sandboxes run as containers on a local bridge network. The supervisor binary is bind-mounted from the host (no in-cluster image pull required); guest mTLS material is supplied as host paths. From 03d7fdeee999330a24939537256f0ec60686e61c Mon Sep 17 00:00:00 2001 From: Roshni Malani Date: Fri, 12 Jun 2026 18:08:24 -0700 Subject: [PATCH 10/10] test(e2e): cover warm-pool workspace isolation and cold-fallback Add a Kubernetes e2e (gated on OPENSHELL_E2E_WARM_POOL) asserting a re-claimed pod never sees the prior claim's writable workspace, the shared data volume is mounted read-only, and a custom-policy request falls back to cold (no SandboxClaim) while a default request binds one. Signed-off-by: Roshni Malani --- e2e/rust/Cargo.toml | 5 + .../tests/warm_pool_workspace_isolation.rs | 277 ++++++++++++++++++ 2 files changed, 282 insertions(+) create mode 100644 e2e/rust/tests/warm_pool_workspace_isolation.rs diff --git a/e2e/rust/Cargo.toml b/e2e/rust/Cargo.toml index 083c622df..8d4f8fd69 100644 --- a/e2e/rust/Cargo.toml +++ b/e2e/rust/Cargo.toml @@ -72,6 +72,11 @@ name = "readyz_health" path = "tests/readyz_health.rs" required-features = ["e2e-kubernetes"] +[[test]] +name = "warm_pool_workspace_isolation" +path = "tests/warm_pool_workspace_isolation.rs" +required-features = ["e2e-kubernetes"] + [[test]] name = "websocket_conformance" path = "tests/websocket_conformance.rs" diff --git a/e2e/rust/tests/warm_pool_workspace_isolation.rs b/e2e/rust/tests/warm_pool_workspace_isolation.rs new file mode 100644 index 000000000..0b7f503c2 --- /dev/null +++ b/e2e/rust/tests/warm_pool_workspace_isolation.rs @@ -0,0 +1,277 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Warm-pool workspace isolation (issue #1879, remediation #6). +//! +//! Validates the security-critical invariant that a warm-pooled sandbox's +//! writable `/sandbox` workspace is single-use: a secret written by one +//! sandbox must never be visible to a later sandbox claimed from the same +//! pool. With the ephemeral `emptyDir` workspace model the kubelet reclaims +//! `/sandbox` with the pod, so a re-claim always starts pristine. +//! +//! This test only runs when warm pooling is enabled in the deployed gateway — +//! set `OPENSHELL_E2E_WARM_POOL=1` and deploy with `server.warmPool.enabled` +//! (e.g. `OPENSHELL_E2E_KUBE_EXTRA_VALUES=deploy/helm/openshell/ci/values-warm-pool.yaml`). +//! It skips otherwise so the default Kubernetes e2e (cold path) is unaffected. + +#![cfg(feature = "e2e-kubernetes")] + +use std::io::Write as _; +use std::process::Stdio; + +use openshell_e2e::harness::binary::{openshell_cmd, openshell_tty_cmd}; +use openshell_e2e::harness::output::strip_ansi; +use openshell_e2e::harness::sandbox::SandboxGuard; +use tempfile::NamedTempFile; + +const MARKER_PATH: &str = "/sandbox/TENANT-A-SECRET"; + +/// Label the Kubernetes driver stamps onto every sandbox-bound object, carrying +/// the gateway sandbox id (`openshell.ai/sandbox-id`). The warm path also sets +/// it on the `SandboxClaim`, so it doubles as the selector that tells a bound +/// warm claim apart from a cold (claim-less) Sandbox. +const SANDBOX_ID_LABEL: &str = "openshell.ai/sandbox-id"; + +fn warm_pool_enabled() -> bool { + std::env::var("OPENSHELL_E2E_WARM_POOL") + .map(|v| v == "1" || v.eq_ignore_ascii_case("true")) + .unwrap_or(false) +} + +fn normalize(output: &str) -> String { + strip_ansi(output).replace('\r', "") +} + +/// Run `sandbox create --no-keep -- sh -c