From 6d08d7afeac7645d8fe8158449ded100fb5e016a Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 00:12:47 -0700 Subject: [PATCH 01/20] fix(onboard): skip Docker bridge probe for VM driver Signed-off-by: Aaron Erickson --- src/lib/onboard.ts | 7 +++--- .../gateway-sandbox-reachability.test.ts | 25 +++++++++++++++++++ .../onboard/gateway-sandbox-reachability.ts | 20 ++++++++++++++- 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 222dfbfb4c..ffadc0e9d6 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -4446,6 +4446,7 @@ async function startDockerDriverGateway({ ignoreError: true, }); const gatewayEnv = getDockerDriverGatewayEnv(openshellVersionOutput); + const bridgeProbeOptions = { drivers: gatewayEnv.OPENSHELL_DRIVERS }; const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true }); const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true, @@ -4461,7 +4462,7 @@ async function startDockerDriverGateway({ if (drift) { restartDockerDriverGatewayProcessForDrift(pidFileGatewayPid, drift.reason); } else if (registerDockerDriverGatewayEndpoint() && (await isDockerDriverGatewayHttpReady())) { - await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure); + await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure, bridgeProbeOptions); console.log(" ✓ Reusing existing Docker-driver gateway"); return; } else { @@ -4492,7 +4493,7 @@ async function startDockerDriverGateway({ isGatewayHealthy(adoptedStatus, adoptedGwInfo, adoptedActiveGatewayInfo) && (await isDockerDriverGatewayHttpReady()) ) { - await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure); + await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure, bridgeProbeOptions); console.log(` ✓ Reusing existing Docker-driver gateway process (PID ${portListenerPid})`); return; } @@ -4561,7 +4562,7 @@ async function startDockerDriverGateway({ isGatewayHealthy(status, namedInfo, currentInfo) && (await isGatewayTcpReady()) ) { - await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure); + await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure, bridgeProbeOptions); console.log(" ✓ Docker-driver gateway is healthy"); return; } diff --git a/src/lib/onboard/gateway-sandbox-reachability.test.ts b/src/lib/onboard/gateway-sandbox-reachability.test.ts index 261e1007a2..86be8df79a 100644 --- a/src/lib/onboard/gateway-sandbox-reachability.test.ts +++ b/src/lib/onboard/gateway-sandbox-reachability.test.ts @@ -8,6 +8,8 @@ import { describe, expect, it } from "vitest"; import { isSandboxBridgeGatewayReachable, formatSandboxBridgeUnreachableMessage, + shouldVerifySandboxBridgeGatewayReachability, + verifySandboxBridgeGatewayReachableOrExit, } from "../../../dist/lib/onboard/gateway-sandbox-reachability"; describe("isSandboxBridgeGatewayReachable", () => { @@ -67,6 +69,29 @@ describe("isSandboxBridgeGatewayReachable", () => { }); }); +describe("verifySandboxBridgeGatewayReachableOrExit", () => { + it("skips the Docker bridge probe when OpenShell is using the macOS VM driver", async () => { + let inspectCalls = 0; + await verifySandboxBridgeGatewayReachableOrExit(false, { + drivers: "vm", + inspectSubnetImpl: () => { + inspectCalls += 1; + return undefined; + }, + runImpl: () => { + throw new Error("probe should not run"); + }, + }); + expect(inspectCalls).toBe(0); + }); + + it("keeps the bridge probe enabled for Docker-driver gateways", () => { + expect(shouldVerifySandboxBridgeGatewayReachability({ drivers: "docker" })).toBe(true); + expect(shouldVerifySandboxBridgeGatewayReachability({ drivers: "vm,docker" })).toBe(true); + expect(shouldVerifySandboxBridgeGatewayReachability({ drivers: "vm" })).toBe(false); + }); +}); + describe("formatSandboxBridgeUnreachableMessage", () => { it("returns empty for an ok result", () => { expect( diff --git a/src/lib/onboard/gateway-sandbox-reachability.ts b/src/lib/onboard/gateway-sandbox-reachability.ts index 62534c9c59..b54e2f3ed7 100644 --- a/src/lib/onboard/gateway-sandbox-reachability.ts +++ b/src/lib/onboard/gateway-sandbox-reachability.ts @@ -57,6 +57,21 @@ export interface SandboxBridgeReachabilityOptions { inspectSubnetImpl?: (networkName: string) => string | undefined; } +export interface SandboxBridgeReachabilityVerifyOptions + extends SandboxBridgeReachabilityOptions { + drivers?: string; +} + +export function shouldVerifySandboxBridgeGatewayReachability( + opts: { drivers?: string } = {}, +): boolean { + const drivers = (opts.drivers ?? process.env.OPENSHELL_DRIVERS ?? "docker") + .split(/[,\s]+/) + .map((driver) => driver.trim().toLowerCase()) + .filter(Boolean); + return drivers.includes("docker"); +} + function defaultInspectSubnet(networkName: string): string | undefined { try { const out = dockerInspectFormat( @@ -186,8 +201,11 @@ export function formatSandboxBridgeUnreachableMessage( export async function verifySandboxBridgeGatewayReachableOrExit( exitOnFailure: boolean, + opts: SandboxBridgeReachabilityVerifyOptions = {}, ): Promise { - const reach = await isSandboxBridgeGatewayReachable(); + if (!shouldVerifySandboxBridgeGatewayReachability({ drivers: opts.drivers })) return; + + const reach = await isSandboxBridgeGatewayReachable(opts); if (reach.ok) return; const message = formatSandboxBridgeUnreachableMessage(reach); From c98d291f26e6cf14bd68a05ac593c701de304963 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 00:17:25 -0700 Subject: [PATCH 02/20] fix(onboard): keep bridge probe patch entrypoint-neutral Keep the macOS VM-driver bridge-probe fix under the onboard entrypoint line budget by passing the driver option inline at the existing call sites. Signed-off-by: Aaron Erickson --- src/lib/onboard.ts | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index ffadc0e9d6..5965bb80f1 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -4446,7 +4446,6 @@ async function startDockerDriverGateway({ ignoreError: true, }); const gatewayEnv = getDockerDriverGatewayEnv(openshellVersionOutput); - const bridgeProbeOptions = { drivers: gatewayEnv.OPENSHELL_DRIVERS }; const gatewayStatus = runCaptureOpenshell(["status"], { ignoreError: true }); const gwInfo = runCaptureOpenshell(["gateway", "info", "-g", GATEWAY_NAME], { ignoreError: true, @@ -4462,7 +4461,7 @@ async function startDockerDriverGateway({ if (drift) { restartDockerDriverGatewayProcessForDrift(pidFileGatewayPid, drift.reason); } else if (registerDockerDriverGatewayEndpoint() && (await isDockerDriverGatewayHttpReady())) { - await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure, bridgeProbeOptions); + await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure, { drivers: gatewayEnv.OPENSHELL_DRIVERS }); console.log(" ✓ Reusing existing Docker-driver gateway"); return; } else { @@ -4493,7 +4492,7 @@ async function startDockerDriverGateway({ isGatewayHealthy(adoptedStatus, adoptedGwInfo, adoptedActiveGatewayInfo) && (await isDockerDriverGatewayHttpReady()) ) { - await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure, bridgeProbeOptions); + await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure, { drivers: gatewayEnv.OPENSHELL_DRIVERS }); console.log(` ✓ Reusing existing Docker-driver gateway process (PID ${portListenerPid})`); return; } @@ -4562,7 +4561,7 @@ async function startDockerDriverGateway({ isGatewayHealthy(status, namedInfo, currentInfo) && (await isGatewayTcpReady()) ) { - await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure, bridgeProbeOptions); + await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure, { drivers: gatewayEnv.OPENSHELL_DRIVERS }); console.log(" ✓ Docker-driver gateway is healthy"); return; } From 26b66cd9639cfeffb541431098826f9265b1c956 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 00:32:16 -0700 Subject: [PATCH 03/20] fix(onboard): wait for VM startup output before detaching --- src/lib/onboard.ts | 9 ++++ src/lib/sandbox/create-stream.test.ts | 61 +++++++++++++++++++++++++++ src/lib/sandbox/create-stream.ts | 22 +++++++++- 3 files changed, 91 insertions(+), 1 deletion(-) diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 5965bb80f1..90cfcb2eb8 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -5918,11 +5918,20 @@ async function createSandbox( ...envArgs, "nemoclaw-start", ])} 2>&1`; + const selectedOpenShellDrivers = (process.env.OPENSHELL_DRIVERS ?? + (process.platform === "darwin" ? "vm" : "docker")) + .split(",") + .map((driver) => driver.trim()) + .filter(Boolean); + const waitForStartupOutputBeforeReadyDetach = selectedOpenShellDrivers.includes("vm"); const createResult = await streamSandboxCreate(createCommand, sandboxEnv, { readyCheck: () => { const list = runCaptureOpenshell(["sandbox", "list"], { ignoreError: true }); return isSandboxReady(list, sandboxName); }, + readyCheckOutputPatterns: waitForStartupOutputBeforeReadyDetach + ? [/Setting up NemoClaw/] + : undefined, }); if (initialSandboxPolicy.cleanup && initialSandboxPolicy.cleanup()) { diff --git a/src/lib/sandbox/create-stream.test.ts b/src/lib/sandbox/create-stream.test.ts index 9902f949c0..3fae9bec49 100644 --- a/src/lib/sandbox/create-stream.test.ts +++ b/src/lib/sandbox/create-stream.test.ts @@ -124,6 +124,67 @@ describe("sandbox-create-stream", () => { expect(child.unref).toHaveBeenCalled(); }); + it("does not detach on Ready until required startup output appears", async () => { + vi.useFakeTimers(); + + const child = new FakeChild(); + const logLine = vi.fn(); + let resolved = false; + const promise = streamSandboxCreate("echo create", process.env, { + spawnImpl: () => child, + readyCheck: () => true, + readyCheckOutputPatterns: [/Setting up NemoClaw/], + pollIntervalMs: 5, + heartbeatIntervalMs: 1_000, + silentPhaseMs: 10_000, + logLine, + }).then((result) => { + resolved = true; + return result; + }); + + child.stdout.emit("data", Buffer.from("Created sandbox: demo\n")); + await vi.advanceTimersByTimeAsync(12); + + expect(resolved).toBe(false); + expect(child.kill).not.toHaveBeenCalled(); + expect(logLine).toHaveBeenCalledWith( + " Sandbox reported Ready; waiting for startup command output before detaching.", + ); + + child.stderr.emit("data", Buffer.from("Setting up NemoClaw (Hermes)...\n")); + await vi.advanceTimersByTimeAsync(6); + + await expect(promise).resolves.toMatchObject({ + status: 0, + forcedReady: true, + output: expect.stringContaining("Setting up NemoClaw (Hermes)..."), + }); + expect(child.kill).toHaveBeenCalledWith("SIGTERM"); + }); + + it("does not recover a non-zero close before required startup output appears", async () => { + const child = new FakeChild(); + const promise = streamSandboxCreate("echo create", process.env, { + spawnImpl: () => child, + readyCheck: () => true, + readyCheckOutputPatterns: [/Setting up NemoClaw/], + pollIntervalMs: 60_000, + heartbeatIntervalMs: 1_000, + silentPhaseMs: 10_000, + logLine: vi.fn(), + }); + + child.stdout.emit("data", Buffer.from("Created sandbox: demo\n")); + child.emit("close", 255); + + await expect(promise).resolves.toMatchObject({ + status: 255, + sawProgress: true, + }); + expect((await promise).forcedReady).toBeUndefined(); + }); + it("flushes the final partial line before resolving", async () => { const child = new FakeChild(); const promise = streamSandboxCreate("echo create", process.env, { diff --git a/src/lib/sandbox/create-stream.ts b/src/lib/sandbox/create-stream.ts index 34ffd6dd25..c844799fe1 100644 --- a/src/lib/sandbox/create-stream.ts +++ b/src/lib/sandbox/create-stream.ts @@ -18,6 +18,10 @@ export interface StreamSandboxCreateOptions { heartbeatIntervalMs?: number; silentPhaseMs?: number; logLine?: (line: string) => void; + // Optional guard for the early-ready escape hatch. When set, readyCheck() + // alone cannot detach the create stream until at least one streamed output + // line matches a configured pattern. + readyCheckOutputPatterns?: readonly RegExp[]; // Initial progress phase: // build — docker-building the sandbox image // upload — pushing the built image into the gateway registry @@ -110,6 +114,9 @@ export function streamSandboxCreate( let pending = ""; let lastPrintedLine = ""; let sawProgress = false; + let readyCheckOutputMatched = + !options.readyCheckOutputPatterns || options.readyCheckOutputPatterns.length === 0; + let printedReadyCheckOutputWait = false; let settled = false; let polling = false; const pollIntervalMs = options.pollIntervalMs || 2000; @@ -172,6 +179,9 @@ export function streamSandboxCreate( if (!line) return; lines.push(line); lastOutputAt = Date.now(); + if (!readyCheckOutputMatched && matchesAny(line, options.readyCheckOutputPatterns ?? [])) { + readyCheckOutputMatched = true; + } if (matchesAny(line, BUILD_PROGRESS_PATTERNS)) { setPhase("build"); } else if (matchesAny(line, PULL_PROGRESS_PATTERNS)) { @@ -238,6 +248,16 @@ export function streamSandboxCreate( } if (!ready) return; setPhase("ready"); + if (!readyCheckOutputMatched) { + if (!printedReadyCheckOutputWait) { + const detail = + "Sandbox reported Ready; waiting for startup command output before detaching."; + lines.push(detail); + printProgressLine(` ${detail}`); + printedReadyCheckOutputWait = true; + } + return; + } const detail = "Sandbox reported Ready before create stream exited; continuing."; lines.push(detail); printProgressLine(` ${detail}`); @@ -300,7 +320,7 @@ export function streamSandboxCreate( // last poll tick and the stream exit (e.g. SSH 255 after "Created sandbox:"). if (code && code !== 0 && options.readyCheck) { try { - if (options.readyCheck()) { + if (options.readyCheck() && readyCheckOutputMatched) { finish(0, { forcedReady: true }); return; } From a458b2f9f5cc4efcfd945ab6fd4291361744f664 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 00:34:49 -0700 Subject: [PATCH 04/20] fix(onboard): keep VM startup gate out of entrypoint --- src/lib/onboard.ts | 9 -------- src/lib/sandbox/create-stream.test.ts | 17 ++++++++------- src/lib/sandbox/create-stream.ts | 30 ++++++++++++++++++++++++--- 3 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 90cfcb2eb8..5965bb80f1 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -5918,20 +5918,11 @@ async function createSandbox( ...envArgs, "nemoclaw-start", ])} 2>&1`; - const selectedOpenShellDrivers = (process.env.OPENSHELL_DRIVERS ?? - (process.platform === "darwin" ? "vm" : "docker")) - .split(",") - .map((driver) => driver.trim()) - .filter(Boolean); - const waitForStartupOutputBeforeReadyDetach = selectedOpenShellDrivers.includes("vm"); const createResult = await streamSandboxCreate(createCommand, sandboxEnv, { readyCheck: () => { const list = runCaptureOpenshell(["sandbox", "list"], { ignoreError: true }); return isSandboxReady(list, sandboxName); }, - readyCheckOutputPatterns: waitForStartupOutputBeforeReadyDetach - ? [/Setting up NemoClaw/] - : undefined, }); if (initialSandboxPolicy.cleanup && initialSandboxPolicy.cleanup()) { diff --git a/src/lib/sandbox/create-stream.test.ts b/src/lib/sandbox/create-stream.test.ts index 3fae9bec49..5ba0534ed5 100644 --- a/src/lib/sandbox/create-stream.test.ts +++ b/src/lib/sandbox/create-stream.test.ts @@ -22,6 +22,9 @@ class FakeChild extends EventEmitter implements StreamableChildProcess { unref = vi.fn(); } +const dockerEnv = { ...process.env, OPENSHELL_DRIVERS: "docker" }; +const vmEnv = { ...process.env, OPENSHELL_DRIVERS: "vm" }; + describe("sandbox-create-stream", () => { afterEach(() => { vi.useRealTimers(); @@ -30,7 +33,7 @@ describe("sandbox-create-stream", () => { it("prints the initial build banner immediately", async () => { const child = new FakeChild(); const logLine = vi.fn(); - const promise = streamSandboxCreate("echo create", process.env, { + const promise = streamSandboxCreate("echo create", dockerEnv, { logLine, spawnImpl: () => child, }); @@ -43,7 +46,7 @@ describe("sandbox-create-stream", () => { it("streams visible progress lines and returns the collected output", async () => { const child = new FakeChild(); const logLine = vi.fn(); - const promise = streamSandboxCreate("echo create", process.env, { + const promise = streamSandboxCreate("echo create", dockerEnv, { logLine, spawnImpl: () => child, heartbeatIntervalMs: 1_000, @@ -99,7 +102,7 @@ describe("sandbox-create-stream", () => { const child = new FakeChild(); let checks = 0; - const promise = streamSandboxCreate("echo create", process.env, { + const promise = streamSandboxCreate("echo create", dockerEnv, { spawnImpl: () => child, readyCheck: () => { checks += 1; @@ -130,10 +133,9 @@ describe("sandbox-create-stream", () => { const child = new FakeChild(); const logLine = vi.fn(); let resolved = false; - const promise = streamSandboxCreate("echo create", process.env, { + const promise = streamSandboxCreate("echo create", vmEnv, { spawnImpl: () => child, readyCheck: () => true, - readyCheckOutputPatterns: [/Setting up NemoClaw/], pollIntervalMs: 5, heartbeatIntervalMs: 1_000, silentPhaseMs: 10_000, @@ -165,10 +167,9 @@ describe("sandbox-create-stream", () => { it("does not recover a non-zero close before required startup output appears", async () => { const child = new FakeChild(); - const promise = streamSandboxCreate("echo create", process.env, { + const promise = streamSandboxCreate("echo create", vmEnv, { spawnImpl: () => child, readyCheck: () => true, - readyCheckOutputPatterns: [/Setting up NemoClaw/], pollIntervalMs: 60_000, heartbeatIntervalMs: 1_000, silentPhaseMs: 10_000, @@ -205,7 +206,7 @@ describe("sandbox-create-stream", () => { it("recovers when sandbox is ready at the moment the stream exits non-zero", async () => { const child = new FakeChild(); const logLine = vi.fn(); - const promise = streamSandboxCreate("echo create", process.env, { + const promise = streamSandboxCreate("echo create", dockerEnv, { spawnImpl: () => child, readyCheck: () => true, // sandbox is already Ready pollIntervalMs: 60_000, // large interval so the poll doesn't fire first diff --git a/src/lib/sandbox/create-stream.ts b/src/lib/sandbox/create-stream.ts index c844799fe1..cac66cf693 100644 --- a/src/lib/sandbox/create-stream.ts +++ b/src/lib/sandbox/create-stream.ts @@ -94,10 +94,31 @@ const VISIBLE_PROGRESS_PATTERNS: readonly RegExp[] = [ /^✓ /, ]; +const VM_READY_DETACH_OUTPUT_PATTERNS: readonly RegExp[] = [/Setting up NemoClaw/]; + function matchesAny(line: string, patterns: readonly RegExp[]) { return patterns.some((pattern) => pattern.test(line)); } +function selectedDrivers(env: NodeJS.ProcessEnv): string[] { + const raw = + env.OPENSHELL_DRIVERS ?? + process.env.OPENSHELL_DRIVERS ?? + (process.platform === "darwin" ? "vm" : "docker"); + return raw + .split(",") + .map((driver) => driver.trim()) + .filter(Boolean); +} + +function getReadyCheckOutputPatterns( + env: NodeJS.ProcessEnv, + patterns: readonly RegExp[] | undefined, +): readonly RegExp[] { + if (patterns) return patterns; + return selectedDrivers(env).includes("vm") ? VM_READY_DETACH_OUTPUT_PATTERNS : []; +} + export function streamSandboxCreate( command: string, env: NodeJS.ProcessEnv = process.env, @@ -114,8 +135,11 @@ export function streamSandboxCreate( let pending = ""; let lastPrintedLine = ""; let sawProgress = false; - let readyCheckOutputMatched = - !options.readyCheckOutputPatterns || options.readyCheckOutputPatterns.length === 0; + const readyCheckOutputPatterns = getReadyCheckOutputPatterns( + env, + options.readyCheckOutputPatterns, + ); + let readyCheckOutputMatched = readyCheckOutputPatterns.length === 0; let printedReadyCheckOutputWait = false; let settled = false; let polling = false; @@ -179,7 +203,7 @@ export function streamSandboxCreate( if (!line) return; lines.push(line); lastOutputAt = Date.now(); - if (!readyCheckOutputMatched && matchesAny(line, options.readyCheckOutputPatterns ?? [])) { + if (!readyCheckOutputMatched && matchesAny(line, readyCheckOutputPatterns)) { readyCheckOutputMatched = true; } if (matchesAny(line, BUILD_PROGRESS_PATTERNS)) { From d3fbfebfe069320d74a53e09bb8fcdcd46fa7e5c Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 01:10:31 -0700 Subject: [PATCH 05/20] fix(hermes): keep macos vm startup mutable --- agents/hermes/start.sh | 7 +++++- src/lib/onboard.ts | 34 +++++++++++++++++++++++++++++ test/gateway-liveness-probe.test.ts | 6 ++--- test/onboard.test.ts | 13 +++++++++++ test/sandbox-init.test.ts | 18 +++++++++++++++ 5 files changed, 73 insertions(+), 5 deletions(-) diff --git a/agents/hermes/start.sh b/agents/hermes/start.sh index 39bf36e683..4c605d93d0 100755 --- a/agents/hermes/start.sh +++ b/agents/hermes/start.sh @@ -585,7 +585,12 @@ if [ "$(id -u)" -ne 0 ]; then export HOME=/sandbox export HERMES_HOME="${HERMES_DIR}" - if ! verify_config_integrity "${HERMES_DIR}" "${HERMES_HASH_FILE}"; then + # macOS VM startup currently runs this entrypoint as the sandbox user and + # remaps rootfs ownership to the host uid. In that mode the strict /etc hash + # cannot remain a root-owned trust anchor, so use the same locked-aware + # mutable-default verifier as OpenClaw. The root path below keeps strict + # verification against /etc/nemoclaw/hermes.config-hash. + if ! verify_config_integrity_if_locked "${HERMES_DIR}"; then echo "[SECURITY] Config integrity check failed — refusing to start (non-root mode)" >&2 exit 1 fi diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 5965bb80f1..8a5807ea7c 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -2033,6 +2033,30 @@ function getRecordedMessagingChannelsForResume( return getKnownMessagingChannels(session.messagingChannels); } +function getMessagingProviderNamesForChannel(sandboxName: string, channel: string): string[] { + if (channel === "discord") return [`${sandboxName}-discord-bridge`]; + if (channel === "telegram") return [`${sandboxName}-telegram-bridge`]; + if (channel === "slack") return [`${sandboxName}-slack-bridge`]; + return []; +} + +function getReusableStoredMessagingChannelsForNonInteractive( + sandboxName: string | null, +): string[] { + if (!sandboxName || !isNonInteractive()) return []; + + const entry = registry.getSandbox(sandboxName); + const configuredChannels = getKnownMessagingChannels(entry?.messagingChannels); + if (configuredChannels.length === 0) return []; + + const disabledChannels = new Set(registry.getDisabledChannels(sandboxName)); + return configuredChannels.filter((channel) => { + if (disabledChannels.has(channel)) return false; + const providers = getMessagingProviderNamesForChannel(sandboxName, channel); + return providers.length > 0 && providers.every((provider) => providerExistsInGateway(provider)); + }); +} + /** * Detect whether any messaging provider credential has been rotated since * the sandbox was created, by comparing SHA-256 hashes of the current @@ -10741,6 +10765,16 @@ async function onboard(opts: OnboardOptions = {}): Promise { } } else { selectedMessagingChannels = await setupMessagingChannels(); + if (selectedMessagingChannels.length === 0) { + const reusableStoredMessagingChannels = + getReusableStoredMessagingChannelsForNonInteractive(sandboxName); + if (reusableStoredMessagingChannels.length > 0) { + selectedMessagingChannels = reusableStoredMessagingChannels; + note( + ` [non-interactive] Reusing existing messaging channel configuration: ${selectedMessagingChannels.join(", ")}`, + ); + } + } } const messagingChannelConfig = readMessagingChannelConfigFromEnv(); onboardSession.updateSession((current: Session) => { diff --git a/test/gateway-liveness-probe.test.ts b/test/gateway-liveness-probe.test.ts index a77c28a176..525db9e279 100644 --- a/test/gateway-liveness-probe.test.ts +++ b/test/gateway-liveness-probe.test.ts @@ -139,9 +139,7 @@ describe("gateway liveness probe (#2020)", () => { expect(dockerStart).toBeGreaterThanOrEqual(0); expect(dockerEnd).toBeGreaterThan(dockerStart); const dockerSection = content.slice(dockerStart, dockerEnd); - const calls = dockerSection.match( - /verifySandboxBridgeGatewayReachableOrExit\(exitOnFailure\)/g, - ); + const calls = dockerSection.match(/verifySandboxBridgeGatewayReachableOrExit\(exitOnFailure/g); expect(calls?.length).toBeGreaterThanOrEqual(3); for (const marker of [ @@ -153,7 +151,7 @@ describe("gateway liveness probe (#2020)", () => { expect(markerIdx).toBeGreaterThan(0); const before = dockerSection.slice(0, markerIdx); expect( - before.lastIndexOf("verifySandboxBridgeGatewayReachableOrExit(exitOnFailure)"), + before.lastIndexOf("verifySandboxBridgeGatewayReachableOrExit(exitOnFailure"), ).toBeGreaterThan(0); } }); diff --git a/test/onboard.test.ts b/test/onboard.test.ts index c72fc52321..18c694b827 100644 --- a/test/onboard.test.ts +++ b/test/onboard.test.ts @@ -7851,6 +7851,7 @@ const { createSandbox } = require(${onboardPath}); HOME: tmpDir, PATH: `${fakeBin}:${process.env.PATH || ""}`, NEMOCLAW_NON_INTERACTIVE: "1", + OPENSHELL_DRIVERS: "docker", }, timeout: 15000, }); @@ -8545,6 +8546,18 @@ const { setupMessagingChannels } = require(${onboardPath}); }, ); + it("non-interactive onboard reuses stored messaging channels when bridge providers exist", () => { + const source = fs.readFileSync(path.join(import.meta.dirname, "../src/lib/onboard.ts"), "utf-8"); + assert.match( + source, + /function getReusableStoredMessagingChannelsForNonInteractive\([\s\S]*?registry\.getSandbox\(sandboxName\)[\s\S]*?providerExistsInGateway\(provider\)/, + ); + assert.match( + source, + /selectedMessagingChannels = await setupMessagingChannels\(\);[\s\S]*?getReusableStoredMessagingChannelsForNonInteractive\(sandboxName\)[\s\S]*?selectedMessagingChannels = reusableStoredMessagingChannels/, + ); + }); + it( "interactive setupMessagingChannels drops slack when prompted token fails tokenFormat check (#1912)", { timeout: 60_000 }, diff --git a/test/sandbox-init.test.ts b/test/sandbox-init.test.ts index 8efe0681fd..012967ed1c 100644 --- a/test/sandbox-init.test.ts +++ b/test/sandbox-init.test.ts @@ -614,6 +614,24 @@ EOF expect(src).toContain("lock_rc_files"); }); + it("hermes non-root fallback uses mutable-default config verification", () => { + const src = readFileSync(join(import.meta.dirname, "../agents/hermes/start.sh"), "utf-8"); + const nonRootStart = src.indexOf('# ── Non-root fallback'); + const rootStart = src.indexOf('# ── Root path', nonRootStart); + expect(nonRootStart).toBeGreaterThanOrEqual(0); + expect(rootStart).toBeGreaterThan(nonRootStart); + const nonRootBlock = src.slice(nonRootStart, rootStart); + const rootBlock = src.slice(rootStart); + + expect(nonRootBlock).toContain('verify_config_integrity_if_locked "${HERMES_DIR}"'); + expect(nonRootBlock).not.toContain( + 'verify_config_integrity "${HERMES_DIR}" "${HERMES_HASH_FILE}"', + ); + expect(rootBlock).toContain( + 'verify_config_integrity "${HERMES_DIR}" "${HERMES_HASH_FILE}"', + ); + }); + it("hermes start.sh rewrites configure guard rc blocks through the symlink-safe helper", () => { const src = readFileSync(join(import.meta.dirname, "../agents/hermes/start.sh"), "utf-8"); const helperFn = src.match(/rewrite_rc_marker_block\(\) \{([\s\S]*?)^}/m); From 3869623644c966ef64712df79afb818a4843ebf3 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 01:15:52 -0700 Subject: [PATCH 06/20] fix(onboard): reuse stored messaging channels --- src/lib/onboard.ts | 47 ++++---------------------- src/lib/onboard/messaging-reuse.ts | 53 ++++++++++++++++++++++++++++++ test/onboard.test.ts | 10 ++++-- 3 files changed, 66 insertions(+), 44 deletions(-) create mode 100644 src/lib/onboard/messaging-reuse.ts diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 8a5807ea7c..e933d928de 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -2025,36 +2025,11 @@ function getKnownMessagingChannels(channels: string[] | null | undefined): strin function getRecordedMessagingChannelsForResume( resume: boolean, - session: Session | null, + session: Session | null, sandboxName: string | null, ): string[] | null { - if (!resume || !isNonInteractive() || !Array.isArray(session?.messagingChannels)) { - return null; - } - return getKnownMessagingChannels(session.messagingChannels); -} - -function getMessagingProviderNamesForChannel(sandboxName: string, channel: string): string[] { - if (channel === "discord") return [`${sandboxName}-discord-bridge`]; - if (channel === "telegram") return [`${sandboxName}-telegram-bridge`]; - if (channel === "slack") return [`${sandboxName}-slack-bridge`]; - return []; -} - -function getReusableStoredMessagingChannelsForNonInteractive( - sandboxName: string | null, -): string[] { - if (!sandboxName || !isNonInteractive()) return []; - - const entry = registry.getSandbox(sandboxName); - const configuredChannels = getKnownMessagingChannels(entry?.messagingChannels); - if (configuredChannels.length === 0) return []; - - const disabledChannels = new Set(registry.getDisabledChannels(sandboxName)); - return configuredChannels.filter((channel) => { - if (disabledChannels.has(channel)) return false; - const providers = getMessagingProviderNamesForChannel(sandboxName, channel); - return providers.length > 0 && providers.every((provider) => providerExistsInGateway(provider)); - }); + return require("./onboard/messaging-reuse").getNonInteractiveStoredMessagingChannels( + resume, session?.messagingChannels, sandboxName, MESSAGING_CHANNELS, (envKey: string) => Boolean(getCredential(envKey) || normalizeCredentialValue(process.env[envKey])), + registry.getSandbox.bind(registry), registry.getDisabledChannels.bind(registry), providerExistsInGateway, isNonInteractive()); } /** @@ -10755,26 +10730,16 @@ async function onboard(opts: OnboardOptions = {}): Promise { nextWebSearchConfig = await configureWebSearch(null, agent, webSearchSupportProbePath); } startRecordedStep("sandbox", { provider, model }); - const recordedMessagingChannels = getRecordedMessagingChannelsForResume(resume, session); + const recordedMessagingChannels = getRecordedMessagingChannelsForResume(resume, session, sandboxName); if (recordedMessagingChannels) { selectedMessagingChannels = recordedMessagingChannels; if (selectedMessagingChannels.length > 0) { note( - ` [resume] Reusing messaging channel configuration: ${selectedMessagingChannels.join(", ")}`, + ` [non-interactive] Reusing messaging channel configuration: ${selectedMessagingChannels.join(", ")}`, ); } } else { selectedMessagingChannels = await setupMessagingChannels(); - if (selectedMessagingChannels.length === 0) { - const reusableStoredMessagingChannels = - getReusableStoredMessagingChannelsForNonInteractive(sandboxName); - if (reusableStoredMessagingChannels.length > 0) { - selectedMessagingChannels = reusableStoredMessagingChannels; - note( - ` [non-interactive] Reusing existing messaging channel configuration: ${selectedMessagingChannels.join(", ")}`, - ); - } - } } const messagingChannelConfig = readMessagingChannelConfigFromEnv(); onboardSession.updateSession((current: Session) => { diff --git a/src/lib/onboard/messaging-reuse.ts b/src/lib/onboard/messaging-reuse.ts new file mode 100644 index 0000000000..11a3954f5e --- /dev/null +++ b/src/lib/onboard/messaging-reuse.ts @@ -0,0 +1,53 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +type MessagingChannel = { name: string; envKey: string }; +type SandboxEntry = { messagingChannels?: string[] | null } | null | undefined; + +export function getMessagingProviderNamesForChannel(sandboxName: string, channel: string): string[] { + if (channel === "discord") return [`${sandboxName}-discord-bridge`]; + if (channel === "telegram") return [`${sandboxName}-telegram-bridge`]; + if (channel === "slack") return [`${sandboxName}-slack-bridge`]; + return []; +} + +function getKnownMessagingChannels( + channels: string[] | null | undefined, + messagingChannels: readonly MessagingChannel[], +): string[] { + if (!Array.isArray(channels)) return []; + const known = new Set(messagingChannels.map((channel) => channel.name)); + return [...new Set(channels.filter((channel) => known.has(channel)))]; +} + +export function getNonInteractiveStoredMessagingChannels( + resume: boolean, + sessionChannels: string[] | null | undefined, + sandboxName: string | null, + messagingChannels: readonly MessagingChannel[], + hasMessagingToken: (envKey: string) => boolean, + getSandbox: (sandboxName: string) => SandboxEntry, + getDisabledChannels: (sandboxName: string) => string[], + providerExists: (providerName: string) => boolean, + nonInteractive: boolean, +): string[] | null { + if (!nonInteractive) return null; + if (resume && Array.isArray(sessionChannels)) { + return getKnownMessagingChannels(sessionChannels, messagingChannels); + } + if (resume || !sandboxName || messagingChannels.some((channel) => hasMessagingToken(channel.envKey))) { + return null; + } + + const configuredChannels = getKnownMessagingChannels( + getSandbox(sandboxName)?.messagingChannels, + messagingChannels, + ); + const disabledChannels = new Set(getDisabledChannels(sandboxName)); + const reusableChannels = configuredChannels.filter((channel) => { + if (disabledChannels.has(channel)) return false; + const providers = getMessagingProviderNamesForChannel(sandboxName, channel); + return providers.length > 0 && providers.every((provider) => providerExists(provider)); + }); + return reusableChannels.length > 0 ? reusableChannels : null; +} diff --git a/test/onboard.test.ts b/test/onboard.test.ts index 18c694b827..58b12b7de3 100644 --- a/test/onboard.test.ts +++ b/test/onboard.test.ts @@ -8548,13 +8548,17 @@ const { setupMessagingChannels } = require(${onboardPath}); it("non-interactive onboard reuses stored messaging channels when bridge providers exist", () => { const source = fs.readFileSync(path.join(import.meta.dirname, "../src/lib/onboard.ts"), "utf-8"); + const reuseSource = fs.readFileSync( + path.join(import.meta.dirname, "../src/lib/onboard/messaging-reuse.ts"), + "utf-8", + ); assert.match( - source, - /function getReusableStoredMessagingChannelsForNonInteractive\([\s\S]*?registry\.getSandbox\(sandboxName\)[\s\S]*?providerExistsInGateway\(provider\)/, + reuseSource, + /function getNonInteractiveStoredMessagingChannels\([\s\S]*?getSandbox\(sandboxName\)[\s\S]*?providerExists\(provider\)/, ); assert.match( source, - /selectedMessagingChannels = await setupMessagingChannels\(\);[\s\S]*?getReusableStoredMessagingChannelsForNonInteractive\(sandboxName\)[\s\S]*?selectedMessagingChannels = reusableStoredMessagingChannels/, + /getRecordedMessagingChannelsForResume\(resume, session, sandboxName\)[\s\S]*?selectedMessagingChannels = await setupMessagingChannels\(\)/, ); }); From cef00797c2e1b57df293d51502a8abd403af9194 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 01:37:19 -0700 Subject: [PATCH 07/20] fix(connect): avoid legacy dns repair for vm sandboxes --- src/lib/actions/sandbox/connect.ts | 35 +++++++++++++++++++++++- src/lib/onboard.ts | 2 +- test/sandbox-connect-inference.test.ts | 37 ++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/src/lib/actions/sandbox/connect.ts b/src/lib/actions/sandbox/connect.ts index 04ce645a7e..fe320867d7 100644 --- a/src/lib/actions/sandbox/connect.ts +++ b/src/lib/actions/sandbox/connect.ts @@ -34,6 +34,7 @@ import { resolveOpenshell } from "../../adapters/openshell/resolve"; const agentRuntime = require("../../../../bin/lib/agent-runtime"); const NEMOCLAW_GATEWAY_NAME = "nemoclaw"; +const LEGACY_CLUSTER_DRIVERS = new Set([null, undefined, "", "kubernetes"]); export type SandboxConnectOptions = { probeOnly?: boolean; @@ -155,13 +156,45 @@ function isSandboxInferenceRouteHealthy(sandboxName: string): boolean { return probe.status === 0 && /^OK\s+[0-9]{3}\b/.test(probe.output.trim()); } +function shouldUseLegacyDnsProxyRepair(sb: SandboxEntry | null): boolean { + return LEGACY_CLUSTER_DRIVERS.has(sb?.openshellDriver); +} + +function reapplyVmInferenceRoute(sandboxName: string, sb: SandboxEntry | null): boolean { + if (!sb?.provider || !sb.model) return false; + const result = runOpenshell( + ["inference", "set", "--provider", sb.provider, "--model", sb.model], + { ignoreError: true }, + ); + return result.status === 0 && isSandboxInferenceRouteHealthy(sandboxName); +} + function repairSandboxInferenceRouteIfNeeded( sandboxName: string, + sb: SandboxEntry | null, { quiet = false }: { quiet?: boolean } = {}, ): boolean { if (process.env.NEMOCLAW_DISABLE_INFERENCE_ROUTE_REPAIR === "1") return false; if (isSandboxInferenceRouteHealthy(sandboxName)) return false; + if (!shouldUseLegacyDnsProxyRepair(sb)) { + if (!quiet) { + console.log(""); + console.log(` inference.local is unavailable inside '${sandboxName}'. Reapplying OpenShell inference route...`); + } + const healthy = reapplyVmInferenceRoute(sandboxName, sb); + if (!quiet) { + if (healthy) { + console.log(" inference.local route repaired."); + } else { + console.error( + ` Warning: inference.local is still unavailable through the OpenShell ${sb?.openshellDriver || "non-legacy"} gateway path.`, + ); + } + } + return healthy; + } + if (!quiet) { console.log(""); console.log(` inference.local is unavailable inside '${sandboxName}'. Repairing sandbox DNS proxy...`); @@ -219,7 +252,7 @@ function ensureSandboxInferenceRoute( ); } } - repairSandboxInferenceRouteIfNeeded(sandboxName, { quiet }); + repairSandboxInferenceRouteIfNeeded(sandboxName, sb, { quiet }); } } catch { /* non-fatal — don't block connect on inference route repair */ diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index e933d928de..085ffffcb0 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -6147,7 +6147,7 @@ async function createSandbox( // DNS proxy — run a forwarder in the sandbox pod so the isolated // sandbox namespace can resolve hostnames (fixes #626). - if (!isLinuxDockerDriverGatewayPlatform()) { + if (getSandboxRuntimeRegistryFields(effectiveSandboxGpuConfig).openshellDriver === "kubernetes") { console.log(" Setting up sandbox DNS proxy..."); runFile("bash", [path.join(SCRIPTS, "setup-dns-proxy.sh"), GATEWAY_NAME, sandboxName], { ignoreError: true, diff --git a/test/sandbox-connect-inference.test.ts b/test/sandbox-connect-inference.test.ts index 98e5a4a225..955dc67e39 100644 --- a/test/sandbox-connect-inference.test.ts +++ b/test/sandbox-connect-inference.test.ts @@ -21,6 +21,7 @@ type SandboxEntryFixture = { provider?: string | null; nimContainer?: string | null; gpuEnabled?: boolean; + openshellDriver?: string | null; policies?: string[]; }; @@ -357,4 +358,40 @@ describe("sandbox connect inference route swap (#1248)", () => { expect(combined).toContain("inference.local route repaired"); }, ); + + it( + "does not run legacy DNS proxy repair for VM sandboxes", + testTimeoutOptions(20_000), + () => { + const { tmpDir, stateFile, sandboxName } = setupFixture( + { + name: "vm-sandbox", + model: "nvidia/nemotron-3-super-120b-a12b", + provider: "nvidia-prod", + gpuEnabled: false, + openshellDriver: "vm", + policies: [], + }, + "nvidia-prod", + "nvidia/nemotron-3-super-120b-a12b", + { + inferenceProbeResponses: [ + 'BROKEN 503 {"error":"inference service unavailable"}', + 'BROKEN 503 {"error":"inference service unavailable"}', + ], + }, + ); + + const result = runConnect(tmpDir, sandboxName); + expect(result.status).toBe(0); + + const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); + expect(state.inferenceSetCalls.length).toBe(1); + expect(state.dockerCalls.length).toBe(0); + + const combined = (result.stdout || "") + (result.stderr || ""); + expect(combined).toContain("Reapplying OpenShell inference route"); + expect(combined).toContain("OpenShell vm gateway path"); + }, + ); }); From 834331133fe3ae0e08001dbfc6f4c1151dddef75 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 02:57:24 -0700 Subject: [PATCH 08/20] fix: monkeypatch macos vm dns for inference --- src/lib/actions/sandbox/connect.ts | 25 ++++ src/lib/actions/sandbox/vm-dns-monkeypatch.ts | 126 ++++++++++++++++++ src/lib/onboard.ts | 19 ++- test/onboard.test.ts | 2 +- test/sandbox-connect-inference.test.ts | 83 +++++++++++- test/shellquote-sandbox.test.ts | 1 + 6 files changed, 251 insertions(+), 5 deletions(-) create mode 100644 src/lib/actions/sandbox/vm-dns-monkeypatch.ts diff --git a/src/lib/actions/sandbox/connect.ts b/src/lib/actions/sandbox/connect.ts index fe320867d7..8c44be07b5 100644 --- a/src/lib/actions/sandbox/connect.ts +++ b/src/lib/actions/sandbox/connect.ts @@ -22,6 +22,10 @@ import type { SandboxEntry } from "../../state/registry"; import { ROOT } from "../../runner"; import { runSetupDnsProxy } from "../dns"; import { ensureLiveSandboxOrExit } from "./gateway-state"; +import { + applyOpenShellVmDnsMonkeypatch, + shouldApplyVmDnsMonkeypatch, +} from "./vm-dns-monkeypatch"; import { createSystemDeps as createSessionDeps, getActiveSandboxSessions, @@ -178,6 +182,27 @@ function repairSandboxInferenceRouteIfNeeded( if (isSandboxInferenceRouteHealthy(sandboxName)) return false; if (!shouldUseLegacyDnsProxyRepair(sb)) { + if (shouldApplyVmDnsMonkeypatch(sb)) { + if (!quiet) { + console.log(""); + console.log( + ` inference.local is unavailable inside '${sandboxName}'. Applying OpenShell VM DNS monkeypatch...`, + ); + } + const patch = applyOpenShellVmDnsMonkeypatch(sandboxName, sb); + if (patch.ok && isSandboxInferenceRouteHealthy(sandboxName)) { + if (!quiet) { + console.log(" inference.local route repaired."); + } + return true; + } + if (!quiet && !patch.ok && patch.reason) { + console.error( + ` Warning: OpenShell VM DNS monkeypatch did not apply: ${patch.reason}`, + ); + } + } + if (!quiet) { console.log(""); console.log(` inference.local is unavailable inside '${sandboxName}'. Reapplying OpenShell inference route...`); diff --git a/src/lib/actions/sandbox/vm-dns-monkeypatch.ts b/src/lib/actions/sandbox/vm-dns-monkeypatch.ts new file mode 100644 index 0000000000..56a78055d5 --- /dev/null +++ b/src/lib/actions/sandbox/vm-dns-monkeypatch.ts @@ -0,0 +1,126 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { + type CaptureOpenshellResult, + stripAnsi, +} from "../../adapters/openshell/client"; +import { captureOpenshell } from "../../adapters/openshell/runtime"; +import type { SandboxEntry } from "../../state/registry"; + +const GVPROXY_DNS = "192.168.127.1"; +const LEGACY_PUBLIC_DNS_BLOCK = ` if [ ! -s /etc/resolv.conf ]; then + echo "nameserver 8.8.8.8" > /etc/resolv.conf + echo "nameserver 8.8.4.4" >> /etc/resolv.conf + fi`; +const GVPROXY_DNS_BLOCK = ` echo "nameserver \${GVPROXY_GATEWAY_IP}" > /etc/resolv.conf`; + +type CaptureFn = ( + args: string[], + opts: { ignoreError?: boolean; timeout?: number }, +) => CaptureOpenshellResult; + +export type VmDnsMonkeypatchResult = { + attempted: boolean; + changed: boolean; + ok: boolean; + reason?: string; + rootfs?: string; +}; + +export function shouldApplyVmDnsMonkeypatch( + entry: Pick | null | undefined, + platform: NodeJS.Platform = process.platform, + env: NodeJS.ProcessEnv = process.env, +): boolean { + if (env.NEMOCLAW_DISABLE_VM_DNS_MONKEYPATCH === "1") return false; + if (entry?.openshellDriver !== "vm") return false; + return platform === "darwin" || env.NEMOCLAW_FORCE_VM_DNS_MONKEYPATCH === "1"; +} + +function dockerDriverGatewayStateDir(env: NodeJS.ProcessEnv, homeDir: string): string { + const configured = env.NEMOCLAW_OPENSHELL_GATEWAY_STATE_DIR; + if (configured && configured.trim()) return path.resolve(configured.trim()); + return path.join(homeDir, ".local", "state", "nemoclaw", "openshell-docker-gateway"); +} + +export function parseSandboxIdFromGetOutput(output: string): string | null { + const match = stripAnsi(output).match(/^\s*(?:Id|ID):\s*([A-Za-z0-9._-]+)\s*$/m); + return match?.[1] ?? null; +} + +function patchGuestInit(initPath: string): boolean { + if (!fs.existsSync(initPath)) return false; + const original = fs.readFileSync(initPath, "utf-8"); + if (original.includes('nameserver ${GVPROXY_GATEWAY_IP}')) return false; + const patched = original.replace(LEGACY_PUBLIC_DNS_BLOCK, GVPROXY_DNS_BLOCK); + if (patched === original) return false; + fs.writeFileSync(initPath, patched); + return true; +} + +export function applyOpenShellVmDnsMonkeypatch( + sandboxName: string, + entry: Pick | null | undefined, + deps: { + capture?: CaptureFn; + env?: NodeJS.ProcessEnv; + homeDir?: string; + platform?: NodeJS.Platform; + stateDir?: string; + } = {}, +): VmDnsMonkeypatchResult { + const env = deps.env ?? process.env; + if (!shouldApplyVmDnsMonkeypatch(entry, deps.platform ?? process.platform, env)) { + return { + attempted: false, + changed: false, + ok: false, + reason: "not a macOS OpenShell VM sandbox", + }; + } + + const capture = deps.capture ?? captureOpenshell; + const get = capture(["sandbox", "get", sandboxName], { + ignoreError: true, + timeout: 10_000, + }); + const sandboxId = parseSandboxIdFromGetOutput(get.output || ""); + if (!sandboxId) { + return { + attempted: true, + changed: false, + ok: false, + reason: "could not resolve OpenShell sandbox id", + }; + } + + const stateDir = + deps.stateDir ?? dockerDriverGatewayStateDir(env, deps.homeDir ?? os.homedir()); + const rootfs = path.join(stateDir, "vm-driver", "sandboxes", sandboxId, "rootfs"); + const resolvConf = path.join(rootfs, "etc", "resolv.conf"); + if (!fs.existsSync(rootfs)) { + return { + attempted: true, + changed: false, + ok: false, + reason: `VM rootfs not found: ${rootfs}`, + }; + } + + fs.mkdirSync(path.dirname(resolvConf), { recursive: true }); + const desired = `nameserver ${GVPROXY_DNS}\n`; + const current = fs.existsSync(resolvConf) ? fs.readFileSync(resolvConf, "utf-8") : ""; + let changed = current !== desired; + if (changed) { + fs.writeFileSync(resolvConf, desired); + } + changed = + patchGuestInit(path.join(rootfs, "srv", "openshell-vm-sandbox-init.sh")) || changed; + + return { attempted: true, changed, ok: true, rootfs }; +} diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 085ffffcb0..b7f573d964 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -61,6 +61,9 @@ const { const { getSelectionDrift, }: typeof import("./onboard/selection-drift") = require("./onboard/selection-drift"); +const { + applyOpenShellVmDnsMonkeypatch, +}: typeof import("./actions/sandbox/vm-dns-monkeypatch") = require("./actions/sandbox/vm-dns-monkeypatch"); const crypto = require("node:crypto"); const fs = require("fs"); const os = require("os"); @@ -6104,11 +6107,12 @@ async function createSandbox( ? builtImageMatch[1] : `openshell/sandbox-from:${buildId}`; + const sandboxRuntimeFields = getSandboxRuntimeRegistryFields(effectiveSandboxGpuConfig); registry.registerSandbox({ name: sandboxName, model: model || null, provider: provider || null, - ...getSandboxRuntimeRegistryFields(effectiveSandboxGpuConfig), + ...sandboxRuntimeFields, ...getSandboxAgentRegistryFields(agent, !fromDockerfile), imageTag: resolvedImageTag, providerCredentialHashes: @@ -6147,13 +6151,24 @@ async function createSandbox( // DNS proxy — run a forwarder in the sandbox pod so the isolated // sandbox namespace can resolve hostnames (fixes #626). - if (getSandboxRuntimeRegistryFields(effectiveSandboxGpuConfig).openshellDriver === "kubernetes") { + if (sandboxRuntimeFields.openshellDriver === "kubernetes") { console.log(" Setting up sandbox DNS proxy..."); runFile("bash", [path.join(SCRIPTS, "setup-dns-proxy.sh"), GATEWAY_NAME, sandboxName], { ignoreError: true, }); } + const vmDnsPatch = applyOpenShellVmDnsMonkeypatch(sandboxName, { + openshellDriver: sandboxRuntimeFields.openshellDriver, + }); + if (vmDnsPatch.ok && vmDnsPatch.changed) { + console.log(" ✓ Applied OpenShell VM DNS monkeypatch"); + } else if (vmDnsPatch.attempted && !vmDnsPatch.ok && vmDnsPatch.reason) { + console.error( + ` Warning: OpenShell VM DNS monkeypatch did not apply: ${vmDnsPatch.reason}`, + ); + } + // Check that messaging providers exist in the gateway (sandbox attachment // cannot be verified via CLI yet — only gateway-level existence is checked). for (const p of messagingProviders) { diff --git a/test/onboard.test.ts b/test/onboard.test.ts index 58b12b7de3..5caf983c10 100644 --- a/test/onboard.test.ts +++ b/test/onboard.test.ts @@ -5042,7 +5042,7 @@ ${webSearchVerifySource}`; source, // #2753: sandboxName is intentionally absent from the options here so // the session does not record a name before createSandbox completes. - /startRecordedStep\("sandbox", \{ provider, model \}\);\s*const recordedMessagingChannels = getRecordedMessagingChannelsForResume\(resume, session\);[\s\S]*?selectedMessagingChannels = recordedMessagingChannels;[\s\S]*?selectedMessagingChannels = await setupMessagingChannels\(\);[\s\S]*?const messagingChannelConfig = readMessagingChannelConfigFromEnv\(\);[\s\S]*?onboardSession\.updateSession\(\(current[^)]*\) => \{\s*current\.messagingChannels = selectedMessagingChannels;\s*current\.messagingChannelConfig = messagingChannelConfig;\s*return current;\s*\}\);[\s\S]*?sandboxName = await createSandbox\(\s*gpu,\s*model,\s*provider,\s*preferredInferenceApi,\s*sandboxName,\s*nextWebSearchConfig,\s*selectedMessagingChannels,\s*fromDockerfile,\s*agent,\s*opts\.controlUiPort \|\| null,\s*sandboxGpuConfig,\s*\);/, + /startRecordedStep\("sandbox", \{ provider, model \}\);\s*const recordedMessagingChannels = getRecordedMessagingChannelsForResume\(resume, session(?:, sandboxName)?\);[\s\S]*?selectedMessagingChannels = recordedMessagingChannels;[\s\S]*?selectedMessagingChannels = await setupMessagingChannels\(\);[\s\S]*?const messagingChannelConfig = readMessagingChannelConfigFromEnv\(\);[\s\S]*?onboardSession\.updateSession\(\(current[^)]*\) => \{\s*current\.messagingChannels = selectedMessagingChannels;\s*current\.messagingChannelConfig = messagingChannelConfig;\s*return current;\s*\}\);[\s\S]*?sandboxName = await createSandbox\(\s*gpu,\s*model,\s*provider,\s*preferredInferenceApi,\s*sandboxName,\s*nextWebSearchConfig,\s*selectedMessagingChannels,\s*fromDockerfile,\s*agent,\s*opts\.controlUiPort \|\| null,\s*sandboxGpuConfig,\s*\);/, ); }); diff --git a/test/sandbox-connect-inference.test.ts b/test/sandbox-connect-inference.test.ts index 955dc67e39..bea7fcec7c 100644 --- a/test/sandbox-connect-inference.test.ts +++ b/test/sandbox-connect-inference.test.ts @@ -93,7 +93,7 @@ if (args[0] === "gateway" && args[1] === "info") { } if (args[0] === "sandbox" && args[1] === "get" && args[2] === ${JSON.stringify(sandboxName)}) { - process.stdout.write("Sandbox:\\n\\n Id: abc\\n Name: ${sandboxName}\\n Phase: Ready\\n"); + process.stdout.write("Sandbox:\\n\\n \\x1b[2mId:\\x1b[0m abc\\n Name: ${sandboxName}\\n Phase: Ready\\n"); process.exit(0); } @@ -214,7 +214,40 @@ process.exit(0); return { tmpDir, stateFile, sandboxName }; } -function runConnect(tmpDir: string, sandboxName: string) { +function createVmRootfs(tmpDir: string, sandboxId = "abc") { + const rootfs = path.join( + tmpDir, + ".local", + "state", + "nemoclaw", + "openshell-docker-gateway", + "vm-driver", + "sandboxes", + sandboxId, + "rootfs", + ); + fs.mkdirSync(path.join(rootfs, "etc"), { recursive: true }); + fs.mkdirSync(path.join(rootfs, "srv"), { recursive: true }); + fs.writeFileSync( + path.join(rootfs, "etc", "resolv.conf"), + "nameserver 8.8.8.8\nnameserver 8.8.4.4\n", + ); + fs.writeFileSync( + path.join(rootfs, "srv", "openshell-vm-sandbox-init.sh"), + [ + "elif ip link show eth0 >/dev/null 2>&1; then", + " if [ ! -s /etc/resolv.conf ]; then", + ' echo "nameserver 8.8.8.8" > /etc/resolv.conf', + ' echo "nameserver 8.8.4.4" >> /etc/resolv.conf', + " fi", + "fi", + "", + ].join("\n"), + ); + return rootfs; +} + +function runConnect(tmpDir: string, sandboxName: string, extraEnv: NodeJS.ProcessEnv = {}) { const repoRoot = path.join(import.meta.dirname, ".."); return spawnSync( process.execPath, @@ -227,6 +260,7 @@ function runConnect(tmpDir: string, sandboxName: string) { HOME: tmpDir, PATH: `${path.join(tmpDir, ".local", "bin")}:/usr/bin:/bin`, NEMOCLAW_NO_CONNECT_HINT: "1", + ...extraEnv, }, timeout: execTimeout(15_000), }, @@ -394,4 +428,49 @@ describe("sandbox connect inference route swap (#1248)", () => { expect(combined).toContain("OpenShell vm gateway path"); }, ); + + it( + "applies the macOS VM DNS monkeypatch before falling back to route reapply", + testTimeoutOptions(20_000), + () => { + const { tmpDir, stateFile, sandboxName } = setupFixture( + { + name: "vm-dns-sandbox", + model: "nvidia/nemotron-3-super-120b-a12b", + provider: "nvidia-prod", + gpuEnabled: false, + openshellDriver: "vm", + policies: [], + }, + "nvidia-prod", + "nvidia/nemotron-3-super-120b-a12b", + { + inferenceProbeResponses: [ + 'BROKEN 503 {"error":"inference service unavailable"}', + "OK 200", + ], + }, + ); + const rootfs = createVmRootfs(tmpDir); + + const result = runConnect(tmpDir, sandboxName, { + NEMOCLAW_FORCE_VM_DNS_MONKEYPATCH: "1", + }); + expect(result.status).toBe(0); + + const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); + expect(state.inferenceSetCalls.length).toBe(0); + expect(state.dockerCalls.length).toBe(0); + expect(fs.readFileSync(path.join(rootfs, "etc", "resolv.conf"), "utf-8")).toBe( + "nameserver 192.168.127.1\n", + ); + expect( + fs.readFileSync(path.join(rootfs, "srv", "openshell-vm-sandbox-init.sh"), "utf-8"), + ).toContain('nameserver ${GVPROXY_GATEWAY_IP}'); + + const combined = (result.stdout || "") + (result.stderr || ""); + expect(combined).toContain("Applying OpenShell VM DNS monkeypatch"); + expect(combined).toContain("inference.local route repaired"); + }, + ); }); diff --git a/test/shellquote-sandbox.test.ts b/test/shellquote-sandbox.test.ts index f74dea52ff..de6eb34722 100644 --- a/test/shellquote-sandbox.test.ts +++ b/test/shellquote-sandbox.test.ts @@ -105,6 +105,7 @@ try { process.env.NEMOCLAW_NON_INTERACTIVE = "1"; process.env.NEMOCLAW_HEALTH_POLL_COUNT = "1"; Object.defineProperty(process, "platform", { value: "darwin" }); + Object.defineProperty(process, "arch", { value: "x64" }); const sandboxName = await createSandbox(null, "gpt-5.4", "nvidia-prod", null, "my-assistant"); console.log(JSON.stringify({ sandboxName, commands })); } catch (error) { From 696be2af5eb0fc2ceea5b245a58af93689ce3f74 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 03:34:27 -0700 Subject: [PATCH 09/20] fix: allow discord guild users for hermes --- agents/hermes/config/messaging-config.ts | 2 ++ test/generate-hermes-config.test.ts | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/agents/hermes/config/messaging-config.ts b/agents/hermes/config/messaging-config.ts index 3e28556b7a..a7f4c1cd6d 100644 --- a/agents/hermes/config/messaging-config.ts +++ b/agents/hermes/config/messaging-config.ts @@ -32,6 +32,8 @@ export function buildMessagingEnvLines( const discordAllowedUsers = collectDiscordAllowedUsers(allowedIds, discordGuilds); if (discordAllowedUsers.length > 0) { envLines.push(`DISCORD_ALLOWED_USERS=${discordAllowedUsers.join(",")}`); + } else if (enabledChannels.has("discord") && Object.keys(discordGuilds).length > 0) { + envLines.push("DISCORD_ALLOW_ALL_USERS=true"); } if (allowedIds.telegram?.length) { envLines.push(`TELEGRAM_ALLOWED_USERS=${allowedIds.telegram.map(String).join(",")}`); diff --git a/test/generate-hermes-config.test.ts b/test/generate-hermes-config.test.ts index af032383ed..8377b8681c 100644 --- a/test/generate-hermes-config.test.ts +++ b/test/generate-hermes-config.test.ts @@ -162,6 +162,20 @@ describe("agents/hermes/generate-config.ts", () => { expect(config.discord.require_mention).toBe(false); }); + it("allows Discord server members when no explicit user allowlist is configured", () => { + const { envFile } = runConfigScript({ + NEMOCLAW_MESSAGING_CHANNELS_B64: encodeJson(["discord"]), + NEMOCLAW_DISCORD_GUILDS_B64: encodeJson({ + "1491590992753590594": { + requireMention: false, + }, + }), + }); + + expect(envFile).toContain("DISCORD_ALLOW_ALL_USERS=true\n"); + expect(envFile).not.toContain("DISCORD_ALLOWED_USERS="); + }); + it("does not emit generic platforms blocks for Telegram or Slack messaging tokens", () => { const { config, envFile } = runConfigScript({ NEMOCLAW_MESSAGING_CHANNELS_B64: encodeJson(["telegram", "slack"]), From db6b0d454898a885824faf5216fbfcc9bb522de4 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 03:40:26 -0700 Subject: [PATCH 10/20] refactor: keep onboard entrypoint net neutral --- src/lib/onboard.ts | 16 +--------------- src/lib/onboard/vm-dns-monkeypatch.ts | 18 ++++++++++++++++++ 2 files changed, 19 insertions(+), 15 deletions(-) create mode 100644 src/lib/onboard/vm-dns-monkeypatch.ts diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index b7f573d964..2344f9e5cc 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -61,9 +61,6 @@ const { const { getSelectionDrift, }: typeof import("./onboard/selection-drift") = require("./onboard/selection-drift"); -const { - applyOpenShellVmDnsMonkeypatch, -}: typeof import("./actions/sandbox/vm-dns-monkeypatch") = require("./actions/sandbox/vm-dns-monkeypatch"); const crypto = require("node:crypto"); const fs = require("fs"); const os = require("os"); @@ -6149,8 +6146,6 @@ async function createSandbox( } } - // DNS proxy — run a forwarder in the sandbox pod so the isolated - // sandbox namespace can resolve hostnames (fixes #626). if (sandboxRuntimeFields.openshellDriver === "kubernetes") { console.log(" Setting up sandbox DNS proxy..."); runFile("bash", [path.join(SCRIPTS, "setup-dns-proxy.sh"), GATEWAY_NAME, sandboxName], { @@ -6158,16 +6153,7 @@ async function createSandbox( }); } - const vmDnsPatch = applyOpenShellVmDnsMonkeypatch(sandboxName, { - openshellDriver: sandboxRuntimeFields.openshellDriver, - }); - if (vmDnsPatch.ok && vmDnsPatch.changed) { - console.log(" ✓ Applied OpenShell VM DNS monkeypatch"); - } else if (vmDnsPatch.attempted && !vmDnsPatch.ok && vmDnsPatch.reason) { - console.error( - ` Warning: OpenShell VM DNS monkeypatch did not apply: ${vmDnsPatch.reason}`, - ); - } + require("./onboard/vm-dns-monkeypatch").applyOnboardVmDnsMonkeypatch(sandboxName, sandboxRuntimeFields); // Check that messaging providers exist in the gateway (sandbox attachment // cannot be verified via CLI yet — only gateway-level existence is checked). diff --git a/src/lib/onboard/vm-dns-monkeypatch.ts b/src/lib/onboard/vm-dns-monkeypatch.ts new file mode 100644 index 0000000000..be4fae375a --- /dev/null +++ b/src/lib/onboard/vm-dns-monkeypatch.ts @@ -0,0 +1,18 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { applyOpenShellVmDnsMonkeypatch } from "../actions/sandbox/vm-dns-monkeypatch"; + +export function applyOnboardVmDnsMonkeypatch( + sandboxName: string, + runtime: { openshellDriver?: string | null }, +): void { + const vmDnsPatch = applyOpenShellVmDnsMonkeypatch(sandboxName, { + openshellDriver: runtime.openshellDriver, + }); + if (vmDnsPatch.ok && vmDnsPatch.changed) { + console.log(" ✓ Applied OpenShell VM DNS monkeypatch"); + } else if (vmDnsPatch.attempted && !vmDnsPatch.ok && vmDnsPatch.reason) { + console.error(` Warning: OpenShell VM DNS monkeypatch did not apply: ${vmDnsPatch.reason}`); + } +} From 11cc7f315ce339c771a04a656ce4d4cbe3480b4c Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 04:01:45 -0700 Subject: [PATCH 11/20] fix: allow discord regional websocket gateways --- agents/hermes/policy-additions.yaml | 8 ++++++++ agents/hermes/policy-permissive.yaml | 8 ++++++++ agents/openclaw/policy-permissive.yaml | 15 +++++++++++++-- .../policies/openclaw-sandbox-permissive.yaml | 8 ++++++++ nemoclaw-blueprint/policies/presets/discord.yaml | 8 ++++++++ test/policies.test.ts | 6 ++++++ test/validate-blueprint.test.ts | 8 ++++++++ 7 files changed, 59 insertions(+), 2 deletions(-) diff --git a/agents/hermes/policy-additions.yaml b/agents/hermes/policy-additions.yaml index 82c98dd093..97cc72d928 100644 --- a/agents/hermes/policy-additions.yaml +++ b/agents/hermes/policy-additions.yaml @@ -229,6 +229,14 @@ network_policies: rules: - allow: { method: GET, path: "/**" } - allow: { method: WEBSOCKET_TEXT, path: "/**" } + - host: "*.discord.gg" + port: 443 + protocol: websocket + enforcement: enforce + websocket_credential_rewrite: true + rules: + - allow: { method: GET, path: "/**" } + - allow: { method: WEBSOCKET_TEXT, path: "/**" } - host: cdn.discordapp.com port: 443 protocol: rest diff --git a/agents/hermes/policy-permissive.yaml b/agents/hermes/policy-permissive.yaml index 0134c667bd..1106e4fff2 100644 --- a/agents/hermes/policy-permissive.yaml +++ b/agents/hermes/policy-permissive.yaml @@ -194,6 +194,14 @@ network_policies: rules: - allow: { method: GET, path: "/**" } - allow: { method: WEBSOCKET_TEXT, path: "/**" } + - host: "*.discord.gg" + port: 443 + protocol: websocket + enforcement: enforce + websocket_credential_rewrite: true + rules: + - allow: { method: GET, path: "/**" } + - allow: { method: WEBSOCKET_TEXT, path: "/**" } - host: cdn.discordapp.com port: 443 protocol: rest diff --git a/agents/openclaw/policy-permissive.yaml b/agents/openclaw/policy-permissive.yaml index 142a1139dd..eb8c58fd8e 100644 --- a/agents/openclaw/policy-permissive.yaml +++ b/agents/openclaw/policy-permissive.yaml @@ -160,9 +160,20 @@ network_policies: access: full - host: gateway.discord.gg port: 443 - protocol: rest + protocol: websocket enforcement: enforce - access: full + websocket_credential_rewrite: true + rules: + - allow: { method: GET, path: "/**" } + - allow: { method: WEBSOCKET_TEXT, path: "/**" } + - host: "*.discord.gg" + port: 443 + protocol: websocket + enforcement: enforce + websocket_credential_rewrite: true + rules: + - allow: { method: GET, path: "/**" } + - allow: { method: WEBSOCKET_TEXT, path: "/**" } - host: cdn.discordapp.com port: 443 protocol: rest diff --git a/nemoclaw-blueprint/policies/openclaw-sandbox-permissive.yaml b/nemoclaw-blueprint/policies/openclaw-sandbox-permissive.yaml index 817c50661b..0369c48280 100644 --- a/nemoclaw-blueprint/policies/openclaw-sandbox-permissive.yaml +++ b/nemoclaw-blueprint/policies/openclaw-sandbox-permissive.yaml @@ -195,6 +195,14 @@ network_policies: rules: - allow: { method: GET, path: "/**" } - allow: { method: WEBSOCKET_TEXT, path: "/**" } + - host: "*.discord.gg" + port: 443 + protocol: websocket + enforcement: enforce + websocket_credential_rewrite: true + rules: + - allow: { method: GET, path: "/**" } + - allow: { method: WEBSOCKET_TEXT, path: "/**" } - host: cdn.discordapp.com port: 443 protocol: rest diff --git a/nemoclaw-blueprint/policies/presets/discord.yaml b/nemoclaw-blueprint/policies/presets/discord.yaml index af8cad661c..775e580524 100644 --- a/nemoclaw-blueprint/policies/presets/discord.yaml +++ b/nemoclaw-blueprint/policies/presets/discord.yaml @@ -34,6 +34,14 @@ network_policies: rules: - allow: { method: GET, path: "/**" } - allow: { method: WEBSOCKET_TEXT, path: "/**" } + - host: "*.discord.gg" + port: 443 + protocol: websocket + enforcement: enforce + websocket_credential_rewrite: true + rules: + - allow: { method: GET, path: "/**" } + - allow: { method: WEBSOCKET_TEXT, path: "/**" } - host: cdn.discordapp.com port: 443 protocol: rest diff --git a/test/policies.test.ts b/test/policies.test.ts index 7a136f6ced..539f46e346 100644 --- a/test/policies.test.ts +++ b/test/policies.test.ts @@ -756,6 +756,11 @@ describe("policies", () => { host: "gateway.discord.gg", credentialRewrite: true, }, + { + preset: "discord", + host: "*.discord.gg", + credentialRewrite: true, + }, { preset: "slack", host: "wss-primary.slack.com", @@ -853,6 +858,7 @@ describe("policies", () => { ]; const cases = [ "gateway.discord.gg", + "*.discord.gg", "wss-primary.slack.com", "wss-backup.slack.com", ]; diff --git a/test/validate-blueprint.test.ts b/test/validate-blueprint.test.ts index b79f677537..398e95518a 100644 --- a/test/validate-blueprint.test.ts +++ b/test/validate-blueprint.test.ts @@ -344,6 +344,7 @@ describe("base sandbox policy", () => { (h) => h === "discord.com" || h === "gateway.discord.gg" || + h === "*.discord.gg" || h === "cdn.discordapp.com" || h === "media.discordapp.net", ); @@ -522,6 +523,13 @@ describe("messaging WebSocket presets", () => { credentialRewrite: true, data: loadYaml(DISCORD_PRESET_PATH), }, + { + name: "discord", + policyKey: "discord", + host: "*.discord.gg", + credentialRewrite: true, + data: loadYaml(DISCORD_PRESET_PATH), + }, { name: "slack", policyKey: "slack", From 0f54432db3779761458f02891f7228f5a5caf654 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 04:18:07 -0700 Subject: [PATCH 12/20] fix: address messaging reuse review feedback --- src/lib/actions/sandbox/vm-dns-monkeypatch.ts | 15 ++++-- src/lib/onboard/messaging-reuse.test.ts | 54 +++++++++++++++++++ src/lib/onboard/messaging-reuse.ts | 2 +- 3 files changed, 67 insertions(+), 4 deletions(-) create mode 100644 src/lib/onboard/messaging-reuse.test.ts diff --git a/src/lib/actions/sandbox/vm-dns-monkeypatch.ts b/src/lib/actions/sandbox/vm-dns-monkeypatch.ts index 56a78055d5..4008b92b49 100644 --- a/src/lib/actions/sandbox/vm-dns-monkeypatch.ts +++ b/src/lib/actions/sandbox/vm-dns-monkeypatch.ts @@ -53,9 +53,18 @@ export function parseSandboxIdFromGetOutput(output: string): string | null { return match?.[1] ?? null; } +function readTextFileIfPresent(filePath: string): string | null { + try { + return fs.readFileSync(filePath, "utf-8"); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === "ENOENT") return null; + throw error; + } +} + function patchGuestInit(initPath: string): boolean { - if (!fs.existsSync(initPath)) return false; - const original = fs.readFileSync(initPath, "utf-8"); + const original = readTextFileIfPresent(initPath); + if (original === null) return false; if (original.includes('nameserver ${GVPROXY_GATEWAY_IP}')) return false; const patched = original.replace(LEGACY_PUBLIC_DNS_BLOCK, GVPROXY_DNS_BLOCK); if (patched === original) return false; @@ -114,7 +123,7 @@ export function applyOpenShellVmDnsMonkeypatch( fs.mkdirSync(path.dirname(resolvConf), { recursive: true }); const desired = `nameserver ${GVPROXY_DNS}\n`; - const current = fs.existsSync(resolvConf) ? fs.readFileSync(resolvConf, "utf-8") : ""; + const current = readTextFileIfPresent(resolvConf) ?? ""; let changed = current !== desired; if (changed) { fs.writeFileSync(resolvConf, desired); diff --git a/src/lib/onboard/messaging-reuse.test.ts b/src/lib/onboard/messaging-reuse.test.ts new file mode 100644 index 0000000000..e2ea2f6935 --- /dev/null +++ b/src/lib/onboard/messaging-reuse.test.ts @@ -0,0 +1,54 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import { describe, expect, it } from "vitest"; + +import { + getMessagingProviderNamesForChannel, + getNonInteractiveStoredMessagingChannels, +} from "./messaging-reuse"; + +const messagingChannels = [ + { name: "discord", envKey: "DISCORD_BOT_TOKEN" }, + { name: "slack", envKey: "SLACK_BOT_TOKEN" }, +]; + +describe("onboard messaging reuse", () => { + it("requires both Slack providers before reusing a stored Slack channel", () => { + expect(getMessagingProviderNamesForChannel("assistant", "slack")).toEqual([ + "assistant-slack-bridge", + "assistant-slack-app", + ]); + + const reusedChannels = getNonInteractiveStoredMessagingChannels( + false, + null, + "assistant", + messagingChannels, + () => false, + () => ({ messagingChannels: ["slack"] }), + () => [], + (provider) => provider === "assistant-slack-bridge", + true, + ); + + expect(reusedChannels).toBeNull(); + }); + + it("reuses stored Slack channels when both Slack providers exist", () => { + const reusedChannels = getNonInteractiveStoredMessagingChannels( + false, + null, + "assistant", + messagingChannels, + () => false, + () => ({ messagingChannels: ["slack"] }), + () => [], + (provider) => + provider === "assistant-slack-bridge" || provider === "assistant-slack-app", + true, + ); + + expect(reusedChannels).toEqual(["slack"]); + }); +}); diff --git a/src/lib/onboard/messaging-reuse.ts b/src/lib/onboard/messaging-reuse.ts index 11a3954f5e..6c3eb740c2 100644 --- a/src/lib/onboard/messaging-reuse.ts +++ b/src/lib/onboard/messaging-reuse.ts @@ -7,7 +7,7 @@ type SandboxEntry = { messagingChannels?: string[] | null } | null | undefined; export function getMessagingProviderNamesForChannel(sandboxName: string, channel: string): string[] { if (channel === "discord") return [`${sandboxName}-discord-bridge`]; if (channel === "telegram") return [`${sandboxName}-telegram-bridge`]; - if (channel === "slack") return [`${sandboxName}-slack-bridge`]; + if (channel === "slack") return [`${sandboxName}-slack-bridge`, `${sandboxName}-slack-app`]; return []; } From 2b290c02532dec02ad20b0d598fa198e82e60bea Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 09:01:08 -0700 Subject: [PATCH 13/20] fix: flush sandbox create tail before ready recovery --- src/lib/sandbox/create-stream.test.ts | 21 +++++++++++++++++++++ src/lib/sandbox/create-stream.ts | 10 +++++++++- 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/lib/sandbox/create-stream.test.ts b/src/lib/sandbox/create-stream.test.ts index 5ba0534ed5..d670d82a9f 100644 --- a/src/lib/sandbox/create-stream.test.ts +++ b/src/lib/sandbox/create-stream.test.ts @@ -226,6 +226,27 @@ describe("sandbox-create-stream", () => { }); }); + it("recovers when required startup output is the final partial line", async () => { + const child = new FakeChild(); + const promise = streamSandboxCreate("echo create", vmEnv, { + spawnImpl: () => child, + readyCheck: () => true, + pollIntervalMs: 60_000, + heartbeatIntervalMs: 1_000, + silentPhaseMs: 10_000, + logLine: vi.fn(), + }); + + child.stderr.emit("data", Buffer.from("Created sandbox: demo\nSetting up NemoClaw")); + child.emit("close", 255); + + await expect(promise).resolves.toMatchObject({ + status: 0, + forcedReady: true, + output: expect.stringContaining("Setting up NemoClaw"), + }); + }); + it("returns non-zero when readyCheck is false at close time", async () => { const child = new FakeChild(); const promise = streamSandboxCreate("echo create", process.env, { diff --git a/src/lib/sandbox/create-stream.ts b/src/lib/sandbox/create-stream.ts index cac66cf693..9882433bfb 100644 --- a/src/lib/sandbox/create-stream.ts +++ b/src/lib/sandbox/create-stream.ts @@ -232,10 +232,17 @@ export function streamSandboxCreate( parts.forEach(flushLine); } + function flushPendingLine() { + if (!pending) return; + const trailing = pending; + pending = ""; + flushLine(trailing); + } + function finish(status: number, overrides: Partial = {}) { if (settled) return; settled = true; - if (pending) flushLine(pending); + flushPendingLine(); if (readyTimer) clearInterval(readyTimer); clearInterval(heartbeatTimer); resolvePromise({ @@ -342,6 +349,7 @@ export function streamSandboxCreate( child.on("close", (code) => { // One last ready-check: the sandbox may have become Ready between the // last poll tick and the stream exit (e.g. SSH 255 after "Created sandbox:"). + flushPendingLine(); if (code && code !== 0 && options.readyCheck) { try { if (options.readyCheck() && readyCheckOutputMatched) { From 3a9f58f21936fc6b9c5b038621becadffb3b3e0a Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 09:06:00 -0700 Subject: [PATCH 14/20] fix: keep VM DNS monkeypatch best-effort --- .../sandbox/vm-dns-monkeypatch.test.ts | 58 +++++++++++++++++++ src/lib/actions/sandbox/vm-dns-monkeypatch.ts | 35 +++++++---- 2 files changed, 83 insertions(+), 10 deletions(-) create mode 100644 src/lib/actions/sandbox/vm-dns-monkeypatch.test.ts diff --git a/src/lib/actions/sandbox/vm-dns-monkeypatch.test.ts b/src/lib/actions/sandbox/vm-dns-monkeypatch.test.ts new file mode 100644 index 0000000000..5c3373268c --- /dev/null +++ b/src/lib/actions/sandbox/vm-dns-monkeypatch.test.ts @@ -0,0 +1,58 @@ +// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +import fs from "node:fs"; +import os from "node:os"; +import path from "node:path"; + +import { afterEach, describe, expect, it, vi } from "vitest"; + +vi.mock("../../adapters/openshell/runtime", () => ({ + captureOpenshell: vi.fn(), +})); + +import { applyOpenShellVmDnsMonkeypatch } from "./vm-dns-monkeypatch"; + +const tempDirs: string[] = []; + +function makeTempDir(): string { + const dir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-vm-dns-monkeypatch-")); + tempDirs.push(dir); + return dir; +} + +function sandboxRootfs(stateDir: string, sandboxId = "abc"): string { + return path.join(stateDir, "vm-driver", "sandboxes", sandboxId, "rootfs"); +} + +describe("OpenShell VM DNS monkeypatch", () => { + afterEach(() => { + for (const dir of tempDirs.splice(0)) { + fs.rmSync(dir, { recursive: true, force: true }); + } + }); + + it("returns a warning result instead of throwing when rootfs files cannot be patched", () => { + const stateDir = makeTempDir(); + const rootfs = sandboxRootfs(stateDir); + fs.mkdirSync(path.join(rootfs, "etc", "resolv.conf"), { recursive: true }); + + const result = applyOpenShellVmDnsMonkeypatch( + "demo", + { openshellDriver: "vm" }, + { + capture: () => ({ status: 0, output: "Id: abc\n" }), + platform: "darwin", + stateDir, + }, + ); + + expect(result).toMatchObject({ + attempted: true, + changed: false, + ok: false, + rootfs, + }); + expect(result.reason).toContain("failed to patch VM DNS files"); + }); +}); diff --git a/src/lib/actions/sandbox/vm-dns-monkeypatch.ts b/src/lib/actions/sandbox/vm-dns-monkeypatch.ts index 4008b92b49..5a2aa07b9d 100644 --- a/src/lib/actions/sandbox/vm-dns-monkeypatch.ts +++ b/src/lib/actions/sandbox/vm-dns-monkeypatch.ts @@ -62,6 +62,10 @@ function readTextFileIfPresent(filePath: string): string | null { } } +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error); +} + function patchGuestInit(initPath: string): boolean { const original = readTextFileIfPresent(initPath); if (original === null) return false; @@ -121,15 +125,26 @@ export function applyOpenShellVmDnsMonkeypatch( }; } - fs.mkdirSync(path.dirname(resolvConf), { recursive: true }); - const desired = `nameserver ${GVPROXY_DNS}\n`; - const current = readTextFileIfPresent(resolvConf) ?? ""; - let changed = current !== desired; - if (changed) { - fs.writeFileSync(resolvConf, desired); - } - changed = - patchGuestInit(path.join(rootfs, "srv", "openshell-vm-sandbox-init.sh")) || changed; + let changed = false; + try { + fs.mkdirSync(path.dirname(resolvConf), { recursive: true }); + const desired = `nameserver ${GVPROXY_DNS}\n`; + const current = readTextFileIfPresent(resolvConf) ?? ""; + changed = current !== desired; + if (changed) { + fs.writeFileSync(resolvConf, desired); + } + changed = + patchGuestInit(path.join(rootfs, "srv", "openshell-vm-sandbox-init.sh")) || changed; - return { attempted: true, changed, ok: true, rootfs }; + return { attempted: true, changed, ok: true, rootfs }; + } catch (error) { + return { + attempted: true, + changed, + ok: false, + reason: `failed to patch VM DNS files: ${errorMessage(error)}`, + rootfs, + }; + } } From 242a624310f2b50afce8cf0918a989b83c03c0e8 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 12:40:47 -0700 Subject: [PATCH 15/20] fix(onboard): address messaging reuse feedback --- agents/hermes/config/messaging-config.ts | 5 ++++- src/lib/onboard/messaging-reuse.test.ts | 16 ++++++++++++++++ src/lib/onboard/messaging-reuse.ts | 3 ++- test/generate-hermes-config.test.ts | 14 ++++++++++++++ 4 files changed, 36 insertions(+), 2 deletions(-) diff --git a/agents/hermes/config/messaging-config.ts b/agents/hermes/config/messaging-config.ts index a7f4c1cd6d..5cf584d86d 100644 --- a/agents/hermes/config/messaging-config.ts +++ b/agents/hermes/config/messaging-config.ts @@ -32,7 +32,10 @@ export function buildMessagingEnvLines( const discordAllowedUsers = collectDiscordAllowedUsers(allowedIds, discordGuilds); if (discordAllowedUsers.length > 0) { envLines.push(`DISCORD_ALLOWED_USERS=${discordAllowedUsers.join(",")}`); - } else if (enabledChannels.has("discord") && Object.keys(discordGuilds).length > 0) { + } else if ( + enabledChannels.has("discord") && + Object.keys(discordGuilds).filter((guildId) => guildId.trim()).length > 0 + ) { envLines.push("DISCORD_ALLOW_ALL_USERS=true"); } if (allowedIds.telegram?.length) { diff --git a/src/lib/onboard/messaging-reuse.test.ts b/src/lib/onboard/messaging-reuse.test.ts index e2ea2f6935..a41aa107c1 100644 --- a/src/lib/onboard/messaging-reuse.test.ts +++ b/src/lib/onboard/messaging-reuse.test.ts @@ -51,4 +51,20 @@ describe("onboard messaging reuse", () => { expect(reusedChannels).toEqual(["slack"]); }); + + it("normalizes empty resume messaging channels to null", () => { + const reusedChannels = getNonInteractiveStoredMessagingChannels( + true, + ["unknown"], + "assistant", + messagingChannels, + () => false, + () => ({ messagingChannels: ["discord"] }), + () => [], + () => true, + true, + ); + + expect(reusedChannels).toBeNull(); + }); }); diff --git a/src/lib/onboard/messaging-reuse.ts b/src/lib/onboard/messaging-reuse.ts index 6c3eb740c2..10b71a0e55 100644 --- a/src/lib/onboard/messaging-reuse.ts +++ b/src/lib/onboard/messaging-reuse.ts @@ -33,7 +33,8 @@ export function getNonInteractiveStoredMessagingChannels( ): string[] | null { if (!nonInteractive) return null; if (resume && Array.isArray(sessionChannels)) { - return getKnownMessagingChannels(sessionChannels, messagingChannels); + const knownSessionChannels = getKnownMessagingChannels(sessionChannels, messagingChannels); + return knownSessionChannels.length > 0 ? knownSessionChannels : null; } if (resume || !sandboxName || messagingChannels.some((channel) => hasMessagingToken(channel.envKey))) { return null; diff --git a/test/generate-hermes-config.test.ts b/test/generate-hermes-config.test.ts index 8377b8681c..db3e799bc1 100644 --- a/test/generate-hermes-config.test.ts +++ b/test/generate-hermes-config.test.ts @@ -176,6 +176,20 @@ describe("agents/hermes/generate-config.ts", () => { expect(envFile).not.toContain("DISCORD_ALLOWED_USERS="); }); + it("does not allow all Discord users for empty guild config keys", () => { + const { envFile } = runConfigScript({ + NEMOCLAW_MESSAGING_CHANNELS_B64: encodeJson(["discord"]), + NEMOCLAW_DISCORD_GUILDS_B64: encodeJson({ + " ": { + requireMention: false, + }, + }), + }); + + expect(envFile).not.toContain("DISCORD_ALLOW_ALL_USERS=true\n"); + expect(envFile).not.toContain("DISCORD_ALLOWED_USERS="); + }); + it("does not emit generic platforms blocks for Telegram or Slack messaging tokens", () => { const { config, envFile } = runConfigScript({ NEMOCLAW_MESSAGING_CHANNELS_B64: encodeJson(["telegram", "slack"]), From 32056fadde7b8d22de5ede97312864fd8c547742 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 12:31:07 -0700 Subject: [PATCH 16/20] fix(macos): harden VM DNS monkeypatch --- src/lib/actions/sandbox/connect.ts | 14 +- .../sandbox/vm-dns-monkeypatch.test.ts | 300 ++++++++++++++++- src/lib/actions/sandbox/vm-dns-monkeypatch.ts | 315 +++++++++++++++--- src/lib/onboard/vm-dns-monkeypatch.ts | 29 +- test/onboard.test.ts | 111 +++++- test/sandbox-connect-inference.test.ts | 56 +++- 6 files changed, 764 insertions(+), 61 deletions(-) diff --git a/src/lib/actions/sandbox/connect.ts b/src/lib/actions/sandbox/connect.ts index 8c44be07b5..f8b38d5d25 100644 --- a/src/lib/actions/sandbox/connect.ts +++ b/src/lib/actions/sandbox/connect.ts @@ -196,10 +196,16 @@ function repairSandboxInferenceRouteIfNeeded( } return true; } - if (!quiet && !patch.ok && patch.reason) { - console.error( - ` Warning: OpenShell VM DNS monkeypatch did not apply: ${patch.reason}`, - ); + if (!quiet) { + if (!patch.ok && patch.reason) { + console.error( + ` Warning: OpenShell VM DNS monkeypatch did not apply: ${patch.reason}`, + ); + } else if (patch.ok) { + console.error( + " Warning: OpenShell VM DNS monkeypatch completed but inference.local is still unavailable.", + ); + } } } diff --git a/src/lib/actions/sandbox/vm-dns-monkeypatch.test.ts b/src/lib/actions/sandbox/vm-dns-monkeypatch.test.ts index 5c3373268c..13f36cf8b0 100644 --- a/src/lib/actions/sandbox/vm-dns-monkeypatch.test.ts +++ b/src/lib/actions/sandbox/vm-dns-monkeypatch.test.ts @@ -11,7 +11,10 @@ vi.mock("../../adapters/openshell/runtime", () => ({ captureOpenshell: vi.fn(), })); -import { applyOpenShellVmDnsMonkeypatch } from "./vm-dns-monkeypatch"; +import { + applyOpenShellVmDnsMonkeypatch, + shouldApplyVmDnsMonkeypatch, +} from "./vm-dns-monkeypatch"; const tempDirs: string[] = []; @@ -25,6 +28,32 @@ function sandboxRootfs(stateDir: string, sandboxId = "abc"): string { return path.join(stateDir, "vm-driver", "sandboxes", sandboxId, "rootfs"); } +function sandboxDir(stateDir: string, sandboxId = "abc"): string { + return path.join(stateDir, "vm-driver", "sandboxes", sandboxId); +} + +function writeRecognizedInit(rootfs: string): void { + fs.mkdirSync(path.join(rootfs, "srv"), { recursive: true }); + fs.writeFileSync( + path.join(rootfs, "srv", "openshell-vm-sandbox-init.sh"), + [ + "elif ip link show eth0 >/dev/null 2>&1; then", + " if [ ! -s /etc/resolv.conf ]; then", + ' echo "nameserver 8.8.8.8" > /etc/resolv.conf', + ' echo "nameserver 8.8.4.4" >> /etc/resolv.conf', + " fi", + "fi", + "", + ].join("\n"), + ); +} + +function writeRootfsFiles(rootfs: string, resolver: string): void { + fs.mkdirSync(path.join(rootfs, "etc"), { recursive: true }); + fs.writeFileSync(path.join(rootfs, "etc", "resolv.conf"), resolver); + writeRecognizedInit(rootfs); +} + describe("OpenShell VM DNS monkeypatch", () => { afterEach(() => { for (const dir of tempDirs.splice(0)) { @@ -32,10 +61,276 @@ describe("OpenShell VM DNS monkeypatch", () => { } }); + it("does not attempt non-VM sandboxes", () => { + const capture = vi.fn(); + + const result = applyOpenShellVmDnsMonkeypatch( + "demo", + { openshellDriver: "kubernetes" }, + { + capture, + env: {}, + platform: "darwin", + stateDir: makeTempDir(), + }, + ); + + expect(result).toMatchObject({ + attempted: false, + changed: false, + ok: false, + status: "skipped", + }); + expect(result.reason).toContain("not an OpenShell VM sandbox"); + expect(capture).not.toHaveBeenCalled(); + }); + + it("does not attempt non-Darwin VM sandboxes unless force-enabled", () => { + const capture = vi.fn(); + + const result = applyOpenShellVmDnsMonkeypatch( + "demo", + { openshellDriver: "vm" }, + { + capture, + env: {}, + platform: "linux", + stateDir: makeTempDir(), + }, + ); + + expect(result).toMatchObject({ + attempted: false, + changed: false, + ok: false, + status: "skipped", + }); + expect(result.reason).toContain("not running on macOS"); + expect(capture).not.toHaveBeenCalled(); + expect(shouldApplyVmDnsMonkeypatch({ openshellDriver: "vm" }, "linux", {})).toBe(false); + expect( + shouldApplyVmDnsMonkeypatch({ openshellDriver: "vm" }, "linux", { + NEMOCLAW_FORCE_VM_DNS_MONKEYPATCH: "1", + }), + ).toBe(true); + }); + + it("honors the VM DNS monkeypatch kill switch", () => { + const capture = vi.fn(); + + const result = applyOpenShellVmDnsMonkeypatch( + "demo", + { openshellDriver: "vm" }, + { + capture, + env: { NEMOCLAW_DISABLE_VM_DNS_MONKEYPATCH: "1" }, + platform: "darwin", + stateDir: makeTempDir(), + }, + ); + + expect(result).toMatchObject({ + attempted: false, + changed: false, + ok: false, + status: "skipped", + }); + expect(result.reason).toContain("disabled"); + expect(capture).not.toHaveBeenCalled(); + }); + + it("puts gvproxy DNS first while preserving resolver options and private resolvers", () => { + const stateDir = makeTempDir(); + const rootfs = sandboxRootfs(stateDir); + writeRootfsFiles( + rootfs, + [ + "search corp.example", + "options ndots:5", + "nameserver 8.8.8.8", + "nameserver 10.0.0.2", + "nameserver 192.168.127.1", + "nameserver 10.0.0.2", + "nameserver 8.8.4.4", + "", + ].join("\n"), + ); + + const result = applyOpenShellVmDnsMonkeypatch( + "demo", + { openshellDriver: "vm" }, + { + capture: () => ({ status: 0, output: "Id: abc\n" }), + platform: "darwin", + stateDir, + }, + ); + + expect(result).toMatchObject({ + attempted: true, + changed: true, + ok: true, + status: "applied", + }); + expect(fs.readFileSync(path.join(rootfs, "etc", "resolv.conf"), "utf-8")).toBe( + [ + "nameserver 192.168.127.1", + "search corp.example", + "options ndots:5", + "nameserver 10.0.0.2", + "", + ].join("\n"), + ); + expect( + fs.readFileSync(path.join(rootfs, "srv", "openshell-vm-sandbox-init.sh"), "utf-8"), + ).toContain('echo "nameserver ${GVPROXY_GATEWAY_IP}" > /etc/resolv.conf'); + }); + + it("is idempotent when resolver and init script are already patched", () => { + const stateDir = makeTempDir(); + const rootfs = sandboxRootfs(stateDir); + fs.mkdirSync(path.join(rootfs, "etc"), { recursive: true }); + fs.mkdirSync(path.join(rootfs, "srv"), { recursive: true }); + fs.writeFileSync( + path.join(rootfs, "etc", "resolv.conf"), + "nameserver 192.168.127.1\nsearch corp.example\noptions ndots:5\n", + ); + fs.writeFileSync( + path.join(rootfs, "srv", "openshell-vm-sandbox-init.sh"), + 'echo "nameserver ${GVPROXY_GATEWAY_IP}" > /etc/resolv.conf\n', + ); + + const result = applyOpenShellVmDnsMonkeypatch( + "demo", + { openshellDriver: "vm" }, + { + capture: () => ({ status: 0, output: "Id: abc\n" }), + platform: "darwin", + stateDir, + }, + ); + + expect(result).toMatchObject({ + attempted: true, + changed: false, + ok: true, + status: "already-present", + }); + }); + + it("returns a soft failure when the VM rootfs is missing", () => { + const stateDir = makeTempDir(); + fs.mkdirSync(sandboxDir(stateDir), { recursive: true }); + + const result = applyOpenShellVmDnsMonkeypatch( + "demo", + { openshellDriver: "vm" }, + { + capture: () => ({ status: 0, output: "Id: abc\n" }), + platform: "darwin", + stateDir, + }, + ); + + expect(result).toMatchObject({ + attempted: true, + changed: false, + ok: false, + status: "failed", + }); + expect(result.reason).toContain("VM rootfs not found"); + }); + + it("returns a specific unsupported-layout reason for ext4-style VM root disks", () => { + const stateDir = makeTempDir(); + const dir = sandboxDir(stateDir); + fs.mkdirSync(dir, { recursive: true }); + fs.writeFileSync(path.join(dir, "rootfs.ext4"), ""); + + const result = applyOpenShellVmDnsMonkeypatch( + "demo", + { openshellDriver: "vm" }, + { + capture: () => ({ status: 0, output: "Id: abc\n" }), + platform: "darwin", + stateDir, + }, + ); + + expect(result).toMatchObject({ + attempted: true, + changed: false, + ok: false, + status: "failed", + }); + expect(result.reason).toContain("ext4 root disk layout"); + expect(result.reason).toContain("rootfs DNS monkeypatch no longer applies"); + }); + + it("refuses unknown init-script shapes without rewriting resolver files", () => { + const stateDir = makeTempDir(); + const rootfs = sandboxRootfs(stateDir); + fs.mkdirSync(path.join(rootfs, "etc"), { recursive: true }); + fs.mkdirSync(path.join(rootfs, "srv"), { recursive: true }); + const resolverPath = path.join(rootfs, "etc", "resolv.conf"); + const originalResolver = "nameserver 8.8.8.8\n"; + fs.writeFileSync(resolverPath, originalResolver); + fs.writeFileSync(path.join(rootfs, "srv", "openshell-vm-sandbox-init.sh"), "echo unknown\n"); + + const result = applyOpenShellVmDnsMonkeypatch( + "demo", + { openshellDriver: "vm" }, + { + capture: () => ({ status: 0, output: "Id: abc\n" }), + platform: "darwin", + stateDir, + }, + ); + + expect(result).toMatchObject({ + attempted: true, + changed: false, + ok: false, + status: "failed", + }); + expect(result.reason).toContain("init script shape not recognized"); + expect(fs.readFileSync(resolverPath, "utf-8")).toBe(originalResolver); + }); + + it("refuses resolver symlinks that escape the VM rootfs", () => { + const stateDir = makeTempDir(); + const rootfs = sandboxRootfs(stateDir); + const outside = path.join(stateDir, "outside-resolv.conf"); + fs.mkdirSync(path.join(rootfs, "etc"), { recursive: true }); + fs.writeFileSync(outside, "nameserver 8.8.8.8\n"); + fs.symlinkSync(outside, path.join(rootfs, "etc", "resolv.conf")); + writeRecognizedInit(rootfs); + + const result = applyOpenShellVmDnsMonkeypatch( + "demo", + { openshellDriver: "vm" }, + { + capture: () => ({ status: 0, output: "Id: abc\n" }), + platform: "darwin", + stateDir, + }, + ); + + expect(result).toMatchObject({ + attempted: true, + changed: false, + ok: false, + status: "failed", + }); + expect(result.reason).toContain("resolves outside VM rootfs"); + expect(fs.readFileSync(outside, "utf-8")).toBe("nameserver 8.8.8.8\n"); + }); + it("returns a warning result instead of throwing when rootfs files cannot be patched", () => { const stateDir = makeTempDir(); const rootfs = sandboxRootfs(stateDir); fs.mkdirSync(path.join(rootfs, "etc", "resolv.conf"), { recursive: true }); + writeRecognizedInit(rootfs); const result = applyOpenShellVmDnsMonkeypatch( "demo", @@ -51,7 +346,8 @@ describe("OpenShell VM DNS monkeypatch", () => { attempted: true, changed: false, ok: false, - rootfs, + rootfs: fs.realpathSync.native(rootfs), + status: "failed", }); expect(result.reason).toContain("failed to patch VM DNS files"); }); diff --git a/src/lib/actions/sandbox/vm-dns-monkeypatch.ts b/src/lib/actions/sandbox/vm-dns-monkeypatch.ts index 5a2aa07b9d..8b62d43cfe 100644 --- a/src/lib/actions/sandbox/vm-dns-monkeypatch.ts +++ b/src/lib/actions/sandbox/vm-dns-monkeypatch.ts @@ -13,23 +13,29 @@ import { captureOpenshell } from "../../adapters/openshell/runtime"; import type { SandboxEntry } from "../../state/registry"; const GVPROXY_DNS = "192.168.127.1"; -const LEGACY_PUBLIC_DNS_BLOCK = ` if [ ! -s /etc/resolv.conf ]; then - echo "nameserver 8.8.8.8" > /etc/resolv.conf - echo "nameserver 8.8.4.4" >> /etc/resolv.conf - fi`; -const GVPROXY_DNS_BLOCK = ` echo "nameserver \${GVPROXY_GATEWAY_IP}" > /etc/resolv.conf`; +const INIT_SCRIPT_RELATIVE_PATH = ["srv", "openshell-vm-sandbox-init.sh"] as const; +const RESOLV_CONF_RELATIVE_PATH = ["etc", "resolv.conf"] as const; +const GVPROXY_RESOLVER_LINE = "nameserver ${GVPROXY_GATEWAY_IP}"; +const PUBLIC_FALLBACK_DNS = new Set(["8.8.8.8", "8.8.4.4"]); +const INIT_PUBLIC_FALLBACK_BLOCK_RE = + /^([ \t]*)if\s+\[\s*!\s+-s\s+\/etc\/resolv\.conf\s*\]\s*;\s*then\s*\r?\n[ \t]*(?:echo|printf)\b[^\n]*8\.8\.8\.8[^\n]*>\s*\/etc\/resolv\.conf[^\n]*\r?\n[ \t]*(?:echo|printf)\b[^\n]*8\.8\.4\.4[^\n]*>>\s*\/etc\/resolv\.conf[^\n]*\r?\n[ \t]*fi/gm; +const INIT_ETH0_PUBLIC_FALLBACK_RE = + /ip\s+link\s+show\s+eth0[\s\S]{0,2000}nameserver\s+8\.8\.8\.8[\s\S]{0,2000}nameserver\s+8\.8\.4\.4/; type CaptureFn = ( args: string[], opts: { ignoreError?: boolean; timeout?: number }, ) => CaptureOpenshellResult; +export type VmDnsMonkeypatchStatus = "skipped" | "applied" | "already-present" | "failed"; + export type VmDnsMonkeypatchResult = { attempted: boolean; changed: boolean; ok: boolean; reason?: string; rootfs?: string; + status?: VmDnsMonkeypatchStatus; }; export function shouldApplyVmDnsMonkeypatch( @@ -66,14 +72,186 @@ function errorMessage(error: unknown): string { return error instanceof Error ? error.message : String(error); } -function patchGuestInit(initPath: string): boolean { +function isPathInside(childPath: string, parentPath: string): boolean { + const relative = path.relative(parentPath, childPath); + return relative === "" || (!!relative && !relative.startsWith("..") && !path.isAbsolute(relative)); +} + +function realpathIfPresent(filePath: string): string | null { + try { + return fs.realpathSync.native(filePath); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === "ENOENT") return null; + throw error; + } +} + +function fail(reason: string, rootfs?: string, changed = false): VmDnsMonkeypatchResult { + return { + attempted: true, + changed, + ok: false, + reason, + rootfs, + status: "failed", + }; +} + +function skipped(reason: string): VmDnsMonkeypatchResult { + return { + attempted: false, + changed: false, + ok: false, + reason, + status: "skipped", + }; +} + +function shouldSkipVmDnsMonkeypatch( + entry: Pick | null | undefined, + platform: NodeJS.Platform, + env: NodeJS.ProcessEnv, +): string | null { + if (env.NEMOCLAW_DISABLE_VM_DNS_MONKEYPATCH === "1") { + return "disabled by NEMOCLAW_DISABLE_VM_DNS_MONKEYPATCH=1"; + } + if (entry?.openshellDriver !== "vm") return "not an OpenShell VM sandbox"; + if (platform !== "darwin" && env.NEMOCLAW_FORCE_VM_DNS_MONKEYPATCH !== "1") { + return "not running on macOS"; + } + return null; +} + +function ext4RootDiskCandidates(sandboxDir: string): string[] { + try { + return fs + .readdirSync(sandboxDir) + .filter((entry) => /(?:^|[-_.])(?:rootfs|root|disk).*(?:ext4|\.img$|\.raw$)/i.test(entry)); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === "ENOENT") return []; + throw error; + } +} + +function resolveTargetInsideRootfs( + rootfsReal: string, + relativePath: readonly string[], + opts: { mustExist?: boolean } = {}, +): { ok: true; path: string } | { ok: false; reason: string } { + const target = path.join(rootfsReal, ...relativePath); + const targetReal = realpathIfPresent(target); + if (targetReal) { + if (!isPathInside(targetReal, rootfsReal)) { + return { + ok: false, + reason: `refusing to patch ${path.join(...relativePath)} because it resolves outside VM rootfs: ${targetReal}`, + }; + } + return { ok: true, path: targetReal }; + } + + if (opts.mustExist) { + return { + ok: false, + reason: `OpenShell VM file not found: ${target}`, + }; + } + + const parentReal = realpathIfPresent(path.dirname(target)); + if (!parentReal) { + return { + ok: false, + reason: `OpenShell VM directory not found: ${path.dirname(target)}`, + }; + } + const resolvedTarget = path.join(parentReal, path.basename(target)); + if (!isPathInside(resolvedTarget, rootfsReal)) { + return { + ok: false, + reason: `refusing to patch ${path.join(...relativePath)} because its parent resolves outside VM rootfs: ${resolvedTarget}`, + }; + } + return { ok: true, path: resolvedTarget }; +} + +function normalizeResolver(current: string): string { + const lines = current.replace(/\r\n/g, "\n").split("\n"); + const next: string[] = [`nameserver ${GVPROXY_DNS}`]; + const seenNameservers = new Set([GVPROXY_DNS, ...PUBLIC_FALLBACK_DNS]); + + for (const line of lines) { + const trimmed = line.trim(); + if (!trimmed) continue; + + const nameserverMatch = trimmed.match(/^nameserver\s+(\S+)(?:\s+.*)?$/); + if (nameserverMatch) { + const resolver = nameserverMatch[1]; + if (seenNameservers.has(resolver)) continue; + seenNameservers.add(resolver); + next.push(line.trimEnd()); + continue; + } + + next.push(line.trimEnd()); + } + + return `${next.join("\n")}\n`; +} + +function buildGvproxyDnsBlock(indent: string): string { + return [ + `${indent}if [ -n "\${GVPROXY_GATEWAY_IP:-}" ]; then`, + `${indent} echo "${GVPROXY_RESOLVER_LINE}" > /etc/resolv.conf`, + `${indent}else`, + `${indent} echo "nameserver ${GVPROXY_DNS}" > /etc/resolv.conf`, + `${indent}fi`, + ].join("\n"); +} + +function buildGuestInitPatch(initPath: string): + | { ok: true; changed: boolean; content?: string } + | { ok: false; reason: string } { + if (path.basename(initPath) !== INIT_SCRIPT_RELATIVE_PATH.at(-1)) { + return { + ok: false, + reason: `refusing to patch unexpected OpenShell VM init script path: ${initPath}`, + }; + } + const original = readTextFileIfPresent(initPath); - if (original === null) return false; - if (original.includes('nameserver ${GVPROXY_GATEWAY_IP}')) return false; - const patched = original.replace(LEGACY_PUBLIC_DNS_BLOCK, GVPROXY_DNS_BLOCK); - if (patched === original) return false; - fs.writeFileSync(initPath, patched); - return true; + if (original === null) { + return { + ok: false, + reason: `OpenShell VM init script not found: ${initPath}`, + }; + } + if (original.includes(GVPROXY_RESOLVER_LINE)) return { ok: true, changed: false }; + + const hasGvproxyEvidence = + original.includes("GVPROXY_GATEWAY_IP") || INIT_ETH0_PUBLIC_FALLBACK_RE.test(original); + if (!hasGvproxyEvidence) { + return { + ok: false, + reason: "OpenShell VM init script shape not recognized; no gvproxy DNS evidence found", + }; + } + + const patched = original.replace(INIT_PUBLIC_FALLBACK_BLOCK_RE, (match, indent: string) => + buildGvproxyDnsBlock(indent), + ); + if (patched === original) { + return { + ok: false, + reason: "OpenShell VM init script public-DNS fallback block was not recognized", + }; + } + if (!patched.includes(GVPROXY_RESOLVER_LINE)) { + return { + ok: false, + reason: "OpenShell VM init script patch did not produce the gvproxy resolver line", + }; + } + return { ok: true, changed: true, content: patched }; } export function applyOpenShellVmDnsMonkeypatch( @@ -88,13 +266,10 @@ export function applyOpenShellVmDnsMonkeypatch( } = {}, ): VmDnsMonkeypatchResult { const env = deps.env ?? process.env; - if (!shouldApplyVmDnsMonkeypatch(entry, deps.platform ?? process.platform, env)) { - return { - attempted: false, - changed: false, - ok: false, - reason: "not a macOS OpenShell VM sandbox", - }; + const platform = deps.platform ?? process.platform; + const skipReason = shouldSkipVmDnsMonkeypatch(entry, platform, env); + if (skipReason) { + return skipped(skipReason); } const capture = deps.capture ?? captureOpenshell; @@ -104,47 +279,99 @@ export function applyOpenShellVmDnsMonkeypatch( }); const sandboxId = parseSandboxIdFromGetOutput(get.output || ""); if (!sandboxId) { - return { - attempted: true, - changed: false, - ok: false, - reason: "could not resolve OpenShell sandbox id", - }; + return fail("could not resolve OpenShell sandbox id"); } const stateDir = deps.stateDir ?? dockerDriverGatewayStateDir(env, deps.homeDir ?? os.homedir()); - const rootfs = path.join(stateDir, "vm-driver", "sandboxes", sandboxId, "rootfs"); - const resolvConf = path.join(rootfs, "etc", "resolv.conf"); - if (!fs.existsSync(rootfs)) { - return { - attempted: true, - changed: false, - ok: false, - reason: `VM rootfs not found: ${rootfs}`, - }; + const stateDirPath = path.resolve(stateDir); + const stateDirReal = realpathIfPresent(stateDirPath); + if (!stateDirReal) { + return fail(`OpenShell VM state directory not found: ${stateDirPath}`); } let changed = false; + let rootfsContext: string | undefined; try { - fs.mkdirSync(path.dirname(resolvConf), { recursive: true }); - const desired = `nameserver ${GVPROXY_DNS}\n`; - const current = readTextFileIfPresent(resolvConf) ?? ""; - changed = current !== desired; - if (changed) { - fs.writeFileSync(resolvConf, desired); + const sandboxDir = path.join(stateDirReal, "vm-driver", "sandboxes", sandboxId); + const sandboxDirReal = realpathIfPresent(sandboxDir); + if (!sandboxDirReal) { + return fail(`OpenShell VM sandbox directory not found: ${sandboxDir}`); + } + + const sandboxesDirReal = path.join(stateDirReal, "vm-driver", "sandboxes"); + if (!isPathInside(sandboxDirReal, sandboxesDirReal)) { + return fail( + `refusing to patch VM sandbox because its directory resolves outside OpenShell state: ${sandboxDirReal}`, + ); + } + + const rootfs = path.join(sandboxDirReal, "rootfs"); + const rootfsReal = realpathIfPresent(rootfs); + if (!rootfsReal) { + const diskCandidates = ext4RootDiskCandidates(sandboxDirReal); + if (diskCandidates.length > 0) { + return fail( + `OpenShell VM sandbox appears to use an ext4 root disk layout (${diskCandidates.join(", ")}); NemoClaw's rootfs DNS monkeypatch no longer applies`, + ); + } + return fail(`VM rootfs not found: ${rootfs}`); + } + rootfsContext = rootfsReal; + if (!isPathInside(rootfsReal, sandboxDirReal)) { + return fail( + `refusing to patch VM DNS because rootfs resolves outside OpenShell sandbox directory: ${rootfsReal}`, + rootfsReal, + ); } - changed = - patchGuestInit(path.join(rootfs, "srv", "openshell-vm-sandbox-init.sh")) || changed; - return { attempted: true, changed, ok: true, rootfs }; + const initScript = resolveTargetInsideRootfs(rootfsReal, INIT_SCRIPT_RELATIVE_PATH, { + mustExist: true, + }); + if (!initScript.ok) return fail(initScript.reason, rootfsReal); + + const resolvConf = resolveTargetInsideRootfs(rootfsReal, RESOLV_CONF_RELATIVE_PATH); + if (!resolvConf.ok) return fail(resolvConf.reason, rootfsReal); + + const initPatch = buildGuestInitPatch(initScript.path); + if (!initPatch.ok) return fail(initPatch.reason, rootfsReal); + + const currentResolver = readTextFileIfPresent(resolvConf.path) ?? ""; + const desiredResolver = normalizeResolver(currentResolver); + if (currentResolver !== desiredResolver) { + fs.writeFileSync(resolvConf.path, desiredResolver); + changed = true; + } + + if (initPatch.changed && initPatch.content !== undefined) { + fs.writeFileSync(initScript.path, initPatch.content); + changed = true; + } + + const verifiedInit = readTextFileIfPresent(initScript.path) ?? ""; + if (!verifiedInit.includes(GVPROXY_RESOLVER_LINE)) { + return fail( + "OpenShell VM init script patch verification failed: gvproxy resolver line missing", + rootfsReal, + changed, + ); + } + + return { + attempted: true, + changed, + ok: true, + rootfs: rootfsReal, + status: changed ? "applied" : "already-present", + }; } catch (error) { return { attempted: true, changed, ok: false, reason: `failed to patch VM DNS files: ${errorMessage(error)}`, - rootfs, + rootfs: rootfsContext, + status: "failed", }; } } diff --git a/src/lib/onboard/vm-dns-monkeypatch.ts b/src/lib/onboard/vm-dns-monkeypatch.ts index be4fae375a..6ed57f7c0a 100644 --- a/src/lib/onboard/vm-dns-monkeypatch.ts +++ b/src/lib/onboard/vm-dns-monkeypatch.ts @@ -1,18 +1,39 @@ // SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -import { applyOpenShellVmDnsMonkeypatch } from "../actions/sandbox/vm-dns-monkeypatch"; +import { + applyOpenShellVmDnsMonkeypatch, + type VmDnsMonkeypatchResult, +} from "../actions/sandbox/vm-dns-monkeypatch"; + +type OnboardVmDnsMonkeypatchDeps = { + apply?: typeof applyOpenShellVmDnsMonkeypatch; + log?: (message: string) => void; + warn?: (message: string) => void; +}; export function applyOnboardVmDnsMonkeypatch( sandboxName: string, runtime: { openshellDriver?: string | null }, + deps: OnboardVmDnsMonkeypatchDeps = {}, ): void { - const vmDnsPatch = applyOpenShellVmDnsMonkeypatch(sandboxName, { + const apply = deps.apply ?? applyOpenShellVmDnsMonkeypatch; + const log = deps.log ?? console.log; + const warn = deps.warn ?? console.error; + const vmDnsPatch: VmDnsMonkeypatchResult = apply(sandboxName, { openshellDriver: runtime.openshellDriver, }); if (vmDnsPatch.ok && vmDnsPatch.changed) { - console.log(" ✓ Applied OpenShell VM DNS monkeypatch"); + log(" ✓ Applied OpenShell VM DNS monkeypatch"); + } else if (vmDnsPatch.ok && vmDnsPatch.attempted) { + log(" OpenShell VM DNS monkeypatch already present"); + } else if ( + vmDnsPatch.status === "skipped" && + runtime.openshellDriver === "vm" && + vmDnsPatch.reason + ) { + log(` OpenShell VM DNS monkeypatch skipped: ${vmDnsPatch.reason}`); } else if (vmDnsPatch.attempted && !vmDnsPatch.ok && vmDnsPatch.reason) { - console.error(` Warning: OpenShell VM DNS monkeypatch did not apply: ${vmDnsPatch.reason}`); + warn(` Warning: OpenShell VM DNS monkeypatch did not apply: ${vmDnsPatch.reason}`); } } diff --git a/test/onboard.test.ts b/test/onboard.test.ts index 5caf983c10..af8d5ae1c9 100644 --- a/test/onboard.test.ts +++ b/test/onboard.test.ts @@ -15,6 +15,7 @@ import { loadAgent } from "../dist/lib/agent/defs.js"; import { buildChain, buildControlUiUrls } from "../dist/lib/dashboard/contract.js"; import { NAME_ALLOWED_FORMAT } from "../dist/lib/name-validation.js"; import { stageOptimizedSandboxBuildContext } from "../dist/lib/sandbox/build-context.js"; +import { applyOnboardVmDnsMonkeypatch } from "../dist/lib/onboard/vm-dns-monkeypatch.js"; import { testTimeoutOptions } from "./helpers/timeouts"; type ShimScalar = string | number | boolean | null | undefined; @@ -2660,7 +2661,7 @@ const { loadAgent } = require(${agentDefsPath}); expect(getGatewayReuseState("", "")).toBe("missing"); }); - it("prints doctor logs automatically when gateway fails to start (#1605)", () => { + it("prints doctor logs automatically when gateway fails to start (#1605)", testTimeoutOptions(20_000), () => { const repoRoot = path.join(import.meta.dirname, ".."); const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-gateway-diag-")); const fakeBin = path.join(tmpDir, "bin"); @@ -3090,6 +3091,106 @@ startGateway(null).catch(() => {}); ).toBe(false); }); + it("runs the OpenShell VM DNS monkeypatch after sandbox registration", () => { + const onboardSource = fs.readFileSync( + path.join(import.meta.dirname, "..", "src", "lib", "onboard.ts"), + "utf8", + ); + + assert.match( + onboardSource, + /registry\.setDefault\(sandboxName\);[\s\S]*applyOnboardVmDnsMonkeypatch\(sandboxName, sandboxRuntimeFields\)/, + ); + }); + + it("logs applied only when the onboard VM DNS monkeypatch changes files", () => { + const changedLogs: string[] = []; + applyOnboardVmDnsMonkeypatch( + "demo", + { openshellDriver: "vm" }, + { + apply: () => ({ + attempted: true, + changed: true, + ok: true, + status: "applied", + }), + log: (message) => changedLogs.push(message), + warn: (message) => changedLogs.push(message), + }, + ); + + const unchangedLogs: string[] = []; + applyOnboardVmDnsMonkeypatch( + "demo", + { openshellDriver: "vm" }, + { + apply: () => ({ + attempted: true, + changed: false, + ok: true, + status: "already-present", + }), + log: (message) => unchangedLogs.push(message), + warn: (message) => unchangedLogs.push(message), + }, + ); + + expect(changedLogs).toEqual([" ✓ Applied OpenShell VM DNS monkeypatch"]); + expect(unchangedLogs).toEqual([" OpenShell VM DNS monkeypatch already present"]); + expect(unchangedLogs.join("\n")).not.toContain("Applied"); + }); + + it("logs skipped VM DNS monkeypatch state for VM sandboxes", () => { + const logs: string[] = []; + + applyOnboardVmDnsMonkeypatch( + "demo", + { openshellDriver: "vm" }, + { + apply: () => ({ + attempted: false, + changed: false, + ok: false, + reason: "disabled by NEMOCLAW_DISABLE_VM_DNS_MONKEYPATCH=1", + status: "skipped", + }), + log: (message) => logs.push(message), + warn: (message) => logs.push(message), + }, + ); + + expect(logs).toEqual([ + " OpenShell VM DNS monkeypatch skipped: disabled by NEMOCLAW_DISABLE_VM_DNS_MONKEYPATCH=1", + ]); + }); + + it("warns without aborting when the onboard VM DNS monkeypatch fails", () => { + const warnings: string[] = []; + + expect(() => + applyOnboardVmDnsMonkeypatch( + "demo", + { openshellDriver: "vm" }, + { + apply: () => ({ + attempted: true, + changed: false, + ok: false, + reason: "VM rootfs not found", + status: "failed", + }), + log: (message) => warnings.push(message), + warn: (message) => warnings.push(message), + }, + ), + ).not.toThrow(); + + expect(warnings).toEqual([ + " Warning: OpenShell VM DNS monkeypatch did not apply: VM rootfs not found", + ]); + }); + it("writes sandbox sync scripts to a temp file for stdin redirection", () => { const scriptFile = writeSandboxConfigSyncFile("echo test"); try { @@ -3522,7 +3623,7 @@ const { setupInference } = require(${onboardPath}); ); }); - it("configures Model Router as a host provider while sandboxes keep inference.local", () => { + it("configures Model Router as a host provider while sandboxes keep inference.local", testTimeoutOptions(60_000), () => { const repoRoot = path.join(import.meta.dirname, ".."); const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-router-inference-")); const fakeBin = path.join(tmpDir, "bin"); @@ -3931,7 +4032,7 @@ const { setupInference } = require(${onboardPath}); } }); - it("prefers the managed Model Router command over PATH", () => { + it("prefers the managed Model Router command over PATH", testTimeoutOptions(60_000), () => { const repoRoot = path.join(import.meta.dirname, ".."); const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-router-managed-")); const fakeBin = path.join(tmpDir, "bin"); @@ -4112,7 +4213,7 @@ const { setupInference } = require(${onboardPath}); } }); - it("refreshes stale managed Model Router command when source fingerprint changes", () => { + it("refreshes stale managed Model Router command when source fingerprint changes", testTimeoutOptions(60_000), () => { const repoRoot = path.join(import.meta.dirname, ".."); const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-router-refresh-")); const fakeBin = path.join(tmpDir, "bin"); @@ -8874,7 +8975,7 @@ const { setupMessagingChannels, MESSAGING_CHANNELS } = require(${onboardPath}); } }); - it("uses the custom Dockerfile parent directory as build context when --from is given", async () => { + it("uses the custom Dockerfile parent directory as build context when --from is given", testTimeoutOptions(60_000), async () => { const repoRoot = path.join(import.meta.dirname, ".."); const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "nemoclaw-onboard-from-dockerfile-")); const fakeBin = path.join(tmpDir, "bin"); diff --git a/test/sandbox-connect-inference.test.ts b/test/sandbox-connect-inference.test.ts index bea7fcec7c..16178c689e 100644 --- a/test/sandbox-connect-inference.test.ts +++ b/test/sandbox-connect-inference.test.ts @@ -416,7 +416,9 @@ describe("sandbox connect inference route swap (#1248)", () => { }, ); - const result = runConnect(tmpDir, sandboxName); + const result = runConnect(tmpDir, sandboxName, { + NEMOCLAW_FORCE_VM_DNS_MONKEYPATCH: "1", + }); expect(result.status).toBe(0); const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); @@ -424,13 +426,14 @@ describe("sandbox connect inference route swap (#1248)", () => { expect(state.dockerCalls.length).toBe(0); const combined = (result.stdout || "") + (result.stderr || ""); + expect(combined).toContain("OpenShell VM DNS monkeypatch did not apply"); expect(combined).toContain("Reapplying OpenShell inference route"); expect(combined).toContain("OpenShell vm gateway path"); }, ); it( - "applies the macOS VM DNS monkeypatch before falling back to route reapply", + "uses the macOS VM DNS monkeypatch without legacy DNS repair or route reset when it restores inference.local", testTimeoutOptions(20_000), () => { const { tmpDir, stateFile, sandboxName } = setupFixture( @@ -471,6 +474,55 @@ describe("sandbox connect inference route swap (#1248)", () => { const combined = (result.stdout || "") + (result.stderr || ""); expect(combined).toContain("Applying OpenShell VM DNS monkeypatch"); expect(combined).toContain("inference.local route repaired"); + expect(combined).not.toContain("Reapplying OpenShell inference route"); + expect(combined).not.toContain("Repairing sandbox DNS proxy"); + }, + ); + + it( + "falls back to OpenShell inference route reapply when the VM DNS monkeypatch applies but inference.local stays broken", + testTimeoutOptions(20_000), + () => { + const { tmpDir, stateFile, sandboxName } = setupFixture( + { + name: "vm-dns-still-broken", + model: "nvidia/nemotron-3-super-120b-a12b", + provider: "nvidia-prod", + gpuEnabled: false, + openshellDriver: "vm", + policies: [], + }, + "nvidia-prod", + "nvidia/nemotron-3-super-120b-a12b", + { + inferenceProbeResponses: [ + 'BROKEN 503 {"error":"inference service unavailable"}', + 'BROKEN 503 {"error":"inference service unavailable"}', + "OK 200", + ], + }, + ); + const rootfs = createVmRootfs(tmpDir); + + const result = runConnect(tmpDir, sandboxName, { + NEMOCLAW_FORCE_VM_DNS_MONKEYPATCH: "1", + }); + expect(result.status).toBe(0); + + const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); + expect(state.inferenceSetCalls.length).toBe(1); + expect(state.dockerCalls.length).toBe(0); + expect(fs.readFileSync(path.join(rootfs, "etc", "resolv.conf"), "utf-8")).toBe( + "nameserver 192.168.127.1\n", + ); + + const combined = (result.stdout || "") + (result.stderr || ""); + expect(combined).toContain("Applying OpenShell VM DNS monkeypatch"); + expect(combined).toContain( + "OpenShell VM DNS monkeypatch completed but inference.local is still unavailable", + ); + expect(combined).toContain("Reapplying OpenShell inference route"); + expect(combined).toContain("inference.local route repaired"); }, ); }); From 793666ca1fb1d23e215f58c6b2cf541de9576d89 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 14:14:13 -0700 Subject: [PATCH 17/20] fix(macos): address VM DNS review feedback --- src/lib/actions/sandbox/connect.ts | 2 +- .../sandbox/vm-dns-monkeypatch.test.ts | 28 +++++++++++++++++++ src/lib/actions/sandbox/vm-dns-monkeypatch.ts | 16 +++++++++++ test/onboard.test.ts | 6 ++-- test/sandbox-connect-inference.test.ts | 1 + 5 files changed, 49 insertions(+), 4 deletions(-) diff --git a/src/lib/actions/sandbox/connect.ts b/src/lib/actions/sandbox/connect.ts index f8b38d5d25..800bbbb7a1 100644 --- a/src/lib/actions/sandbox/connect.ts +++ b/src/lib/actions/sandbox/connect.ts @@ -38,7 +38,7 @@ import { resolveOpenshell } from "../../adapters/openshell/resolve"; const agentRuntime = require("../../../../bin/lib/agent-runtime"); const NEMOCLAW_GATEWAY_NAME = "nemoclaw"; -const LEGACY_CLUSTER_DRIVERS = new Set([null, undefined, "", "kubernetes"]); +const LEGACY_CLUSTER_DRIVERS = new Set([null, undefined, "", "docker", "kubernetes"]); export type SandboxConnectOptions = { probeOnly?: boolean; diff --git a/src/lib/actions/sandbox/vm-dns-monkeypatch.test.ts b/src/lib/actions/sandbox/vm-dns-monkeypatch.test.ts index 13f36cf8b0..2c50f10822 100644 --- a/src/lib/actions/sandbox/vm-dns-monkeypatch.test.ts +++ b/src/lib/actions/sandbox/vm-dns-monkeypatch.test.ts @@ -326,6 +326,34 @@ describe("OpenShell VM DNS monkeypatch", () => { expect(fs.readFileSync(outside, "utf-8")).toBe("nameserver 8.8.8.8\n"); }); + it("refuses dangling resolver symlinks before writing", () => { + const stateDir = makeTempDir(); + const rootfs = sandboxRootfs(stateDir); + const outside = path.join(stateDir, "missing-resolv.conf"); + fs.mkdirSync(path.join(rootfs, "etc"), { recursive: true }); + fs.symlinkSync(outside, path.join(rootfs, "etc", "resolv.conf")); + writeRecognizedInit(rootfs); + + const result = applyOpenShellVmDnsMonkeypatch( + "demo", + { openshellDriver: "vm" }, + { + capture: () => ({ status: 0, output: "Id: abc\n" }), + platform: "darwin", + stateDir, + }, + ); + + expect(result).toMatchObject({ + attempted: true, + changed: false, + ok: false, + status: "failed", + }); + expect(result.reason).toContain("dangling symlink"); + expect(fs.existsSync(outside)).toBe(false); + }); + it("returns a warning result instead of throwing when rootfs files cannot be patched", () => { const stateDir = makeTempDir(); const rootfs = sandboxRootfs(stateDir); diff --git a/src/lib/actions/sandbox/vm-dns-monkeypatch.ts b/src/lib/actions/sandbox/vm-dns-monkeypatch.ts index 8b62d43cfe..73235f8084 100644 --- a/src/lib/actions/sandbox/vm-dns-monkeypatch.ts +++ b/src/lib/actions/sandbox/vm-dns-monkeypatch.ts @@ -86,6 +86,15 @@ function realpathIfPresent(filePath: string): string | null { } } +function lstatIfPresent(filePath: string): fs.Stats | null { + try { + return fs.lstatSync(filePath); + } catch (error) { + if ((error as NodeJS.ErrnoException).code === "ENOENT") return null; + throw error; + } +} + function fail(reason: string, rootfs?: string, changed = false): VmDnsMonkeypatchResult { return { attempted: true, @@ -150,6 +159,13 @@ function resolveTargetInsideRootfs( return { ok: true, path: targetReal }; } + if (lstatIfPresent(target)?.isSymbolicLink()) { + return { + ok: false, + reason: `refusing to patch ${path.join(...relativePath)} because it is a dangling symlink: ${target}`, + }; + } + if (opts.mustExist) { return { ok: false, diff --git a/test/onboard.test.ts b/test/onboard.test.ts index af8d5ae1c9..d6d267c1ec 100644 --- a/test/onboard.test.ts +++ b/test/onboard.test.ts @@ -5141,9 +5141,9 @@ ${webSearchVerifySource}`; assert.match( source, - // #2753: sandboxName is intentionally absent from the options here so - // the session does not record a name before createSandbox completes. - /startRecordedStep\("sandbox", \{ provider, model \}\);\s*const recordedMessagingChannels = getRecordedMessagingChannelsForResume\(resume, session(?:, sandboxName)?\);[\s\S]*?selectedMessagingChannels = recordedMessagingChannels;[\s\S]*?selectedMessagingChannels = await setupMessagingChannels\(\);[\s\S]*?const messagingChannelConfig = readMessagingChannelConfigFromEnv\(\);[\s\S]*?onboardSession\.updateSession\(\(current[^)]*\) => \{\s*current\.messagingChannels = selectedMessagingChannels;\s*current\.messagingChannelConfig = messagingChannelConfig;\s*return current;\s*\}\);[\s\S]*?sandboxName = await createSandbox\(\s*gpu,\s*model,\s*provider,\s*preferredInferenceApi,\s*sandboxName,\s*nextWebSearchConfig,\s*selectedMessagingChannels,\s*fromDockerfile,\s*agent,\s*opts\.controlUiPort \|\| null,\s*sandboxGpuConfig,\s*\);/, + // #2753: sandboxName is read for resume hints here, but the session still + // does not persist a sandbox name before createSandbox completes. + /startRecordedStep\("sandbox", \{ provider, model \}\);\s*const recordedMessagingChannels = getRecordedMessagingChannelsForResume\(resume, session, sandboxName\);[\s\S]*?selectedMessagingChannels = recordedMessagingChannels;[\s\S]*?selectedMessagingChannels = await setupMessagingChannels\(\);[\s\S]*?const messagingChannelConfig = readMessagingChannelConfigFromEnv\(\);[\s\S]*?onboardSession\.updateSession\(\(current[^)]*\) => \{\s*current\.messagingChannels = selectedMessagingChannels;\s*current\.messagingChannelConfig = messagingChannelConfig;\s*return current;\s*\}\);[\s\S]*?sandboxName = await createSandbox\(\s*gpu,\s*model,\s*provider,\s*preferredInferenceApi,\s*sandboxName,\s*nextWebSearchConfig,\s*selectedMessagingChannels,\s*fromDockerfile,\s*agent,\s*opts\.controlUiPort \|\| null,\s*sandboxGpuConfig,\s*\);/, ); }); diff --git a/test/sandbox-connect-inference.test.ts b/test/sandbox-connect-inference.test.ts index 16178c689e..db917eed62 100644 --- a/test/sandbox-connect-inference.test.ts +++ b/test/sandbox-connect-inference.test.ts @@ -362,6 +362,7 @@ describe("sandbox connect inference route swap (#1248)", () => { model: "nvidia/nemotron-3-super-120b-a12b", provider: "nvidia-prod", gpuEnabled: false, + openshellDriver: "docker", policies: [], }, "nvidia-prod", From 70a488731bef68c4d3090b32f8a8e905f87d67a6 Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Wed, 13 May 2026 14:21:28 -0700 Subject: [PATCH 18/20] fix(macos): special-case only VM DNS repair --- src/lib/actions/sandbox/connect.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/lib/actions/sandbox/connect.ts b/src/lib/actions/sandbox/connect.ts index 800bbbb7a1..c5e291f541 100644 --- a/src/lib/actions/sandbox/connect.ts +++ b/src/lib/actions/sandbox/connect.ts @@ -38,7 +38,6 @@ import { resolveOpenshell } from "../../adapters/openshell/resolve"; const agentRuntime = require("../../../../bin/lib/agent-runtime"); const NEMOCLAW_GATEWAY_NAME = "nemoclaw"; -const LEGACY_CLUSTER_DRIVERS = new Set([null, undefined, "", "docker", "kubernetes"]); export type SandboxConnectOptions = { probeOnly?: boolean; @@ -161,7 +160,7 @@ function isSandboxInferenceRouteHealthy(sandboxName: string): boolean { } function shouldUseLegacyDnsProxyRepair(sb: SandboxEntry | null): boolean { - return LEGACY_CLUSTER_DRIVERS.has(sb?.openshellDriver); + return sb?.openshellDriver !== "vm"; } function reapplyVmInferenceRoute(sandboxName: string, sb: SandboxEntry | null): boolean { From 4c36539fb1de09f5161254449564dd9443fad04d Mon Sep 17 00:00:00 2001 From: Aaron Erickson Date: Thu, 14 May 2026 08:01:44 -0500 Subject: [PATCH 19/20] fix(connect): probe VM inference after route reapply --- src/lib/actions/sandbox/connect.ts | 6 +-- test/sandbox-connect-inference.test.ts | 52 +++++++++++++++++++++++++- 2 files changed, 54 insertions(+), 4 deletions(-) diff --git a/src/lib/actions/sandbox/connect.ts b/src/lib/actions/sandbox/connect.ts index c5e291f541..cb59f11b61 100644 --- a/src/lib/actions/sandbox/connect.ts +++ b/src/lib/actions/sandbox/connect.ts @@ -165,11 +165,11 @@ function shouldUseLegacyDnsProxyRepair(sb: SandboxEntry | null): boolean { function reapplyVmInferenceRoute(sandboxName: string, sb: SandboxEntry | null): boolean { if (!sb?.provider || !sb.model) return false; - const result = runOpenshell( - ["inference", "set", "--provider", sb.provider, "--model", sb.model], + runOpenshell( + ["inference", "set", "--provider", sb.provider, "--model", sb.model, "--no-verify"], { ignoreError: true }, ); - return result.status === 0 && isSandboxInferenceRouteHealthy(sandboxName); + return isSandboxInferenceRouteHealthy(sandboxName); } function repairSandboxInferenceRouteIfNeeded( diff --git a/test/sandbox-connect-inference.test.ts b/test/sandbox-connect-inference.test.ts index db917eed62..192ce25bf4 100644 --- a/test/sandbox-connect-inference.test.ts +++ b/test/sandbox-connect-inference.test.ts @@ -27,6 +27,7 @@ type SandboxEntryFixture = { type SetupFixtureOptions = { inferenceProbeResponses?: string[]; + inferenceSetStatus?: number; }; function setupFixture( @@ -129,7 +130,7 @@ if (args[0] === "inference" && args[1] === "get") { if (args[0] === "inference" && args[1] === "set") { state.inferenceSetCalls.push(args.slice(2)); fs.writeFileSync(stateFile, JSON.stringify(state)); - process.exit(0); + process.exit(${JSON.stringify(options.inferenceSetStatus ?? 0)}); } if (args[0] === "logs") { @@ -526,4 +527,53 @@ describe("sandbox connect inference route swap (#1248)", () => { expect(combined).toContain("inference.local route repaired"); }, ); + + it( + "probes VM inference health after route reapply even when inference set exits nonzero", + testTimeoutOptions(20_000), + () => { + const { tmpDir, stateFile, sandboxName } = setupFixture( + { + name: "vm-route-set-nonzero", + model: "nvidia/nemotron-3-super-120b-a12b", + provider: "nvidia-prod", + gpuEnabled: false, + openshellDriver: "vm", + policies: [], + }, + "nvidia-prod", + "nvidia/nemotron-3-super-120b-a12b", + { + inferenceProbeResponses: [ + 'BROKEN 503 {"error":"inference service unavailable"}', + "OK 200", + ], + inferenceSetStatus: 1, + }, + ); + + const result = runConnect(tmpDir, sandboxName, { + NEMOCLAW_FORCE_VM_DNS_MONKEYPATCH: "1", + }); + expect(result.status).toBe(0); + + const state = JSON.parse(fs.readFileSync(stateFile, "utf-8")); + expect(state.inferenceSetCalls).toEqual([ + [ + "--provider", + "nvidia-prod", + "--model", + "nvidia/nemotron-3-super-120b-a12b", + "--no-verify", + ], + ]); + expect(state.dockerCalls.length).toBe(0); + + const combined = (result.stdout || "") + (result.stderr || ""); + expect(combined).toContain("OpenShell VM DNS monkeypatch did not apply"); + expect(combined).toContain("Reapplying OpenShell inference route"); + expect(combined).toContain("inference.local route repaired"); + expect(combined).not.toContain("OpenShell vm gateway path"); + }, + ); }); From 69698b7c5f0838519623a0d040f819eecfa6ab5d Mon Sep 17 00:00:00 2001 From: Carlos Villela Date: Thu, 14 May 2026 11:24:27 -0700 Subject: [PATCH 20/20] fix(onboard): satisfy entrypoint budget --- src/lib/onboard.ts | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/lib/onboard.ts b/src/lib/onboard.ts index 7dc93e0a13..edd7702b86 100644 --- a/src/lib/onboard.ts +++ b/src/lib/onboard.ts @@ -1971,11 +1971,6 @@ function getMessagingChannelForEnvKey(envKey: string): string | null { return null; } -function getKnownMessagingChannels(channels: string[] | null | undefined): string[] { - if (!Array.isArray(channels)) return []; - const known = new Set(MESSAGING_CHANNELS.map((channel) => channel.name)); - return [...new Set(channels.filter((channel) => known.has(channel)))]; -} function getRecordedMessagingChannelsForResume( resume: boolean, @@ -4832,11 +4827,7 @@ function getSandboxRuntimeRegistryFields( sandboxGpuEnabled: config.sandboxGpuEnabled, sandboxGpuMode: config.mode, sandboxGpuDevice: config.sandboxGpuDevice, - openshellDriver: isLinuxDockerDriverGatewayEnabled() - ? process.platform === "darwin" - ? "vm" - : "docker" - : "kubernetes", + openshellDriver: isLinuxDockerDriverGatewayEnabled() ? (process.platform === "darwin" ? "vm" : "docker") : "kubernetes", openshellVersion: getInstalledOpenshellVersion( runCaptureOpenshell(["--version"], { ignoreError: true }), ),