Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
6d08d7a
fix(onboard): skip Docker bridge probe for VM driver
ericksoa May 13, 2026
c98d291
fix(onboard): keep bridge probe patch entrypoint-neutral
ericksoa May 13, 2026
26b66cd
fix(onboard): wait for VM startup output before detaching
ericksoa May 13, 2026
a458b2f
fix(onboard): keep VM startup gate out of entrypoint
ericksoa May 13, 2026
d3fbfeb
fix(hermes): keep macos vm startup mutable
ericksoa May 13, 2026
3869623
fix(onboard): reuse stored messaging channels
ericksoa May 13, 2026
cef0079
fix(connect): avoid legacy dns repair for vm sandboxes
ericksoa May 13, 2026
8343311
fix: monkeypatch macos vm dns for inference
ericksoa May 13, 2026
696be2a
fix: allow discord guild users for hermes
ericksoa May 13, 2026
db6b0d4
refactor: keep onboard entrypoint net neutral
ericksoa May 13, 2026
aed17c3
Merge remote-tracking branch 'origin/main' into fix/macos-vm-skip-doc…
ericksoa May 13, 2026
11cc7f3
fix: allow discord regional websocket gateways
ericksoa May 13, 2026
0f54432
fix: address messaging reuse review feedback
ericksoa May 13, 2026
46ab1f6
Merge remote-tracking branch 'origin/main' into fix/macos-vm-skip-doc…
ericksoa May 13, 2026
2b290c0
fix: flush sandbox create tail before ready recovery
ericksoa May 13, 2026
3a9f58f
fix: keep VM DNS monkeypatch best-effort
ericksoa May 13, 2026
242a624
fix(onboard): address messaging reuse feedback
ericksoa May 13, 2026
32056fa
fix(macos): harden VM DNS monkeypatch
ericksoa May 13, 2026
d7a3b25
Merge branch 'main' into fix/macos-vm-skip-docker-bridge-probe
cv May 13, 2026
793666c
fix(macos): address VM DNS review feedback
ericksoa May 13, 2026
70a4887
fix(macos): special-case only VM DNS repair
ericksoa May 13, 2026
cd513ea
Merge branch 'main' into fix/macos-vm-skip-docker-bridge-probe
cv May 13, 2026
5a84abb
Merge remote-tracking branch 'origin/main' into fix/macos-vm-skip-doc…
cv May 13, 2026
2646709
Merge remote-tracking branch 'origin/main' into fix/macos-vm-skip-doc…
ericksoa May 14, 2026
4c36539
fix(connect): probe VM inference after route reapply
ericksoa May 14, 2026
55296c4
merge: main into fix/macos-vm-skip-docker-bridge-probe
cv May 14, 2026
69698b7
fix(onboard): satisfy entrypoint budget
cv May 14, 2026
7a22871
Merge branch 'main' into fix/macos-vm-skip-docker-bridge-probe
ericksoa May 14, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion agents/hermes/start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,12 @@ if [ "$(id -u)" -ne 0 ]; then
export HOME=/sandbox
export HERMES_HOME="${HERMES_DIR}"

if ! verify_config_integrity "${HERMES_DIR}" "${HERMES_HASH_FILE}"; then
# macOS VM startup currently runs this entrypoint as the sandbox user and
# remaps rootfs ownership to the host uid. In that mode the strict /etc hash
# cannot remain a root-owned trust anchor, so use the same locked-aware
# mutable-default verifier as OpenClaw. The root path below keeps strict
# verification against /etc/nemoclaw/hermes.config-hash.
if ! verify_config_integrity_if_locked "${HERMES_DIR}"; then
echo "[SECURITY] Config integrity check failed — refusing to start (non-root mode)" >&2
exit 1
fi
Expand Down
60 changes: 59 additions & 1 deletion src/lib/actions/sandbox/connect.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ import type { SandboxEntry } from "../../state/registry";
import { ROOT } from "../../runner";
import { runSetupDnsProxy } from "../dns";
import { ensureLiveSandboxOrExit } from "./gateway-state";
import {
applyOpenShellVmDnsMonkeypatch,
shouldApplyVmDnsMonkeypatch,
} from "./vm-dns-monkeypatch";
import {
createSystemDeps as createSessionDeps,
getActiveSandboxSessions,
Expand All @@ -34,6 +38,7 @@ import { resolveOpenshell } from "../../adapters/openshell/resolve";
const agentRuntime = require("../../../../bin/lib/agent-runtime");

const NEMOCLAW_GATEWAY_NAME = "nemoclaw";
const LEGACY_CLUSTER_DRIVERS = new Set([null, undefined, "", "kubernetes"]);

Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
export type SandboxConnectOptions = {
probeOnly?: boolean;
Expand Down Expand Up @@ -155,13 +160,66 @@ function isSandboxInferenceRouteHealthy(sandboxName: string): boolean {
return probe.status === 0 && /^OK\s+[0-9]{3}\b/.test(probe.output.trim());
}

function shouldUseLegacyDnsProxyRepair(sb: SandboxEntry | null): boolean {
return LEGACY_CLUSTER_DRIVERS.has(sb?.openshellDriver);
}

function reapplyVmInferenceRoute(sandboxName: string, sb: SandboxEntry | null): boolean {
if (!sb?.provider || !sb.model) return false;
const result = runOpenshell(
["inference", "set", "--provider", sb.provider, "--model", sb.model],
{ ignoreError: true },
);
return result.status === 0 && isSandboxInferenceRouteHealthy(sandboxName);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Outdated
}

function repairSandboxInferenceRouteIfNeeded(
sandboxName: string,
sb: SandboxEntry | null,
{ quiet = false }: { quiet?: boolean } = {},
): boolean {
if (process.env.NEMOCLAW_DISABLE_INFERENCE_ROUTE_REPAIR === "1") return false;
if (isSandboxInferenceRouteHealthy(sandboxName)) return false;

if (!shouldUseLegacyDnsProxyRepair(sb)) {
if (shouldApplyVmDnsMonkeypatch(sb)) {
if (!quiet) {
console.log("");
console.log(
` inference.local is unavailable inside '${sandboxName}'. Applying OpenShell VM DNS monkeypatch...`,
);
}
const patch = applyOpenShellVmDnsMonkeypatch(sandboxName, sb);
if (patch.ok && isSandboxInferenceRouteHealthy(sandboxName)) {
if (!quiet) {
console.log(" inference.local route repaired.");
}
return true;
}
if (!quiet && !patch.ok && patch.reason) {
console.error(
` Warning: OpenShell VM DNS monkeypatch did not apply: ${patch.reason}`,
);
}
}

if (!quiet) {
console.log("");
console.log(` inference.local is unavailable inside '${sandboxName}'. Reapplying OpenShell inference route...`);
}
const healthy = reapplyVmInferenceRoute(sandboxName, sb);
if (!quiet) {
if (healthy) {
console.log(" inference.local route repaired.");
} else {
console.error(
` Warning: inference.local is still unavailable through the OpenShell ${sb?.openshellDriver || "non-legacy"} gateway path.`,
);
}
}
return healthy;
}

if (!quiet) {
console.log("");
console.log(` inference.local is unavailable inside '${sandboxName}'. Repairing sandbox DNS proxy...`);
Expand Down Expand Up @@ -219,7 +277,7 @@ function ensureSandboxInferenceRoute(
);
}
}
repairSandboxInferenceRouteIfNeeded(sandboxName, { quiet });
repairSandboxInferenceRouteIfNeeded(sandboxName, sb, { quiet });
}
} catch {
/* non-fatal — don't block connect on inference route repair */
Expand Down
126 changes: 126 additions & 0 deletions src/lib/actions/sandbox/vm-dns-monkeypatch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
// SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

import fs from "node:fs";
import os from "node:os";
import path from "node:path";

import {
type CaptureOpenshellResult,
stripAnsi,
} from "../../adapters/openshell/client";
import { captureOpenshell } from "../../adapters/openshell/runtime";
import type { SandboxEntry } from "../../state/registry";

const GVPROXY_DNS = "192.168.127.1";
const LEGACY_PUBLIC_DNS_BLOCK = ` if [ ! -s /etc/resolv.conf ]; then
echo "nameserver 8.8.8.8" > /etc/resolv.conf
echo "nameserver 8.8.4.4" >> /etc/resolv.conf
fi`;
const GVPROXY_DNS_BLOCK = ` echo "nameserver \${GVPROXY_GATEWAY_IP}" > /etc/resolv.conf`;

type CaptureFn = (
args: string[],
opts: { ignoreError?: boolean; timeout?: number },
) => CaptureOpenshellResult;

export type VmDnsMonkeypatchResult = {
attempted: boolean;
changed: boolean;
ok: boolean;
reason?: string;
rootfs?: string;
};

export function shouldApplyVmDnsMonkeypatch(
entry: Pick<SandboxEntry, "openshellDriver"> | null | undefined,
platform: NodeJS.Platform = process.platform,
env: NodeJS.ProcessEnv = process.env,
): boolean {
if (env.NEMOCLAW_DISABLE_VM_DNS_MONKEYPATCH === "1") return false;
if (entry?.openshellDriver !== "vm") return false;
return platform === "darwin" || env.NEMOCLAW_FORCE_VM_DNS_MONKEYPATCH === "1";
}

function dockerDriverGatewayStateDir(env: NodeJS.ProcessEnv, homeDir: string): string {
const configured = env.NEMOCLAW_OPENSHELL_GATEWAY_STATE_DIR;
if (configured && configured.trim()) return path.resolve(configured.trim());
return path.join(homeDir, ".local", "state", "nemoclaw", "openshell-docker-gateway");
}

export function parseSandboxIdFromGetOutput(output: string): string | null {
const match = stripAnsi(output).match(/^\s*(?:Id|ID):\s*([A-Za-z0-9._-]+)\s*$/m);
return match?.[1] ?? null;
}

function patchGuestInit(initPath: string): boolean {
if (!fs.existsSync(initPath)) return false;
const original = fs.readFileSync(initPath, "utf-8");
if (original.includes('nameserver ${GVPROXY_GATEWAY_IP}')) return false;
const patched = original.replace(LEGACY_PUBLIC_DNS_BLOCK, GVPROXY_DNS_BLOCK);
if (patched === original) return false;
fs.writeFileSync(initPath, patched);

Check failure

Code scanning / CodeQL

Potential file system race condition High

The file may have changed since it
was checked
.
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
return true;
}

export function applyOpenShellVmDnsMonkeypatch(
sandboxName: string,
entry: Pick<SandboxEntry, "openshellDriver"> | null | undefined,
deps: {
capture?: CaptureFn;
env?: NodeJS.ProcessEnv;
homeDir?: string;
platform?: NodeJS.Platform;
stateDir?: string;
} = {},
): VmDnsMonkeypatchResult {
const env = deps.env ?? process.env;
if (!shouldApplyVmDnsMonkeypatch(entry, deps.platform ?? process.platform, env)) {
return {
attempted: false,
changed: false,
ok: false,
reason: "not a macOS OpenShell VM sandbox",
};
}

const capture = deps.capture ?? captureOpenshell;
const get = capture(["sandbox", "get", sandboxName], {
ignoreError: true,
timeout: 10_000,
});
const sandboxId = parseSandboxIdFromGetOutput(get.output || "");
if (!sandboxId) {
return {
attempted: true,
changed: false,
ok: false,
reason: "could not resolve OpenShell sandbox id",
};
}

const stateDir =
deps.stateDir ?? dockerDriverGatewayStateDir(env, deps.homeDir ?? os.homedir());
const rootfs = path.join(stateDir, "vm-driver", "sandboxes", sandboxId, "rootfs");
const resolvConf = path.join(rootfs, "etc", "resolv.conf");
if (!fs.existsSync(rootfs)) {
return {
attempted: true,
changed: false,
ok: false,
reason: `VM rootfs not found: ${rootfs}`,
};
}

fs.mkdirSync(path.dirname(resolvConf), { recursive: true });
const desired = `nameserver ${GVPROXY_DNS}\n`;
const current = fs.existsSync(resolvConf) ? fs.readFileSync(resolvConf, "utf-8") : "";
let changed = current !== desired;
if (changed) {
fs.writeFileSync(resolvConf, desired);

Check failure

Code scanning / CodeQL

Potential file system race condition High

The file may have changed since it
was checked
.
Comment thread
github-advanced-security[bot] marked this conversation as resolved.
Fixed
}
changed =
patchGuestInit(path.join(rootfs, "srv", "openshell-vm-sandbox-init.sh")) || changed;

return { attempted: true, changed, ok: true, rootfs };
}
38 changes: 26 additions & 12 deletions src/lib/onboard.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,9 @@ const {
const {
getSelectionDrift,
}: typeof import("./onboard/selection-drift") = require("./onboard/selection-drift");
const {
applyOpenShellVmDnsMonkeypatch,
}: typeof import("./actions/sandbox/vm-dns-monkeypatch") = require("./actions/sandbox/vm-dns-monkeypatch");
const crypto = require("node:crypto");
const fs = require("fs");
const os = require("os");
Expand Down Expand Up @@ -2025,12 +2028,11 @@ function getKnownMessagingChannels(channels: string[] | null | undefined): strin

function getRecordedMessagingChannelsForResume(
resume: boolean,
session: Session | null,
session: Session | null, sandboxName: string | null,
): string[] | null {
if (!resume || !isNonInteractive() || !Array.isArray(session?.messagingChannels)) {
return null;
}
return getKnownMessagingChannels(session.messagingChannels);
return require("./onboard/messaging-reuse").getNonInteractiveStoredMessagingChannels(
resume, session?.messagingChannels, sandboxName, MESSAGING_CHANNELS, (envKey: string) => Boolean(getCredential(envKey) || normalizeCredentialValue(process.env[envKey])),
registry.getSandbox.bind(registry), registry.getDisabledChannels.bind(registry), providerExistsInGateway, isNonInteractive());
}

/**
Expand Down Expand Up @@ -4461,7 +4463,7 @@ async function startDockerDriverGateway({
if (drift) {
restartDockerDriverGatewayProcessForDrift(pidFileGatewayPid, drift.reason);
} else if (registerDockerDriverGatewayEndpoint() && (await isDockerDriverGatewayHttpReady())) {
await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure);
await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure, { drivers: gatewayEnv.OPENSHELL_DRIVERS });
console.log(" ✓ Reusing existing Docker-driver gateway");
return;
} else {
Expand Down Expand Up @@ -4492,7 +4494,7 @@ async function startDockerDriverGateway({
isGatewayHealthy(adoptedStatus, adoptedGwInfo, adoptedActiveGatewayInfo) &&
(await isDockerDriverGatewayHttpReady())
) {
await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure);
await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure, { drivers: gatewayEnv.OPENSHELL_DRIVERS });
console.log(` ✓ Reusing existing Docker-driver gateway process (PID ${portListenerPid})`);
return;
}
Expand Down Expand Up @@ -4561,7 +4563,7 @@ async function startDockerDriverGateway({
isGatewayHealthy(status, namedInfo, currentInfo) &&
(await isGatewayTcpReady())
) {
await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure);
await verifySandboxBridgeGatewayReachableOrExit(exitOnFailure, { drivers: gatewayEnv.OPENSHELL_DRIVERS });
console.log(" ✓ Docker-driver gateway is healthy");
return;
}
Expand Down Expand Up @@ -6105,11 +6107,12 @@ async function createSandbox(
? builtImageMatch[1]
: `openshell/sandbox-from:${buildId}`;

const sandboxRuntimeFields = getSandboxRuntimeRegistryFields(effectiveSandboxGpuConfig);
registry.registerSandbox({
name: sandboxName,
model: model || null,
provider: provider || null,
...getSandboxRuntimeRegistryFields(effectiveSandboxGpuConfig),
...sandboxRuntimeFields,
...getSandboxAgentRegistryFields(agent, !fromDockerfile),
imageTag: resolvedImageTag,
providerCredentialHashes:
Expand Down Expand Up @@ -6148,13 +6151,24 @@ async function createSandbox(

// DNS proxy — run a forwarder in the sandbox pod so the isolated
// sandbox namespace can resolve hostnames (fixes #626).
if (!isLinuxDockerDriverGatewayPlatform()) {
if (sandboxRuntimeFields.openshellDriver === "kubernetes") {
console.log(" Setting up sandbox DNS proxy...");
runFile("bash", [path.join(SCRIPTS, "setup-dns-proxy.sh"), GATEWAY_NAME, sandboxName], {
ignoreError: true,
});
}

const vmDnsPatch = applyOpenShellVmDnsMonkeypatch(sandboxName, {
openshellDriver: sandboxRuntimeFields.openshellDriver,
});
if (vmDnsPatch.ok && vmDnsPatch.changed) {
console.log(" ✓ Applied OpenShell VM DNS monkeypatch");
} else if (vmDnsPatch.attempted && !vmDnsPatch.ok && vmDnsPatch.reason) {
console.error(
` Warning: OpenShell VM DNS monkeypatch did not apply: ${vmDnsPatch.reason}`,
);
}

// Check that messaging providers exist in the gateway (sandbox attachment
// cannot be verified via CLI yet — only gateway-level existence is checked).
for (const p of messagingProviders) {
Expand Down Expand Up @@ -10731,12 +10745,12 @@ async function onboard(opts: OnboardOptions = {}): Promise<void> {
nextWebSearchConfig = await configureWebSearch(null, agent, webSearchSupportProbePath);
}
startRecordedStep("sandbox", { provider, model });
const recordedMessagingChannels = getRecordedMessagingChannelsForResume(resume, session);
const recordedMessagingChannels = getRecordedMessagingChannelsForResume(resume, session, sandboxName);
if (recordedMessagingChannels) {
selectedMessagingChannels = recordedMessagingChannels;
if (selectedMessagingChannels.length > 0) {
note(
` [resume] Reusing messaging channel configuration: ${selectedMessagingChannels.join(", ")}`,
` [non-interactive] Reusing messaging channel configuration: ${selectedMessagingChannels.join(", ")}`,
);
}
} else {
Expand Down
25 changes: 25 additions & 0 deletions src/lib/onboard/gateway-sandbox-reachability.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import { describe, expect, it } from "vitest";
import {
isSandboxBridgeGatewayReachable,
formatSandboxBridgeUnreachableMessage,
shouldVerifySandboxBridgeGatewayReachability,
verifySandboxBridgeGatewayReachableOrExit,
} from "../../../dist/lib/onboard/gateway-sandbox-reachability";

describe("isSandboxBridgeGatewayReachable", () => {
Expand Down Expand Up @@ -67,6 +69,29 @@ describe("isSandboxBridgeGatewayReachable", () => {
});
});

describe("verifySandboxBridgeGatewayReachableOrExit", () => {
it("skips the Docker bridge probe when OpenShell is using the macOS VM driver", async () => {
let inspectCalls = 0;
await verifySandboxBridgeGatewayReachableOrExit(false, {
drivers: "vm",
inspectSubnetImpl: () => {
inspectCalls += 1;
return undefined;
},
runImpl: () => {
throw new Error("probe should not run");
},
});
expect(inspectCalls).toBe(0);
});

it("keeps the bridge probe enabled for Docker-driver gateways", () => {
expect(shouldVerifySandboxBridgeGatewayReachability({ drivers: "docker" })).toBe(true);
expect(shouldVerifySandboxBridgeGatewayReachability({ drivers: "vm,docker" })).toBe(true);
expect(shouldVerifySandboxBridgeGatewayReachability({ drivers: "vm" })).toBe(false);
});
});

describe("formatSandboxBridgeUnreachableMessage", () => {
it("returns empty for an ok result", () => {
expect(
Expand Down
Loading
Loading