From 73092543e4b553d6952713e4b517e4a529dc8fb3 Mon Sep 17 00:00:00 2001 From: austinpower1258 Date: Fri, 29 May 2026 22:00:03 -0700 Subject: [PATCH 1/4] Add failing test: Hermes per-turn session-end must not destroy restore record Hermes fires on_session_end once per conversation turn, not at the true session boundary. cmux maps it to the destructive session-end action, so after the first turn the restore record and surface resume binding are consumed and nothing survives a quit/relaunch. This test asserts the per-turn session-end routes through the non-destructive turn-boundary path (recordPromptStop) and does not call store.consume / clearAgentSurfaceResumeBinding. It fails on current code. Refs #5000 Co-Authored-By: Claude Opus 4.8 --- .../CLIGenericHookPersistenceTests.swift | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/cmuxTests/CLIGenericHookPersistenceTests.swift b/cmuxTests/CLIGenericHookPersistenceTests.swift index 9f7d205470..77cefc9c13 100644 --- a/cmuxTests/CLIGenericHookPersistenceTests.swift +++ b/cmuxTests/CLIGenericHookPersistenceTests.swift @@ -679,6 +679,131 @@ extension CLINotifyProcessIntegrationRegressionTests { XCTAssertEqual(responseSession["runtimeStatus"] as? String, "running") } + func testHermesAgentSessionEndIsTurnBoundaryAndPreservesRestoreRecord() throws { + // Hermes fires the `on_session_end` plugin hook once per conversation turn + // (end of every run_conversation()), not at the true session boundary. cmux + // maps that event to the `session-end` subcommand, so the per-turn hook must + // route through the non-destructive turn-boundary path (recordPromptStop) and + // must NOT consume the session or clear the surface resume binding. Otherwise + // the restore record is destroyed after the first turn and nothing survives a + // quit/relaunch. See https://github.com/manaflow-ai/cmux/issues/5000. + let cliPath = try bundledCLIPath() + let socketPath = makeSocketPath("hermes-session-end") + let listenerFD = try bindUnixSocket(at: socketPath) + let state = MockSocketServerState() + let root = FileManager.default.temporaryDirectory + .appendingPathComponent("cmux-hermes-session-end-\(UUID().uuidString)", isDirectory: true) + let workspaceId = "11111111-1111-1111-1111-111111111111" + let surfaceId = "22222222-2222-2222-2222-222222222222" + let sessionId = "hermes-session-end-123" + + try FileManager.default.createDirectory(at: root, withIntermediateDirectories: true) + defer { + Darwin.close(listenerFD) + unlink(socketPath) + try? FileManager.default.removeItem(at: root) + } + + let environment: [String: String] = [ + "HOME": root.path, + "PATH": "/usr/bin:/bin:/usr/sbin:/sbin", + "PWD": root.path, + "CMUX_SOCKET_PATH": socketPath, + "CMUX_WORKSPACE_ID": workspaceId, + "CMUX_SURFACE_ID": surfaceId, + "CMUX_AGENT_HOOK_STATE_DIR": root.path, + "CMUX_CLI_SENTRY_DISABLED": "1", + ] + + func runHermesHook(_ subcommand: String, input: String) -> ProcessRunResult { + let serverHandled = startMockServer(listenerFD: listenerFD, state: state) { line in + guard let payload = self.jsonObject(line) else { + return "OK" + } + guard let id = payload["id"] as? String, let method = payload["method"] as? String else { + return self.malformedRequestResponse(id: payload["id"] as? String, raw: line) + } + switch method { + case "surface.list": + return self.surfaceListResponse(id: id, surfaceId: surfaceId) + case "feed.push": + return self.v2Response(id: id, ok: true, result: [:]) + default: + return self.v2Response(id: id, ok: false, error: ["code": "unrecognized_method", "message": "unexpected method: \(method)"]) + } + } + let result = runProcess( + executablePath: cliPath, + arguments: ["hooks", "hermes-agent", subcommand], + environment: environment, + standardInput: input, + timeout: 5 + ) + wait(for: [serverHandled], timeout: 5) + return result + } + + func storedHermesSessionIfPresent() throws -> [String: Any]? { + let storeURL = root.appendingPathComponent("hermes-agent-hook-sessions.json", isDirectory: false) + guard let data = try? Data(contentsOf: storeURL), + let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any], + let sessions = json["sessions"] as? [String: Any] + else { + return nil + } + return sessions[sessionId] as? [String: Any] + } + + let start = runHermesHook( + "session-start", + input: #"{"session_id":"\#(sessionId)","cwd":"\#(root.path)","hook_event_name":"on_session_start"}"# + ) + XCTAssertFalse(start.timedOut, start.stderr) + XCTAssertEqual(start.status, 0, start.stderr) + + // Finish a turn so a restorable record exists for the session. + let stop = runHermesHook( + "agent-response", + input: #"{"session_id":"\#(sessionId)","cwd":"\#(root.path)","hook_event_name":"post_llm_call","extra":{"user_message":"do the thing","assistant_response":"done","model":"gpt-4","platform":"cli"}}"# + ) + XCTAssertFalse(stop.timedOut, stop.stderr) + XCTAssertEqual(stop.status, 0, stop.stderr) + + XCTAssertNotNil( + try storedHermesSessionIfPresent(), + "Expected a Hermes session record to exist before the per-turn session-end hook fires" + ) + + // The per-turn on_session_end hook. Hermes is a restorable agent, so this is a + // turn boundary, not a true session teardown. + let sessionEndCommandStart = state.commands.count + let sessionEnd = runHermesHook( + "session-end", + input: #"{"session_id":"\#(sessionId)","cwd":"\#(root.path)","hook_event_name":"on_session_end"}"# + ) + XCTAssertFalse(sessionEnd.timedOut, sessionEnd.stderr) + XCTAssertEqual(sessionEnd.status, 0, sessionEnd.stderr) + XCTAssertEqual(sessionEnd.stdout, "{}\n") + + let sessionEndCommands = Array(state.commands.dropFirst(sessionEndCommandStart)) + XCTAssertTrue( + sessionEndCommands.contains { $0.contains("feed.push") }, + "Expected Hermes session-end to emit feed telemetry, saw \(sessionEndCommands)" + ) + XCTAssertFalse( + sessionEndCommands.contains { $0.hasPrefix("clear_agent_pid hermes-agent.") }, + "Hermes on_session_end fires per turn and must not clear saved routing, saw \(sessionEndCommands)" + ) + XCTAssertFalse( + sessionEndCommands.contains { $0.contains("surface.resume.clear") }, + "Hermes on_session_end fires per turn and must not clear the surface resume binding, saw \(sessionEndCommands)" + ) + XCTAssertNotNil( + try storedHermesSessionIfPresent(), + "Hermes on_session_end fires per turn and must not consume the restore record, saw it removed from the store" + ) + } + func testAntigravityHookInstallUsesNativeHooksJSONShape() throws { let cliPath = try bundledCLIPath() let root = FileManager.default.temporaryDirectory From ca4a7125df95a46ac4d3cafa9312ca52c13ad10c Mon Sep 17 00:00:00 2001 From: austinpower1258 Date: Fri, 29 May 2026 22:01:10 -0700 Subject: [PATCH 2/4] Fix: treat restorable agents' per-turn session-end as a turn boundary Hermes fires the on_session_end plugin hook once per conversation turn (end of every run_conversation()), not at the true session boundary. cmux maps both on_session_end and on_session_finalize to the destructive session-end action, and the .sessionEnd handler only spared grok and antigravity from consuming the session via a hardcoded name check. Because hermes-agent was not in that list, the restore record and surface resume binding were destroyed after the first turn, so a quit/relaunch had nothing to restore. Replace the hardcoded name check with a declared sessionEndIsTurnBoundary property on AgentHookDef (alongside publishesStopNotification), set true for grok, antigravity, and hermes-agent. This turns an implicit, drift-prone name registry into a typed invariant on the definition and fixes the entire class of 'per-turn session-end destroys the restore record' bugs for any restorable agent that opts in. grok and antigravity behavior is unchanged. Fixes #5000 Co-Authored-By: Claude Opus 4.8 --- CLI/CMUXCLI+AgentHookDefinitions.swift | 16 ++++++++++++++++ CLI/cmux.swift | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/CLI/CMUXCLI+AgentHookDefinitions.swift b/CLI/CMUXCLI+AgentHookDefinitions.swift index 91fe5a7c1d..be3cee6c7e 100644 --- a/CLI/CMUXCLI+AgentHookDefinitions.swift +++ b/CLI/CMUXCLI+AgentHookDefinitions.swift @@ -21,6 +21,17 @@ extension CMUXCLI { let events: [HookEvent] let aliases: Set let publishesStopNotification: Bool + /// Whether this agent's `SessionEnd`/`session-end` hook fires once per + /// conversation turn rather than at a true session teardown. + /// + /// Restorable agents (grok, antigravity, hermes-agent) re-emit their + /// session-end event after every turn, so the `.sessionEnd` handler must + /// treat it as a non-destructive turn boundary (`recordPromptStop`) and + /// must not consume the session or clear the surface resume binding — + /// otherwise the restore record is destroyed after the first turn and + /// nothing survives a quit/relaunch. See + /// https://github.com/manaflow-ai/cmux/issues/5000. + let sessionEndIsTurnBoundary: Bool /// Feed-hook events. Each entry installs a second hook for /// `agentEvent` that invokes `cmux hooks feed --source ` /// with a 120s timeout so the socket reply wait doesn't trip the @@ -81,6 +92,7 @@ extension CMUXCLI { format: HookFormat, events: [HookEvent], aliases: Set = [], publishesStopNotification: Bool = true, + sessionEndIsTurnBoundary: Bool = false, feedHookEvents: [String] = [], postInstallAction: PostInstallAction? = nil) { self.name = name; self.displayName = displayName; self.statusKey = statusKey @@ -92,6 +104,7 @@ extension CMUXCLI { self.sessionStoreSuffix = sessionStoreSuffix; self.disableEnvVar = disableEnvVar self.hookMarker = hookMarker; self.format = format; self.events = events self.publishesStopNotification = publishesStopNotification + self.sessionEndIsTurnBoundary = sessionEndIsTurnBoundary self.aliases = Set(aliases.compactMap { alias in let normalized = alias.trimmingCharacters(in: .whitespacesAndNewlines).lowercased() return normalized.isEmpty ? nil : normalized @@ -149,6 +162,7 @@ extension CMUXCLI { .init(agentEvent: "SessionEnd", cmuxSubcommand: "session-end"), ], publishesStopNotification: false, + sessionEndIsTurnBoundary: true, feedHookEvents: ["PreToolUse"] ), AgentHookDef( @@ -214,6 +228,7 @@ extension CMUXCLI { .init(agentEvent: "SessionEnd", cmuxSubcommand: "session-end"), ], aliases: ["agy"], + sessionEndIsTurnBoundary: true, feedHookEvents: ["PreToolUse", "PostToolUse"] ), AgentHookDef( @@ -244,6 +259,7 @@ extension CMUXCLI { .init(agentEvent: "on_session_finalize", cmuxSubcommand: "session-end"), .init(agentEvent: "on_session_reset", cmuxSubcommand: "session-start"), ], + sessionEndIsTurnBoundary: true, feedHookEvents: ["pre_tool_call", "post_tool_call", "pre_approval_request", "post_approval_response"] ), AgentHookDef( diff --git a/CLI/cmux.swift b/CLI/cmux.swift index 0158b64168..65ded3663d 100644 --- a/CLI/cmux.swift +++ b/CLI/cmux.swift @@ -28040,7 +28040,7 @@ export default function cmuxPiSessionExtension(pi: ExtensionAPI) { if def.name == "codex", !sessionId.isEmpty { retireCodexMonitorLeases(sessionId: sessionId, turnId: nil, env: env) } - if def.name == "grok" || def.name == "antigravity" { + if def.sessionEndIsTurnBoundary { if let mapped = sessionId.isEmpty ? nil : (try? store.lookup(sessionId: sessionId)) { sendAgentFeedTelemetry(workspaceId: mapped.workspaceId) _ = try? store.recordPromptStop( From 1abf05fce41a582ddc0f7d035be4d79e9bf76390 Mon Sep 17 00:00:00 2001 From: austinpower1258 Date: Fri, 29 May 2026 22:16:33 -0700 Subject: [PATCH 3/4] Split hermes on_session_finalize into a dedicated destructive teardown action MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cursor Bugbot correctly flagged that hermes maps both on_session_end (per-turn) and on_session_finalize (true teardown) to the session-end subcommand, so the sessionEndIsTurnBoundary flag made the genuine teardown non-destructive too — leaking the restore record, PID routing, and resume binding indefinitely. Route on_session_finalize to a new session-finalize subcommand backed by a new AgentHookAction.sessionFinalize that owns the destructive cleanup. The per-turn session-end still takes the non-destructive turn-boundary path; a non-turn-boundary session-end (gemini/copilot/etc.) falls through to the shared teardown, so their behavior is unchanged. Only hermes routes to the new action. Extend the regression test to also assert session-finalize consumes the record, clears the resume binding, and clears agent PID routing. Refs #5000 Co-Authored-By: Claude Opus 4.8 --- CLI/CMUXCLI+AgentHookDefinitions.swift | 11 ++++- CLI/cmux.swift | 4 ++ .../CLIGenericHookPersistenceTests.swift | 44 ++++++++++++++++--- 3 files changed, 50 insertions(+), 9 deletions(-) diff --git a/CLI/CMUXCLI+AgentHookDefinitions.swift b/CLI/CMUXCLI+AgentHookDefinitions.swift index be3cee6c7e..1346387ea7 100644 --- a/CLI/CMUXCLI+AgentHookDefinitions.swift +++ b/CLI/CMUXCLI+AgentHookDefinitions.swift @@ -31,6 +31,12 @@ extension CMUXCLI { /// otherwise the restore record is destroyed after the first turn and /// nothing survives a quit/relaunch. See /// https://github.com/manaflow-ai/cmux/issues/5000. + /// + /// Agents whose runtime distinguishes a per-turn boundary from a genuine + /// session teardown (hermes-agent emits both `on_session_end` per turn and + /// `on_session_finalize` once at the end) route the teardown event to the + /// separate `session-finalize` subcommand / ``AgentHookAction/sessionFinalize`` + /// action, which performs the destructive cleanup this flag suppresses. let sessionEndIsTurnBoundary: Bool /// Feed-hook events. Each entry installs a second hook for /// `agentEvent` that invokes `cmux hooks feed --source ` @@ -115,7 +121,7 @@ extension CMUXCLI { } enum AgentHookAction { - case sessionStart, promptSubmit, stop, notification, approvalResponse, sessionEnd, noop + case sessionStart, promptSubmit, stop, notification, approvalResponse, sessionEnd, sessionFinalize, noop } static let subcommandActions: [String: AgentHookAction] = [ @@ -129,6 +135,7 @@ extension CMUXCLI { "shell-exec": .promptSubmit, "shell-done": .noop, "session-end": .sessionEnd, + "session-finalize": .sessionFinalize, ] // MARK: Agent definitions @@ -256,7 +263,7 @@ extension CMUXCLI { .init(agentEvent: "pre_approval_request", cmuxSubcommand: "notification"), .init(agentEvent: "post_approval_response", cmuxSubcommand: "approval-response"), .init(agentEvent: "on_session_end", cmuxSubcommand: "session-end"), - .init(agentEvent: "on_session_finalize", cmuxSubcommand: "session-end"), + .init(agentEvent: "on_session_finalize", cmuxSubcommand: "session-finalize"), .init(agentEvent: "on_session_reset", cmuxSubcommand: "session-start"), ], sessionEndIsTurnBoundary: true, diff --git a/CLI/cmux.swift b/CLI/cmux.swift index 65ded3663d..83aede0950 100644 --- a/CLI/cmux.swift +++ b/CLI/cmux.swift @@ -28064,6 +28064,10 @@ export default function cmuxPiSessionExtension(pi: ExtensionAPI) { #endif break } + // A non-turn-boundary session-end is a genuine teardown; share the + // destructive cleanup with the dedicated finalize action. + fallthrough + case .sessionFinalize: if let mapped = sessionId.isEmpty ? nil : (try? store.lookup(sessionId: sessionId)) { sendAgentFeedTelemetry(workspaceId: mapped.workspaceId) let suppressVisibleMutations = shouldSuppressNestedAgentVisibleMutations(currentAgentPID: mapped.pid, env: env) diff --git a/cmuxTests/CLIGenericHookPersistenceTests.swift b/cmuxTests/CLIGenericHookPersistenceTests.swift index 77cefc9c13..200c9f3a30 100644 --- a/cmuxTests/CLIGenericHookPersistenceTests.swift +++ b/cmuxTests/CLIGenericHookPersistenceTests.swift @@ -679,14 +679,17 @@ extension CLINotifyProcessIntegrationRegressionTests { XCTAssertEqual(responseSession["runtimeStatus"] as? String, "running") } - func testHermesAgentSessionEndIsTurnBoundaryAndPreservesRestoreRecord() throws { + func testHermesAgentSessionEndIsTurnBoundaryButFinalizeTearsDown() throws { // Hermes fires the `on_session_end` plugin hook once per conversation turn - // (end of every run_conversation()), not at the true session boundary. cmux - // maps that event to the `session-end` subcommand, so the per-turn hook must - // route through the non-destructive turn-boundary path (recordPromptStop) and - // must NOT consume the session or clear the surface resume binding. Otherwise - // the restore record is destroyed after the first turn and nothing survives a - // quit/relaunch. See https://github.com/manaflow-ai/cmux/issues/5000. + // (end of every run_conversation()), not at the true session boundary, and a + // separate `on_session_finalize` hook once at genuine teardown. cmux maps the + // per-turn event to the `session-end` subcommand and the teardown event to the + // `session-finalize` subcommand. The per-turn hook must route through the + // non-destructive turn-boundary path (recordPromptStop) and must NOT consume + // the session or clear the surface resume binding — otherwise the restore + // record is destroyed after the first turn and nothing survives a + // quit/relaunch. The finalize hook must perform the destructive cleanup. + // See https://github.com/manaflow-ai/cmux/issues/5000. let cliPath = try bundledCLIPath() let socketPath = makeSocketPath("hermes-session-end") let listenerFD = try bindUnixSocket(at: socketPath) @@ -802,6 +805,33 @@ extension CLINotifyProcessIntegrationRegressionTests { try storedHermesSessionIfPresent(), "Hermes on_session_end fires per turn and must not consume the restore record, saw it removed from the store" ) + + // The genuine teardown hook (on_session_finalize) routes to the dedicated + // session-finalize subcommand and must perform the destructive cleanup the + // per-turn path suppresses: consume the record, clear the resume binding, and + // clear the agent PID routing. + let finalizeCommandStart = state.commands.count + let finalize = runHermesHook( + "session-finalize", + input: #"{"session_id":"\#(sessionId)","cwd":"\#(root.path)","hook_event_name":"on_session_finalize"}"# + ) + XCTAssertFalse(finalize.timedOut, finalize.stderr) + XCTAssertEqual(finalize.status, 0, finalize.stderr) + XCTAssertEqual(finalize.stdout, "{}\n") + + let finalizeCommands = Array(state.commands.dropFirst(finalizeCommandStart)) + XCTAssertTrue( + finalizeCommands.contains { $0.hasPrefix("clear_agent_pid hermes-agent.") }, + "Hermes on_session_finalize is a true teardown and must clear agent PID routing, saw \(finalizeCommands)" + ) + XCTAssertTrue( + finalizeCommands.contains { $0.contains("surface.resume.clear") }, + "Hermes on_session_finalize is a true teardown and must clear the surface resume binding, saw \(finalizeCommands)" + ) + XCTAssertNil( + try storedHermesSessionIfPresent(), + "Hermes on_session_finalize is a true teardown and must consume the restore record" + ) } func testAntigravityHookInstallUsesNativeHooksJSONShape() throws { From d94357faf7505c18e7286fbb0bb307e9debaa694 Mon Sep 17 00:00:00 2001 From: austinpower1258 Date: Fri, 29 May 2026 22:48:42 -0700 Subject: [PATCH 4/4] Extract shared session teardown helper; drop fallthrough Addresses CodeRabbit review: replace the fallthrough from .sessionEnd into .sessionFinalize with an explicit, documented performAgentSessionTeardown() nested helper called from both the non-turn-boundary session-end path and the dedicated .sessionFinalize action. Behavior is unchanged; this removes the SwiftLint 'fallthrough should be avoided' warning and makes the shared destructive cleanup explicit. Refs #5000 Co-Authored-By: Claude Opus 4.8 --- CLI/cmux.swift | 47 ++++++++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/CLI/cmux.swift b/CLI/cmux.swift index 83aede0950..da845b98af 100644 --- a/CLI/cmux.swift +++ b/CLI/cmux.swift @@ -27006,6 +27006,28 @@ export default function cmuxPiSessionExtension(pi: ExtensionAPI) { #endif let pidKey = "\(def.statusKey).\(sessionId.isEmpty ? "default" : sessionId)" var didSendFeedTelemetry = false + // Destructive session teardown shared by a genuine (non-turn-boundary) + // `session-end` and the dedicated `session-finalize` action: consume the + // restore record, clear the surface resume binding, and clear PID routing. + func performAgentSessionTeardown() { + guard let mapped = sessionId.isEmpty ? nil : (try? store.lookup(sessionId: sessionId)) else { return } + sendAgentFeedTelemetry(workspaceId: mapped.workspaceId) + let suppressVisibleMutations = shouldSuppressNestedAgentVisibleMutations(currentAgentPID: mapped.pid, env: env) + if suppressVisibleMutations { + telemetry.breadcrumb("\(def.name)-hook.session-end.nested-suppressed") + } else if let consumed = try? store.consume(sessionId: sessionId, workspaceId: nil, surfaceId: nil) { + clearAgentSurfaceResumeBinding( + client: client, + workspaceId: consumed.workspaceId, + surfaceId: consumed.surfaceId, + sessionId: consumed.sessionId + ) + _ = try? sendV1Command( + "clear_agent_pid \(pidKey) --tab=\(consumed.workspaceId)\(socketPanelOption(consumed.surfaceId)) --clear-status", + client: client + ) + } + } func runtimeStatus(for notificationStatus: AgentHookNotificationStatus?) -> AgentHookRuntimeStatus? { switch notificationStatus { case .idle?: @@ -28064,28 +28086,11 @@ export default function cmuxPiSessionExtension(pi: ExtensionAPI) { #endif break } - // A non-turn-boundary session-end is a genuine teardown; share the - // destructive cleanup with the dedicated finalize action. - fallthrough + // A non-turn-boundary session-end is a genuine teardown. + performAgentSessionTeardown() + case .sessionFinalize: - if let mapped = sessionId.isEmpty ? nil : (try? store.lookup(sessionId: sessionId)) { - sendAgentFeedTelemetry(workspaceId: mapped.workspaceId) - let suppressVisibleMutations = shouldSuppressNestedAgentVisibleMutations(currentAgentPID: mapped.pid, env: env) - if suppressVisibleMutations { - telemetry.breadcrumb("\(def.name)-hook.session-end.nested-suppressed") - } else if let consumed = try? store.consume(sessionId: sessionId, workspaceId: nil, surfaceId: nil) { - clearAgentSurfaceResumeBinding( - client: client, - workspaceId: consumed.workspaceId, - surfaceId: consumed.surfaceId, - sessionId: consumed.sessionId - ) - _ = try? sendV1Command( - "clear_agent_pid \(pidKey) --tab=\(consumed.workspaceId)\(socketPanelOption(consumed.surfaceId)) --clear-status", - client: client - ) - } - } + performAgentSessionTeardown() case .noop: break