Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 25 additions & 2 deletions CLI/CMUXCLI+AgentHookDefinitions.swift
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,23 @@ extension CMUXCLI {
let events: [HookEvent]
let aliases: Set<String>
let publishesStopNotification: Bool
/// Whether this agent's `SessionEnd`/`session-end` hook fires once per
/// conversation turn rather than at a true session teardown.
///
/// Restorable agents (grok, antigravity, hermes-agent) re-emit their
/// session-end event after every turn, so the `.sessionEnd` handler must
/// treat it as a non-destructive turn boundary (`recordPromptStop`) and
/// must not consume the session or clear the surface resume binding —
/// otherwise the restore record is destroyed after the first turn and
/// nothing survives a quit/relaunch. See
/// https://github.com/manaflow-ai/cmux/issues/5000.
///
/// Agents whose runtime distinguishes a per-turn boundary from a genuine
/// session teardown (hermes-agent emits both `on_session_end` per turn and
/// `on_session_finalize` once at the end) route the teardown event to the
/// separate `session-finalize` subcommand / ``AgentHookAction/sessionFinalize``
/// action, which performs the destructive cleanup this flag suppresses.
let sessionEndIsTurnBoundary: Bool
/// Feed-hook events. Each entry installs a second hook for
/// `agentEvent` that invokes `cmux hooks feed --source <name>`
/// with a 120s timeout so the socket reply wait doesn't trip the
Expand Down Expand Up @@ -81,6 +98,7 @@ extension CMUXCLI {
format: HookFormat, events: [HookEvent],
aliases: Set<String> = [],
publishesStopNotification: Bool = true,
sessionEndIsTurnBoundary: Bool = false,
feedHookEvents: [String] = [],
postInstallAction: PostInstallAction? = nil) {
self.name = name; self.displayName = displayName; self.statusKey = statusKey
Expand All @@ -92,6 +110,7 @@ extension CMUXCLI {
self.sessionStoreSuffix = sessionStoreSuffix; self.disableEnvVar = disableEnvVar
self.hookMarker = hookMarker; self.format = format; self.events = events
self.publishesStopNotification = publishesStopNotification
self.sessionEndIsTurnBoundary = sessionEndIsTurnBoundary
self.aliases = Set(aliases.compactMap { alias in
let normalized = alias.trimmingCharacters(in: .whitespacesAndNewlines).lowercased()
return normalized.isEmpty ? nil : normalized
Expand All @@ -102,7 +121,7 @@ extension CMUXCLI {
}

enum AgentHookAction {
case sessionStart, promptSubmit, stop, notification, approvalResponse, sessionEnd, noop
case sessionStart, promptSubmit, stop, notification, approvalResponse, sessionEnd, sessionFinalize, noop
}

static let subcommandActions: [String: AgentHookAction] = [
Expand All @@ -116,6 +135,7 @@ extension CMUXCLI {
"shell-exec": .promptSubmit,
"shell-done": .noop,
"session-end": .sessionEnd,
"session-finalize": .sessionFinalize,
]

// MARK: Agent definitions
Expand Down Expand Up @@ -149,6 +169,7 @@ extension CMUXCLI {
.init(agentEvent: "SessionEnd", cmuxSubcommand: "session-end"),
],
publishesStopNotification: false,
sessionEndIsTurnBoundary: true,
feedHookEvents: ["PreToolUse"]
),
AgentHookDef(
Expand Down Expand Up @@ -214,6 +235,7 @@ extension CMUXCLI {
.init(agentEvent: "SessionEnd", cmuxSubcommand: "session-end"),
],
aliases: ["agy"],
sessionEndIsTurnBoundary: true,
feedHookEvents: ["PreToolUse", "PostToolUse"]
),
AgentHookDef(
Expand Down Expand Up @@ -241,9 +263,10 @@ extension CMUXCLI {
.init(agentEvent: "pre_approval_request", cmuxSubcommand: "notification"),
.init(agentEvent: "post_approval_response", cmuxSubcommand: "approval-response"),
.init(agentEvent: "on_session_end", cmuxSubcommand: "session-end"),
.init(agentEvent: "on_session_finalize", cmuxSubcommand: "session-end"),
.init(agentEvent: "on_session_finalize", cmuxSubcommand: "session-finalize"),
.init(agentEvent: "on_session_reset", cmuxSubcommand: "session-start"),
],
sessionEndIsTurnBoundary: true,
Comment thread
cursor[bot] marked this conversation as resolved.
feedHookEvents: ["pre_tool_call", "post_tool_call", "pre_approval_request", "post_approval_response"]
),
AgentHookDef(
Expand Down
47 changes: 28 additions & 19 deletions CLI/cmux.swift
Original file line number Diff line number Diff line change
Expand Up @@ -27007,6 +27007,28 @@ export default function cmuxPiSessionExtension(pi: ExtensionAPI) {
#endif
let pidKey = "\(def.statusKey).\(sessionId.isEmpty ? "default" : sessionId)"
var didSendFeedTelemetry = false
// Destructive session teardown shared by a genuine (non-turn-boundary)
// `session-end` and the dedicated `session-finalize` action: consume the
// restore record, clear the surface resume binding, and clear PID routing.
func performAgentSessionTeardown() {
guard let mapped = sessionId.isEmpty ? nil : (try? store.lookup(sessionId: sessionId)) else { return }
sendAgentFeedTelemetry(workspaceId: mapped.workspaceId)
let suppressVisibleMutations = shouldSuppressNestedAgentVisibleMutations(currentAgentPID: mapped.pid, env: env)
if suppressVisibleMutations {
telemetry.breadcrumb("\(def.name)-hook.session-end.nested-suppressed")
} else if let consumed = try? store.consume(sessionId: sessionId, workspaceId: nil, surfaceId: nil) {
clearAgentSurfaceResumeBinding(
client: client,
workspaceId: consumed.workspaceId,
surfaceId: consumed.surfaceId,
sessionId: consumed.sessionId
)
_ = try? sendV1Command(
"clear_agent_pid \(pidKey) --tab=\(consumed.workspaceId)\(socketPanelOption(consumed.surfaceId)) --clear-status",
client: client
)
}
}
func runtimeStatus(for notificationStatus: AgentHookNotificationStatus?) -> AgentHookRuntimeStatus? {
switch notificationStatus {
case .idle?:
Expand Down Expand Up @@ -28041,7 +28063,7 @@ export default function cmuxPiSessionExtension(pi: ExtensionAPI) {
if def.name == "codex", !sessionId.isEmpty {
retireCodexMonitorLeases(sessionId: sessionId, turnId: nil, env: env)
}
if def.name == "grok" || def.name == "antigravity" {
if def.sessionEndIsTurnBoundary {
if let mapped = sessionId.isEmpty ? nil : (try? store.lookup(sessionId: sessionId)) {
sendAgentFeedTelemetry(workspaceId: mapped.workspaceId)
_ = try? store.recordPromptStop(
Expand All @@ -28065,24 +28087,11 @@ export default function cmuxPiSessionExtension(pi: ExtensionAPI) {
#endif
break
}
if let mapped = sessionId.isEmpty ? nil : (try? store.lookup(sessionId: sessionId)) {
sendAgentFeedTelemetry(workspaceId: mapped.workspaceId)
let suppressVisibleMutations = shouldSuppressNestedAgentVisibleMutations(currentAgentPID: mapped.pid, env: env)
if suppressVisibleMutations {
telemetry.breadcrumb("\(def.name)-hook.session-end.nested-suppressed")
} else if let consumed = try? store.consume(sessionId: sessionId, workspaceId: nil, surfaceId: nil) {
clearAgentSurfaceResumeBinding(
client: client,
workspaceId: consumed.workspaceId,
surfaceId: consumed.surfaceId,
sessionId: consumed.sessionId
)
_ = try? sendV1Command(
"clear_agent_pid \(pidKey) --tab=\(consumed.workspaceId)\(socketPanelOption(consumed.surfaceId)) --clear-status",
client: client
)
}
}
// A non-turn-boundary session-end is a genuine teardown.
performAgentSessionTeardown()

case .sessionFinalize:
performAgentSessionTeardown()

case .noop:
break
Expand Down
155 changes: 155 additions & 0 deletions cmuxTests/CLIGenericHookPersistenceTests.swift
Original file line number Diff line number Diff line change
Expand Up @@ -679,6 +679,161 @@ extension CLINotifyProcessIntegrationRegressionTests {
XCTAssertEqual(responseSession["runtimeStatus"] as? String, "running")
}

func testHermesAgentSessionEndIsTurnBoundaryButFinalizeTearsDown() throws {
// Hermes fires the `on_session_end` plugin hook once per conversation turn
// (end of every run_conversation()), not at the true session boundary, and a
// separate `on_session_finalize` hook once at genuine teardown. cmux maps the
// per-turn event to the `session-end` subcommand and the teardown event to the
// `session-finalize` subcommand. The per-turn hook must route through the
// non-destructive turn-boundary path (recordPromptStop) and must NOT consume
// the session or clear the surface resume binding — otherwise the restore
// record is destroyed after the first turn and nothing survives a
// quit/relaunch. The finalize hook must perform the destructive cleanup.
// See https://github.com/manaflow-ai/cmux/issues/5000.
let cliPath = try bundledCLIPath()
let socketPath = makeSocketPath("hermes-session-end")
let listenerFD = try bindUnixSocket(at: socketPath)
let state = MockSocketServerState()
let root = FileManager.default.temporaryDirectory
.appendingPathComponent("cmux-hermes-session-end-\(UUID().uuidString)", isDirectory: true)
let workspaceId = "11111111-1111-1111-1111-111111111111"
let surfaceId = "22222222-2222-2222-2222-222222222222"
let sessionId = "hermes-session-end-123"

try FileManager.default.createDirectory(at: root, withIntermediateDirectories: true)
defer {
Darwin.close(listenerFD)
unlink(socketPath)
try? FileManager.default.removeItem(at: root)
}

let environment: [String: String] = [
"HOME": root.path,
"PATH": "/usr/bin:/bin:/usr/sbin:/sbin",
"PWD": root.path,
"CMUX_SOCKET_PATH": socketPath,
"CMUX_WORKSPACE_ID": workspaceId,
"CMUX_SURFACE_ID": surfaceId,
"CMUX_AGENT_HOOK_STATE_DIR": root.path,
"CMUX_CLI_SENTRY_DISABLED": "1",
]

func runHermesHook(_ subcommand: String, input: String) -> ProcessRunResult {
let serverHandled = startMockServer(listenerFD: listenerFD, state: state) { line in
guard let payload = self.jsonObject(line) else {
return "OK"
}
guard let id = payload["id"] as? String, let method = payload["method"] as? String else {
return self.malformedRequestResponse(id: payload["id"] as? String, raw: line)
}
switch method {
case "surface.list":
return self.surfaceListResponse(id: id, surfaceId: surfaceId)
case "feed.push":
return self.v2Response(id: id, ok: true, result: [:])
default:
return self.v2Response(id: id, ok: false, error: ["code": "unrecognized_method", "message": "unexpected method: \(method)"])
}
}
let result = runProcess(
executablePath: cliPath,
arguments: ["hooks", "hermes-agent", subcommand],
environment: environment,
standardInput: input,
timeout: 5
)
wait(for: [serverHandled], timeout: 5)
return result
}

func storedHermesSessionIfPresent() throws -> [String: Any]? {
let storeURL = root.appendingPathComponent("hermes-agent-hook-sessions.json", isDirectory: false)
guard let data = try? Data(contentsOf: storeURL),
let json = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
let sessions = json["sessions"] as? [String: Any]
else {
return nil
}
return sessions[sessionId] as? [String: Any]
}

let start = runHermesHook(
"session-start",
input: #"{"session_id":"\#(sessionId)","cwd":"\#(root.path)","hook_event_name":"on_session_start"}"#
)
XCTAssertFalse(start.timedOut, start.stderr)
XCTAssertEqual(start.status, 0, start.stderr)

// Finish a turn so a restorable record exists for the session.
let stop = runHermesHook(
"agent-response",
input: #"{"session_id":"\#(sessionId)","cwd":"\#(root.path)","hook_event_name":"post_llm_call","extra":{"user_message":"do the thing","assistant_response":"done","model":"gpt-4","platform":"cli"}}"#
)
XCTAssertFalse(stop.timedOut, stop.stderr)
XCTAssertEqual(stop.status, 0, stop.stderr)

XCTAssertNotNil(
try storedHermesSessionIfPresent(),
"Expected a Hermes session record to exist before the per-turn session-end hook fires"
)

// The per-turn on_session_end hook. Hermes is a restorable agent, so this is a
// turn boundary, not a true session teardown.
let sessionEndCommandStart = state.commands.count
let sessionEnd = runHermesHook(
"session-end",
input: #"{"session_id":"\#(sessionId)","cwd":"\#(root.path)","hook_event_name":"on_session_end"}"#
)
XCTAssertFalse(sessionEnd.timedOut, sessionEnd.stderr)
XCTAssertEqual(sessionEnd.status, 0, sessionEnd.stderr)
XCTAssertEqual(sessionEnd.stdout, "{}\n")

let sessionEndCommands = Array(state.commands.dropFirst(sessionEndCommandStart))
XCTAssertTrue(
sessionEndCommands.contains { $0.contains("feed.push") },
"Expected Hermes session-end to emit feed telemetry, saw \(sessionEndCommands)"
)
XCTAssertFalse(
sessionEndCommands.contains { $0.hasPrefix("clear_agent_pid hermes-agent.") },
"Hermes on_session_end fires per turn and must not clear saved routing, saw \(sessionEndCommands)"
)
XCTAssertFalse(
sessionEndCommands.contains { $0.contains("surface.resume.clear") },
"Hermes on_session_end fires per turn and must not clear the surface resume binding, saw \(sessionEndCommands)"
)
XCTAssertNotNil(
try storedHermesSessionIfPresent(),
"Hermes on_session_end fires per turn and must not consume the restore record, saw it removed from the store"
)

// The genuine teardown hook (on_session_finalize) routes to the dedicated
// session-finalize subcommand and must perform the destructive cleanup the
// per-turn path suppresses: consume the record, clear the resume binding, and
// clear the agent PID routing.
let finalizeCommandStart = state.commands.count
let finalize = runHermesHook(
"session-finalize",
input: #"{"session_id":"\#(sessionId)","cwd":"\#(root.path)","hook_event_name":"on_session_finalize"}"#
)
XCTAssertFalse(finalize.timedOut, finalize.stderr)
XCTAssertEqual(finalize.status, 0, finalize.stderr)
XCTAssertEqual(finalize.stdout, "{}\n")

let finalizeCommands = Array(state.commands.dropFirst(finalizeCommandStart))
XCTAssertTrue(
finalizeCommands.contains { $0.hasPrefix("clear_agent_pid hermes-agent.") },
"Hermes on_session_finalize is a true teardown and must clear agent PID routing, saw \(finalizeCommands)"
)
XCTAssertTrue(
finalizeCommands.contains { $0.contains("surface.resume.clear") },
"Hermes on_session_finalize is a true teardown and must clear the surface resume binding, saw \(finalizeCommands)"
)
XCTAssertNil(
try storedHermesSessionIfPresent(),
"Hermes on_session_finalize is a true teardown and must consume the restore record"
)
}

func testAntigravityHookInstallUsesNativeHooksJSONShape() throws {
let cliPath = try bundledCLIPath()
let root = FileManager.default.temporaryDirectory
Expand Down
Loading