From 876feb451b0a0f6655f98cbd527959636790edb3 Mon Sep 17 00:00:00 2001 From: austinpower1258 Date: Fri, 29 May 2026 22:06:26 -0700 Subject: [PATCH 1/4] Add failing test for Hermes pre_tool_call approval misclassification cmux's feed-event classifier maps Hermes `pre_tool_call` (a tool *starting*, no approval pending) to a blocking PermissionRequest whenever the tool is side-effecting (e.g. `terminal`). That fires a spurious "Terminal needs approval" notification while the Hermes TUI shows nothing to approve. Hermes reserves `pre_approval_request` for real approvals. This commit adds the regression test only (no fix) so CI proves the test catches the bug. classifyFeedEvent is made internal so it can be exercised directly. Refs #4985 Co-Authored-By: Claude Opus 4.8 --- CLI/cmux.swift | 2 +- cmux.xcodeproj/project.pbxproj | 4 + cmuxTests/FeedEventClassificationTests.swift | 107 +++++++++++++++++++ 3 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 cmuxTests/FeedEventClassificationTests.swift diff --git a/CLI/cmux.swift b/CLI/cmux.swift index 0158b64168..19d79909c1 100644 --- a/CLI/cmux.swift +++ b/CLI/cmux.swift @@ -30164,7 +30164,7 @@ export default function cmuxPiSessionExtension(pi: ExtensionAPI) { /// blocks waiting for a user decision. Claude Code owns decisions /// through its native PermissionRequest hook. Its PreToolUse hook is /// telemetry/status only. - private static func classifyFeedEvent( + static func classifyFeedEvent( source: String, event: String, toolName: String diff --git a/cmux.xcodeproj/project.pbxproj b/cmux.xcodeproj/project.pbxproj index 0510406821..c6d39408e3 100644 --- a/cmux.xcodeproj/project.pbxproj +++ b/cmux.xcodeproj/project.pbxproj @@ -204,6 +204,7 @@ FEED0000000000000000F00D /* FeedButtonStyleDebugWindowController.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEED0000000000000000F00C /* FeedButtonStyleDebugWindowController.swift */; }; FEED0000000000000000F002 /* FeedCoordinator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEED0000000000000000F001 /* FeedCoordinator.swift */; }; FEEDC0DEC0DEC0DEC0DE0001 /* FeedCoordinatorTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEEDC0DEC0DEC0DEC0DE0002 /* FeedCoordinatorTests.swift */; }; + FEED49850000000000000001 /* FeedEventClassificationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEED49850000000000000002 /* FeedEventClassificationTests.swift */; }; FEED0000000000000000F005 /* FeedPanelView.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEED0000000000000000F004 /* FeedPanelView.swift */; }; FEED0000000000000000F011 /* FeedPanelViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEED0000000000000000F010 /* FeedPanelViewModel.swift */; }; FEED0000000000000000F013 /* FeedPermissionActionPolicy.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEED0000000000000000F012 /* FeedPermissionActionPolicy.swift */; }; @@ -820,6 +821,7 @@ FEED0000000000000000F00C /* FeedButtonStyleDebugWindowController.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = FeedButtonStyleDebugWindowController.swift; sourceTree = ""; }; FEED0000000000000000F001 /* FeedCoordinator.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = FeedCoordinator.swift; sourceTree = ""; }; FEEDC0DEC0DEC0DEC0DE0002 /* FeedCoordinatorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedCoordinatorTests.swift; sourceTree = ""; }; + FEED49850000000000000002 /* FeedEventClassificationTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedEventClassificationTests.swift; sourceTree = ""; }; FEED0000000000000000F004 /* FeedPanelView.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = FeedPanelView.swift; sourceTree = ""; }; FEED0000000000000000F010 /* FeedPanelViewModel.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = FeedPanelViewModel.swift; sourceTree = ""; }; FEED0000000000000000F012 /* FeedPermissionActionPolicy.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = FeedPermissionActionPolicy.swift; sourceTree = ""; }; @@ -1805,6 +1807,7 @@ D2C075029771815DD5DA1332 /* NotificationAndMenuBarTests.swift */, 42092CDB2109E250F7F2A76E /* TabManagerUnitTests.swift */, C9A57002C9A57002C9A57002 /* WorkspaceGroupTests.swift */, + FEED49850000000000000002 /* FeedEventClassificationTests.swift */, 42D69572C8D276745E502B94 /* SessionIndexViewTests.swift */, F1C3F1DBF6BF5D7223C4A30C /* SidebarMarkdownRendererTests.swift */, 14A7DC53B9CA33BE2A421711 /* WorkspacePullRequestSidebarTests.swift */, @@ -2608,6 +2611,7 @@ C0DEF4120000000000000001 /* CommandPaletteSettingsToggleTests.swift in Sources */, C1713006C1713006C1713006 /* CommandPaletteShortcutCustomizationTests.swift in Sources */, FEEDC0DEC0DEC0DEC0DE0001 /* FeedCoordinatorTests.swift in Sources */, + FEED49850000000000000001 /* FeedEventClassificationTests.swift in Sources */, D0B10018A1B2C3D4E5F60001 /* FileDropOverlayViewTests.swift in Sources */, FE002101 /* FileExplorerRootResolverTests.swift in Sources */, B37A0000000000000000000B /* FileExplorerStateModePersistenceTests.swift in Sources */, diff --git a/cmuxTests/FeedEventClassificationTests.swift b/cmuxTests/FeedEventClassificationTests.swift new file mode 100644 index 0000000000..7cdc0273e9 --- /dev/null +++ b/cmuxTests/FeedEventClassificationTests.swift @@ -0,0 +1,107 @@ +import Testing + +#if canImport(cmux_DEV) +@testable import cmux_DEV +#elseif canImport(cmux) +@testable import cmux +#endif + +/// Regression coverage for the feed-event → user-attention classification. +/// +/// The "Terminal needs approval" notification (see `FeedCoordinator`) fires +/// only for events that `classifyFeedEvent` marks actionable and whose wire +/// `hook_event_name` is `PermissionRequest` / `ExitPlanMode` / +/// `AskUserQuestion`. The class of bug this guards against is broad +/// pattern-matching that maps a *tool-starting* lifecycle event to an +/// approval, over-triggering the notification. +/// +/// https://github.com/manaflow-ai/cmux/issues/4985 +@Suite("Feed event classification") +struct FeedEventClassificationTests { + private func classify(_ source: String, _ event: String, tool: String = "") + -> (name: String, actionable: Bool) + { + let result = CMUXCLI.classifyFeedEvent(source: source, event: event, toolName: tool) + return (result.0, result.1) + } + + // MARK: Hermes Agent (the reported bug) + + /// Hermes emits `pre_tool_call` when a tool *starts* — no approval is + /// pending. It has a distinct `pre_approval_request` event for real + /// approvals. `pre_tool_call` must never be actionable, even for a + /// side-effecting tool like `terminal`, or the user sees a spurious + /// "Terminal needs approval" banner with nothing pending in the TUI. + @Test func hermesPreToolCallIsTelemetryEvenForSideEffectingTools() { + #expect(classify("hermes-agent", "pre_tool_call", tool: "terminal").actionable == false) + #expect(classify("hermes-agent", "pre_tool_call", tool: "Bash").actionable == false) + #expect(classify("hermes-agent", "pre_tool_call", tool: "Write").actionable == false) + #expect(classify("hermes-agent", "pre_tool_call", tool: "Read").actionable == false) + #expect(classify("hermes-agent", "pre_tool_call", tool: "terminal").name == "PreToolUse") + } + + /// Lifecycle bookends are telemetry only. + @Test func hermesLifecycleEventsAreNotActionable() { + #expect(classify("hermes-agent", "post_tool_call").actionable == false) + #expect(classify("hermes-agent", "pre_llm_call").actionable == false) + #expect(classify("hermes-agent", "post_llm_call").actionable == false) + #expect(classify("hermes-agent", "on_session_start").actionable == false) + #expect(classify("hermes-agent", "on_session_end").actionable == false) + } + + /// `pre_approval_request` carries the real approval semantic. The + /// "needs approval" notification fires for it via the dedicated + /// `notification` hook subcommand, so on the feed path it stays a + /// non-blocking `Notification` (avoids a double banner). + @Test func hermesApprovalRequestStaysNonBlockingOnFeedPath() { + let approval = classify("hermes-agent", "pre_approval_request") + #expect(approval.name == "Notification") + #expect(approval.actionable == false) + } + + /// Future Hermes event names must be safe by default: unknown → no + /// notification (non-actionable telemetry). + @Test func hermesUnknownEventIsSafeByDefault() { + let unknown = classify("hermes-agent", "some_future_event", tool: "terminal") + #expect(unknown.actionable == false) + } + + // MARK: Claude (dedicated-approval agent — must not regress) + + /// Claude owns approvals through its `PermissionRequest` hook; its + /// `PreToolUse` is telemetry and must not escalate side-effecting tools. + @Test func claudePreToolUseDoesNotEscalate() { + #expect(classify("claude", "PreToolUse", tool: "Bash").actionable == false) + #expect(classify("claude", "PreToolUse", tool: "Write").actionable == false) + } + + @Test func claudePermissionRequestIsActionable() { + #expect(classify("claude", "PermissionRequest", tool: "Bash").name == "PermissionRequest") + #expect(classify("claude", "PermissionRequest", tool: "Bash").actionable == true) + #expect(classify("claude", "PermissionRequest", tool: "ExitPlanMode").name == "ExitPlanMode") + #expect(classify("claude", "PermissionRequest", tool: "AskUserQuestion").name == "AskUserQuestion") + } + + // MARK: Generic agents without a dedicated approval event + + /// Agents whose only signal is `PreToolUse` (gemini, copilot, …) still + /// escalate side-effecting tools to an approval — that path is correct + /// and must be preserved. + @Test func genericPreToolUseEscalatesSideEffectingTools() { + #expect(classify("gemini", "PreToolUse", tool: "Bash").name == "PermissionRequest") + #expect(classify("gemini", "PreToolUse", tool: "Bash").actionable == true) + #expect(classify("gemini", "PreToolUse", tool: "Read").actionable == false) + } + + /// Codex has a dedicated `PermissionRequest` feed event, so its + /// `PreToolUse` is telemetry only. + @Test func codexPreToolUseIsTelemetry() { + #expect(classify("codex", "PreToolUse", tool: "shell").actionable == false) + #expect(classify("codex", "PermissionRequest", tool: "shell").actionable == true) + } + + /// Unknown source + unknown event is safe by default. + @Test func unknownSourceUnknownEventIsSafe() { + #expect(classify("totally-new-agent", "some_future_event", tool: "Bash").actionable == false) + } +} From 1538585294cdd4165988d6853f5c2ee05e4db1f8 Mon Sep 17 00:00:00 2001 From: austinpower1258 Date: Fri, 29 May 2026 22:08:12 -0700 Subject: [PATCH 2/4] Classify feed events via typed semantic registry; fix Hermes pre_tool_call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Replace the raw event-name pattern matching in classifyFeedEvent with an explicit, typed registry keyed on (source, event) -> FeedEventSemantic. Notification eligibility and the blocking Feed wait are derived from the resolved semantic, never from string matching, so a tool-*starting* lifecycle event can no longer be mistaken for an approval request. The class of bug: the heuristic "pre-tool event + side-effecting tool => approval" is correct only for agents whose *only* signal is the pre-tool event (gemini, copilot, …). Agents with a dedicated approval event (Claude PermissionRequest, Codex PermissionRequest, Hermes pre_approval_request) must treat their pre-tool event as telemetry, or it double-counts and fires a spurious "needs approval" notification. Hermes `pre_tool_call` (tool starting, no approval) was being escalated to PermissionRequest for side-effecting tools like `terminal`, producing the false-positive banner in #4985. The registry encodes this distinction explicitly (toolStart vs toolStartMaybeApproval) and makes unknown/future event names safe by default: they resolve to non-actionable telemetry that never notifies. Fixes #4985 Co-Authored-By: Claude Opus 4.8 --- CLI/cmux.swift | 247 +++++++++++++++++++++++++++++++++---------------- 1 file changed, 166 insertions(+), 81 deletions(-) diff --git a/CLI/cmux.swift b/CLI/cmux.swift index 19d79909c1..8af1941ff0 100644 --- a/CLI/cmux.swift +++ b/CLI/cmux.swift @@ -30161,110 +30161,195 @@ export default function cmuxPiSessionExtension(pi: ExtensionAPI) { /// Classifies a raw agent hook event into our wire `hook_event_name` /// plus an `isActionable` flag that drives whether the Feed bridge - /// blocks waiting for a user decision. Claude Code owns decisions - /// through its native PermissionRequest hook. Its PreToolUse hook is - /// telemetry/status only. + /// blocks waiting for a user decision (and whether `FeedCoordinator` + /// posts a "needs approval" notification). + /// + /// The mapping is driven by an explicit, typed registry + /// (`feedEventSemantic`) keyed on `(source, event)` rather than by + /// pattern-matching raw event-name strings. Notification eligibility + /// is derived only from the resolved ``FeedEventSemantic``, so a + /// tool-*starting* lifecycle event can never be mistaken for an + /// approval request — and unknown / future event names default to + /// non-actionable telemetry that never notifies. static func classifyFeedEvent( source: String, event: String, toolName: String ) -> (String, Bool) { - if source == "claude" { - switch event { - case "PermissionRequest": - switch toolName { - case "ExitPlanMode": - return ("ExitPlanMode", true) - case "AskUserQuestion": - return ("AskUserQuestion", true) - default: - return ("PermissionRequest", true) - } - case "PostToolUse": - return ("PostToolUse", false) - case "UserPromptSubmit": - return ("UserPromptSubmit", false) - case "SessionStart": - return ("SessionStart", false) - case "SessionEnd": - return ("SessionEnd", false) - case "Stop": - return ("Stop", false) - case "SubagentStop": - return ("SubagentStop", false) - case "Notification": - return ("Notification", false) - default: - return ("PreToolUse", false) - } - } + let semantic = feedEventSemantic(source: source, event: event) + return wireMapping(for: semantic, toolName: toolName) + } + + /// User-attention semantic of a hook/feed event, independent of the + /// agent-specific raw event name. Notifications and blocking waits are + /// keyed off this — never off raw event-name string matching — so the + /// same misclassification cannot recur as new event names are added. + enum FeedEventSemantic { + /// A real approval is pending; the user must approve/deny. Drives + /// the blocking Feed wait and the "needs approval" notification. + /// Resolved against the tool name so Claude's `ExitPlanMode` / + /// `AskUserQuestion` approvals route to their dedicated kinds. + case approvalRequest + /// A tool is about to run but no approval is pending. Telemetry + /// only. Used by agents that expose a *separate* approval event + /// (Claude, Codex, Hermes) so their pre-tool hook never escalates. + case toolStart + /// A tool is about to run and the agent has *no* dedicated approval + /// event, so a side-effecting tool is escalated to an approval and + /// read-only tools stay telemetry. Resolved against the tool name. + case toolStartMaybeApproval + /// A tool finished. Telemetry only. + case toolEnd + /// A new turn / prompt started. Telemetry only. + case promptSubmit + /// The agent finished responding. Telemetry only. + case response + /// A subagent finished responding. Telemetry only. + case subagentResponse + case sessionStart + case sessionEnd + /// A generic status/notification event. Telemetry only — real + /// approval banners for these agents fire through the dedicated + /// `notification` hook subcommand, not the feed path. + case statusNotification + /// Unknown / unregistered event. Safe default: telemetry only, + /// never actionable, never notifies. + case unknown + } - if source == "hermes-agent" { - switch event { - case "pre_tool_call": - if Self.sideEffectingTools.contains(toolName) { - return ("PermissionRequest", true) - } - return ("PreToolUse", false) - case "post_tool_call": - return ("PostToolUse", false) - case "pre_approval_request": - return ("Notification", false) - case "post_approval_response": - return ("Notification", false) - case "pre_llm_call": - return ("UserPromptSubmit", false) - case "post_llm_call": - return ("Stop", false) - case "on_session_start", "on_session_reset": - return ("SessionStart", false) - case "on_session_end", "on_session_finalize": - return ("SessionEnd", false) - default: - return ("PreToolUse", false) - } - } + /// Resolves the semantic for a `(source, event)` pair. A registered + /// source uses its own table (unmatched events fall to ``unknown``); + /// unregistered sources use the generic table. + private static func feedEventSemantic( + source: String, + event: String + ) -> FeedEventSemantic { + let table = feedEventSemanticRegistry[source] ?? genericFeedEventSemantics + return table[event] ?? .unknown + } - switch event { - case "PreToolUse", "beforeShellExecution": - if source == "codex" { return ("PreToolUse", false) } + /// Maps a resolved semantic to the wire `hook_event_name` plus the + /// `isActionable` flag, using `toolName` for the two tool-dependent + /// semantics. + private static func wireMapping( + for semantic: FeedEventSemantic, + toolName: String + ) -> (String, Bool) { + switch semantic { + case .approvalRequest: + switch toolName { + case "ExitPlanMode": return ("ExitPlanMode", true) + case "AskUserQuestion": return ("AskUserQuestion", true) + default: return ("PermissionRequest", true) + } + case .toolStartMaybeApproval: switch toolName { - case "ExitPlanMode": - return ("ExitPlanMode", true) - case "AskUserQuestion": - return ("AskUserQuestion", true) + case "ExitPlanMode": return ("ExitPlanMode", true) + case "AskUserQuestion": return ("AskUserQuestion", true) default: - // Any tool that can mutate the environment surfaces as - // a permission request so the user can approve/deny - // from the Feed sidebar. Read-only tools stay as - // non-actionable telemetry so we don't flood the - // Actionable view with every file read. + // Any tool that can mutate the environment surfaces as a + // permission request so the user can approve/deny from the + // Feed sidebar. Read-only tools stay non-actionable + // telemetry so we don't flood the Actionable view. if Self.sideEffectingTools.contains(toolName) { return ("PermissionRequest", true) } return ("PreToolUse", false) } - case "PermissionRequest": - return ("PermissionRequest", true) - case "PostToolUse": + case .toolStart: + return ("PreToolUse", false) + case .toolEnd: return ("PostToolUse", false) - case "UserPromptSubmit": + case .promptSubmit: return ("UserPromptSubmit", false) - case "SessionStart": - return ("SessionStart", false) - case "SessionEnd": - return ("SessionEnd", false) - case "Stop": + case .response: return ("Stop", false) - case "SubagentStop": + case .subagentResponse: return ("SubagentStop", false) - case "Notification": + case .sessionStart: + return ("SessionStart", false) + case .sessionEnd: + return ("SessionEnd", false) + case .statusNotification: return ("Notification", false) - default: + case .unknown: + // Safe default: telemetry, no approval, no notification. return ("PreToolUse", false) } } + /// Per-agent event-semantic tables. Each entry is the source of truth + /// for that agent's `(event) -> semantic` mapping; events absent here + /// resolve to ``FeedEventSemantic/unknown``. + /// + /// The key distinction the registry encodes: agents with a *dedicated* + /// approval event (Claude `PermissionRequest`, Codex `PermissionRequest`, + /// Hermes `pre_approval_request`) classify their pre-tool event as + /// ``FeedEventSemantic/toolStart`` (always telemetry). Agents whose only + /// signal is the pre-tool event (gemini, copilot, …, handled by + /// ``genericFeedEventSemantics``) use + /// ``FeedEventSemantic/toolStartMaybeApproval`` so side-effecting tools + /// still escalate. Conflating the two is the bug behind #4985. + private static let feedEventSemanticRegistry: [String: [String: FeedEventSemantic]] = [ + "claude": [ + "PermissionRequest": .approvalRequest, + "PreToolUse": .toolStart, + "PostToolUse": .toolEnd, + "UserPromptSubmit": .promptSubmit, + "SessionStart": .sessionStart, + "SessionEnd": .sessionEnd, + "Stop": .response, + "SubagentStop": .subagentResponse, + "Notification": .statusNotification, + ], + "codex": [ + "PermissionRequest": .approvalRequest, + "PreToolUse": .toolStart, + "beforeShellExecution": .toolStart, + "PostToolUse": .toolEnd, + "UserPromptSubmit": .promptSubmit, + "SessionStart": .sessionStart, + "SessionEnd": .sessionEnd, + "Stop": .response, + "SubagentStop": .subagentResponse, + "Notification": .statusNotification, + ], + "hermes-agent": [ + // `pre_tool_call` is a tool *starting* — Hermes raises a + // separate `pre_approval_request` for real approvals, so this + // must stay telemetry even for side-effecting tools (#4985). + "pre_tool_call": .toolStart, + "post_tool_call": .toolEnd, + // The approval banner for Hermes fires through the dedicated + // `notification` hook subcommand; on the feed path this stays a + // non-blocking notification to avoid a duplicate banner. + "pre_approval_request": .statusNotification, + "post_approval_response": .statusNotification, + "pre_llm_call": .promptSubmit, + "post_llm_call": .response, + "on_session_start": .sessionStart, + "on_session_reset": .sessionStart, + "on_session_end": .sessionEnd, + "on_session_finalize": .sessionEnd, + ], + ] + + /// Fallback table for agents without a dedicated entry in + /// ``feedEventSemanticRegistry``. These agents expose only a pre-tool + /// event, so it carries ``FeedEventSemantic/toolStartMaybeApproval``. + private static let genericFeedEventSemantics: [String: FeedEventSemantic] = [ + "PreToolUse": .toolStartMaybeApproval, + "beforeShellExecution": .toolStartMaybeApproval, + "PermissionRequest": .approvalRequest, + "PostToolUse": .toolEnd, + "UserPromptSubmit": .promptSubmit, + "SessionStart": .sessionStart, + "SessionEnd": .sessionEnd, + "Stop": .response, + "SubagentStop": .subagentResponse, + "Notification": .statusNotification, + ] + /// Tools that mutate state and deserve a user-visible approve/ /// deny prompt in Feed. Keyed on the canonical tool names Claude, /// Codex, and similar agents emit. Read-only tools (Read, Grep, From a6dce30e7ef6464113cb5ebaea2ab46bb2a933ac Mon Sep 17 00:00:00 2001 From: austinpower1258 Date: Fri, 29 May 2026 23:13:57 -0700 Subject: [PATCH 3/4] Extract dedicatedApprovalEvent helper; strengthen classification tests DRY the ExitPlanMode / AskUserQuestion tool-name routing shared by the .approvalRequest and .toolStartMaybeApproval wire mappings into a single dedicatedApprovalEvent(for:) helper, and scope FeedEventSemantic to private. Add coverage that the dedicated-approval tool names route correctly on the generic pre-tool path and that Codex's beforeShellExecution stays telemetry. Refs #4985 Co-Authored-By: Claude Opus 4.8 --- CLI/cmux.swift | 41 +++++++++++--------- cmuxTests/FeedEventClassificationTests.swift | 16 +++++++- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/CLI/cmux.swift b/CLI/cmux.swift index 8af1941ff0..2947fd4eff 100644 --- a/CLI/cmux.swift +++ b/CLI/cmux.swift @@ -30184,7 +30184,7 @@ export default function cmuxPiSessionExtension(pi: ExtensionAPI) { /// agent-specific raw event name. Notifications and blocking waits are /// keyed off this — never off raw event-name string matching — so the /// same misclassification cannot recur as new event names are added. - enum FeedEventSemantic { + private enum FeedEventSemantic { /// A real approval is pending; the user must approve/deny. Drives /// the blocking Feed wait and the "needs approval" notification. /// Resolved against the tool name so Claude's `ExitPlanMode` / @@ -30228,6 +30228,17 @@ export default function cmuxPiSessionExtension(pi: ExtensionAPI) { return table[event] ?? .unknown } + /// Tool names that carry their own dedicated approval wire event rather + /// than the generic `PermissionRequest`. Returns the actionable wire + /// mapping for such a tool, or `nil` for ordinary tools. + private static func dedicatedApprovalEvent(for toolName: String) -> (String, Bool)? { + switch toolName { + case "ExitPlanMode": return ("ExitPlanMode", true) + case "AskUserQuestion": return ("AskUserQuestion", true) + default: return nil + } + } + /// Maps a resolved semantic to the wire `hook_event_name` plus the /// `isActionable` flag, using `toolName` for the two tool-dependent /// semantics. @@ -30237,25 +30248,19 @@ export default function cmuxPiSessionExtension(pi: ExtensionAPI) { ) -> (String, Bool) { switch semantic { case .approvalRequest: - switch toolName { - case "ExitPlanMode": return ("ExitPlanMode", true) - case "AskUserQuestion": return ("AskUserQuestion", true) - default: return ("PermissionRequest", true) - } + return dedicatedApprovalEvent(for: toolName) ?? ("PermissionRequest", true) case .toolStartMaybeApproval: - switch toolName { - case "ExitPlanMode": return ("ExitPlanMode", true) - case "AskUserQuestion": return ("AskUserQuestion", true) - default: - // Any tool that can mutate the environment surfaces as a - // permission request so the user can approve/deny from the - // Feed sidebar. Read-only tools stay non-actionable - // telemetry so we don't flood the Actionable view. - if Self.sideEffectingTools.contains(toolName) { - return ("PermissionRequest", true) - } - return ("PreToolUse", false) + if let dedicated = dedicatedApprovalEvent(for: toolName) { + return dedicated } + // Any tool that can mutate the environment surfaces as a + // permission request so the user can approve/deny from the + // Feed sidebar. Read-only tools stay non-actionable + // telemetry so we don't flood the Actionable view. + if Self.sideEffectingTools.contains(toolName) { + return ("PermissionRequest", true) + } + return ("PreToolUse", false) case .toolStart: return ("PreToolUse", false) case .toolEnd: diff --git a/cmuxTests/FeedEventClassificationTests.swift b/cmuxTests/FeedEventClassificationTests.swift index 7cdc0273e9..299607207d 100644 --- a/cmuxTests/FeedEventClassificationTests.swift +++ b/cmuxTests/FeedEventClassificationTests.swift @@ -93,10 +93,24 @@ struct FeedEventClassificationTests { #expect(classify("gemini", "PreToolUse", tool: "Read").actionable == false) } + /// Even on the maybe-approval (generic pre-tool) path, the two dedicated + /// approval tool names route to their own wire kinds — they are never + /// collapsed into a generic `PermissionRequest`. Guards the shared + /// `dedicatedApprovalEvent(for:)` branch inside `.toolStartMaybeApproval`. + @Test func genericPreToolUseRoutesDedicatedApprovalTools() { + #expect(classify("gemini", "PreToolUse", tool: "ExitPlanMode").name == "ExitPlanMode") + #expect(classify("gemini", "PreToolUse", tool: "ExitPlanMode").actionable == true) + #expect(classify("gemini", "PreToolUse", tool: "AskUserQuestion").name == "AskUserQuestion") + #expect(classify("gemini", "PreToolUse", tool: "AskUserQuestion").actionable == true) + } + /// Codex has a dedicated `PermissionRequest` feed event, so its - /// `PreToolUse` is telemetry only. + /// pre-tool events (`PreToolUse` and the Codex-specific + /// `beforeShellExecution`) are telemetry only. @Test func codexPreToolUseIsTelemetry() { #expect(classify("codex", "PreToolUse", tool: "shell").actionable == false) + #expect(classify("codex", "beforeShellExecution", tool: "shell").actionable == false) + #expect(classify("codex", "beforeShellExecution", tool: "shell").name == "PreToolUse") #expect(classify("codex", "PermissionRequest", tool: "shell").actionable == true) } From 6d59f6333d65eb6d938bbfd59e1ecdc7dd3942e9 Mon Sep 17 00:00:00 2001 From: austinpower1258 Date: Fri, 29 May 2026 23:55:03 -0700 Subject: [PATCH 4/4] Make Hermes feed-event regression test compile and run; address review feedback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The FeedEventClassificationTests regression test never compiled on CI. `CMUXCLI.classifyFeedEvent` lives in the `cmux_cli` executable module, but the test (hosted by `cmux.app`) imported `cmux_DEV` and could neither see nor link the CLI tool's symbols. The `tests` job was failing at the cmuxTests *build* step (`cannot find 'CMUXCLI' in scope`) before any test ran — so the PR's two-commit red/green never actually validated the fix. Extract the pure classification logic into `CLI/FeedEventClassifier.swift` (`struct FeedEventClassifier`, single source of truth), compiled into both the `cmux-cli` and `cmuxTests` targets. The decision is now a genuine compiled unit test: 10 tests run and pass. `@testable import cmux_cli` is not an option — the app-hosted test bundle cannot link the CLI executable's symbols (the same reason the existing CLI test uses a subprocess). Behavior is unchanged for production payloads. Also addresses review feedback folded into the extracted file: - `FeedEventSemantic` is now `private` (Greptile P2). - `ExitPlanMode`/`AskUserQuestion` routing deduplicated via a shared `dedicatedApprovalEvent(for:)` helper (CodeRabbit). - Added coverage for Codex `beforeShellExecution` (telemetry) and generic-agent dedicated-approval-tool routing through the `.toolStartMaybeApproval` branch. Co-Authored-By: Claude Opus 4.8 --- CLI/FeedEventClassifier.swift | 244 +++++++++++++++++++ CLI/cmux.swift | 225 +---------------- cmux.xcodeproj/project.pbxproj | 6 + cmuxTests/FeedEventClassificationTests.swift | 12 +- 4 files changed, 257 insertions(+), 230 deletions(-) create mode 100644 CLI/FeedEventClassifier.swift diff --git a/CLI/FeedEventClassifier.swift b/CLI/FeedEventClassifier.swift new file mode 100644 index 0000000000..d42b18fbdc --- /dev/null +++ b/CLI/FeedEventClassifier.swift @@ -0,0 +1,244 @@ +import Foundation + +/// Classifies a raw agent hook event into our wire `hook_event_name` plus an +/// `isActionable` flag. +/// +/// This is the single source of truth behind both the running `cmux` CLI +/// (`cmux hooks feed …`) and the `FeedEventClassificationTests` regression +/// suite — the file is compiled into the `cmux-cli` target and the +/// `cmuxTests` target so the pure decision can be unit-tested without +/// launching the app or running the CLI as a subprocess. +/// +/// The mapping is driven by an explicit, typed registry +/// (``feedEventSemantic(source:event:)``) keyed on `(source, event)` rather +/// than by pattern-matching raw event-name strings. Notification eligibility +/// is derived only from the resolved ``FeedEventSemantic``, so a +/// tool-*starting* lifecycle event can never be mistaken for an approval +/// request — and unknown / future event names default to non-actionable +/// telemetry that never notifies. Conflating a tool-start with an approval +/// is the bug behind https://github.com/manaflow-ai/cmux/issues/4985. +struct FeedEventClassifier { + /// Classifies a raw agent hook event into our wire `hook_event_name` + /// plus an `isActionable` flag that drives whether the Feed bridge + /// blocks waiting for a user decision (and whether `FeedCoordinator` + /// posts a "needs approval" notification). + /// + /// - Parameters: + /// - source: The agent id that emitted the event (`claude`, `codex`, + /// `hermes-agent`, …). Unregistered sources use the generic table. + /// - event: The agent's raw hook event name. + /// - toolName: The tool the event refers to, used only for the two + /// tool-dependent semantics. + /// - Returns: The wire `hook_event_name` and whether the event is + /// Feed-actionable (blocks + may notify). + static func classify( + source: String, + event: String, + toolName: String + ) -> (String, Bool) { + let semantic = feedEventSemantic(source: source, event: event) + return wireMapping(for: semantic, toolName: toolName) + } + + /// User-attention semantic of a hook/feed event, independent of the + /// agent-specific raw event name. Notifications and blocking waits are + /// keyed off this — never off raw event-name string matching — so the + /// same misclassification cannot recur as new event names are added. + private enum FeedEventSemantic { + /// A real approval is pending; the user must approve/deny. Drives + /// the blocking Feed wait and the "needs approval" notification. + /// Resolved against the tool name so Claude's `ExitPlanMode` / + /// `AskUserQuestion` approvals route to their dedicated kinds. + case approvalRequest + /// A tool is about to run but no approval is pending. Telemetry + /// only. Used by agents that expose a *separate* approval event + /// (Claude, Codex, Hermes) so their pre-tool hook never escalates. + case toolStart + /// A tool is about to run and the agent has *no* dedicated approval + /// event, so a side-effecting tool is escalated to an approval and + /// read-only tools stay telemetry. Resolved against the tool name. + case toolStartMaybeApproval + /// A tool finished. Telemetry only. + case toolEnd + /// A new turn / prompt started. Telemetry only. + case promptSubmit + /// The agent finished responding. Telemetry only. + case response + /// A subagent finished responding. Telemetry only. + case subagentResponse + case sessionStart + case sessionEnd + /// A generic status/notification event. Telemetry only — real + /// approval banners for these agents fire through the dedicated + /// `notification` hook subcommand, not the feed path. + case statusNotification + /// Unknown / unregistered event. Safe default: telemetry only, + /// never actionable, never notifies. + case unknown + } + + /// Resolves the semantic for a `(source, event)` pair. A registered + /// source uses its own table (unmatched events fall to ``FeedEventSemantic/unknown``); + /// unregistered sources use the generic table. + private static func feedEventSemantic( + source: String, + event: String + ) -> FeedEventSemantic { + let table = feedEventSemanticRegistry[source] ?? genericFeedEventSemantics + return table[event] ?? .unknown + } + + /// Tool names that carry their own dedicated approval wire event rather + /// than the generic `PermissionRequest`. Returns the actionable wire + /// mapping for such a tool, or `nil` for ordinary tools. + private static func dedicatedApprovalEvent(for toolName: String) -> (String, Bool)? { + switch toolName { + case "ExitPlanMode": return ("ExitPlanMode", true) + case "AskUserQuestion": return ("AskUserQuestion", true) + default: return nil + } + } + + /// Maps a resolved semantic to the wire `hook_event_name` plus the + /// `isActionable` flag, using `toolName` for the two tool-dependent + /// semantics. + private static func wireMapping( + for semantic: FeedEventSemantic, + toolName: String + ) -> (String, Bool) { + switch semantic { + case .approvalRequest: + return dedicatedApprovalEvent(for: toolName) ?? ("PermissionRequest", true) + case .toolStartMaybeApproval: + if let dedicated = dedicatedApprovalEvent(for: toolName) { + return dedicated + } + // Any tool that can mutate the environment surfaces as a + // permission request so the user can approve/deny from the + // Feed sidebar. Read-only tools stay non-actionable + // telemetry so we don't flood the Actionable view. + if Self.sideEffectingTools.contains(toolName) { + return ("PermissionRequest", true) + } + return ("PreToolUse", false) + case .toolStart: + return ("PreToolUse", false) + case .toolEnd: + return ("PostToolUse", false) + case .promptSubmit: + return ("UserPromptSubmit", false) + case .response: + return ("Stop", false) + case .subagentResponse: + return ("SubagentStop", false) + case .sessionStart: + return ("SessionStart", false) + case .sessionEnd: + return ("SessionEnd", false) + case .statusNotification: + return ("Notification", false) + case .unknown: + // Safe default: telemetry, no approval, no notification. + return ("PreToolUse", false) + } + } + + /// Per-agent event-semantic tables. Each entry is the source of truth + /// for that agent's `(event) -> semantic` mapping; events absent here + /// resolve to ``FeedEventSemantic/unknown``. + /// + /// The key distinction the registry encodes: agents with a *dedicated* + /// approval event (Claude `PermissionRequest`, Codex `PermissionRequest`, + /// Hermes `pre_approval_request`) classify their pre-tool event as + /// ``FeedEventSemantic/toolStart`` (always telemetry). Agents whose only + /// signal is the pre-tool event (gemini, copilot, …, handled by + /// ``genericFeedEventSemantics``) use + /// ``FeedEventSemantic/toolStartMaybeApproval`` so side-effecting tools + /// still escalate. Conflating the two is the bug behind #4985. + private static let feedEventSemanticRegistry: [String: [String: FeedEventSemantic]] = [ + "claude": [ + "PermissionRequest": .approvalRequest, + "PreToolUse": .toolStart, + "PostToolUse": .toolEnd, + "UserPromptSubmit": .promptSubmit, + "SessionStart": .sessionStart, + "SessionEnd": .sessionEnd, + "Stop": .response, + "SubagentStop": .subagentResponse, + "Notification": .statusNotification, + ], + "codex": [ + "PermissionRequest": .approvalRequest, + "PreToolUse": .toolStart, + "beforeShellExecution": .toolStart, + "PostToolUse": .toolEnd, + "UserPromptSubmit": .promptSubmit, + "SessionStart": .sessionStart, + "SessionEnd": .sessionEnd, + "Stop": .response, + "SubagentStop": .subagentResponse, + "Notification": .statusNotification, + ], + "hermes-agent": [ + // `pre_tool_call` is a tool *starting* — Hermes raises a + // separate `pre_approval_request` for real approvals, so this + // must stay telemetry even for side-effecting tools (#4985). + "pre_tool_call": .toolStart, + "post_tool_call": .toolEnd, + // The approval banner for Hermes fires through the dedicated + // `notification` hook subcommand; on the feed path this stays a + // non-blocking notification to avoid a duplicate banner. + "pre_approval_request": .statusNotification, + "post_approval_response": .statusNotification, + "pre_llm_call": .promptSubmit, + "post_llm_call": .response, + "on_session_start": .sessionStart, + "on_session_reset": .sessionStart, + "on_session_end": .sessionEnd, + "on_session_finalize": .sessionEnd, + ], + ] + + /// Fallback table for agents without a dedicated entry in + /// ``feedEventSemanticRegistry``. These agents expose only a pre-tool + /// event, so it carries ``FeedEventSemantic/toolStartMaybeApproval``. + private static let genericFeedEventSemantics: [String: FeedEventSemantic] = [ + "PreToolUse": .toolStartMaybeApproval, + "beforeShellExecution": .toolStartMaybeApproval, + "PermissionRequest": .approvalRequest, + "PostToolUse": .toolEnd, + "UserPromptSubmit": .promptSubmit, + "SessionStart": .sessionStart, + "SessionEnd": .sessionEnd, + "Stop": .response, + "SubagentStop": .subagentResponse, + "Notification": .statusNotification, + ] + + /// Tools that mutate state and deserve a user-visible approve/ + /// deny prompt in Feed. Keyed on the canonical tool names Claude, + /// Codex, and similar agents emit. Read-only tools (Read, Grep, + /// Glob, Task, WebFetch, WebSearch, LS, TodoWrite, …) are + /// intentionally excluded. + private static let sideEffectingTools: Set = [ + "Bash", + "Write", + "Edit", + "MultiEdit", + "NotebookEdit", + "apply_patch", // Codex + "shell", // Codex / other agents + "terminal", // Hermes Agent + "run_command", // Antigravity + "write_to_file", + "replace_file_content", + "multi_replace_file_content", + "manage_task", + "schedule", + "ask_permission", + "invoke_subagent", + "define_subagent", + "manage_subagents", + "generate_image", + ] +} diff --git a/CLI/cmux.swift b/CLI/cmux.swift index 2947fd4eff..eb9dfe848d 100644 --- a/CLI/cmux.swift +++ b/CLI/cmux.swift @@ -30042,7 +30042,7 @@ export default function cmuxPiSessionExtension(pi: ExtensionAPI) { // Decide whether this event is Feed-actionable. Non-actionable // events are forwarded as telemetry (non-blocking) and exit `{}` // so the agent proceeds without a decision. - let (hookEventName, isActionable) = Self.classifyFeedEvent( + let (hookEventName, isActionable) = FeedEventClassifier.classify( source: source, event: rawEvent, toolName: toolName @@ -30159,229 +30159,6 @@ export default function cmuxPiSessionExtension(pi: ExtensionAPI) { print("{}") } - /// Classifies a raw agent hook event into our wire `hook_event_name` - /// plus an `isActionable` flag that drives whether the Feed bridge - /// blocks waiting for a user decision (and whether `FeedCoordinator` - /// posts a "needs approval" notification). - /// - /// The mapping is driven by an explicit, typed registry - /// (`feedEventSemantic`) keyed on `(source, event)` rather than by - /// pattern-matching raw event-name strings. Notification eligibility - /// is derived only from the resolved ``FeedEventSemantic``, so a - /// tool-*starting* lifecycle event can never be mistaken for an - /// approval request — and unknown / future event names default to - /// non-actionable telemetry that never notifies. - static func classifyFeedEvent( - source: String, - event: String, - toolName: String - ) -> (String, Bool) { - let semantic = feedEventSemantic(source: source, event: event) - return wireMapping(for: semantic, toolName: toolName) - } - - /// User-attention semantic of a hook/feed event, independent of the - /// agent-specific raw event name. Notifications and blocking waits are - /// keyed off this — never off raw event-name string matching — so the - /// same misclassification cannot recur as new event names are added. - private enum FeedEventSemantic { - /// A real approval is pending; the user must approve/deny. Drives - /// the blocking Feed wait and the "needs approval" notification. - /// Resolved against the tool name so Claude's `ExitPlanMode` / - /// `AskUserQuestion` approvals route to their dedicated kinds. - case approvalRequest - /// A tool is about to run but no approval is pending. Telemetry - /// only. Used by agents that expose a *separate* approval event - /// (Claude, Codex, Hermes) so their pre-tool hook never escalates. - case toolStart - /// A tool is about to run and the agent has *no* dedicated approval - /// event, so a side-effecting tool is escalated to an approval and - /// read-only tools stay telemetry. Resolved against the tool name. - case toolStartMaybeApproval - /// A tool finished. Telemetry only. - case toolEnd - /// A new turn / prompt started. Telemetry only. - case promptSubmit - /// The agent finished responding. Telemetry only. - case response - /// A subagent finished responding. Telemetry only. - case subagentResponse - case sessionStart - case sessionEnd - /// A generic status/notification event. Telemetry only — real - /// approval banners for these agents fire through the dedicated - /// `notification` hook subcommand, not the feed path. - case statusNotification - /// Unknown / unregistered event. Safe default: telemetry only, - /// never actionable, never notifies. - case unknown - } - - /// Resolves the semantic for a `(source, event)` pair. A registered - /// source uses its own table (unmatched events fall to ``unknown``); - /// unregistered sources use the generic table. - private static func feedEventSemantic( - source: String, - event: String - ) -> FeedEventSemantic { - let table = feedEventSemanticRegistry[source] ?? genericFeedEventSemantics - return table[event] ?? .unknown - } - - /// Tool names that carry their own dedicated approval wire event rather - /// than the generic `PermissionRequest`. Returns the actionable wire - /// mapping for such a tool, or `nil` for ordinary tools. - private static func dedicatedApprovalEvent(for toolName: String) -> (String, Bool)? { - switch toolName { - case "ExitPlanMode": return ("ExitPlanMode", true) - case "AskUserQuestion": return ("AskUserQuestion", true) - default: return nil - } - } - - /// Maps a resolved semantic to the wire `hook_event_name` plus the - /// `isActionable` flag, using `toolName` for the two tool-dependent - /// semantics. - private static func wireMapping( - for semantic: FeedEventSemantic, - toolName: String - ) -> (String, Bool) { - switch semantic { - case .approvalRequest: - return dedicatedApprovalEvent(for: toolName) ?? ("PermissionRequest", true) - case .toolStartMaybeApproval: - if let dedicated = dedicatedApprovalEvent(for: toolName) { - return dedicated - } - // Any tool that can mutate the environment surfaces as a - // permission request so the user can approve/deny from the - // Feed sidebar. Read-only tools stay non-actionable - // telemetry so we don't flood the Actionable view. - if Self.sideEffectingTools.contains(toolName) { - return ("PermissionRequest", true) - } - return ("PreToolUse", false) - case .toolStart: - return ("PreToolUse", false) - case .toolEnd: - return ("PostToolUse", false) - case .promptSubmit: - return ("UserPromptSubmit", false) - case .response: - return ("Stop", false) - case .subagentResponse: - return ("SubagentStop", false) - case .sessionStart: - return ("SessionStart", false) - case .sessionEnd: - return ("SessionEnd", false) - case .statusNotification: - return ("Notification", false) - case .unknown: - // Safe default: telemetry, no approval, no notification. - return ("PreToolUse", false) - } - } - - /// Per-agent event-semantic tables. Each entry is the source of truth - /// for that agent's `(event) -> semantic` mapping; events absent here - /// resolve to ``FeedEventSemantic/unknown``. - /// - /// The key distinction the registry encodes: agents with a *dedicated* - /// approval event (Claude `PermissionRequest`, Codex `PermissionRequest`, - /// Hermes `pre_approval_request`) classify their pre-tool event as - /// ``FeedEventSemantic/toolStart`` (always telemetry). Agents whose only - /// signal is the pre-tool event (gemini, copilot, …, handled by - /// ``genericFeedEventSemantics``) use - /// ``FeedEventSemantic/toolStartMaybeApproval`` so side-effecting tools - /// still escalate. Conflating the two is the bug behind #4985. - private static let feedEventSemanticRegistry: [String: [String: FeedEventSemantic]] = [ - "claude": [ - "PermissionRequest": .approvalRequest, - "PreToolUse": .toolStart, - "PostToolUse": .toolEnd, - "UserPromptSubmit": .promptSubmit, - "SessionStart": .sessionStart, - "SessionEnd": .sessionEnd, - "Stop": .response, - "SubagentStop": .subagentResponse, - "Notification": .statusNotification, - ], - "codex": [ - "PermissionRequest": .approvalRequest, - "PreToolUse": .toolStart, - "beforeShellExecution": .toolStart, - "PostToolUse": .toolEnd, - "UserPromptSubmit": .promptSubmit, - "SessionStart": .sessionStart, - "SessionEnd": .sessionEnd, - "Stop": .response, - "SubagentStop": .subagentResponse, - "Notification": .statusNotification, - ], - "hermes-agent": [ - // `pre_tool_call` is a tool *starting* — Hermes raises a - // separate `pre_approval_request` for real approvals, so this - // must stay telemetry even for side-effecting tools (#4985). - "pre_tool_call": .toolStart, - "post_tool_call": .toolEnd, - // The approval banner for Hermes fires through the dedicated - // `notification` hook subcommand; on the feed path this stays a - // non-blocking notification to avoid a duplicate banner. - "pre_approval_request": .statusNotification, - "post_approval_response": .statusNotification, - "pre_llm_call": .promptSubmit, - "post_llm_call": .response, - "on_session_start": .sessionStart, - "on_session_reset": .sessionStart, - "on_session_end": .sessionEnd, - "on_session_finalize": .sessionEnd, - ], - ] - - /// Fallback table for agents without a dedicated entry in - /// ``feedEventSemanticRegistry``. These agents expose only a pre-tool - /// event, so it carries ``FeedEventSemantic/toolStartMaybeApproval``. - private static let genericFeedEventSemantics: [String: FeedEventSemantic] = [ - "PreToolUse": .toolStartMaybeApproval, - "beforeShellExecution": .toolStartMaybeApproval, - "PermissionRequest": .approvalRequest, - "PostToolUse": .toolEnd, - "UserPromptSubmit": .promptSubmit, - "SessionStart": .sessionStart, - "SessionEnd": .sessionEnd, - "Stop": .response, - "SubagentStop": .subagentResponse, - "Notification": .statusNotification, - ] - - /// Tools that mutate state and deserve a user-visible approve/ - /// deny prompt in Feed. Keyed on the canonical tool names Claude, - /// Codex, and similar agents emit. Read-only tools (Read, Grep, - /// Glob, Task, WebFetch, WebSearch, LS, TodoWrite, …) are - /// intentionally excluded. - private static let sideEffectingTools: Set = [ - "Bash", - "Write", - "Edit", - "MultiEdit", - "NotebookEdit", - "apply_patch", // Codex - "shell", // Codex / other agents - "terminal", // Hermes Agent - "run_command", // Antigravity - "write_to_file", - "replace_file_content", - "multi_replace_file_content", - "manage_task", - "schedule", - "ask_permission", - "invoke_subagent", - "define_subagent", - "manage_subagents", - "generate_image", - ] - private static let skipInterviewAndPlanAnswer = "Skip interview and plan immediately" /// Encodes the user's decision in the agent's expected hook stdout diff --git a/cmux.xcodeproj/project.pbxproj b/cmux.xcodeproj/project.pbxproj index bd9a339d52..7d8304728c 100644 --- a/cmux.xcodeproj/project.pbxproj +++ b/cmux.xcodeproj/project.pbxproj @@ -205,6 +205,8 @@ FEED0000000000000000F002 /* FeedCoordinator.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEED0000000000000000F001 /* FeedCoordinator.swift */; }; FEEDC0DEC0DEC0DEC0DE0001 /* FeedCoordinatorTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEEDC0DEC0DEC0DEC0DE0002 /* FeedCoordinatorTests.swift */; }; FEED49850000000000000001 /* FeedEventClassificationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEED49850000000000000002 /* FeedEventClassificationTests.swift */; }; + FEEDC1A50000000000000001 /* FeedEventClassifier.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEEDC1A50000000000000002 /* FeedEventClassifier.swift */; }; + FEEDC1A50000000000000003 /* FeedEventClassifier.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEEDC1A50000000000000002 /* FeedEventClassifier.swift */; }; FEED0000000000000000F005 /* FeedPanelView.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEED0000000000000000F004 /* FeedPanelView.swift */; }; FEED0000000000000000F011 /* FeedPanelViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEED0000000000000000F010 /* FeedPanelViewModel.swift */; }; FEED0000000000000000F013 /* FeedPermissionActionPolicy.swift in Sources */ = {isa = PBXBuildFile; fileRef = FEED0000000000000000F012 /* FeedPermissionActionPolicy.swift */; }; @@ -823,6 +825,7 @@ FEED0000000000000000F001 /* FeedCoordinator.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = FeedCoordinator.swift; sourceTree = ""; }; FEEDC0DEC0DEC0DEC0DE0002 /* FeedCoordinatorTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedCoordinatorTests.swift; sourceTree = ""; }; FEED49850000000000000002 /* FeedEventClassificationTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedEventClassificationTests.swift; sourceTree = ""; }; + FEEDC1A50000000000000002 /* FeedEventClassifier.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = FeedEventClassifier.swift; sourceTree = ""; }; FEED0000000000000000F004 /* FeedPanelView.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = FeedPanelView.swift; sourceTree = ""; }; FEED0000000000000000F010 /* FeedPanelViewModel.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = FeedPanelViewModel.swift; sourceTree = ""; }; FEED0000000000000000F012 /* FeedPermissionActionPolicy.swift */ = {isa = PBXFileReference; includeInIndex = 1; lastKnownFileType = sourcecode.swift; path = FeedPermissionActionPolicy.swift; sourceTree = ""; }; @@ -1687,6 +1690,7 @@ isa = PBXGroup; children = ( B9000001A1B2C3D4E5F60719 /* cmux.swift */, + FEEDC1A50000000000000002 /* FeedEventClassifier.swift */, B900004BA1B2C3D4E5F60719 /* CLISocketPathResolver.swift */, C510C1E00000000000000001 /* SocketOperationTelemetry.swift */, B9000030A1B2C3D4E5F60719 /* cmux_open.swift */, @@ -2509,6 +2513,7 @@ B9000048A1B2C3D4E5F60719 /* CMUXCLI+TmuxCompatHUDSupport.swift in Sources */, B9000044A1B2C3D4E5F60719 /* CMUXCLI+TmuxCompatSupport.swift in Sources */, B9000033A1B2C3D4E5F60719 /* CMUXCLI+TopRendering.swift in Sources */, + FEEDC1A50000000000000001 /* FeedEventClassifier.swift in Sources */, C0DEF0B10000000000000003 /* JSONCParser.swift in Sources */, C47110020000000000000003 /* ProcessPipeReader.swift in Sources */, B9000027A1B2C3D4E5F60719 /* RemoteRelayZshBootstrap.swift in Sources */, @@ -2615,6 +2620,7 @@ C1713006C1713006C1713006 /* CommandPaletteShortcutCustomizationTests.swift in Sources */, FEEDC0DEC0DEC0DEC0DE0001 /* FeedCoordinatorTests.swift in Sources */, FEED49850000000000000001 /* FeedEventClassificationTests.swift in Sources */, + FEEDC1A50000000000000003 /* FeedEventClassifier.swift in Sources */, D0B10018A1B2C3D4E5F60001 /* FileDropOverlayViewTests.swift in Sources */, FE002101 /* FileExplorerRootResolverTests.swift in Sources */, B37A0000000000000000000B /* FileExplorerStateModePersistenceTests.swift in Sources */, diff --git a/cmuxTests/FeedEventClassificationTests.swift b/cmuxTests/FeedEventClassificationTests.swift index 299607207d..01675158c8 100644 --- a/cmuxTests/FeedEventClassificationTests.swift +++ b/cmuxTests/FeedEventClassificationTests.swift @@ -1,10 +1,10 @@ import Testing -#if canImport(cmux_DEV) -@testable import cmux_DEV -#elseif canImport(cmux) -@testable import cmux -#endif +// `FeedEventClassifier` lives in `CLI/FeedEventClassifier.swift`, which is +// compiled into both the `cmux-cli` target and this test target — so the pure +// classification decision can be unit-tested directly, without `@testable` +// importing the `cmux_cli` executable module (whose symbols the app-hosted +// test bundle cannot link). /// Regression coverage for the feed-event → user-attention classification. /// @@ -21,7 +21,7 @@ struct FeedEventClassificationTests { private func classify(_ source: String, _ event: String, tool: String = "") -> (name: String, actionable: Bool) { - let result = CMUXCLI.classifyFeedEvent(source: source, event: event, toolName: tool) + let result = FeedEventClassifier.classify(source: source, event: event, toolName: tool) return (result.0, result.1) }