diff --git a/HANDOFF.md b/HANDOFF.md new file mode 100644 index 00000000..f76f78f0 --- /dev/null +++ b/HANDOFF.md @@ -0,0 +1,52 @@ +# Handoff + +> Updated 2026-06-10 after `ln-sync`. Volatile transfer state only. Delete or overwrite once the FE-847 scaffold is laid, or if `memory/SPEC.md` / `memory/PLAN.md` remain sufficient for re-entry. + +## Canonical state + +- `memory/SPEC.md` now owns D76-L–D78-L and I45-L–I47-L for the turn-boundary choreography layer. +- `memory/PLAN.md` now owns the frontier split: + - `dx-tier-2-harness` (FE-847, active): thin Tier-2 DX chassis + coverage-first scaffold only. + - `turn-boundary-reconciliation` (M7, next): assistant-visible watermark projection, `prepareNextTurn` reconciler / `worldUpdate`, submit-time mention ledger + staleness. + - `kick-and-context-seeding` (next): honest assistant origination via `session.triggerExchange` plus boot/resume context seeding. +- **Branch decision (user, 2026-06-10): all of S0–S5 build on the single `ln/fe-847-dx-introspection-tier-2` branch under FE-847.** The three groupings stay distinct planning units (seams + traceability) but execute as sequential slices — no separate Linear issues / Graphite branches. PLAN reconciled to match (Linear/Branch lines, sequencing, dependency-edge notes). + +## Sync notes + +- Approved the PLAN split: keep FE-847 to S0 chassis/scaffold; keep kick+seeding a distinct planning unit from M7 reconciliation (but same FE-847 branch per the decision above). +- Tightened topology sync in `src/session/README.md`: the write-side is planned, not already implemented. +- Removed stale dependency-graph horizon residue for `turn-boundary-reconciliation` after it moved to Next. + +## Oracle pre-build review (2026-06-10) + +Endorsed the architecture; four hazards folded into SPEC (D76–D78, I45–I47, coverage rows, lexicon): + +1. **same-session capture** — `worldUpdate` covers any not-yet-visible write incl. submit-time/freestyle capture (D18-L/D66-L), not only foreign writes. +2. **kick = conversational-debt classification** (ignore trailing continuity-only entries) → idempotent reboot-after-notice. +3. **compaction preserves the watermark carrier** so projection never regresses. +4. **guard-as-retry** — `before_provider_request` re-runs prepare once on drift, never writes; reconciler runs before prompt composition. + +Plus: S1 = separate watermark projection (not `runtimeState.world.latestLsn` overload). Optional S2 split (S2a watermark+reconciler+worldUpdate / S2b adapter stamping + drains) deferred to `ln-scope`. + +## Oracle final pre-scope review (2026-06-10) + +Verdict: **ready to scope**, S0→S1→S2→S3→S4 sequencing on one branch sound, no reorder forced. One seam tightening + two scaffold-authoring guards folded in: + +- **D78-L / I46-L** — resume-debt ignore set now explicitly includes reconciler-inserted **side-task & reviewer drains** (D15-L), not just seed / `worldUpdate` / `brunch.mention*` / `brunch.session_lifecycle`. Generalized to "any reconciler-inserted notice owing no assistant continuation." Closes an I46 fixture ambiguity where a persisted side-task notice could be misread as tail debt. +- **S0 scaffold (PLAN)** — stub **one shared continuity-entry classifier** (`isWatermarkCarrier` / `isContinuityOnlyNonDebtEntry`) so S1/S2 and S4 share one carrier/continuity-only/debt taxonomy instead of duplicating lists. +- **S0 scaffold (PLAN)** — assert `worldUpdate.items` / watermark / kick as **sets and `{specId, lsn}` properties, not payload-order goldens** (no canonical item sort specified) to keep the suite deterministic. + +## Next step + +Run `ln-scope` for `dx-tier-2-harness` (FE-847): real `runBrunchTui` boot + one faux turn + provider-payload/transcript oracle + fixture resume, then lay skipped scaffold tests and intentional topology stubs. + +Scaffold must preserve these edge cases (now 7, post-oracle): + +1. seed/full-overview snapshots advance the watermark; narrow `getNodes` / `queryNodes` reads do not +2. no redundant `worldUpdate` immediately after a seed that named the current snapshot LSN +3. resume kick uses latest-conversational-debt (ignoring trailing continuity entries), so a user tail still earns a kick after reconciler-inserted notices +4. crash-after-notice-before-provider reboot still kicks when debt is unanswered (idempotent) +5. same-session capture bumps `current_lsn` and is surfaced by next `worldUpdate` (not swallowed) +6. foreign write between snapshot read and seed insertion is not masked by the seed +7. compaction+resume preserves the watermark (no spurious `worldUpdate`) +8. a trailing reconciler-inserted side-task / reviewer drain is ignored by kick classification (owes no continuation), so it neither masks an older user/assistant debt nor manufactures a kick over a satisfied leaf diff --git a/memory/PLAN.md b/memory/PLAN.md index 0e10804d..47034ca3 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -41,6 +41,8 @@ A new graph-mutation planning result has been promoted into the rolling plan as **Readiness / elicitation-gaps remodel promoted (2026-06-09 ln-plan, post-`ln-spec`).** A SPEC pass reconceived the readiness and prospective-agenda model and must now land in code (D45-L, D57-L, D64-L, D65-L, D73-L, D74-L; A24-L, A27-L; I25-L, I30-L, I31-L). Four coupled implications: (1) **`elicitation_backlog` → `elicitation_gaps`** — the FE-823 question-instance / `open|closed` table is remodeled into typed coverage *obligations* (each gap carries a `name` typology key + meta `rationale`, a band, a `presence|field|coverage|manual` predicate union, an `importance` + derived `coverage`, and a `disposition`), seeded from the collated **grounding typology catalog** (floor `domain`/`protagonist`/`pain_pull`/`constraint` + progressive drivers `value`/`context_of_use`/`success_sketch`/`solution_boundary`) instead of four literal anchor questions; (2) **JIT capability-readiness** replaces the stored grade gate — readiness is judged on a capability request against the relevant gaps (proceed / proceed-at-low-epistemic-status / negotiate), retiring `readiness_grade`, `updateReadinessGrade`, `READINESS_GRADES`, and the `MIN_GRADE` proxy tables in `runtime-policy.ts`; (3) a soft derived **readiness estimate** (UI-only, gates nothing) plus removal of the vestigial `chrome.phase` / `chrome.chatMode` fields; (4) a small follow-on **session/runtime vocabulary leaf** (`src/session/schema/kinds.ts`) mirroring `graph/schema/kinds.ts` for the `op_mode`/`strategy`/`lens`/`goal` axes. These are promoted as `elicitation-gaps-remodel` → `capability-readiness` (hard chain) plus the parallel `runtime-vocab-leaf`; none are POC-ship-critical (the delivery cut de-scopes elicitation quality). **Sequencing tension with the trio:** `capability-readiness` mutates exactly the shapes the trio would lock (`workspace/workspace-state` drops phase/chatMode and gains the readiness estimate; `session/runtime-state` + composition drop grade). By the trio's own "lock upstream shape before downstream output" principle, the gaps/readiness remodel is *upstream* of the trio's readiness/chrome-touching locks and should land before stage 1 (`projection-shape-coverage`) freezes those shapes — otherwise the locks churn. Recommended order: `elicitation-gaps-remodel` → `capability-readiness` first, then the trio; or, if the trio leads, it must explicitly bracket the grade/phase/chatMode fields until the remodel lands. `elicitation-driver` now rides the remodeled gaps substrate, not the FE-823 backlog shape. **2026-06-10 follow-on (D75-L):** a further SPEC pass collapsed the parallel grounding-typology vocabulary onto the node-kind ontology — gaps now reference graph node kinds (`refersTo: NodeKind`) instead of a closed typology `name` enum. This inserts `gaps-node-kind-reference` at the head of the chain (`elicitation-gaps-remodel` → `gaps-node-kind-reference` → `capability-readiness`); it reshapes the gaps substrate and the `capability → NodeKind[]` map, and absorbs the now-retired refactor plan (which had planned to enshrine the typology catalog). +**Turn-boundary choreography promoted as core mechanics (2026-06-10 ln-plan, post-`ln-spec` D76-L–D78-L / I45-L–I47-L).** The runtime "Tier-2" layer — what enters the transcript at a turn boundary and who originates the next turn — is being specced and scoped *now*, not deferred to M7-as-fog, because it is core product choreography and the concept is fresh. SPEC locked three decisions (assistant-visible watermark D76-L; one-writer reconciler + aux seams/guard D77-L; honest kick + context seeding D78-L), sharpened I9-L, and added I45-L–I47-L plus a **coverage-first scaffold** design note (author the layer's whole invariant suite up front, skip/`todo` each test until its enabling slice lands). The layer decomposes into a slice map S0–S5: **S0** is the Tier-2 *chassis* (DX only, thin) on **FE-847** — real `runBrunchTui` boot, one faux model turn, provider-payload capture, transcript inspection, fixture resume — plus authoring the skipped coverage-first scaffold and the topology stubs the product slices fill. **S1–S3 + S5(share)** are product write-side mechanics owned by **`turn-boundary-reconciliation` (M7)**: S1 assistant-visible watermark projection, S2 the `prepareNextTurn` reconciler + `worldUpdate` + own-write/full-overview watermark stamping, S3 the submit-time mention ledger + staleness. **S4 + S5(share)** are the **`kick-and-context-seeding`** grouping: honest assistant-origination behind `session.triggerExchange` plus boot/resume context seeding. S5 (boot idempotence + carrier discipline, I47-L) is a cross-cutting obligation threaded through both product groupings, not its own frontier. **Branch decision (user, 2026-06-10): the entire S0–S5 layer is built on the single `ln/fe-847-dx-introspection-tier-2` branch under the FE-847 issue** — the three groupings are distinct planning units (seams + traceability) executed as sequential slices, not separate Linear issues/Graphite branches (AGENTS.md permits multiple slices per issue+branch). The scaffold's first tests must encode three edge cases locked into SPEC: (a) seed/full-overview snapshots advance the watermark while narrow `getNodes`/`queryNodes` reads do not; (b) no redundant `worldUpdate` immediately after a seed that named the current snapshot LSN; (c) the resume kick decision is taken on the **pre-reconcile** tail, so a user tail still earns a kick even after the reconciler inserts seed/staleness notices ahead of it. None of this is POC-ship-critical; the S0 chassis is buildable now. + ### Context-pipeline coverage (the next design/lock spine) The four LLM-facing context concerns are not independent — they are the stages of **one pipeline** (D60-L): **PULL → PROJECT → RENDER → COMPOSE → surface**. Coverage means *each stage carries its appropriate oracle over a complete, ledgered inventory*. The stages must be closed **in dependency order**, because each downstream lock is only stable once its upstream shape is locked (projection invariants churn while read shapes still move; renderer goldens churn while projection shapes still move; prompt goldens churn while renderer output still moves). @@ -82,7 +84,14 @@ per ledger row: ### Active -- No active frontier. `capability-readiness` completed the stored-grade deletion sweep on 2026-06-11; see §Next for the next sequenced frontier. +- `turn-boundary-reconciliation` (FE-847) — remaining FE-847 closure on the shared branch: flip the skipped Tier-2 I45/I47 scaffold live, prove submit-time mention resolution and staleness through the real session path, and preserve the latest watermark carrier across compaction/resume. + +### Turn-boundary choreography (Tier-2 layer) + +Core runtime choreography specced/scoped now (Context §Turn-boundary choreography; SPEC D76-L–D78-L, I45-L–I47-L). FE-847 lays the chassis + coverage-first scaffold; the product write-side then fills the scaffold slice by slice. **Branch-mechanics override (user, 2026-06-11): `dx-tier-2-harness` stays on `ln/fe-847-dx-introspection-tier-2`, while the remaining product closures (`turn-boundary-reconciliation` and `kick-and-context-seeding`) continue together on the stacked successor branch `ln/fe-847-turn-boundary-closure`.** This is a stack-management exception only: same FE-847 issue, same sequential closure, no new frontier or Linear split. Each grouping still flips its own scaffold tests live. + +1. `turn-boundary-reconciliation` (M7 product mechanics; slice group on FE-847) — S1 assistant-visible watermark projection (D76-L), S2 the `prepareNextTurn` one-writer reconciler + `worldUpdate` + own-write/full-overview watermark stamping (D77-L), S3 submit-time mention ledger + staleness (I9-L). Carries its share of S5 (carrier discipline / no-redundant-`worldUpdate`-after-seed idempotence, I47-L). +2. `kick-and-context-seeding` (product mechanics; slice group on FE-847) — **sequenced after `turn-boundary-reconciliation` S1/S2** (the seed must advance the watermark and the kick decision interacts with reconciler-inserted notices). S4 honest assistant-origination behind `session.triggerExchange` (`startAssistantTurn({ origin })`) + boot/resume context seeding (D78-L). Carries its share of S5 (boot/resume seed idempotence, pre-reconcile-tail kick policy, I46-L/I47-L). ### Readiness & elicitation-gaps remodel (recommended ahead of the trio) @@ -113,12 +122,11 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - `probes-and-transcripts-evolution` — continuous probe/report/transcript hardening as each delivery frontier lands evidence. - `topology-readmes-and-boundaries` — small doc/test hardening when a frontier moves files or exposes a boundary; should remain attached to the frontier when possible rather than becoming an abstract cleanup project. - `dev-seed-fixtures` — rich, real seed data for local dev / manual / observer testing: the consolidated seed contract, the `npm run seed` loader, and growing/enhancing fixture sets (Bilal-port + legacy). Its semantic curation mutation slice is folded into / blocked by `role-safe-graph-mutations`; ongoing seed-data maintenance remains low-conflict. -- `dx-introspection-live` — DX follow-on to `dx-feedback-loops`: harden the four-role `.fixtures/` topology + `--cwd` launch (D70-L), unify dev gating under `BRUNCH_DEV` and wire the dormant introspection extension into the real TUI (D71-L), and make introspection conversational (A26-L). Three sequenced slices; ready for a scoping thread. Low-conflict with the product trio; touches `.fixtures/`, `src/app/`, `src/dev/`, `src/.pi/extensions/introspection/`. +- `dx-introspection-live` — done 2026-06-11. DX follow-on to `dx-feedback-loops`: hardened the four-role `.fixtures/` topology + `--cwd` launch (D70-L), unified dev gating under `BRUNCH_DEV`, wired introspection into the real TUI (D71-L), made introspection conversational (A26-L), and added the workspace-local `.brunch/debug/` cache for final system prompt + Brunch-owned tool-result contents. `tool-renders` flattening remains deferred until a concrete renderer-debugging need appears. - `runtime-vocab-leaf` — establish `src/session/schema/kinds.ts` as the drizzle-free source-of-truth leaf for the session/runtime axis enums (`op_mode`, `strategy`, `lens`, `goal`, `auto` sentinel), mirroring `graph/schema/kinds.ts` (D73-L ownership direction). The decision-3 follow-on; independent of the remodel chain and the trio. Must not recreate `READINESS_GRADES` (retired by `capability-readiness`). ### Horizon -- `turn-boundary-reconciliation` — M7; graph revisions, `worldUpdate`, mention staleness, side-task/reviewer drains. - `coherence-first-class` — M8; bounded coherence verdicts backed by reconciliation needs. - `compaction-and-conflict-widening` — M9; long-horizon continuity through compaction. - `subagents-for-proposal-diversity` — optional proposal-quality enhancement; never a POC blocker. @@ -129,6 +137,87 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai ## Frontier Definitions +### dx-tier-2-harness + +- **Name:** Tier-2 DX chassis — real-boot + faux-turn + payload/transcript oracle + fixture resume +- **Linear:** FE-847 — DX introspection Tier 2 +- **Branch:** `ln/fe-847-dx-introspection-tier-2` +- **Kind:** structural / dev-substrate +- **Status:** done +- **Certainty:** proving +- **Retires:** part of A25-L — extends the DX-loop proof from faux-provider scripted turns (`dx-feedback-loops`) to a reusable *real-boot* Tier-2 chassis that captures the provider payload and inspects the resulting transcript. +- **Lights up:** A Tier-2 test chassis that did not exist — `runBrunchTui` boots for real, one faux model turn runs, the provider payload is captured, the resulting transcript is inspected, and a session resumes from a fixture transcript — the harness every turn-boundary-choreography product slice asserts its mechanics through. +- **Stabilizes:** The Tier-2 harness seam plus the coverage-first scaffold for I45-L–I47-L (the skipped invariant suite + intentional topology stubs the watermark projection, the `prepareNextTurn` reconciler, and the origination primitive will fill). +- **Objective:** Build the thin Tier-2 chassis (S0) only: (1) a real `runBrunchTui` boot path usable in test, (2) one faux model turn driven end-to-end with no network/keys, (3) provider-payload capture + transcript-inspection oracles, (4) fixture-transcript resume. Then **author the coverage-first scaffold** for the whole turn-boundary-choreography layer: the I45-L–I47-L invariant suite as `it.todo` / `describe.skip` keyed to its enabling slice, plus intentional `export {}` topology stubs (ownership comment per AGENTS.md) for the not-yet-built modules — including **one shared continuity-entry classifier** (`isWatermarkCarrier` / `isContinuityOnlyNonDebtEntry`) so S1/S2 watermark projection and S4 resume-kick classification share one taxonomy of carrier vs. continuity-only-non-debt vs. debt-bearing entries rather than duplicating hardcoded lists. The scaffold's first tests must encode the three SPEC edge cases — seed/full-overview snapshots advance the watermark while narrow reads do not; no redundant `worldUpdate` immediately after a seed naming the current snapshot LSN; the resume kick decision is taken on the pre-reconcile tail (a user tail still earns a kick after the reconciler inserts seed/staleness notices) — and assert `worldUpdate.items` / watermark / kick outcomes as **sets and `{specId, lsn}` properties, not payload-order goldens** (no canonical item sort is specified), so the suite stays deterministic. +- **Why now / unlocks:** The user has elevated the turn-boundary-choreography layer to core mechanics and wants the proving infrastructure laid in while the concept is fresh. The chassis is buildable now and is the harness through which S1–S5 product mechanics are proven; authoring the skipped scaffold now stops the edge cases from being lost before their slices exist. +- **Acceptance:** + - A test can boot the real `runBrunchTui` orchestration, run one faux model turn, capture the exact provider payload, and inspect the resulting transcript entries — with no network, keys, or tokens. + - A session can resume from a fixture transcript through the same chassis. + - The I45-L–I47-L invariant suite exists as skipped (`it.todo` / `describe.skip`) tests keyed to their enabling slices (`turn-boundary-reconciliation`, `kick-and-context-seeding`), and the three SPEC edge cases are each present as a named skipped case. + - Intentional topology stubs exist for the assistant-visible watermark projection, the `prepareNextTurn` reconciler, and the origination primitive — `export {}` + ownership/IO/future-callers comment per AGENTS.md. + - No product mechanics land on this frontier: the watermark/reconciler/kick modules stay stubs; `npm run verify` is green with the scaffold tests skipped (no slice lands green by leaving its own tests skipped — that obligation is on the product frontiers). +- **Verification:** Inner — chassis unit tests (boot, faux turn, payload capture, transcript inspect, fixture resume); a test asserting the scaffold suite is present-but-skipped and the topology stubs compile. The skip ledger is itself the layer's live coverage map (SPEC §Design Notes, coverage-first scaffold). +- **Cross-cutting obligations:** Preserve the D39-L sealed-profile boundary and the `dx-feedback-loops`/`dx-introspection-live` DX conventions — the chassis is a dev/test substrate, observes but does not shape product behavior, and stays distinct from `src/probes/` product-verification runs. Do not fold S1–S5 product mechanics into S0. Topology stubs follow AGENTS.md §intentional topology stubs. +- **Topology materialization:** Chassis/harness lives under `src/dev/` (Tier-2 test front door) reusing the shared faux harness; topology stubs land at their final product homes (assistant-visible watermark projection under `src/projections/session/`, the `prepareNextTurn` reconciler and origination primitive under `src/session/` per their READMEs, and the shared continuity-entry classifier at the boundary both consume — `src/projections/session/` if read-side-owned) so the dependency direction is legible before behavior exists. +- **Traceability:** D37-L, D39-L, D43-L, D68-L, D69-L, D76-L, D77-L, D78-L; A25-L; I45-L, I46-L, I47-L. +- **Design docs:** `memory/SPEC.md` D76-L–D78-L, I45-L–I47-L, §Verification Design (coverage-first scaffold design note); `src/dev/README.md`; `src/session/README.md`; `src/projections/README.md`. +- **Current execution pointer:** Done 2026-06-10 with 2026-06-11 closure on FE-847. The real `runBrunchTui` boot chassis, faux-turn payload/transcript oracle, fixture resume path, skipped I45-L–I47-L scaffold, and topology stubs are in place; the final follow-on tightened Tier-1 proof so Brunch-configured faux sessions now own the definitive provider-facing prompt/tool payload assertion. + +### turn-boundary-reconciliation + +- **Name:** Turn-boundary reconciliation — assistant-visible watermark, `worldUpdate`, mention staleness +- **Linear:** FE-847 — built as a slice group on the FE-847 branch (2026-06-10 single-branch decision); no separate issue. +- **Branch:** `ln/fe-847-turn-boundary-closure` (stacked successor FE-847 branch, shared with `kick-and-context-seeding`). +- **Kind:** structural / product mechanics (M7) +- **Status:** active (turn-boundary choreography; not POC-ship-critical) +- **Certainty:** proving +- **Retires:** A4-L (the remaining "M7 still needs generated `worldUpdate` traces" subclaim) and A9-L (session-scoped `(entity_id, seen_lsn)` mention-ledger granularity is the right staleness grain). +- **Depends on:** `dx-tier-2-harness` chassis + scaffold (same branch; the chassis is the oracle these slices assert through and supplies the topology stubs they fill). +- **Lights up:** The write-side of continuity — a single `prepareNextTurn` reconciler that projects the assistant-visible watermark, samples `current_lsn`, and inserts `worldUpdate` / mention-staleness / side-task drains, plus submit-time mention resolution and own-write watermark stamping. +- **Stabilizes:** I45-L (watermark advance correctness), I9-L (submit-time mention resolution, `(entity_id, seen_lsn)` ledger), and its share of I47-L (carrier discipline / boot idempotence). +- **Objective:** Build the product write-side of turn-boundary choreography behind the FE-847 chassis. **S1** — assistant-visible watermark projection (D76-L): project `{specId, lsn}` from the session's watermark carriers (boot/context seed + whole-spec overview snapshot, `worldUpdate`, own graph-mutation `toolResult`); narrow `getNodes`/`queryNodes` reads update per-entity read ledgers, never the global watermark. **S2** — the one-writer `prepareNextTurn` reconciler (D77-L): compute watermark, sample `current_lsn`, insert `worldUpdate` naming only strictly-greater items (I4-L), with own-mutation + full-overview watermark stamping and `before_provider_request` as a guard only. **S3** — submit-time mention resolution + staleness (I9-L): resolve `#` handles to stable graph ids at `session.submitMessage`, append `brunch.mention` ledger facts, emit discretionary staleness hints when an entity changed since `seen_lsn`. Flip the corresponding FE-847 scaffold tests live. +- **Why now / unlocks:** Specced now as core mechanics while the concept is fresh (Context §Turn-boundary choreography). The watermark + reconciler are the substrate `kick-and-context-seeding` and later M8 coherence build on. Not POC-ship-critical. +- **Acceptance:** + - The watermark advances only via seed/full-overview snapshot, `worldUpdate`, or own mutation; narrow reads never advance the global watermark; a freshly seeded session whose seed named the current snapshot LSN does not synthesize a redundant `worldUpdate` (I45-L edge cases live). + - `worldUpdate` is synthesized only when `current_lsn > watermark`, names only strictly-greater items, and is carried as a Brunch custom transcript entry (never a synthetic `toolCall` or prompt-only injection). + - Mentions resolve to stable graph ids at submit time (not autocomplete time), the ledger stores `(entity_id, seen_lsn)`, and staleness hints fire only when an entity changed since it was last seen (I9-L). + - The reconciler is the single continuity writer; `before_provider_request` only guards (asserts no stale unresolved continuity) and never double-writes. + - The relevant FE-847 scaffold tests are flipped live (no slice lands green leaving its own tests skipped). +- **Verification:** Inner — watermark-projection property/unit tests (own-write stamping vs foreign `worldUpdate`; strict-greater set per I4-L; no-`worldUpdate` when `current==watermark`; seed/overview advance vs narrow-read no-advance). Middle — Tier-2 faux-turn-through-real-boot assertions over change-log-range fixtures driving a foreign writer; mention resolution against fixture graph data. (SPEC §Verification Design rows I45-L, I47-L.) +- **Cross-cutting obligations:** Continuity facts ride Brunch custom transcript entries (D37-L), never synthetic `toolCall`s or prompt-manifest injection (carrier discipline, I47-L). Multi-spec discipline: watermark is `{specId, lsn}`; never compare bare LSNs across sibling specs (I4-L). The reconciler runs **before prompt composition**; `before_provider_request` is a guard that on post-prepare drift **re-runs preparation once** (abort/retry), never a second writer (D77-L). Same-session submit/capture writes (D18-L/D66-L) are not own-mutation `toolResult`s — they advance `current_lsn` and must be surfaced by the next `worldUpdate`, not swallowed (I45-L). The watermark must survive compaction (preserved-anchor set retains the latest watermark carrier so projection never regresses, I47-L). Boot/resume reconciliation is idempotent, deriving dedupe from projected transcript state, not hidden flags (I47-L, shared with `kick-and-context-seeding`). Side-task/reviewer drains (D15-L) belong to this reconciler seam. +- **Topology materialization:** The `prepareNextTurn` reconciler and watermark projection land at their final homes (`src/session/` reconciler, `src/projections/session/` watermark) filling the FE-847 topology stubs; submit-time mention resolution at `session.submitMessage`; tool-result watermark stamping at the graph read/mutation adapters. +- **Traceability:** D14-L, D15-L, D17-L, D37-L, D43-L, D49-L, D76-L, D77-L; A4-L, A9-L; I1-L, I4-L, I9-L, I45-L, I47-L. +- **Design docs:** `memory/SPEC.md` D76-L–D77-L, I9-L, I45-L, I47-L; `src/session/README.md`; `src/projections/README.md`; `src/projections/session/runtime-state.ts`. +- **Current execution pointer:** Core S1-S3 mechanics landed on FE-847; the remaining builder entry is `memory/cards/turn-boundary-reconciliation--continuity-chain.md`, which closes the frontier by flipping the skipped Tier-2 I45/I47 scaffold live, proving mention resolution/staleness through the real submit path, and preserving the latest watermark carrier across compaction/resume. + +### kick-and-context-seeding + +- **Name:** Session origination — honest kick + boot/resume context seeding +- **Linear:** FE-847 — built as a slice group on the FE-847 branch (2026-06-10 single-branch decision); no separate issue/branch. +- **Branch:** `ln/fe-847-turn-boundary-closure` (stacked successor FE-847 branch, shared with `turn-boundary-reconciliation`). +- **Kind:** structural / product mechanics +- **Status:** next (turn-boundary choreography; not POC-ship-critical) +- **Certainty:** proving +- **Retires:** the R16 origination gap — proof that a structured-strategy session can originate its own offer-first turn honestly (no fabricated user entry) and seed context idempotently across real restart/resume. +- **Depends on:** `turn-boundary-reconciliation` (S1 watermark projection + S2 reconciler — the seed must advance the watermark and the kick decision interacts with reconciler-inserted notices) and the `dx-tier-2-harness` chassis. Sequenced last in the FE-847 slice chain. +- **Lights up:** Honest session origination — `startAssistantTurn({ origin })` surfaced through `session.triggerExchange`, plus boot/resume context seeding as custom continuity entries. +- **Stabilizes:** I46-L (honest origination + pre-reconcile-tail resume policy) and its share of I47-L (boot/resume seed idempotence + carrier discipline). +- **Objective:** Build the write-side of origination (S4) behind the FE-847 chassis (same FE-847 branch, sequenced after the reconciliation slices). A **new** session seeds workspace/spec-overview context as custom continuity entries (D76-L; the seed names the snapshot LSN and so initializes the watermark), then kicks an assistant-originated `present_*` exchange. A **resumed** session takes the kick decision from the **pre-reconcile** transcript tail: kick iff that tail owed assistant continuation (user message or incomplete exchange-tuple), even after the reconciler inserts seed/staleness notices ahead of it; otherwise rest at a `request_*`/system leaf. AUTO always originates offer-first (D66-L: AUTO never selects `freestyle`); only an explicit `freestyle` pin yields a wait-for-user idle. Carries its share of S5 — boot/resume seeding is idempotent (dedupe derived from projected transcript state, survives real restart) and continuity rides custom entries only. Flip the corresponding FE-847 scaffold tests live. +- **Why now / unlocks:** The offer-first default (R16, D12-L, I13-L) has a read side but no honest write-side origination; specced now as core mechanics. Kept a distinct planning unit from M7 reconciliation because it is origination, not reconciliation; executed as the final FE-847 slice group, not a separate branch. Not POC-ship-critical. +- **Acceptance:** + - Origination never writes a fabricated user transcript entry and never injects a "user said begin" prompt; the kick is `startAssistantTurn({ origin })` surfaced via `session.triggerExchange`. + - A new session seeds-then-kicks before the first provider call; the seed names the snapshot LSN so no redundant `worldUpdate` is synthesized immediately after seeding (I45-L edge case, with M7). + - A resumed session's kick decision classifies the latest unresolved conversational debt (ignoring trailing continuity-only entries): a user tail still earns a kick after the reconciler inserts seed/staleness notices; a `request_*`/system leaf stays idle; a crash-after-notice-before-provider reboot still kicks when the underlying debt is unanswered (idempotent re-boot, I46-L edge cases). + - AUTO never originates a `freestyle` turn; only an explicit `freestyle` pin idles for the user. + - Boot/resume seeding is idempotent (repeated boot does not duplicate seed/`worldUpdate`; dedupe derived from projection) and survives real restart/resume (I47-L). + - The relevant FE-847 scaffold tests are flipped live. +- **Verification:** Middle — Tier-2 faux-turn-through-real-boot assertions: new session seeds-then-kicks before the first provider call; resumed-session kick fires on a user pre-reconcile tail even behind inserted notices, and stays silent at a `request_*`/system leaf; no fabricated user entry in any path; AUTO never originates `freestyle`. Restart/resume idempotence property tests (repeated boot does not duplicate seed/`worldUpdate`). Outer — manual walkthrough of opening-offer quality (tracked, not gated). (SPEC §Verification Design rows I46-L, I47-L.) +- **Cross-cutting obligations:** Honest origination — no fabricated user turns, ever (I46-L). Continuity facts ride Brunch custom transcript entries (D37-L), never synthetic `toolCall`s or prompt-only injection (I47-L). Boot idempotence derives from projected transcript state, not hidden flags (I47-L, shared with `turn-boundary-reconciliation`). This is product behavior on the non-D39-L-seal side, not a `BRUNCH_DEV` affordance. +- **Topology materialization:** The origination primitive (`startAssistantTurn`) lands in the session orchestration layer (`src/session/`) filling the FE-847 stub; `session.triggerExchange` is the public surface (D49-L); context seeding writes custom continuity entries through the same carrier as `worldUpdate`. +- **Traceability:** D12-L, D37-L, D49-L, D66-L, D75-L, D76-L, D78-L; R16; I13-L, I46-L, I47-L. +- **Design docs:** `memory/SPEC.md` D78-L, I46-L, I47-L; `src/session/README.md`. +- **Current execution pointer:** Core S4 helper logic landed on FE-847; the remaining builder entry is `memory/cards/kick-and-context-seeding--honest-origination.md`, which closes I46/I47 through real boot/resume origination proofs rather than more local helper-only tests. + ### project-graph-review-cycle - **Name:** Project-graph review-set proposal and atomic acceptance @@ -565,7 +654,7 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - **Name:** Live, conversational agent-input introspection in the real dev TUI - **Linear:** unassigned — create in FE / brunch when the frontier starts (do not parent under FE-531; sibling of FE-825, not a child). - **Kind:** structural / dev-substrate (capability expansion over `dx-feedback-loops`) -- **Status:** next (DX substrate; not POC-ship-critical) +- **Status:** done - **Certainty:** proving - **Retires:** A26-L — proof that conversational introspection is buildable as a read-only dev session-query-back tool without weakening D39-L sealing. - **Lights up:** Running `BRUNCH_DEV=1 npm run dev -- --cwd .fixtures/workbenches/` boots the *real* Brunch TUI against a chosen fixture workspace with the introspection extension live and the model able to query exact prior session-log values back into chat for discussion — a loop that did not exist before this frontier (the extension was built but dormant, and dev runs polluted the operating cwd). @@ -582,7 +671,7 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - **Topology materialization:** `.fixtures/scratch/` (gitignored) has joined `seeds/`/`workbenches/`/`runs/`; `--cwd` parsing lands in `src/app/brunch.ts` / `runBrunchCli`; `BRUNCH_DEV` gating and the introspection `{ enabled }` wire-up land in `src/app/brunch-tui.ts`; the provider-payload tap remains in `src/.pi/extensions/introspection/`; conversational query planes live in `src/.pi/extensions/session-query/` and `src/.pi/extensions/introspect-query/`, sharing projection/truncation helpers from `src/.pi/extensions/shared/query-projection.ts`; `.gitignore`, `.fixtures/README.md`, `src/dev/README.md`, and `src/.pi/extensions/README.md` reconcile to the new topology and gate. - **Traceability:** D39-L, D58-L, D67-L, D68-L, D69-L, D70-L, D71-L; A26-L; I38-L, I42-L. - **Design docs:** `memory/SPEC.md` §Development Feedback Loops and D69-L–D71-L, A26-L, I42-L; `.fixtures/README.md`; `src/dev/README.md`; `src/.pi/extensions/introspection/README.md`; `src/.pi/extensions/session-query/README.md`; `src/.pi/extensions/introspect-query/README.md`. -- **Current execution pointer:** Slice 1 done 2026-06-09. Slice 2 done 2026-06-09: `brunch_session_query` is registered only under the slice-1 `introspection.enabled` gate, queries the current session branch read-only, returns one-or-many verbatim projections, truncates/spills large output, and is covered by unit, registration-gating, and faux-turn tests. 2026-06-10 follow-on: the dev query-tool set is explicitly unioned into the D40-L active-tool allow-list under the factory's introspection gate, so `setActiveTools` and composed prompt active-tool lists advertise registered dev query tools while product/default sessions stay on the product set; `brunch_introspect_query` adds read-only projection over captured `before_provider_request` payloads plus base prompt options, sharing projection/truncation helpers with `brunch_session_query`. 2026-06-10 boot-seam smoke oracle landed in `src/app/brunch-tui.test.ts`: real `runBrunchTui` orchestration reaches `createAgentSessionRuntime`, binds the Pi header/cwd/session context, and proves dev query tools are present+active only under `BRUNCH_DEV`. Remaining debt is outer-loop live-model compliance: in a `BRUNCH_DEV=1` real TUI session, ask the agent to pull exact prior/session and provider-payload values through the dev query tools, echo them in fenced blocks, and discuss them. +- **Current execution pointer:** Done 2026-06-11. Slices 1-2, the dev-query active-tool follow-on, and the workspace debug-cache chain are done: `BRUNCH_DEV` real TUI launches can mirror the latest final system prompt and append explicit Brunch-owned text tool-result content into launch-cwd `.brunch/debug/` while repo-root `.fixtures/scratch/` remains the durable paired-run artifact path. `tool-renders` flattening remains explicitly deferred until a concrete renderer-debugging need appears. ### dev-seed-fixtures @@ -675,6 +764,9 @@ nodes: topology-readmes-and-boundaries [parallel] attach-to-frontier topology hardening dev-seed-fixtures [parallel] rich seed data substrate for dev/observer testing web-design-system-port [done · earned] ported prior-trunk tokens + card primitives into src/web; retired invented warm aesthetic; read-only, no spine deps + dx-tier-2-harness [active · proving] FE-847 Tier-2 DX chassis (real boot + faux turn + payload/transcript oracle + fixture resume) + coverage-first scaffold (skipped I45-I47) + topology stubs + turn-boundary-reconciliation [next · proving] M7 product write-side: watermark projection (S1) + prepareNextTurn reconciler/worldUpdate/own-write stamping (S2) + submit-time mention ledger/staleness (S3) + kick-and-context-seeding [next · proving] separate product frontier/branch: honest kick via triggerExchange + boot/resume context seeding (S4); pre-reconcile-tail policy; boot idempotence (S5 share) edges: graph-tool-resilience -[hard]-> capture-response-to-graph @@ -700,6 +792,10 @@ edges: dx-feedback-loops -[optional]-> role-safe-graph-mutations (version-bump+alias is a shared unblocker; land before concurrent pi-facing churn) dx-feedback-loops -[optional]-> projection-shape-coverage (same shared unblocker; soft, not a hard gate — buildable independently) dx-feedback-loops -[hard]-> dx-introspection-live (built the dormant introspection machinery this frontier wires live + makes conversational) + dx-feedback-loops -[hard]-> dx-tier-2-harness (Tier-2 chassis reuses the src/dev faux harness + real-boot front door) + dx-tier-2-harness -[hard]-> turn-boundary-reconciliation (S1-S3 mechanics are proven through the Tier-2 chassis + flip its skipped scaffold tests live) + dx-tier-2-harness -[hard]-> kick-and-context-seeding (S4 origination is proven through the Tier-2 chassis; same FE-847 branch, last slice group) + turn-boundary-reconciliation -[hard]-> kick-and-context-seeding (seed must advance the watermark (S1) and the kick decision interacts with reconciler-inserted notices (S2)) parallel obligations: probes-and-transcripts-evolution -[evidence]-> every P0/P1 frontier @@ -707,7 +803,6 @@ parallel obligations: dev-seed-fixtures -[data]-> capture-response-to-graph, poc-live-ship-gate (real multi-spec graphs to exercise observer/capture; semantic curation waits on role-safe-graph-mutations) horizon: - turn-boundary-reconciliation coherence-first-class compaction-and-conflict-widening subagents-for-proposal-diversity @@ -730,5 +825,7 @@ notes: - `project-graph-review-cycle` is complete evidence for the optional batch proposal/review story; keep future review-quality work as follow-up, not FE-809 completion debt. - `topology-readmes-and-boundaries` is not a license for abstract cleanup; it rides with concrete delivery seams. - **Readiness / elicitation-gaps remodel (2026-06-09 ln-plan, post-`ln-spec`).** The SPEC pass (D45-L, D57-L, D64-L, D65-L, D73-L, D74-L; A24-L, A27-L; I25-L, I30-L, I31-L) promotes a hard chain `elicitation-gaps-remodel` → `capability-readiness` plus the parallel `runtime-vocab-leaf`. `elicitation_backlog` is remodeled into the D65-L `elicitation_gaps` obligation register (name + rationale, band, `presence|field|coverage|manual` predicate, importance + derived coverage, disposition; seeded from the grounding typology catalog). Capability-readiness becomes a JIT `capability → relevant gaps` judgment that retires the stored `readiness_grade` / `updateReadinessGrade` / `READINESS_GRADES` / `MIN_GRADE` proxies, adds a soft UI-only `readiness estimate`, and removes `chrome.phase` / `chrome.chatMode`. **These are upstream of the trio's readiness/chrome-touching locks** (`capability-readiness` mutates `workspace/workspace-state` + `session/runtime-state` shapes that `projection-shape-coverage` would freeze): land the chain before trio stage 1, or have the trio explicitly bracket the grade/phase/chatMode fields until the remodel lands. None are POC-ship-critical. `elicitation-driver` now depends on `elicitation-gaps-remodel`, not the FE-823 backlog shape. `runtime-vocab-leaf` is the decision-3 follow-on (session/runtime enum source-of-truth leaf) and does **not** relocate the retired `READINESS_GRADES`. Decision-2 (readiness-grade vs band term overlap → `capture_band`/`readiness_gate`) was explicitly **left alone**. + - **Turn-boundary choreography (Tier-2 layer, 2026-06-10).** Promoted from the `turn-boundary-reconciliation` horizon stub into three frontiers after a SPEC pass locked D76-L–D78-L / I45-L–I47-L. `dx-tier-2-harness` (FE-847) is the thin DX chassis + coverage-first scaffold (skipped tests + topology stubs); `turn-boundary-reconciliation` (M7) owns the watermark/reconciler/mention write-side (S1–S3); `kick-and-context-seeding` is the honest-origination + seeding grouping (S4). S5 (boot idempotence + carrier discipline, I47-L) is a cross-cutting obligation on both product groupings, not its own frontier. **All of S0–S5 build on the single `ln/fe-847-dx-introspection-tier-2` branch under FE-847 (user decision 2026-06-10)** — distinct planning units, sequential slices, no separate issues/branches. The scaffold encodes three edge cases: seed/full-overview snapshots advance the watermark while narrow reads do not; no redundant `worldUpdate` after a seed naming the current snapshot LSN; the resume kick decision is taken on the pre-reconcile tail. Each grouping flips its own scaffold tests live (no slice lands green leaving its tests skipped). None POC-ship-critical; the S0 chassis is buildable now. + - **Oracle pre-build review (2026-06-10).** Endorsed the architecture (projected watermark + one reconciler writer + honest origination) and surfaced four pre-build hazards, all folded into SPEC: (1) **same-session capture** — `worldUpdate` now covers any write not already assistant-visible via a carrier, incl. submit-time/freestyle capture (D18-L/D66-L), not just foreign writes (D76-L/I45-L); (2) **kick = conversational-debt classification** ignoring trailing continuity-only entries, so reboot-after-notice stays idempotent (D78-L/I46-L); (3) **compaction must preserve the watermark carrier** so projection never regresses (I47-L); (4) **guard-as-retry** — `before_provider_request` re-runs prepare once on drift, never writes; reconciler runs before prompt composition (D77-L). Also: keep S1 a separate watermark projection, not an overload of `runtimeState.world.latestLsn`. **Optional S2 split** if it grows too wide: S2a = watermark + core reconciler + `worldUpdate`; S2b = adapter stamping + side-task/reviewer drains. Defer to `ln-scope`. - Multi-spec workspace discipline applies throughout: target the selected/current spec explicitly; no workspace-global graph truth in the POC. ``` diff --git a/memory/SPEC.md b/memory/SPEC.md index 97df7b62..9b87ed9d 100644 --- a/memory/SPEC.md +++ b/memory/SPEC.md @@ -257,11 +257,17 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c - **D42-L — Session naming is Pi `session_info` presentation metadata, not spec identity.** Brunch-created sessions should be named at creation with neutral workspace-global defaults (`Untitled Session 1`, `Untitled Session 2`, …) so pickers/chrome never show an unnamed Brunch session and unchanged defaults do not collide across specs in the same cwd. These defaults are immediate lifecycle metadata, not LLM-generated summaries and not derived from the selected spec title. Brunch may later use Pi session lifecycle hooks to opportunistically replace a default with a short human-readable name that characterizes what happened in the transcript. The preferred generation trigger is `session_shutdown` for `quit`, `new`, and `resume` replacements because it sees the just-finished transcript and can name it before later picker lists need to distinguish sessions; `session_before_compact` or post-compaction (`session_compact`) may be used to refresh names after major summarization, and a manual/user rename command can force or override naming. The generation call should mirror the model-selection pattern in the local `summarize.ts` extension example: choose a cheap/fast authorized model, extract user/assistant text plus salient tool calls from the current branch, ask for a concise title, and append a Pi `session_info` entry through `SessionManager.appendSessionInfo`. Naming must be best-effort and non-blocking with a tight budget: failures, missing auth, empty transcripts, or shutdown aborts preserve the existing default/user label rather than blocking session replacement or exit. Session display names label sessions in pickers and chrome, but do not affect spec ids, session bindings, graph truth, or replay semantics. Depends on: D6-L, D17-L, D21-L, D35-L. Supersedes: using spec title or session UUID alone as the only durable display label once transcripts have meaningful content, leaving Brunch-created sessions unnamed, spec-local default numbering, or treating generated session names as canonical spec identity. - **D58-L — Brunch prompt composition is a thin runtime header plus a gated prompt-resource manifest, not eager selection of every objective pack.** `.pi/agents/compose(agentId, sessionState, spec, workspace, context)` runs before Pi provider requests through Brunch's prompt extension and emits: **(1) agent control header** — keyed agent identity, model/thinking expectation, foreground role derived from `op_mode`, and mode/tool-authority summary; **(2) runtime-state header** — current pinned/AUTO `goal`, `strategy`, and `lens`, the readiness estimate (D45-L), and workspace posture; **(3) resource manifests** — XML-style ``, ``, ``, and `` entries filtered by `.pi/agents/state.ts` legal tuples, grade, `op_mode`, and the agent allow-list, each carrying `{name, description, location}` for a Brunch-owned markdown resource under `src/.pi/{agents,skills}/`; the `{name, description, location}` triples are code-owned in `.pi/agents/state.ts`, not filesystem-discovered, honoring D39-L sealing; **(4) compact pushed context** — only the minimal context handles and rendered context needed to orient the turn, with deeper context access still governed by D60-L. Detailed goal/strategy/lens/method instructions live in Brunch prompt resources and are loaded by the agent with `read` when needed, following the same simple mechanism Pi uses for skills. Method resources are the prompt-level home for Brunch tool-routing and sequencing guidance; tool definitions remain boundary schemas/execution hooks, not the whole Brunch guide to when or how tools should be composed. `AUTO` means the axis is unpinned: the manifest lists legal choices and router instructions tell the agent to choose only from the current manifest, reading the selected resource before applying it when detail matters. Pinned axes point to the pinned resource; code enforces legality and tool gating but does not choose or concatenate large semantic packs on the agent's behalf. Pi-native skills may still carry startup-scoped capabilities, but runtime-state-gated availability is Brunch's manifest, not ambient Pi discovery. `.pi/agents/` is the keyed agent prompt assembly layer (`definitions/`, `contexts/`); `.pi/skills/` carries goal/strategy/lens/method resources; `.pi/agents/contexts/` is the D60-L agent-context orchestration layer (code), not a manifest resource family or general renderer bucket. Reusable text renderers may migrate to `renderers/` under D52-L. Composition is projection, not a behavioral state machine. Depends on: D23-L, D25-L, D39-L, D40-L, D52-L, D59-L, D60-L. Supersedes: the flat "base + mode + role + strategy + lens + grade + …" layering; the fixed all-packs concatenation in `compose-brunch-prompt.ts`; "role preset / runtime bundle" as the composition unit; direct Layer-2 eager prompt-pack injection as the default mechanism; top-level `src/agents/` for Pi-only agents; and `capability` as a parallel name for `method` / ``. +#### Continuity & origination (turn-boundary choreography) + +- **D76-L — Session continuity state is a projected assistant-visible watermark carried by transcript custom entries, never stored mutable state.** The staleness reference `lastSeenLsn` means the **assistant-visible watermark**: the highest spec-local LSN (I1-L) this session has actually been *shown* in its transcript, projected per D43-L from the session's own **watermark carriers** — boot/context seed and whole-spec overview snapshots that name a snapshot LSN, `worldUpdate`, and the session's own graph-mutation `toolResult`s — never a stored field. It is a `{specId, lsn}` watermark (I4-L); bare LSNs are not comparable across sibling specs. Distinguish it from the **runtime-observed** `current_lsn` (the spec's `graph_clock`, sampled cheaply at the turn boundary): the gap `current_lsn > watermark` is the only `worldUpdate` trigger. The watermark is initialized or advanced by any continuity entry naming a spec-local snapshot LSN: (a) a **boot/context seed or whole-spec overview snapshot** — so a freshly seeded session does not immediately synthesize a redundant `worldUpdate`; (b) a `worldUpdate` inserted for any write **not already assistant-visible through another carrier** — *foreign* writes (other sessions on the same spec, web clients, reviewer side-tasks) *and* same-session writes that did not ride an own-mutation `toolResult`, notably submit-time / freestyle capture (D18-L, D66-L) committed at `session.submitMessage` rather than through an assistant tool call; and (c) the session's *own* mutation `toolResult`s, which are already visible and therefore never warrant a `worldUpdate` (re-announcing them would violate I4-L's strict-greater property). **Narrow graph reads (`getNodes` / `queryNodes`) do not advance the global watermark** — they update per-entity read ledgers only (D77-L), so a narrow read cannot suppress unrelated staleness. The watermark is its own projection over the carrier set, distinct from `runtimeState.world.latestLsn` (a `worldUpdate`-only projection); do not overload that field to carry it. All continuity facts (seed/refresh, `worldUpdate`, `brunch.mention*`, `brunch.session_lifecycle`) ride **Brunch custom transcript entries** (the D37-L non-exchange-fact carrier), not synthetic `toolCall`s (which would dishonestly imply a tool ran) and not D58-L prompt-manifest injection (ephemeral, invisible to the D43-L projection). Any process-local cache of the watermark is an implementation optimization, not product state. Depends on: D14-L, D17-L, D37-L, D43-L, I1-L, I4-L. Supersedes: a stored/mutable `agent_visible_lsn` / `lastSeenLsn` field; defining the watermark as "the app sampled the graph clock" (which would let staleness vanish before the agent is shown the change); carrying continuity via synthetic tool calls or prompt-only injection. +- **D77-L — Turn-boundary reconciliation is one writer seam plus two auxiliary seams and a guard, not four co-equal insertion points.** The write-side of continuity is owned by a single **pre-assistant-turn reconciler** (canonically `prepareNextTurn`; `before_agent_start` is the temporary adapter until that seam is wired): it computes the projected watermark, samples `current_lsn`, and inserts `worldUpdate` (naming only items with LSN strictly greater than the pre-update watermark, I4-L), mention-staleness hints, side-task/reviewer drains (D15-L), and any boot/resume seed or kick decision (D78-L). Two auxiliary seams write continuity outside that reconciler: **submit-time mention resolution** at user-message ingestion (`session.submitMessage`, D49-L) resolves `#` handles to stable graph ids and appends `brunch.mention` ledger facts — independent of autocomplete freshness, which is advisory UI only; and **tool-result watermark stamping** at the graph read/mutation adapters records the LSN at which a graph fact became assistant-visible — but only the session's own mutations and **whole-spec snapshot reads** (full graph overview) advance the **global** assistant-visible watermark (D76-L), while narrow `getNodes` / `queryNodes` reads update **per-entity read ledgers** (the D14-L mention ledger now; an optional direct-read ledger if later built) and must not touch the global watermark, so a narrow read cannot mask unrelated staleness. `before_provider_request` is a **guard only** (assert no stale unresolved continuity remains), never the normal writer, because writing there risks double-writes against the reconciler; on detecting post-prepare drift (a write landed between `prepareNextTurn` and the provider call) it **re-runs turn preparation once** (abort/retry) rather than patching the transcript itself. The reconciler must run **before prompt composition** so its inserted continuity is visible to the same turn. Depends on: D14-L, D15-L, D17-L, D49-L, D76-L. Supersedes: four co-equal insertion points each owning overlapping continuity writes; tying mention resolution to autocomplete-time state; using `before_provider_request` as a primary continuity writer. +- **D78-L — Session origination ("kick" + context seeding) is honest assistant-origination behind `session.triggerExchange`, gated by transcript-tail policy, never a fabricated user turn.** At idle a structured-strategy session must present a system/assistant-originated offer (R16, D12-L, I13-L); the missing write-side is origination. The kick is an internal product primitive (`startAssistantTurn({ origin })`) living in the session orchestration layer and surfaced publicly through `session.triggerExchange` (D49-L); it never writes a fabricated user transcript entry and never injects a "user said begin" prompt. A **new** session first seeds context (workspace/spec overview as custom continuity entries per D76-L), then kicks an assistant-originated `present_*` exchange. A **resumed** session decides the kick from the **latest unresolved conversational debt**, computed by ignoring *trailing continuity-only entries* — any reconciler-inserted notice that itself owes no assistant continuation: seed / `worldUpdate` / `brunch.mention*` / `brunch.session_lifecycle` / side-task & reviewer drains (D15-L) — whether the reconciler inserted them this boot or a *previous* boot persisted them before dying: it originates a turn iff that debt owed assistant continuation (a user message, or an incomplete exchange-tuple awaiting the assistant), and otherwise rests at an assistant/system-originated leaf (a pending `request_*` awaiting the user). Classifying debt rather than the literal last entry makes the kick **idempotent across crash/reboot**: trailing continuity notices never mask an older unanswered user/assistant debt, and never manufacture a kick over a satisfied leaf. The "kick unless freestyle" gate maps onto D66-L: because AUTO never selects `freestyle`, AUTO always originates offer-first, and only an explicit `freestyle` pin yields a wait-for-user idle. This is **product behavior on the non-D39-L-seal side**, not a `BRUNCH_DEV` affordance. Context seeding for new specs may draw on the `elicitation_gaps` grounding floor (D75-L) to shape the opening offer, but the seeded overview itself is read context, not graph truth. Depends on: D12-L, D37-L, D49-L, D66-L, D75-L, D76-L; R16. Supersedes: faking a user message to start the agent; treating "originate the first turn" as a dev-harness concern; an unconditional resume-kick that re-asks when the tail already awaits the user. + #### Development experience (DX) - **D67-L — Brunch tracks the latest pi release; dev iterates against pi source via a gated runtime alias.** Brunch keeps `@earendil-works/pi-*` current with upstream rather than pinning to an old line; version bumps are routine adaptation work, not deferred migrations. Local vite/vitest development aliases `@earendil-works/pi-ai`, `@earendil-works/pi-agent-core`, `@earendil-works/pi-tui`, and `@earendil-works/pi-coding-agent` to the sibling `pi-mono` `src/` checkout via an explicit `PI_SOURCE` runtime flag so cross-package iteration needs no rebuild in those loops; published builds, TypeScript, editors, and default runtime resolve the normal installed `dist`. Base `tsconfig.json` deliberately carries no pi source `paths` because paths cannot be env-gated; if a `tsx` real-provider loop later needs no-rebuild pi source, add an opt-in `tsconfig.dev.json` rather than weakening the default. Inaugural bump: `^0.75.5 → 0.79.0`. Depends on: A25-L, D39-L. Supersedes: pinning Brunch to a fixed older pi line, treating pi upgrades as discrete migration projects, or making a personal source checkout the unconditional type/default resolution path. - **D68-L — Development feedback loops are first-class DX, consolidated behind one front door, distinct from product-verification probes.** Brunch maintains three named developer loops: (1) **faux loop** — deterministic, in-process `AgentSession` over the pi faux provider + `.inMemory()` services, the inner/middle-loop substrate for wrapper logic and regressions; (2) **real-provider TUI/CLI loop** — `tsx`-run Brunch source against a live model for interactive use, with pi-source resolution opt-in per D67-L only when needed; (3) **introspection loop** — real provider plus payload/manifest capture (D69-L). These loops live behind a single consolidated dev front door (`src/dev/`) that owns the dev launchers and the shared faux-harness factory; ad hoc per-file faux setup is absorbed into that factory. The dev loops are the *means of building and iterating on* Brunch and are distinct from `src/probes/` **probe runs**, which are durable *product-verification* artifacts (`.fixtures/runs/`, `docs/architecture/probes-and-transcripts.md`); where a dev loop produces durable evidence it does so as a probe run rather than a parallel artifact path. Depends on: D39-L, D67-L; the probe/transcript model. Supersedes: scattered, unnamed dev-iteration scripts and ad hoc faux-provider wiring as the wrapper's test substrate. -- **D69-L — Agent-input introspection is one read-only, dev-gated Brunch extension; mechanical and conversational modes are separate planes.** A single Brunch-owned extension family, wired through `brunch-pi-extensions.ts` (never ambient discovery), provides **mechanical** introspection two ways: (a) a passive `before_provider_request`/`before_agent_start` tap that records *exactly the final payload the model receives* (system prompt, tool JSON schemas, D58-L prompt-resource manifest), and (b) an on-demand `/introspect` command that reports the **base** system-prompt inputs via `ctx.getSystemPromptOptions()` (base inputs only — `getSystemPromptOptions` returns pi's `_baseSystemPromptOptions`, so it does *not* reflect later `before_agent_start`/`before_provider_request` mutations) and the latest passive capture. The extension returns every payload unchanged so it observes but never shapes product behavior (D39-L sealing); because `before_provider_request` is a registration-ordered transformation chain in pi, the introspection tap must be registered *after* all Brunch prompt/tool/policy mutators to record the post-mutation payload. **Conversational** introspection is the sibling read-only query-tool plane: under the same `BRUNCH_DEV`/`introspection.enabled` gate, `brunch_session_query` reads `ctx.sessionManager.getBranch()` and `brunch_introspect_query` reads the captured provider payload plus base prompt options. Both tools project exact values with the shared capped dot/`[n]`/`[*]` grammar, truncate/spill large output, and rely on the agent's normal chat reply to echo/discuss the returned bytes. The D40-L active-tool allow-list explicitly unions this dev query-tool set only when the factory's dev gate is on, subtracts blocked tools, and intersects registered tools; registration alone is not advertisement. Tool-description nudges are the only prompt surface; no product prompt resource or fixed self-report schema is added. Subjective live interrogation remains an injected turn driven from the dev front-door launcher (`session.prompt`) or typed interactively, not a separate slash command. Captured scratch runs still write under `.fixtures/scratch/introspection//` (D70-L) so "what was sent" and "how the model read it" stay correlated. The launcher performs no global environment mutation; real TUI launches keep Pi startup update suppression scoped at the session-construction site with save/restore scoping (D71-L). Direct diagnostic for the "Prompt-resource discretionary loading" blind spot (I38-L). Depends on: D39-L, D40-L, D58-L, D68-L, D70-L; I38-L. Supersedes: treating "how the model sees our tools/skills" as an outer-loop-only, non-instrumented concern, and the fixed structured self-report schema as the default conversational surface. +- **D69-L — Agent-input introspection is one read-only, dev-gated Brunch extension; mechanical and conversational modes are separate planes.** A single Brunch-owned extension family, wired through `brunch-pi-extensions.ts` (never ambient discovery), provides **mechanical** introspection two ways: (a) a passive `before_provider_request`/`before_agent_start` tap that records *exactly the final payload the model receives* (system prompt, tool JSON schemas, D58-L prompt-resource manifest), and (b) an on-demand `/introspect` command that reports the **base** system-prompt inputs via `ctx.getSystemPromptOptions()` (base inputs only — `getSystemPromptOptions` returns pi's `_baseSystemPromptOptions`, so it does *not* reflect later `before_agent_start`/`before_provider_request` mutations) and the latest passive capture. The extension returns every payload unchanged so it observes but never shapes product behavior (D39-L sealing); because `before_provider_request` is a registration-ordered transformation chain in pi, the introspection tap must be registered *after* all Brunch prompt/tool/policy mutators to record the post-mutation payload. **Conversational** introspection is the sibling read-only query-tool plane: under the same `BRUNCH_DEV`/`introspection.enabled` gate, `brunch_session_query` reads `ctx.sessionManager.getBranch()` and `brunch_introspect_query` reads the captured provider payload plus base prompt options. Both tools project exact values with the shared capped dot/`[n]`/`[*]` grammar, truncate/spill large output, and rely on the agent's normal chat reply to echo/discuss the returned bytes. The D40-L active-tool allow-list explicitly unions this dev query-tool set only when the factory's dev gate is on, subtracts blocked tools, and intersects registered tools; registration alone is not advertisement. Tool-description nudges are the only prompt surface; no product prompt resource or fixed self-report schema is added. Subjective live interrogation remains an injected turn driven from the dev front-door launcher (`session.prompt`) or typed interactively, not a separate slash command. Captured scratch runs still write under `.fixtures/scratch/introspection//` (D70-L) so "what was sent" and "how the model read it" stay correlated. In `BRUNCH_DEV` real TUI launches, the same passive capture may also mirror the latest final system prompt bytes into the launch workspace's ephemeral `.brunch/debug/system-prompt.md`, and explicit Brunch-owned text `tool_result` content appends to `.brunch/debug/tool-contents.md`; this cache is not probe evidence, never reconstructs or shapes prompt state, and does not attempt `renderResult()` flattening. The launcher performs no global environment mutation; real TUI launches keep Pi startup update suppression scoped at the session-construction site with save/restore scoping (D71-L). Direct diagnostic for the "Prompt-resource discretionary loading" blind spot (I38-L). Depends on: D39-L, D40-L, D58-L, D68-L, D70-L; I38-L. Supersedes: treating "how the model sees our tools/skills" as an outer-loop-only, non-instrumented concern, and the fixed structured self-report schema as the default conversational surface. - **D70-L — `.fixtures/` is a four-role tree (seeds / workbenches / runs / scratch); dev-loop artifacts decouple operating-cwd from artifact-root.** `.fixtures/` separates four lifecycles, each with its own git policy: **`seeds/`** — tracked, reusable explicit-basis starting truth loaded via `npm run seed` (INPUT); **`workbenches/`** — launchable Brunch workspaces whose `.brunch/` is gitignored local state (the directories a dev `--cwd` targets, D71-L); **`runs/`** — tracked, *curated/promoted* probe evidence under `//`, probe-first per D68-L (EVIDENCE); **`scratch/`** — gitignored, ephemeral live dev-loop output under `//` (SCRATCH). Dev launchers (faux/introspection) must resolve their artifact root to the package-relative repo `.fixtures/scratch/`, **not** to the operating `cwd` — the same operating-cwd-vs-`fixtureRoot` decoupling the probe layer already uses (`mkdtemp` ephemeral cwd + repo-resolved `fixtureRoot`). This removes the `join(cwd, '.fixtures', …)` nesting defect where launching against a workbench would write `/.fixtures/…`. An exploratory scratch run becomes durable evidence only by explicit promotion (move `scratch///` → `runs///`, then track it), keeping curated `runs/` clean. `.fixtures/scratch/` is the chosen scratch home (over reusing `tmp/`) so promotion is a move within one tree. Depends on: D52-L, D68-L; the probe/transcript model. Supersedes: pinning dev-run artifacts to the operating cwd; treating all `.fixtures/runs/` output as tracked evidence; leaving the `workbenches/` role undocumented. - **D71-L — One `BRUNCH_DEV` switch gates all dev affordances; the main CLI accepts `--cwd`; introspection is present-but-dead in prod.** The over-specific `BRUNCH_DEV_RPC` env var is generalized to a single `BRUNCH_DEV` switch that, when set, enables dev affordances together: dev RPC methods (`dev.*`), registration of the read-only introspection extension (D69-L), and routing of dev-loop artifacts to `.fixtures/scratch/` (D70-L). `runBrunchCli` parses a `--cwd ` flag (defaulting to `process.cwd()`) so a dev session can target a `.fixtures/workbenches/` workspace without `cd`. Two independent prod-safety gates hold: (1) `src/dev/**` is build-excluded by `tsconfig.build.json`, so launchers/harness/alias never ship; (2) the introspection extension, though compiled into `dist` under `src/.pi/`, only *registers* when `createBrunchPiExtensions(..., { introspection: { enabled } })` opts in — and the TUI call site sets `enabled` from `BRUNCH_DEV` only, so absent the switch it is present-but-dead, never wired, honoring D39-L explicit-opt-in sealing (no ambient discovery). Brunch-launched TUI sessions keep Pi startup update suppression on in both product and `BRUNCH_DEV` runs by scoping `PI_OFFLINE=1` through `InteractiveMode.run()` unless the user already set a value; prior `PI_OFFLINE` / `PI_SKIP_VERSION_CHECK` state is restored in `finally`, never as a leaked global `process.env` mutation. Depends on: D39-L, D67-L, D68-L, D69-L, D70-L. Supersedes: the `BRUNCH_DEV_RPC`-only dev gate; relying on the operating cwd to choose the dev workspace; the assumption that the introspection extension needs build-exclusion (runtime opt-in suffices); lifting Pi offline mode in `BRUNCH_DEV` TUI sessions merely to enable live-provider behavior. - **D59-L — `goal` is a readiness-derived, AUTO-able objective axis, distinct from strategy.** A *goal* is what the session agent currently pursues; a *strategy* is the reusable interaction shape used to pursue it — a goal is pursued *via* a strategy *through* a lens (three orthogonal axes). The goal set is derived from readiness-band coverage (D64-L) rather than a stored grade: `grounding-advance` (fill grounding gaps and raise grounding coverage), `elicit-expand` (expand the elicited specification graph while ambiguity remains productive), `commit-converge` (reduce / lock down reviewable commitments), plus an always-on `capture-posture` (capture or confirm dev `posture`, D45-L). `goal` defaults to the readiness-derived objective (e.g. while grounding coverage is thin, `grounding-advance`), may be pinned, or left `AUTO`; in either case D58-L manifests advertise the legal resource(s) rather than injecting the whole goal body. For now `goal` is **internal/readiness-derived and not part of the user posture-change surface** (it is too contingent to expose as a user-mutable axis); the pin affordance is reserved for system/internal logic, and unlike `strategy`/`lens` the user does not switch it (D40-L, Q4). `elicit-expand` and `commit-converge` intentionally form the diverge/converge pair for the elicitation diamond; `elicit-I` / `elicit-II` are retired because they were phase-like labels, not objectives. "Advance grounding" is a goal, not a strategy — though the `grounding-advance` goal may carry a dedicated default interaction pattern. Depends on: D45-L, D57-L, D58-L, D64-L. Supersedes: conflating the elicit-lifecycle objective with strategy selection, and deriving the goal set from a stored readiness grade. @@ -280,7 +286,7 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c | I6-L | Every reconciliation need has `created_at_lsn ≤` current LSN for its owning spec; its target is exactly one of `{kind:'edge', edgeId}` or `{kind:'node_pair', aId, bId}` per [`docs/design/GRAPH_MODEL.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/GRAPH_MODEL.md#reconciliationneed--separate-substrate-not-a-graph-edge); resolved needs carry a strictly later spec-local `resolved_at_lsn`. | partially covered (`CommandExecutor` reconciliation-need tests prove target-spec allocation and resolve ordering) | D8-L, D51-L, I1-L | | I7-L | ~~Every `framing_as` value belongs to the allowed matrix for that node's base kind.~~ **Retired.** `framing_as` absorbed by D54-L/D56-L node kinds; no node carries a `framing_as` field. | — | D7-L (retired) | | I8-L | Spec selection persists across pi `switchSession` (i.e. `/new`); the selected session file is reopened consistently by headless projection/capture paths; each session has exactly one `brunch.session_binding`, and a session's bound spec never changes. | partially covered (M0 coordinator/TUI boot integration tests + store-only probe checker; M1 no-injected-coordinator capture regression; M2 coordinator-created JSONL reload tests; manual TUI smoke still planned) | D11-L, D21-L | -| I9-L | Every `brunch.mention` payload resolves a transcript `#` handle to a stable graph entity id; the ledger never stores title-anchored references or relies on autocomplete popup metadata. | planned (M7 invariant) | D14-L | +| I9-L | Every `brunch.mention` payload resolves a transcript `#` handle to a stable graph entity id; resolution to a stable id happens at user-message **submit time** (D77-L), not autocomplete time (which is advisory UI); the ledger stores `(entity_id, seen_lsn)` pairs and never title-anchored references or autocomplete popup metadata; ledger staleness compares stored `seen_lsn` against the current spec LSN to drive discretionary `brunch.mention_staleness_hint` entries in the turn-boundary reconciler. | planned (turn-boundary-reconciliation slice) | D14-L, D76-L, D77-L | | I10-L | Structured elicitation prompts/responses live in the Pi transcript when structure is needed; Brunch-supported session exchanges are projected only from linear coordinator-bound sessions, and no parallel canonical chat/turn table carries elicitation state. | covered for projection shape and current read surfaces (M1 exchange projection tests, M2 JSONL/RPC projection tests, M3 canonical Brunch session-envelope validation and explicit custom-entry classifiers) | D12-L, D13-L, D18-L, D24-L | | I11-L | No durable graph mutation path — including migrations, maintenance scripts, elicitor-capture writes, deferred observer/auditor writes, or side-task-attributed writes — may bypass the `CommandExecutor` path that performs authority/result classification, version checks, structural validation, transaction execution, LSN allocation, and change-log append. | planned (M4 architectural + migration invariants; M5 caller-boundary tests) | D4-L, D15-L, D16-L, D20-L | | I12-L | Side-task results are delivered only at turn boundaries; no side-task result may steer or mutate the active turn outside the next-turn delivery path. | planned (M7 side-task delivery invariant) | D15-L | @@ -313,9 +319,12 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c | I39-L | Every graph node in a spec has exactly one stable projected human reference code derived from `kind` + `kind_ordinal`; `(spec_id, plane, kind, kind_ordinal)` is unique; ordinals are monotonic per `(spec_id, plane, kind)` and are not reused after deletion or supersession. | partially covered (`graph-tool-resilience` added `nodes.kind_ordinal`, `node_kind_counters`, DB uniqueness, CommandExecutor allocation for single-node/batch writes, rollback protection, `GraphNode.kindOrdinal` row mapping, globally unique 1–3 letter labels with readiness-band metadata, projected-code parsing, selected-spec adapter resolution before `CommandExecutor`, code-only `mutate_graph` / `read_graph` schemas, and code-primary prompt/tool rendering; remaining slice still needs deletion/supersession no-reuse coverage) | D54-L, D62-L; I1-L, I11-L | | I40-L | Accepted graph nodes and edges use only `basis ∈ explicit | implicit`; review-set approval and direct user statements produce `explicit`, `propose-graph` concept-level materialization produces `implicit`, and the mutation path is recoverable from `change_log` rather than from a persisted basis enum value such as `accepted_review_set`. | covered (`graph-tool-resilience` replaced the persisted basis enum with `explicit | implicit`, made `mutateGraph` apply one batch create-basis to all created nodes/edges, made single-node `createNode` reject retired basis values before LSN/counter/node/change-log allocation, made `propose-graph` adapter commits implicit, made review-set translation explicit, rejected retired `accepted_review_set`, and records `change_log.operation` independently; `capture-response-to-graph` proves direct structured text responses commit explicit-basis graph nodes through `CommandExecutor`; `.fixtures/runs/project-graph-review-cycle/2026-06-06-project-graph-review-cycle/` proves full review-cycle approval creates explicit-basis graph truth) | D26-L, D27-L, D53-L, D63-L | | I41-L | Same-spec `supersession` edges form an acyclic directed graph; every edge-creation path validates proposed supersession edges together with existing supersession edges before committing. | covered (`command-executor/commit-graph-batch.test.ts` rejects existing-cycle closure, intra-batch cycles, and mixed existing+batch cycles through the shared dry-run/commit planner before batch writes; rejected cycles roll back or avoid batch nodes/edges/change_log; acyclic supersession commits remain covered by query/CommandExecutor success paths) | D51-L, D53-L; I34-L | -| I42-L | Dev-only substrate never affects product/prod behavior: `src/dev/**` is build-excluded from `dist`; the introspection extension registers and advertises its query tools only when `BRUNCH_DEV` opts it in (default product sessions never register or advertise the tap, `/introspect`, `brunch_session_query`, `brunch_introspect_query`, or any `before_provider_request` observer); dev-loop artifacts land only under gitignored `.fixtures/scratch/`, never tracked `runs/` or the operating cwd; and Pi startup update suppression / any offline-default lift is save/restore-scoped through TUI launch, never a leaked global `process.env` mutation. | covered for the current DX substrate (`src/.pi/__tests__/introspection.test.ts` proves default-off registration + last-position ordering when enabled, including absence/presence and active-tool advertisement of `brunch_session_query` / `brunch_introspect_query`; `src/.pi/agents/state.test.ts` proves the injected dev tool set is unioned only before blocked-tool subtraction and registered-tool intersection; `src/.pi/extensions/session-query/index.test.ts` and `src/.pi/extensions/introspect-query/index.test.ts` cover read-only find/project/truncation behavior; `src/app/brunch-tui.test.ts` proves the real TUI launch path threads `BRUNCH_DEV` into introspection registration, keeps the registrar last, asserts `tsconfig.build.json` excludes `src/dev`, and proves `PI_OFFLINE` startup update suppression plus prior `PI_OFFLINE` / `PI_SKIP_VERSION_CHECK` values are save/restore-scoped through `finally`; `src/dev/introspection-launcher.test.ts` proves scratch artifact routing is repo-rooted and independent of workspace cwd; `.fixtures/README.md` + `.gitignore` document/guard scratch). | D39-L, D40-L, D68-L, D69-L, D70-L, D71-L | +| I42-L | Dev-only substrate never affects product/prod behavior: `src/dev/**` is build-excluded from `dist`; the introspection extension registers and advertises its query tools only when `BRUNCH_DEV` opts it in (default product sessions never register or advertise the tap, `/introspect`, `brunch_session_query`, `brunch_introspect_query`, or any `before_provider_request` observer); durable dev-loop artifacts land only under gitignored `.fixtures/scratch/`, never tracked `runs/` or the operating cwd; the only workspace-local dev cache is ephemeral `.brunch/debug/` output derived from the same passive capture / explicit Brunch-owned text `tool_result` events in `BRUNCH_DEV` real TUI launches; and Pi startup update suppression / any offline-default lift is save/restore-scoped through TUI launch, never a leaked global `process.env` mutation. | covered for the current DX substrate (`src/.pi/__tests__/introspection.test.ts` proves default-off registration + last-position ordering when enabled, active-tool advertisement of `brunch_session_query` / `brunch_introspect_query`, debug-cache mirroring from passive final-prompt capture, and Brunch-owned tool-result filtering/append formatting; `src/.pi/agents/state.test.ts` proves the injected dev tool set is unioned only before blocked-tool subtraction and registered-tool intersection; `src/.pi/extensions/session-query/index.test.ts` and `src/.pi/extensions/introspect-query/index.test.ts` cover read-only find/project/truncation behavior; `src/app/brunch-tui.test.ts` proves the real TUI launch path threads `BRUNCH_DEV` into introspection registration with launch-cwd debug-cache options, keeps the registrar last, asserts `tsconfig.build.json` excludes `src/dev`, and proves `PI_OFFLINE` startup update suppression plus prior `PI_OFFLINE` / `PI_SKIP_VERSION_CHECK` values are save/restore-scoped through `finally`; `src/dev/introspection-launcher.test.ts` proves scratch artifact routing is repo-rooted and independent of workspace cwd; `.fixtures/README.md` + `.gitignore` document/guard scratch). | D39-L, D40-L, D68-L, D69-L, D70-L, D71-L | | I43-L | The web client's accent presentation map is exhaustive over `NodePlane` (intent/oracle/design/plan); every plane renders with a defined accent, and node reference-code labels remain canonical via `NODE_KIND_METADATA` + `kindOrdinal` (no fallthrough default that silently swallows an unmapped plane). | met (compile-time `satisfies Record` exhaustiveness check on `PLANE_ACCENT` in `src/web/components/node-card.tsx`; breaks the build when a new `NodePlane` is added without an accent) | D72-L; I36-L | | I44-L | Domain enum taxonomy lives in the drizzle-free leaf `src/graph/schema/kinds.ts` (zero imports), `db/schema.ts` owns no enum `const` array (it imports them from the leaf), and the `web/` build target transitively contains no Drizzle/persistence code. The only sanctioned `db/`→`graph/` import is from `db/schema.ts` to `graph/schema/kinds.ts`. | covered (`src/graph/architecture.test.ts` guards leaf purity, db→graph import confinement, absence of enum const arrays in `db/schema.ts`, and post-`build:web` absence of `drizzle`/`sqliteTable` in the dist-web bundle; `src/db/README.md` and `src/graph/README.md` record the taxonomy leaf topology) | D52-L, D73-L; I26-L | +| I45-L | A session's assistant-visible watermark advances only when a continuity entry naming a strictly higher spec-local LSN is inserted: a boot/context seed or whole-spec overview snapshot, a `worldUpdate` for any write not already assistant-visible through another carrier (naming only items with LSN strictly greater than the pre-update watermark, I4-L), or the session's own graph-mutation `toolResult`. `worldUpdate` covers foreign writes **and** same-session writes that did not ride an own-mutation `toolResult` (e.g. submit-time / freestyle capture); such a same-session capture advances `current_lsn` and is surfaced by the next `worldUpdate`, never silently swallowed. A freshly seeded session whose seed named the current snapshot LSN does not immediately synthesize a redundant `worldUpdate`. Narrow `getNodes`/`queryNodes` reads do not advance the global watermark (they update per-entity read ledgers only). When `current_lsn == watermark` no `worldUpdate` is synthesized, and the session's own already-visible mutations never produce a `worldUpdate`. The watermark is its own projection over the carrier set (distinct from `runtimeState.world.latestLsn`), projected from transcript continuity entries (D43-L), never a stored field. | planned (turn-boundary-reconciliation slice; coverage-first scaffold) | D43-L, D76-L, D77-L; I1-L, I4-L | +| I46-L | Session origination never writes a fabricated user transcript entry. A new session inserts seed continuity entries and then an assistant-originated exchange before idling; a resumed session decides the kick from the **latest unresolved conversational debt**, computed by ignoring trailing continuity-only entries — any reconciler-inserted notice owing no assistant continuation: seed / `worldUpdate` / `brunch.mention*` / `brunch.session_lifecycle` / side-task & reviewer drains — whether inserted this boot or persisted by a prior boot — it originates a turn iff that debt owed assistant continuation (a user message or an incomplete exchange-tuple awaiting the assistant), and otherwise rests at an assistant/system-originated leaf (I13-L). The kick decision is idempotent across crash/reboot: trailing continuity notices neither mask an older unanswered debt nor manufacture a kick over a satisfied leaf. AUTO never originates a `freestyle` turn (D66-L); only an explicit `freestyle` pin yields a wait-for-user idle. | planned (kick+seeding slice; coverage-first scaffold) | D66-L, D78-L; R16; I13-L | +| I47-L | Continuity facts (seed/refresh, `worldUpdate`, `brunch.mention*`, `brunch.session_lifecycle`) persist only as Brunch custom transcript entries — never synthetic `toolCall`s, never prompt-only injection — so the D43-L projection can reconstruct them; boot/resume seeding is idempotent, deriving dedupe from projected transcript state (a seed/world-update already present is not re-emitted) rather than from hidden flags, and survives real restart/resume. The watermark must also survive compaction: the preserved-anchor set retains the latest watermark-carrier entry per spec so the projected global watermark never regresses after compaction+resume (which would otherwise spuriously re-emit `worldUpdate`). | planned (kick+seeding + turn-boundary-reconciliation slices; coverage-first scaffold) | D17-L, D37-L, D43-L, D76-L, D78-L | ## Future Direction Register @@ -500,8 +509,9 @@ src/.pi/ | **Auto-compaction extension** | The Brunch-owned `session_before_compact` extension (`src/.pi/extensions/auto-compaction.ts`) that renders the preserved anchor set as a deterministic markdown header and prepends it to an LLM-generated narrative summary. Resolves its summarization model through the active agent definition's model preference; falls through to Pi default compaction on auth/empty-output/unexpected errors. | | **Preserved anchor set** | The configured list of transcript entry kinds and selection rules that must survive compaction byte-stable. Canonical source is [src/.pi/extensions/compaction/index.ts](file:///Users/lunelson/Code/hashintel/brunch-next/src/.pi/extensions/compaction/index.ts); each rule is `{ kind, select, rationale }` where `select ∈ first | latest | active-leaves | all-unresolved`. Externalized so it can be reviewed and updated for correctness without SPEC churn. | | **Anchor contract** | The data inside the preserved-anchor TypeScript contract — distinct from the rendering policy (which lives in code) and the LLM summarization (which is bundle-resolved). | -| **World update** | `worldUpdate` custom message synthesised in `prepareNextTurn` summarising relevant graph changes since the session's `lastSeenLsn`. | -| **Mention ledger** | Per-session `(entity_id, seen_lsn)` record driving discretionary staleness hints when an entity has changed since the agent last saw it. | +| **World update** | `worldUpdate` custom message synthesised by the turn-boundary reconciler (D77-L) summarising graph changes not already assistant-visible since the session's assistant-visible watermark — foreign writes and same-session writes that did not ride an own-mutation `toolResult` (e.g. submit-time capture); names only items with LSN strictly greater than that watermark (I4-L, I45-L). | +| **Assistant-visible watermark** | The session's `lastSeenLsn` under D76-L: the highest spec-local LSN the session has actually been *shown* in its transcript, a `{specId, lsn}` value projected (D43-L) from the session's **watermark carriers** (boot/context seed or whole-spec overview snapshot, `worldUpdate`, own graph-mutation `toolResult`) — never stored; narrow `getNodes`/`queryNodes` reads do not advance it (D77-L). Distinct from the runtime-observed `current_lsn` (the spec `graph_clock`); the gap between them triggers a `worldUpdate`. | +| **Mention ledger** | Per-session `(entity_id, seen_lsn)` record driving discretionary staleness hints when an entity has changed since the agent last saw it; resolved at submit time, not autocomplete time (I9-L). | | **Authority** | Source of a node's claim: `stakeholder | technical | external | derived`. | | **Epistemic status** | Confidence basis: `observed | asserted | assumed | inferred`. Like `authority`, this is a context-shaping label for attention, grouping, and compression rather than a complete theory of truth. | | **Framing-as** | ~~Orthogonal modality classifying a node's product role.~~ **Retired.** Absorbed by `thesis`, `term`, `constraint`, and `goal` (D54-L, D56-L). | @@ -610,7 +620,7 @@ Verification oracles prove Brunch's *product* claims; development loops are how The vite/vitest-backed loops can run against pi *source* via the D67-L `PI_SOURCE` alias, so no rebuild is needed there to pick up either Brunch or pi edits. `tsx`-run real-provider loops intentionally keep default `dist` resolution until an opt-in dev tsconfig is needed. -Dev-loop artifacts route to gitignored `.fixtures/scratch///`, resolved to the repo root rather than the operating cwd, and decoupled from the `--cwd` workspace a dev session targets (D70-L); a single `BRUNCH_DEV` switch gates dev affordances while Brunch TUI launch keeps Pi startup update checks suppressed (D71-L). The introspection loop's live wiring into the real TUI, the four-role `.fixtures/` topology, and conversational self-report (the agent reporting in chat on tool I/O, understandability, errors, and skill activation — A26-L) are the `dx-introspection-live` follow-on; `dx-feedback-loops` built the capture machinery but left it dormant and writing under `runs/introspection/`. +Dev-loop artifacts route to gitignored `.fixtures/scratch///`, resolved to the repo root rather than the operating cwd, and decoupled from the `--cwd` workspace a dev session targets (D70-L); a single `BRUNCH_DEV` switch gates dev affordances while Brunch TUI launch keeps Pi startup update checks suppressed (D71-L). Workspace-local `.brunch/debug/` files are ephemeral `BRUNCH_DEV` caches of passive introspection bytes and explicit Brunch-owned text tool-result content, not scratch evidence. The introspection loop's live wiring into the real TUI, the four-role `.fixtures/` topology, and conversational self-report (the agent reporting in chat on tool I/O, understandability, errors, and skill activation — A26-L) are the `dx-introspection-live` follow-on; `dx-feedback-loops` built the capture machinery but left it dormant and writing under `runs/introspection/`. ### Oracle Strategy by Loop Tier @@ -683,6 +693,9 @@ The first required probe is M0: after manual TUI interaction, a checker proves ` | I39-L | `graph-tool-resilience` CommandExecutor/adapter/context tests: counter rows allocate monotonic per-kind ordinals in multi-node batches, rollback does not persist failed ordinals/counter rows, DB constraints reject duplicate `(spec_id, plane, kind, kind_ordinal)`, projected-code metadata is unique and parses by longest prefix, existing-code refs resolve inside the selected spec, and prompt/tool renderers use codes as primary handles. Remaining proof: deletion/supersession no-reuse. | | I40-L | `graph-tool-resilience` CommandExecutor/adapter tests: `mutateGraph` applies one batch create-basis to all created nodes/edges, single-node `createNode` rejects retired basis values before LSN/counter/node/change-log allocation, `propose-graph` adapter commits use `implicit`, review-set translation uses `explicit`, retired `accepted_review_set` is rejected, and `change_log.operation` remains independent of basis. FE-807 adds direct structured text response capture with `basis: explicit`. FE-809 adds real project-graph review-cycle acceptance proof with explicit-basis readback under `.fixtures/runs/project-graph-review-cycle/2026-06-06-project-graph-review-cycle/`. | | I41-L | `graph-tool-resilience` CommandExecutor tests reject supersession cycles across existing edges, intra-batch edges, and mixed existing+batch edges, including rollback of batch nodes/edges/change_log; existing acyclic supersession paths still commit. | +| I45-L | Middle — watermark-projection property tests (own-write stamping vs foreign `worldUpdate`; strict-greater item set per I4-L; no-`worldUpdate` when `current==watermark`); **seed/full-overview snapshots advance the watermark while narrow `getNodes`/`queryNodes` reads do not**; **no redundant `worldUpdate` immediately after a seed that named the current snapshot LSN**; **same-session submit/capture write bumps `current_lsn` and is surfaced by the next `worldUpdate` (not swallowed)**; **a foreign write that lands between the snapshot read and seed insertion is not masked by the seed**; change-log-range fixtures driving a foreign writer (a second faux session or a direct `CommandExecutor` write) through the real boot. Inner — projection unit tests over synthetic transcript continuity entries. Authored coverage-first (skipped/`todo`) ahead of the `turn-boundary-reconciliation` slice. | +| I46-L | Middle — Tier-2 faux-turn-through-real-boot assertions: new session seeds-then-kicks before the first provider call; resumed-session kick decision classifies **latest unresolved conversational debt** (ignoring trailing continuity-only entries) and still fires when a user tail is followed by reconciler-inserted seed/staleness notices; **crash-after-notice-before-provider reboot still kicks when the underlying debt is an unanswered user/assistant turn** (idempotent re-boot); resumed-session kick stays silent when the latest debt already rests at a `request_*`/system leaf; no fabricated user entry in any path; AUTO never originates `freestyle`. Outer — manual walkthrough of opening-offer quality. Authored coverage-first (skipped/`todo`) ahead of the `kick+seeding` slice. | +| I47-L | Middle — restart/resume idempotence property tests (repeated boot does not duplicate seed/`worldUpdate`; dedupe derived from projection); **compaction+resume preserves the projected watermark and does not spuriously re-emit `worldUpdate`** (preserved-anchor set retains the latest watermark carrier); carrier-discipline source/architecture tests (continuity facts are custom entries, not synthetic `toolCall`s or prompt-only). Authored coverage-first (skipped/`todo`) ahead of the enabling slices. | ### Design Notes @@ -693,6 +706,7 @@ The first required probe is M0: after manual TUI interaction, a checker proves ` - **Projection handlers are oracles, not stores.** Read/subscription tests should prove handlers reconstruct truth from Brunch-supported linear Pi JSONL, `.brunch/workspace.json`, or SQLite graph/change log; they should not introduce a canonical view-store just for testing. - **Behavioral quality boundary.** Inner/middle loops prove structural validity, durable state, invariants, and expected graph/property coverage. “Good interview”, “good question”, and “coherent UX feel” remain outer-loop checklist/generative-fixture judgments until enough examples justify sharper metrics. - **Subscriptions are scoped for the POC.** Initial subscription oracles should prove initial state payload plus ordered live updates by invalidating/refetching canonical projection handlers rather than introducing a view store. Reconnect/resume semantics are acknowledged but deferred unless a frontier explicitly depends on them. +- **Coverage-first scaffold for the turn-boundary-choreography layer (D76-L–D78-L; I45-L–I47-L).** Because this layer's mechanics are conceptually clear but built across several slices, its full invariant suite is authored up front as a single coverage map, then enabled slice by slice. Tests for unbuilt slices are committed as `it.todo` / `describe.skip` keyed to the enabling slice (`kick+seeding`, `turn-boundary-reconciliation`) so the skip ledger is itself the layer's live coverage map; each slice's definition of done includes flipping its own scaffold tests live (no slice lands green by leaving its tests skipped). The scaffold imports intentional topology stubs (`export {}` + ownership comment per AGENTS.md) for the not-yet-built modules — the assistant-visible watermark projection, the turn-boundary reconciler, and the origination primitive — so signatures and the dependency direction are legible before behavior exists. This is the inner/middle-loop verification policy for the layer; `ln-oracles` still owns elaborating any outer-loop origination-quality fitness. ### Acknowledged Blind Spots diff --git a/memory/cards/kick-and-context-seeding--honest-origination.md b/memory/cards/kick-and-context-seeding--honest-origination.md new file mode 100644 index 00000000..972b9db9 --- /dev/null +++ b/memory/cards/kick-and-context-seeding--honest-origination.md @@ -0,0 +1,116 @@ +# Honest Origination Closure + +Frontier: kick-and-context-seeding +Status: active +Mode: chain +Created: 2026-06-11 + +## Orientation + +- Seam: FE-847 origination over real boot/resume; the local helper logic exists, but the live proof still sits in skipped Tier-2 I46/I47 rows. +- Frontier: `kick-and-context-seeding`; `startAssistantTurn` and context-seed helpers landed, yet no real boot/resume oracle proves the product launch surfaces honor that logic end to end. +- Volatile state: `src/session/start-assistant-turn.test.ts` already proves local debt classification, AUTO-vs-`freestyle`, and crash-after-notice behavior; the missing closure is real boot/resume ownership. +- Main risk: the current Tier-2 harness drives a manual faux prompt; closing I46 may require a more faithful launch/resume trigger seam rather than more helper-only unit proof. +- Cross-cutting obligations: no fabricated user turns, seed entries remain Brunch custom continuity entries, debt classification ignores continuity-only entries including side-task/reviewer drains, and this frontier stays sequenced after the reconciliation closure cards that stabilize watermark carriers and compaction behavior. +- Posture: proving (inherited from `kick-and-context-seeding`) + +## Card 1 - Prove new-session seed-then-kick through the real boot seam + +### Objective + +A real new-session boot seeds context and starts an assistant-originated first turn before the first provider call, without fabricating any user transcript entry. + +### Light-card cold-start reads + +- `memory/SPEC.md` — D76-L, D78-L, I45-L, I46-L, I47-L +- `memory/PLAN.md` — frontier: `kick-and-context-seeding` +- `HANDOFF.md` — FE-847 volatile sequencing and edge-case list +- `src/dev/README.md` — Tier-2 harness ownership ledger +- `src/session/README.md` — origination ownership under `start-assistant-turn.ts` + +### Acceptance Criteria + +✓ A real new-session boot inserts seed continuity entries before the first provider call and then starts an assistant-originated turn with no fabricated user message. + +✓ The seed names the current snapshot LSN, so a redundant immediate `worldUpdate` is still suppressed under the real boot path. + +✓ The corresponding skipped I46 scaffold row is live after this slice. + +### Verification Approach + +- Inner: keep local `start-assistant-turn` helper tests for classification logic. +- Middle: flip the new-session seed-then-kick Tier-2 scaffold row live through the real boot harness. + +### Cross-cutting obligations + +- This is product behavior, not a `BRUNCH_DEV` affordance. +- Keep origination behind assistant/system ownership only; never fake a user opener. + +### Assumption dependency + +None. + +### Expected touched paths (tentative) + +```text +src/dev/ +├── tier-2-harness.ts ~ +└── tier-2-harness.test.ts ~ +src/session/ +├── start-assistant-turn.ts ? +└── start-assistant-turn.test.ts ? +src/rpc/methods/ +└── session.ts ? +src/app/ +└── brunch-tui.ts ? +``` + +## Card 2 - Prove resume-debt classification and idle policy through restart/resume + +### Objective + +Resume boot classifies the pre-reconcile conversational debt correctly across continuity-only tails and reboot-after-notice cases, and only an explicit `freestyle` pin leaves the assistant idle. + +### Light-card cold-start reads + +- `memory/SPEC.md` — D66-L, D78-L, I13-L, I46-L, I47-L +- `memory/PLAN.md` — frontier: `kick-and-context-seeding` +- `memory/cards/turn-boundary-reconciliation--continuity-chain.md` — Cards 1 and 3 establish the watermark and compaction preconditions this slice assumes +- `src/session/README.md` — continuity-only taxonomy and origination seam + +### Acceptance Criteria + +✓ Resume classification ignores trailing continuity-only entries, including seed, `worldUpdate`, `brunch.mention*`, `brunch.session_lifecycle`, side-task drains, and reviewer drains. + +✓ Crash-after-notice-before-provider still kicks when the underlying debt is unresolved, while `request_*` / system leaves remain idle. + +✓ AUTO remains offer-first; only an explicit `freestyle` pin idles the assistant. + +✓ The remaining skipped I46/I47 origination rows are live after this slice. + +### Verification Approach + +- Inner: preserve focused helper tests for debt classification edge cases. +- Middle: real resume/restart fixture assertions through the Tier-2 harness or session-resume seam. + +### Cross-cutting obligations + +- Do not fork the continuity-only taxonomy; reuse the shared classifier owned under `projections/session/`. +- Keep restart idempotence derived from transcript projection, not hidden runtime flags. + +### Assumption dependency + +None. + +### Expected touched paths (tentative) + +```text +src/dev/ +├── tier-2-harness.ts ~ +└── tier-2-harness.test.ts ~ +src/session/ +├── start-assistant-turn.ts ? +└── start-assistant-turn.test.ts ? +src/projections/session/ +└── continuity-entry-classifier.ts ? +``` diff --git a/memory/cards/turn-boundary-reconciliation--continuity-chain.md b/memory/cards/turn-boundary-reconciliation--continuity-chain.md new file mode 100644 index 00000000..f6773c71 --- /dev/null +++ b/memory/cards/turn-boundary-reconciliation--continuity-chain.md @@ -0,0 +1,164 @@ +# Turn-Boundary Reconciliation Closure + +Frontier: turn-boundary-reconciliation +Status: active +Mode: chain +Created: 2026-06-11 + +## Orientation + +- Seam: FE-847 Tier-2 turn-boundary reconciliation over real boot/resume; the domain helpers exist, but the frontier still closes through skipped scaffold rows in `src/dev/tier-2-harness.test.ts`. +- Frontier: `turn-boundary-reconciliation`; assistant-visible watermark projection, `prepareNextTurn`, and mention-ledger mechanics landed, but the frontier is not done until Tier-2 and compaction invariants replace the scaffold. +- Volatile state: unit tests in `src/projections/session/assistant-visible-watermark.test.ts`, `src/session/prepare-next-turn.test.ts`, and `src/session/mention-ledger.test.ts` already prove local logic; the missing proof is end-to-end ownership through the real runtime and resume seams. +- Main risk: closing I45/I47 may require evolving the Tier-2 harness and compaction anchor contract, not merely unskipping tests; keep the one-writer seam intact and do not reintroduce ad hoc continuity insertion points. +- Cross-cutting obligations: `prepareNextTurn` stays the single continuity writer, `before_provider_request` stays a guard only, continuity facts remain Brunch custom entries, watermark comparisons stay `{specId, lsn}` only, and the latest watermark carrier must survive compaction/resume. +- Posture: proving (inherited from `turn-boundary-reconciliation`) + +## Card 1 - Flip the I45 watermark/world-update scaffold live through Tier-2 + +### Objective + +The real Tier-2 boot/resume harness proves assistant-visible watermark and `worldUpdate` behavior across seed, overview, foreign-write, and same-session-capture cases by replacing the skipped I45 scaffold rows with live assertions. + +### Light-card cold-start reads + +- `memory/SPEC.md` — D76-L, D77-L, I4-L, I45-L, I47-L +- `memory/PLAN.md` — frontier: `turn-boundary-reconciliation` +- `HANDOFF.md` — FE-847 volatile sequencing and the scaffold edge-case list +- `src/dev/README.md` — Tier-2 harness ownership ledger +- `src/session/README.md` — turn-boundary choreography seam ownership +- `src/projections/README.md` — assistant-visible-watermark row and continuity classifier ownership + +### Acceptance Criteria + +✓ The skipped Tier-2 rows for seed/full-overview carriers vs narrow reads, strict-greater `worldUpdate`, same-session capture surfacing, and foreign-write-during-seed all run live against the real boot/resume harness. + +✓ The proof uses `{specId, lsn}` and set semantics, not payload-order goldens or bare-LSN comparisons. + +✓ Any helper or lower-fidelity test kept after this slice still proves a local derivation unavailable from Tier-2; duplicate wiring-only proof is retired. + +### Verification Approach + +- Inner: retain focused unit/property tests for projection and `prepareNextTurn` local semantics. +- Middle: flip the corresponding `src/dev/tier-2-harness.test.ts` I45 scaffold rows live through real boot/resume fixtures. + +### Cross-cutting obligations + +- Do not move watermark truth into stored mutable state. +- Same-session submit/capture writes must still surface by `worldUpdate` when they were not already assistant-visible. +- If the Tier-2 harness needs new helpers, keep them runtime-facing and delete-oriented rather than adding a parallel faux path. + +### Assumption dependency + +None — this slice is itself the frontier-closing proof for the remaining I45-L uncertainty. + +### Expected touched paths (tentative) + +```text +src/dev/ +├── tier-2-harness.ts ~ +└── tier-2-harness.test.ts ~ +src/session/ +├── prepare-next-turn.ts ? +└── prepare-next-turn.test.ts ? +src/projections/session/ +├── assistant-visible-watermark.ts ? +└── assistant-visible-watermark.test.ts ? +src/.pi/ +├── brunch-pi-extensions.ts ? +└── extensions/session/lifecycle.ts ? +``` + +## Card 2 - Prove mention resolution and staleness through the real submit path + +### Objective + +Submitting a user message through the real session path appends stable-id `brunch.mention` facts at submit time and surfaces only genuinely stale mentions at the next turn boundary. + +### Light-card cold-start reads + +- `memory/SPEC.md` — D14-L, D49-L, D77-L, I9-L, I45-L +- `memory/PLAN.md` — frontier: `turn-boundary-reconciliation` +- `src/session/README.md` — mention-ledger / turn-boundary ownership +- `src/rpc/README.md` — `session.submitMessage` ownership and transcript effects + +### Acceptance Criteria + +✓ A real submit path appends `brunch.mention` facts from stable graph ids at submit time, not autocomplete time or later reconciliation. + +✓ The next turn boundary emits `brunch.mention_staleness_hint` only for entities whose current LSN exceeds the stored `seen_lsn`. + +✓ The mid-level proof owns this behavior; any older mock-only assertion kept after the slice still proves a narrower local helper rather than the same submit-path wiring. + +### Verification Approach + +- Inner: keep local mention-ledger tests for parsing and staleness derivation. +- Middle: add a real submit/resume assertion path (Tier-2 or equivalent selected-spec session harness) that proves the ledger append plus next-turn staleness output. + +### Cross-cutting obligations + +- Mention resolution stays bound to submit-time transcript truth. +- Staleness remains advisory continuity output, not hidden session state. + +### Assumption dependency + +None. + +### Expected touched paths (tentative) + +```text +src/dev/ +├── tier-2-harness.ts ~ +└── tier-2-harness.test.ts ~ +src/rpc/methods/ +└── session.ts ? +src/session/ +├── mention-ledger.ts ? +└── mention-ledger.test.ts ? +``` + +## Card 3 - Preserve the latest watermark carrier across compaction and resume + +### Objective + +Compaction and resume preserve the latest watermark-carrying continuity entry per spec so the projected watermark cannot regress and spuriously re-emit `worldUpdate`. + +### Light-card cold-start reads + +- `memory/SPEC.md` — D43-L, D76-L, I47-L +- `memory/PLAN.md` — frontier: `turn-boundary-reconciliation` +- `src/.pi/extensions/compaction/index.ts` — current anchor contract +- `src/session/README.md` — turn-boundary choreography seam + +### Acceptance Criteria + +✓ The compaction anchor contract explicitly preserves the latest watermark carrier family needed for D76-L projection, not just `worldUpdate` alone. + +✓ A compaction-plus-resume proof shows the projected watermark does not regress and no spurious `worldUpdate` is emitted after restart. + +✓ The corresponding skipped I47 scaffold row is live after this slice. + +### Verification Approach + +- Inner: anchor-contract tests or direct unit assertions over carrier selection. +- Middle: resume-through-compaction proof via the Tier-2 harness or a compaction-focused session fixture test. + +### Cross-cutting obligations + +- Preserve continuity as transcript truth; do not add hidden flags or out-of-band watermark persistence. +- Keep the preserved-carrier rule spec-local. + +### Assumption dependency + +None. + +### Expected touched paths (tentative) + +```text +src/.pi/extensions/compaction/ +└── index.ts ~ +src/dev/ +└── tier-2-harness.test.ts ~ +src/session/ +└── jsonl-session-viability.test.ts ? +``` diff --git a/src/.pi/README.md b/src/.pi/README.md index cc4c21fa..b81a61cc 100644 --- a/src/.pi/README.md +++ b/src/.pi/README.md @@ -7,7 +7,7 @@ This directory is Brunch's sealed Pi-harness surface. It contains the agent pers ## Owns - Pi-facing agent prompt assembly and runtime prompt resources. -- Pi extension registration: tools, lifecycle hooks, command handlers, autocomplete, TUI chrome, workspace dialogs, and dev-gated read-only introspection. +- Pi extension registration: tools, lifecycle hooks, command handlers, autocomplete, TUI chrome, workspace dialogs, and dev-gated read-only introspection. `extensions/session/lifecycle.ts` adapts Pi session/turn hooks into one ordered Brunch session-boundary pipeline: workspace rebinding first, then continuity preparation steps. `extensions/graph/index.ts` stamps the live watermark carriers for own mutations and full graph-overview reads. - Pi-native skills/resources that the agent reads on demand. - Reusable Pi TUI components used by those extensions. diff --git a/src/.pi/__tests__/extension-registry.test.ts b/src/.pi/__tests__/extension-registry.test.ts index 2f0cf569..38e71b6b 100644 --- a/src/.pi/__tests__/extension-registry.test.ts +++ b/src/.pi/__tests__/extension-registry.test.ts @@ -117,6 +117,56 @@ describe('Brunch explicit Pi extension registry', () => { expect(sessionStartIndexes[0]).toBeLessThan(sessionStartIndexes[1] ?? -1); }); + it('wires prepareNextTurn into the live session boundary and leaves provider-request as guard-only', async () => { + let graphLsn = 3; + const appended: Array<{ customType: string; data: unknown }> = []; + const events = new Map Promise | void>>(); + const sessionManager = { + getEntries: () => appended.map((entry) => ({ type: 'custom', ...entry })), + appendCustomEntry(customType: string, data: unknown) { + appended.push({ customType, data }); + }, + }; + + await createBrunchPiExtensions(brunchChromeFixture, undefined, { + coordinator: {} as never, + graph: { + specId: 1, + commandExecutor: {} as never, + reads: { + queryGraph: () => + ({ + lsn: graphLsn, + nodes: [{ id: 10, kind: 'goal', title: 'Live goal', updatedAtLsn: graphLsn }], + edges: [], + }) as never, + getNodes: () => [], + resolveNodeCode: () => undefined, + }, + }, + })(recordingApiWithEvents(events)); + + await events.get('before_agent_start')?.[0]?.({}, { sessionManager }); + + expect(appended).toEqual([ + { + customType: 'worldUpdate', + data: expect.objectContaining({ specId: 1, currentLsn: 3, changedSinceLsn: 0 }), + }, + ]); + + await expect( + events.get('before_provider_request')?.[0]?.({}, { sessionManager }), + ).resolves.toBeUndefined(); + expect(appended).toHaveLength(1); + + graphLsn = 4; + await expect(events.get('before_provider_request')?.[0]?.({}, { sessionManager })).rejects.toThrow( + /prepareNextTurn must run before prompt composition/, + ); + expect(appended).toHaveLength(1); + }); + it('does not retain the filesystem-discovery product-extension protocol', async () => { const shell = await readFile(join(projectRoot(), 'src/.pi/brunch-pi-extensions.ts'), 'utf8'); const discoveryExport = ['discover', 'BrunchProductExtensionEntries'].join(''); @@ -153,6 +203,36 @@ const brunchChromeFixture = { }, }; +function recordingApiWithEvents(events: Map Promise | void>>) { + return { + on(eventName: string, handler: (event: any, ctx: any) => Promise | void) { + events.set(eventName, [...(events.get(eventName) ?? []), handler]); + }, + registerTool() {}, + registerCommand() {}, + registerShortcut() {}, + registerMessageRenderer() {}, + sendMessage() {}, + getAllTools: () => + [ + 'read', + 'grep', + 'find', + 'ls', + 'present_alternatives', + PRESENT_QUESTION_TOOL, + PRESENT_OPTIONS_TOOL, + REQUEST_ANSWER_TOOL, + REQUEST_CHOICE_TOOL, + REQUEST_CHOICES_TOOL, + 'bash', + 'edit', + 'write', + ].map((name) => ({ name })), + setActiveTools() {}, + } as never; +} + function createRecordingExtensionApi() { const eventNames: string[] = []; const toolNames: string[] = []; diff --git a/src/.pi/__tests__/graph-tools.test.ts b/src/.pi/__tests__/graph-tools.test.ts index 8b417328..17ad491b 100644 --- a/src/.pi/__tests__/graph-tools.test.ts +++ b/src/.pi/__tests__/graph-tools.test.ts @@ -80,12 +80,21 @@ describe('graph tools end-to-end', () => { const reads = createGraphReads(db, specId); const tools: Array<{ name: string; execute: (toolCallId: string, params: never) => Promise }> = []; + const carriers: Array<{ customType: string; data: unknown }> = []; - registerBrunchGraph({ registerTool: (tool: unknown) => tools.push(tool as never) } as never, { - specId, - commandExecutor: executor, - reads, - }); + registerBrunchGraph( + { + registerTool: (tool: unknown) => tools.push(tool as never), + appendEntry(customType: string, data: unknown) { + carriers.push({ customType, data }); + }, + } as never, + { + specId, + commandExecutor: executor, + reads, + }, + ); const commit = tools.find((tool) => tool.name === 'mutate_graph')!; const read = tools.find((tool) => tool.name === 'read_graph')!; @@ -96,18 +105,34 @@ describe('graph tools end-to-end', () => { { op: 'create_node', ref: 'n2', plane: 'intent', kind: 'requirement', title: 'Expose queryGraph' }, { op: 'create_edge', category: 'dependency', dependency: 'n2', dependent: 'n1' }, ], - } as never)) as { content: readonly { text: string }[]; details: { status: string } }; + } as never)) as { + content: readonly { text: string }[]; + details: { status: string; lsn: number }; + }; expect(commitResult.details.status).toBe('success'); expect(formatMutateGraphResult(commitResult.details as never)).toContain('Graph mutated successfully'); + expect(carriers).toEqual([ + { + customType: 'brunch.own_mutation', + data: { specId, lsn: commitResult.details.lsn, source: 'mutate_graph' }, + }, + ]); const readResult = (await read.execute('tool-2', { mode: 'overview' } as never)) as { content: readonly { text: string }[]; - details: { nodes: readonly unknown[]; edges: readonly unknown[] }; + details: { nodes: readonly unknown[]; edges: readonly unknown[]; lsn: number }; }; expect(readResult.details.nodes).toHaveLength(2); expect(readResult.details.edges).toHaveLength(1); expect(readResult.content[0]!.text).toContain('Build graph API'); + expect(carriers.at(-1)).toEqual({ + customType: 'brunch.graph_overview_snapshot', + data: { specId, snapshotLsn: readResult.details.lsn }, + }); + + await read.execute('tool-3', { mode: 'neighborhood', nodeCode: 'G1' } as never); + expect(carriers).toHaveLength(2); }); }); diff --git a/src/.pi/__tests__/introspection.test.ts b/src/.pi/__tests__/introspection.test.ts index 5b181f3e..5361d838 100644 --- a/src/.pi/__tests__/introspection.test.ts +++ b/src/.pi/__tests__/introspection.test.ts @@ -1,3 +1,7 @@ +import { mkdtemp, readFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + import { describe, expect, it } from 'vitest'; import { createBrunchPiExtensions } from '../brunch-pi-extensions.js'; @@ -70,6 +74,52 @@ describe('Brunch introspection extension', () => { expect(store.latestPassiveCapture()?.payload).toEqual({ wrapped: { original: true } }); }); + it('mirrors the latest captured final system prompt into the workspace debug cache', async () => { + const api = createFakeExtensionApi(); + const store = createInMemoryBrunchIntrospectionStore(); + const cwd = await mkdtemp(join(tmpdir(), 'brunch-introspection-debug-')); + + registerBrunchIntrospection(api.api as never, { + store, + clock: fixedClock, + debugCache: { cwd }, + }); + + await api.emitBeforeProviderRequest({ payload: { system: 'first final prompt' } }); + await api.emitBeforeProviderRequest({ payload: { system: 'second final prompt' } }); + + await expect(readFile(join(cwd, '.brunch/debug/system-prompt.md'), 'utf8')).resolves.toBe( + 'second final prompt', + ); + }); + + it('appends only explicit Brunch-owned text tool results to the workspace debug cache', async () => { + const api = createFakeExtensionApi(); + const cwd = await mkdtemp(join(tmpdir(), 'brunch-introspection-tools-')); + + registerBrunchIntrospection(api.api as never, { + clock: fixedClock, + debugCache: { cwd }, + }); + + await api.emitToolResult({ + toolName: 'read_graph', + content: [{ type: 'text', text: 'graph block' }], + }); + await api.emitToolResult({ + toolName: 'read', + content: [{ type: 'text', text: 'built-in block' }], + }); + await api.emitToolResult({ + toolName: 'brunch_session_query', + content: [{ type: 'text', text: 'query block' }], + }); + + await expect(readFile(join(cwd, '.brunch/debug/tool-contents.md'), 'utf8')).resolves.toBe( + 'graph block\n\n---\n\nquery block', + ); + }); + it('is absent by default and registered last when dev introspection is enabled', async () => { const productApi = createFakeExtensionApi(); @@ -90,7 +140,9 @@ describe('Brunch introspection extension', () => { expect(devApi.commandNames.at(-1)).toBe(BRUNCH_INTROSPECTION_COMMAND); expect(devApi.toolNames.slice(-2)).toEqual([BRUNCH_SESSION_QUERY_TOOL, BRUNCH_INTROSPECT_QUERY_TOOL]); - expect(devApi.eventNames.slice(-2)).toEqual(['before_agent_start', 'before_provider_request']); + expect(devApi.eventNames).toEqual( + expect.arrayContaining(['before_agent_start', 'before_provider_request', 'tool_result']), + ); }); it('advertises registered dev query tools only when introspection is enabled', async () => { @@ -177,6 +229,11 @@ function createFakeExtensionApi() { ), ); }, + async emitToolResult(event: unknown): Promise { + return last( + await Promise.all((handlers.get('tool_result') ?? []).map((handler) => handler(event, {}))), + ); + }, async runProviderRequestChain(event: { payload: unknown }): Promise { let currentPayload = event.payload; for (const handler of handlers.get('before_provider_request') ?? []) { diff --git a/src/.pi/__tests__/prompting.test.ts b/src/.pi/__tests__/prompting.test.ts index 6812ba78..05781bc9 100644 --- a/src/.pi/__tests__/prompting.test.ts +++ b/src/.pi/__tests__/prompting.test.ts @@ -78,7 +78,7 @@ function groundingGaps(coverage: Partial> = {}): Elicit } const promptContext = { - spec: { id: 1, name: 'Spec', readinessGrade: 'commitments_ready' as const }, + spec: { id: 1, name: 'Spec' }, workspace: { cwd: '/tmp/brunch', posture: workspacePosture({ @@ -234,7 +234,7 @@ describe('Brunch prompt-pack topology', () => { it('refreshes selected-spec prompt context through the shell session-boundary path before composing', async () => { const events: Record unknown>> = {}; let selected = { - spec: { id: 1, name: 'Launch spec', readinessGrade: 'commitments_ready' as const }, + spec: { id: 1, name: 'Launch spec' }, session: { id: 'launch-session', label: 'Launch session' }, nodeTitles: ['Launch-only node'], }; @@ -247,7 +247,7 @@ describe('Brunch prompt-pack topology', () => { }, async () => { selected = { - spec: { id: 2, name: 'Switched spec', readinessGrade: 'commitments_ready' as const }, + spec: { id: 2, name: 'Switched spec' }, session: { id: 'switched-session', label: 'Switched session' }, nodeTitles: ['Switched current node'], }; diff --git a/src/.pi/brunch-pi-extensions.ts b/src/.pi/brunch-pi-extensions.ts index a66dac09..861b3c82 100644 --- a/src/.pi/brunch-pi-extensions.ts +++ b/src/.pi/brunch-pi-extensions.ts @@ -1,6 +1,15 @@ -import { type ExtensionAPI, type ExtensionFactory } from '@earendil-works/pi-coding-agent'; +import { + type ExtensionAPI, + type ExtensionFactory, + type SessionManager, +} from '@earendil-works/pi-coding-agent'; import { formatGraphNodeCode } from '../graph/schema/nodes.js'; +import { + prepareNextTurn, + type GraphChangeItem, + type PrepareNextTurnResult, +} from '../session/prepare-next-turn.js'; import { registerBrunchAlternatives } from './components/alternatives.js'; import { registerBrunchChrome } from './extensions/chrome/index.js'; import { type BrunchChromeState } from './extensions/chrome/index.js'; @@ -22,7 +31,10 @@ import { registerBrunchMentionAutocomplete } from './extensions/mentions/index.j import { registerBrunchOperationalModePolicy } from './extensions/runtime/index.js'; import { BRUNCH_SESSION_QUERY_TOOL, registerBrunchSessionQuery } from './extensions/session-query/index.js'; import { registerBrunchSessionBoundary } from './extensions/session/lifecycle.js'; -import { type BrunchSessionBoundaryHandler } from './extensions/session/lifecycle.js'; +import { + type BrunchSessionBoundaryHandler, + type BrunchSessionBoundaryPipelineStep, +} from './extensions/session/lifecycle.js'; import { registerBrunchPrompting, type BrunchPromptContextProvider, @@ -125,8 +137,14 @@ export function createBrunchPiExtensions( const devAllowedToolNames = introspectionOptions?.enabled ? [BRUNCH_SESSION_QUERY_TOOL, BRUNCH_INTROSPECT_QUERY_TOOL] : undefined; + const continuityStep = options.graph ? createPrepareNextTurnContinuityStep(options.graph) : undefined; const extensions: BrunchProductExtensionRegistrar[] = [ - (api) => registerBrunchSessionBoundary(api, onSessionBoundary), + (api) => { + registerBrunchSessionBoundary(api, onSessionBoundary, { + continuitySteps: continuityStep ? [continuityStep] : [], + }); + if (options.graph) registerBrunchContinuityGuard(api, options.graph); + }, (api) => registerBrunchChrome(api, chrome), registerBrunchBranchPolicyHandlers, (api) => registerBrunchOperationalModePolicy(api, { devAllowedToolNames }), @@ -150,10 +168,11 @@ export function createBrunchPiExtensions( ...(introspectionOptions?.enabled ? [ (api: ExtensionAPI) => { - const { store, clock } = introspectionOptions; + const { store, clock, debugCache } = introspectionOptions; const introspectionStore = registerBrunchIntrospection(api, { ...(store ? { store } : {}), ...(clock ? { clock } : {}), + ...(debugCache ? { debugCache } : {}), }); registerBrunchSessionQuery(api); registerBrunchIntrospectQuery(api, { store: introspectionStore }); @@ -167,3 +186,57 @@ export function createBrunchPiExtensions( } }; } + +function createPrepareNextTurnContinuityStep(graph: BrunchGraphDeps): BrunchSessionBoundaryPipelineStep { + return ({ sessionManager }) => { + const result = prepareNextTurnForGraph(graph, sessionManager); + for (const entry of result.entriesToAppend) { + sessionManager.appendCustomEntry(entry.customType, entry.data); + } + }; +} + +function registerBrunchContinuityGuard(pi: ExtensionAPI, graph: BrunchGraphDeps): void { + pi.on('before_provider_request', async (_event, ctx) => { + const result = prepareNextTurnForGraph(graph, ctx.sessionManager as SessionManager); + if (result.entriesToAppend.length > 0) { + throw new Error( + 'Continuity drift remained before provider request; prepareNextTurn must run before prompt composition.', + ); + } + }); +} + +function prepareNextTurnForGraph( + graph: BrunchGraphDeps, + sessionManager: SessionManager, +): PrepareNextTurnResult { + const snapshot = graph.reads.queryGraph(undefined, { visibility: 'all' }); + return prepareNextTurn({ + specId: graph.specId, + currentLsn: snapshot.lsn, + entries: sessionManager.getEntries(), + changes: graphChangesFromSnapshot(graph.specId, snapshot), + }); +} + +function graphChangesFromSnapshot( + specId: number, + snapshot: ReturnType, +): readonly GraphChangeItem[] { + return [ + ...snapshot.nodes.map((node) => ({ + specId, + lsn: node.updatedAtLsn, + entityId: node.id, + kind: node.kind, + title: node.title, + })), + ...snapshot.edges.map((edge) => ({ + specId, + lsn: edge.updatedAtLsn, + entityId: edge.id, + kind: edge.category, + })), + ]; +} diff --git a/src/.pi/extensions/graph/index.ts b/src/.pi/extensions/graph/index.ts index 7c266244..0e0132b1 100644 --- a/src/.pi/extensions/graph/index.ts +++ b/src/.pi/extensions/graph/index.ts @@ -17,6 +17,7 @@ import type { } from '../../../graph/index.js'; import { formatNeighborhood } from '../../../renderers/graph/node-neighborhood.js'; import { graphMutationProductUpdates, type ProductUpdatePublisher } from '../../../rpc/product-updates.js'; +import { stampOwnMutationWatermark } from '../../../session/prepare-next-turn.js'; import { translateMutateGraph, formatGraphOverview, @@ -71,6 +72,8 @@ export function registerBrunchGraph(pi: ExtensionAPI, deps: BrunchGraphDeps): vo const text = formatMutateGraphResult(result); if (result.status === 'success') { deps.productUpdates?.publish(graphMutationProductUpdates({ specId, lsn: result.lsn })); + const carrier = stampOwnMutationWatermark({ specId, lsn: result.lsn, source: 'mutate_graph' }); + pi.appendEntry(carrier.customType, carrier.data); } return { content: [{ type: 'text' as const, text }], details: result }; @@ -110,6 +113,10 @@ export function registerBrunchGraph(pi: ExtensionAPI, deps: BrunchGraphDeps): vo const slice = reads.queryGraph(undefined, options); text = formatGraphOverview(slice); details = slice; + pi.appendEntry('brunch.graph_overview_snapshot', { + specId: deps.specId, + snapshotLsn: slice.lsn, + }); } else if (params.mode === 'list_by_kind') { const slice = reads.queryGraph({ kinds: params.kinds as readonly NodeKind[] }, options); text = formatGraphOverview(slice, 'Graph slice by kind'); diff --git a/src/.pi/extensions/introspection/README.md b/src/.pi/extensions/introspection/README.md index f8e6a0d2..8aa72c89 100644 --- a/src/.pi/extensions/introspection/README.md +++ b/src/.pi/extensions/introspection/README.md @@ -2,9 +2,9 @@ Owns the dev-only D69-L agent-input introspection tap. -- **Owns:** read-only `before_provider_request` capture of the final provider payload and the dev `/introspect` command that reports base `getSystemPromptOptions()` inputs plus the latest passive capture. +- **Owns:** read-only `before_provider_request` capture of the final provider payload, `tool_result` mirroring for explicit Brunch-owned text results, and the dev `/introspect` command that reports base `getSystemPromptOptions()` inputs plus the latest passive capture. - **Input:** Pi extension events from the explicit Brunch extension bundle. -- **Output:** in-memory capture records consumed by `src/dev/introspection-launcher.ts` and written under repo-root `.fixtures/scratch/introspection//`. +- **Output:** in-memory capture records consumed by `src/dev/introspection-launcher.ts` and written under repo-root `.fixtures/scratch/introspection//`; under `BRUNCH_DEV` real TUI launches, the latest captured final system prompt is also mirrored to `.brunch/debug/system-prompt.md`, and explicit Brunch-owned text tool results append to `.brunch/debug/tool-contents.md`. - **Used by:** developer feedback loops only. Product Brunch sessions omit this extension unless `createBrunchPiExtensions(..., { introspection: { enabled: true } })` is passed explicitly. -The extension observes only: hook handlers return `undefined` and never replace provider payloads or system prompts. It must be registered last in `brunch-pi-extensions.ts` when enabled so the passive tap sees the post-mutation provider payload. +The extension observes only: hook handlers return `undefined` and never replace provider payloads, system prompts, or tool results. It must be registered last in `brunch-pi-extensions.ts` when enabled so the passive tap sees the post-mutation provider payload. The `.brunch/debug/` mirror is a dev cache of captured bytes, not scratch evidence and not a prompt or TUI-render reconstruction path. diff --git a/src/.pi/extensions/introspection/debug-cache.ts b/src/.pi/extensions/introspection/debug-cache.ts new file mode 100644 index 00000000..0a1977e2 --- /dev/null +++ b/src/.pi/extensions/introspection/debug-cache.ts @@ -0,0 +1,108 @@ +import { appendFile, mkdir, readFile, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; + +export interface BrunchDebugCacheOptions { + readonly cwd: string; +} + +const BRUNCH_DEBUG_CONTENT_TOOL_NAMES = new Set([ + 'brunch_introspect_query', + 'brunch_session_query', + 'mutate_graph', + 'present_alternatives', + 'present_options', + 'present_question', + 'present_review_set', + 'read_graph', + 'read_session_context', + 'read_workspace_context', + 'request_answer', + 'request_choice', + 'request_choices', + 'request_review', +]); + +export async function mirrorSystemPromptToDebugCache( + options: BrunchDebugCacheOptions, + payload: unknown, +): Promise { + const systemPrompt = systemPromptFromProviderPayload(payload); + if (systemPrompt === undefined) return; + + const debugDir = join(options.cwd, '.brunch', 'debug'); + await mkdir(debugDir, { recursive: true }); + await writeFile(join(debugDir, 'system-prompt.md'), systemPrompt, 'utf8'); +} + +export async function appendToolContentToDebugCache( + options: BrunchDebugCacheOptions, + event: unknown, +): Promise { + const text = toolContentFromEvent(event); + if (text === undefined) return; + + const debugDir = join(options.cwd, '.brunch', 'debug'); + await mkdir(debugDir, { recursive: true }); + await appendSeparatedBlock(join(debugDir, 'tool-contents.md'), text); +} + +function toolContentFromEvent(event: unknown): string | undefined { + if (!isRecord(event) || typeof event.toolName !== 'string') return undefined; + if (!BRUNCH_DEBUG_CONTENT_TOOL_NAMES.has(event.toolName)) return undefined; + + const content = event.content; + if (!Array.isArray(content)) return undefined; + + const text = content.flatMap((block) => { + if (isRecord(block) && block.type === 'text' && typeof block.text === 'string') return [block.text]; + return []; + }); + return text.length > 0 ? text.join('\n') : undefined; +} + +async function appendSeparatedBlock(file: string, text: string): Promise { + let existing = ''; + try { + existing = await readFile(file, 'utf8'); + } catch (error) { + if (!isNodeError(error) || error.code !== 'ENOENT') throw error; + } + + await appendFile(file, `${existing.length > 0 ? '\n\n---\n\n' : ''}${text}`, 'utf8'); +} + +function systemPromptFromProviderPayload(payload: unknown): string | undefined { + if (!isRecord(payload)) return undefined; + + if (typeof payload.system === 'string') return payload.system; + if (typeof payload.systemPrompt === 'string') return payload.systemPrompt; + + const messages = payload.messages; + if (!Array.isArray(messages)) return undefined; + + const systemMessage = messages.find( + (message): message is { readonly content: unknown } => + isRecord(message) && message.role === 'system' && 'content' in message, + ); + return textFromMessageContent(systemMessage?.content); +} + +function textFromMessageContent(content: unknown): string | undefined { + if (typeof content === 'string') return content; + if (!Array.isArray(content)) return undefined; + + const parts = content.flatMap((part) => { + if (typeof part === 'string') return [part]; + if (isRecord(part) && typeof part.text === 'string') return [part.text]; + return []; + }); + return parts.length > 0 ? parts.join('') : undefined; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null; +} + +function isNodeError(error: unknown): error is NodeJS.ErrnoException { + return error instanceof Error && 'code' in error; +} diff --git a/src/.pi/extensions/introspection/index.ts b/src/.pi/extensions/introspection/index.ts index 39fade61..6d1ad947 100644 --- a/src/.pi/extensions/introspection/index.ts +++ b/src/.pi/extensions/introspection/index.ts @@ -1,5 +1,11 @@ import type { ExtensionAPI, ExtensionCommandContext } from '@earendil-works/pi-coding-agent'; +import { + appendToolContentToDebugCache, + mirrorSystemPromptToDebugCache, + type BrunchDebugCacheOptions, +} from './debug-cache.js'; + export const BRUNCH_INTROSPECTION_COMMAND = 'introspect'; export interface BrunchIntrospectionTurnCapture { @@ -29,6 +35,7 @@ export interface BrunchIntrospectionStore { export interface BrunchIntrospectionOptions { readonly store?: BrunchIntrospectionStore; readonly clock?: () => Date; + readonly debugCache?: BrunchDebugCacheOptions; } export interface InMemoryBrunchIntrospectionStore extends BrunchIntrospectionStore { @@ -87,13 +94,20 @@ export function registerBrunchIntrospection( nextTurnOrdinal += 1; }); - pi.on('before_provider_request', (event) => { + pi.on('before_provider_request', async (event) => { + const payload = isRecord(event) && 'payload' in event ? event.payload : undefined; store.recordPassiveCapture({ turnId: activeTurnId, capturedAt: now(), event: 'before_provider_request', - payload: isRecord(event) && 'payload' in event ? event.payload : undefined, + payload, }); + if (options.debugCache) await mirrorSystemPromptToDebugCache(options.debugCache, payload); + return undefined; + }); + + pi.on('tool_result', async (event) => { + if (options.debugCache) await appendToolContentToDebugCache(options.debugCache, event); return undefined; }); diff --git a/src/.pi/extensions/session/lifecycle.test.ts b/src/.pi/extensions/session/lifecycle.test.ts new file mode 100644 index 00000000..de1b5845 --- /dev/null +++ b/src/.pi/extensions/session/lifecycle.test.ts @@ -0,0 +1,68 @@ +import { describe, expect, it } from 'vitest'; + +import { + bindBrunchSessionBoundary, + registerBrunchSessionBoundary, + runBrunchSessionBoundaryPipeline, + type BrunchSessionBoundaryPipelineStep, +} from './lifecycle.js'; + +describe('Brunch session-boundary lifecycle', () => { + it('runs workspace rebinding and continuity steps through one ordered boundary pipeline', async () => { + const events: string[] = []; + const sessionManager = { id: 'session-manager' }; + const continuity: BrunchSessionBoundaryPipelineStep = ({ phase, sessionManager }) => { + events.push(`continuity:${phase}:${(sessionManager as unknown as { id: string }).id}`); + }; + + await runBrunchSessionBoundaryPipeline(sessionManager as never, { + phase: 'before_agent_start', + refreshWorkspaceBinding: async (manager) => { + events.push(`binding:${(manager as unknown as { id: string }).id}`); + }, + continuitySteps: [continuity], + }); + + expect(events).toEqual(['binding:session-manager', 'continuity:before_agent_start:session-manager']); + }); + + it('preserves the previous bindBrunchSessionBoundary workspace-refresh behavior', async () => { + const events: string[] = []; + const sessionManager = { id: 'legacy-manager' }; + + await bindBrunchSessionBoundary(sessionManager as never, async (manager) => { + events.push((manager as unknown as { id: string }).id); + }); + + expect(events).toEqual(['legacy-manager']); + }); + + it('registers session_start, before_agent_start, and assistant message boundaries onto the same pipeline', async () => { + const handlers = new Map Promise | void>>(); + const phases: string[] = []; + const sessionManager = { id: 'registered-manager' }; + + registerBrunchSessionBoundary( + { + on(event: string, handler: (event: any, ctx: any) => Promise | void) { + handlers.set(event, [...(handlers.get(event) ?? []), handler]); + }, + } as never, + async () => {}, + { + continuitySteps: [ + ({ phase }) => { + phases.push(phase); + }, + ], + }, + ); + + await handlers.get('session_start')?.[0]?.({}, { sessionManager }); + await handlers.get('before_agent_start')?.[0]?.({}, { sessionManager }); + await handlers.get('message_start')?.[0]?.({ message: { role: 'user' } }, { sessionManager }); + await handlers.get('message_start')?.[0]?.({ message: { role: 'assistant' } }, { sessionManager }); + + expect(phases).toEqual(['session_start', 'before_agent_start', 'assistant_message_start']); + }); +}); diff --git a/src/.pi/extensions/session/lifecycle.ts b/src/.pi/extensions/session/lifecycle.ts index b264ea70..4fddae3f 100644 --- a/src/.pi/extensions/session/lifecycle.ts +++ b/src/.pi/extensions/session/lifecycle.ts @@ -1,24 +1,60 @@ import { SessionManager, type ExtensionAPI } from '@earendil-works/pi-coding-agent'; +export type BrunchSessionBoundaryPhase = 'session_start' | 'before_agent_start' | 'assistant_message_start'; + +export interface BrunchSessionBoundaryPipelineContext { + readonly sessionManager: SessionManager; + readonly phase: BrunchSessionBoundaryPhase; +} + export type BrunchSessionBoundaryHandler = (sessionManager: SessionManager) => Promise | void; +export type BrunchSessionBoundaryPipelineStep = ( + context: BrunchSessionBoundaryPipelineContext, +) => Promise | void; + +export interface BrunchSessionBoundaryPipelineOptions { + readonly phase?: BrunchSessionBoundaryPhase; + readonly refreshWorkspaceBinding?: BrunchSessionBoundaryHandler | undefined; + readonly continuitySteps?: readonly BrunchSessionBoundaryPipelineStep[]; +} + +export async function runBrunchSessionBoundaryPipeline( + sessionManager: SessionManager, + options: BrunchSessionBoundaryPipelineOptions = {}, +): Promise { + await options.refreshWorkspaceBinding?.(sessionManager); + const context = { sessionManager, phase: options.phase ?? 'before_agent_start' }; + for (const step of options.continuitySteps ?? []) { + await step(context); + } +} export async function bindBrunchSessionBoundary( sessionManager: SessionManager, onSessionBoundary?: BrunchSessionBoundaryHandler, ): Promise { - await onSessionBoundary?.(sessionManager); + await runBrunchSessionBoundaryPipeline(sessionManager, { refreshWorkspaceBinding: onSessionBoundary }); } export function registerBrunchSessionBoundaryRefreshHandlers( pi: ExtensionAPI, onSessionBoundary?: BrunchSessionBoundaryHandler, + options: Omit = {}, ): void { pi.on('before_agent_start', async (_event, ctx) => { - await bindBrunchSessionBoundary(ctx.sessionManager as SessionManager, onSessionBoundary); + await runBrunchSessionBoundaryPipeline(ctx.sessionManager as SessionManager, { + ...options, + phase: 'before_agent_start', + refreshWorkspaceBinding: onSessionBoundary, + }); }); pi.on('message_start', async (event, ctx) => { if (event.message.role === 'assistant') { - await bindBrunchSessionBoundary(ctx.sessionManager as SessionManager, onSessionBoundary); + await runBrunchSessionBoundaryPipeline(ctx.sessionManager as SessionManager, { + ...options, + phase: 'assistant_message_start', + refreshWorkspaceBinding: onSessionBoundary, + }); } }); } @@ -26,9 +62,14 @@ export function registerBrunchSessionBoundaryRefreshHandlers( export function registerBrunchSessionBoundary( pi: ExtensionAPI, onSessionBoundary?: BrunchSessionBoundaryHandler, + options: Omit = {}, ): void { pi.on('session_start', async (_event, ctx) => { - await bindBrunchSessionBoundary(ctx.sessionManager as SessionManager, onSessionBoundary); + await runBrunchSessionBoundaryPipeline(ctx.sessionManager as SessionManager, { + ...options, + phase: 'session_start', + refreshWorkspaceBinding: onSessionBoundary, + }); }); - registerBrunchSessionBoundaryRefreshHandlers(pi, onSessionBoundary); + registerBrunchSessionBoundaryRefreshHandlers(pi, onSessionBoundary, options); } diff --git a/src/app/brunch-tui.test.ts b/src/app/brunch-tui.test.ts index 060a5ac3..3a0db0d9 100644 --- a/src/app/brunch-tui.test.ts +++ b/src/app/brunch-tui.test.ts @@ -3,13 +3,11 @@ import { tmpdir } from 'node:os'; import { basename, join } from 'node:path'; import { - createAgentSessionRuntime, SessionManager, type ExtensionCommandContext, type ExtensionContext, type ExtensionUIContext, type RegisteredCommand, - type ToolDefinition, } from '@earendil-works/pi-coding-agent'; import { describe, expect, it } from 'vitest'; @@ -79,50 +77,6 @@ describe('Brunch TUI boot', () => { } }); - it('boots the real runtime seam with ready context and BRUNCH_DEV-gated query tools', async () => { - const productBoot = await bootRuntimeThroughRunBrunchTui({ dev: false }); - try { - expect(productBoot.runtime.session.sessionManager.getHeader()).toMatchObject({ - cwd: productBoot.cwd, - id: expect.any(String), - type: 'session', - }); - await expect(readSessionContextDetails(productBoot.runtime.session)).resolves.toMatchObject({ - status: 'ready', - specId: expect.any(Number), - }); - await expect(readWorkspaceContextMarkdownFiles(productBoot.runtime.session)).resolves.toContain( - 'boot-seam.md', - ); - expect(productBoot.runtime.session.getAllTools().map((tool) => tool.name)).not.toEqual( - expect.arrayContaining(['brunch_session_query', 'brunch_introspect_query']), - ); - expect(productBoot.runtime.session.getActiveToolNames()).not.toEqual( - expect.arrayContaining(['brunch_session_query', 'brunch_introspect_query']), - ); - } finally { - await productBoot.runtime.dispose(); - productBoot.restoreEnv(); - } - - const devBoot = await bootRuntimeThroughRunBrunchTui({ dev: true }); - try { - expect(devBoot.runtime.session.sessionManager.getHeader()).toMatchObject({ cwd: devBoot.cwd }); - await expect(readSessionContextDetails(devBoot.runtime.session)).resolves.toMatchObject({ - status: 'ready', - }); - expect(devBoot.runtime.session.getAllTools().map((tool) => tool.name)).toEqual( - expect.arrayContaining(['brunch_session_query', 'brunch_introspect_query']), - ); - expect(devBoot.runtime.session.getActiveToolNames()).toEqual( - expect.arrayContaining(['brunch_session_query', 'brunch_introspect_query']), - ); - } finally { - await devBoot.runtime.dispose(); - devBoot.restoreEnv(); - } - }); - it('registers graph tools on the default product runtime path', async () => { const cwd = await mkdtemp(join(tmpdir(), 'brunch-tui-graph-runtime-')); const agentDir = await mkdtemp(join(tmpdir(), 'brunch-agent-dir-')); @@ -386,6 +340,7 @@ describe('Brunch TUI boot', () => { launchInteractive: async ({ dev }) => { observed.push(dev?.introspection.enabled); expect(dev?.introspection.store).toBeDefined(); + expect(dev?.introspection.debugCache).toEqual({ cwd: '/tmp/project' }); }, }); @@ -450,7 +405,7 @@ describe('Brunch TUI boot', () => { } as never); expect(commands.at(-1)).toBe(BRUNCH_INTROSPECTION_COMMAND); - expect(events.at(-1)).toBe('before_provider_request'); + expect(events).toEqual(expect.arrayContaining(['before_provider_request', 'tool_result'])); }); it('scopes Pi startup update suppression and restores update-check env in finally', async () => { @@ -1265,6 +1220,7 @@ describe('Brunch TUI boot', () => { const ctx: FakeExtensionContext = { sessionManager: { getEntries: () => [], + appendCustomEntry: (_customType: string, _data: unknown) => {}, } as unknown as FakeExtensionContext['sessionManager'], ui: { setHeader: (_factory) => {}, @@ -1327,6 +1283,7 @@ describe('Brunch TUI boot', () => { const ctx: FakeExtensionContext = { sessionManager: { getEntries: () => [], + appendCustomEntry: (_customType: string, _data: unknown) => {}, } as unknown as FakeExtensionContext['sessionManager'], ui: { setHeader: (_factory) => {}, @@ -1583,83 +1540,6 @@ describe('Brunch TUI boot', () => { }); }); -async function bootRuntimeThroughRunBrunchTui(options: { dev: boolean }) { - const cwd = await mkdtemp(join(tmpdir(), `brunch-boot-seam-${options.dev ? 'dev' : 'prod'}-`)); - const agentDir = await mkdtemp(join(tmpdir(), 'brunch-agent-dir-')); - await writeFile(join(cwd, 'boot-seam.md'), '# Boot seam\n'); - - const previousDev = process.env.BRUNCH_DEV; - const hadPreviousDev = Object.hasOwn(process.env, 'BRUNCH_DEV'); - if (options.dev) { - process.env.BRUNCH_DEV = '1'; - } else { - delete process.env.BRUNCH_DEV; - } - - const restoreEnv = () => { - if (hadPreviousDev && previousDev !== undefined) { - process.env.BRUNCH_DEV = previousDev; - } else { - delete process.env.BRUNCH_DEV; - } - }; - - let runtime: Awaited> | undefined; - try { - await runBrunchTui({ - cwd, - autoOpen: false, - runWorkspaceDialogPreflight: async () => ({ action: 'newSpec', title: 'Boot seam smoke' }), - webSidecarRunner: async () => null, - launchInteractive: async (context) => { - runtime = await createAgentSessionRuntime(createBrunchAgentSessionRuntimeFactory(context), { - cwd, - agentDir, - sessionManager: context.workspace.session.manager, - }); - }, - }); - } catch (error) { - restoreEnv(); - throw error; - } - - if (!runtime) { - restoreEnv(); - throw new Error('runBrunchTui did not reach launchInteractive'); - } - - return { cwd, runtime, restoreEnv }; -} - -async function readSessionContextDetails(session: { - getToolDefinition(name: string): ToolDefinition | undefined; - sessionManager: unknown; -}) { - const tool = session.getToolDefinition('read_session_context'); - if (!tool) throw new Error('read_session_context tool is not registered'); - const result = await tool.execute('boot-session-context', {}, undefined, undefined, { - sessionManager: session.sessionManager, - } as never); - return result.details; -} - -async function readWorkspaceContextMarkdownFiles(session: { - getToolDefinition(name: string): ToolDefinition | undefined; - sessionManager: unknown; -}): Promise { - const tool = session.getToolDefinition('read_workspace_context'); - if (!tool) throw new Error('read_workspace_context tool is not registered'); - const result = (await tool.execute( - 'boot-workspace-context', - { mode: 'cwd_inventory' }, - undefined, - undefined, - { sessionManager: session.sessionManager } as never, - )) as { details: { data: { markdownFiles: Array<{ path: string }> } } }; - return result.details.data.markdownFiles.map((file) => file.path); -} - async function writeHostilePiSettings(cwd: string, agentDir: string): Promise { const hostileSettings = { lastChangelogVersion: '999.0.0-hostile', diff --git a/src/app/brunch-tui.ts b/src/app/brunch-tui.ts index f6d4430f..1ef55da0 100644 --- a/src/app/brunch-tui.ts +++ b/src/app/brunch-tui.ts @@ -79,6 +79,7 @@ export interface BrunchTuiDevOptions { readonly introspection: { readonly enabled: true; readonly store: BrunchIntrospectionStore; + readonly debugCache: { readonly cwd: string }; }; } @@ -104,7 +105,7 @@ export async function runBrunchTui(options: BrunchTuiOptions = {}): Promise/`, independent of the workspace cwd it targets: - `mechanical.json` — latest passive provider-payload capture plus optional `/introspect` base-prompt report - `subjective.json` — assistant answer text from the subjective prompt - `manifest.json` — paired summary keyed by the same captured turn id -The `/introspect` command reports `ctx.getSystemPromptOptions()` base inputs plus the latest passive capture; it deliberately does not claim to reconstruct exact model input. Exactness belongs to the passive provider-payload tap registered last in the Brunch extension bundle. +The `/introspect` command reports `ctx.getSystemPromptOptions()` base inputs plus the latest passive capture; it deliberately does not claim to reconstruct exact model input. Exactness belongs to the passive provider-payload tap registered last in the Brunch extension bundle. In `BRUNCH_DEV` real TUI launches, that same passive capture mirrors the latest final system prompt bytes into `.brunch/debug/system-prompt.md`; Brunch-owned text `tool_result` content appends to `.brunch/debug/tool-contents.md`. This is an ephemeral workspace debug cache, separate from repo-root `.fixtures/scratch/` evidence, and does not attempt `renderResult()` flattening. diff --git a/src/dev/faux-harness.test.ts b/src/dev/faux-harness.test.ts index d5f49754..9c63d5a9 100644 --- a/src/dev/faux-harness.test.ts +++ b/src/dev/faux-harness.test.ts @@ -1,6 +1,19 @@ +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + import { fauxAssistantMessage } from '@earendil-works/pi-ai'; +import { DefaultResourceLoader } from '@earendil-works/pi-coding-agent'; import { describe, expect, it } from 'vitest'; +import { + BRUNCH_INTROSPECT_QUERY_TOOL, + BRUNCH_SESSION_QUERY_TOOL, + chromeStateForWorkspace, + createBrunchPiExtensions, + createInMemoryBrunchIntrospectionStore, +} from '../.pi/brunch-pi-extensions.js'; +import { createBrunchPiSettings } from '../.pi/brunch-pi-settings.js'; import { BRUNCH_FAUX_HARNESS_API_KEY, BRUNCH_FAUX_HARNESS_ENV_API_KEY, @@ -23,6 +36,7 @@ describe('createBrunchFauxHarness', () => { expect(harness.session.sessionFile).toBeUndefined(); expect(harness.session.getActiveToolNames()).toEqual([]); expect(harness.provider.getPendingResponseCount()).toBe(1); + expect(harness.providerContexts).toEqual([]); expect(process.env.BRUNCH_FAUX_HARNESS_API_KEY).toBeUndefined(); } finally { harness.dispose(); @@ -34,6 +48,119 @@ describe('createBrunchFauxHarness', () => { } }); + it('captures provider contexts and active tools as a Tier-1 faux-session oracle', async () => { + const harness = await createBrunchFauxHarness({ + responses: [fauxAssistantMessage('captured')], + customTools: [ + { + name: 'probe_tool', + label: 'Probe tool', + description: 'Probe tool', + parameters: { type: 'object', properties: {}, additionalProperties: false }, + execute: async () => ({ + content: [{ type: 'text', text: 'ok' }], + details: { ok: true }, + isError: false, + }), + }, + ], + }); + + try { + await harness.session.prompt('capture this', { expandPromptTemplates: false, source: 'rpc' }); + + expect(harness.providerContexts).toHaveLength(1); + expect(harness.providerContexts[0]?.activeToolNames).toEqual(['probe_tool']); + expect(JSON.stringify(harness.providerContexts[0]?.messages)).toContain('capture this'); + } finally { + harness.dispose(); + } + }); + + it('captures Brunch-composed provider payloads through the Tier-1 faux-session seam', async () => { + const cwd = await mkdtemp(join(tmpdir(), 'brunch-tier-1-faux-')); + const agentDir = await mkdtemp(join(tmpdir(), 'brunch-tier-1-agent-')); + const store = createInMemoryBrunchIntrospectionStore(); + const profile = createBrunchPiSettings({ + cwd, + agentDir, + extensionFactories: [ + createBrunchPiExtensions( + chromeStateForWorkspace( + { + status: 'ready', + cwd, + spec: { id: 1, title: 'Tier-1 faux spec' }, + session: { id: 'session-1', file: join(cwd, 'session.jsonl'), manager: {} as never }, + chrome: { + cwd, + spec: { id: 1, title: 'Tier-1 faux spec' }, + phase: 'elicitation', + chatMode: 'responding-to-elicitation', + }, + }, + {}, + ), + undefined, + { + coordinator: {} as never, + graphMentionSource: { listMentionCandidates: () => [] }, + promptContext: () => ({ + spec: { id: 1, name: 'Tier-1 faux spec', readinessGrade: 'commitments_ready' }, + workspace: { cwd }, + session: { id: 'session-1', label: 'Tier-1 session' }, + }), + introspection: { enabled: true, store }, + }, + ), + ], + }); + const resourceLoader = new DefaultResourceLoader({ + cwd, + agentDir, + settingsManager: profile.settingsManager, + ...profile.resourceLoaderOptions, + }); + await resourceLoader.reload(); + const harness = await createBrunchFauxHarness({ + cwd, + responses: [fauxAssistantMessage('captured brunch payload')], + resourceLoader, + settingsManager: profile.settingsManager, + }); + + try { + await harness.session.prompt('capture the real Brunch payload', { + expandPromptTemplates: false, + source: 'rpc', + }); + + const systemPrompt = harness.providerContexts[0]?.systemPrompt; + const activeToolsLine = systemPrompt?.split('\n').find((line) => line.startsWith('- active tools:')); + expect(harness.providerContexts).toHaveLength(1); + expect(systemPrompt).toContain('[Brunch agent control]'); + expect(systemPrompt).toContain(''); + expect(activeToolsLine).toContain('read'); + expect(activeToolsLine).toContain('grep'); + expect(activeToolsLine).toContain('find'); + expect(activeToolsLine).toContain('ls'); + expect(activeToolsLine).toContain(BRUNCH_SESSION_QUERY_TOOL); + expect(activeToolsLine).toContain(BRUNCH_INTROSPECT_QUERY_TOOL); + expect(activeToolsLine).not.toContain('bash'); + expect(activeToolsLine).not.toContain('edit'); + expect(activeToolsLine).not.toContain('write'); + expect(JSON.stringify(harness.providerContexts[0]?.messages)).toContain( + 'capture the real Brunch payload', + ); + } finally { + harness.dispose(); + await Promise.all([ + rm(cwd, { recursive: true, force: true }), + rm(agentDir, { recursive: true, force: true }), + ]); + } + }); + it('uses the literal dev key for the in-process provider config by default', () => { expect(brunchFauxProviderConfig(defaultBrunchFauxModel()).apiKey).toBe(BRUNCH_FAUX_HARNESS_API_KEY); }); diff --git a/src/dev/faux-harness.ts b/src/dev/faux-harness.ts index 79609bf7..a8a292c0 100644 --- a/src/dev/faux-harness.ts +++ b/src/dev/faux-harness.ts @@ -1,12 +1,17 @@ import { registerFauxProvider, + type AssistantMessage, + type Context, type FauxProviderRegistration, + type FauxResponseFactory, type FauxResponseStep, + type StreamOptions, } from '@earendil-works/pi-ai'; import { AuthStorage, createAgentSession, ModelRegistry, + type ResourceLoader, SessionManager, SettingsManager, type AgentSession, @@ -33,12 +38,22 @@ export interface BrunchFauxHarnessOptions { readonly responses?: readonly FauxResponseStep[]; readonly model?: Partial; readonly customTools?: readonly ToolDefinition[]; + readonly resourceLoader?: ResourceLoader; + readonly settingsManager?: SettingsManager; +} + +export interface ProviderContextSnapshot { + readonly systemPrompt?: string; + readonly messages: Context['messages']; + readonly tools: NonNullable; + readonly activeToolNames: readonly string[]; } export interface BrunchFauxHarness { readonly session: AgentSession; readonly provider: FauxProviderRegistration; readonly model: BrunchFauxModelOptions; + readonly providerContexts: readonly ProviderContextSnapshot[]; dispose(): void; } @@ -51,7 +66,10 @@ export async function createBrunchFauxHarness( api: `${model.api}-faux-source`, models: [{ id: model.modelId, name: model.modelName, input: ['text'] }], }); - provider.setResponses([...(options.responses ?? [])]); + const providerContexts: ProviderContextSnapshot[] = []; + provider.setResponses( + (options.responses ?? []).map((response) => captureFauxResponse(response, providerContexts)), + ); const authStorage = AuthStorage.inMemory({ [model.provider]: { type: 'api_key', key: BRUNCH_FAUX_HARNESS_API_KEY }, @@ -70,20 +88,46 @@ export async function createBrunchFauxHarness( authStorage, modelRegistry, model: registeredModel, + ...(options.resourceLoader ? { resourceLoader: options.resourceLoader } : {}), sessionManager: SessionManager.inMemory(options.cwd), - settingsManager: SettingsManager.inMemory({ quietStartup: true }), + settingsManager: options.settingsManager ?? SettingsManager.inMemory({ quietStartup: true }), ...(options.customTools?.length ? { tools: options.customTools.map((tool) => tool.name), customTools: [...options.customTools] } - : { noTools: 'all' as const }), + : options.resourceLoader + ? {} + : { noTools: 'all' as const }), }); return { session, provider, model, + providerContexts, dispose() { session.dispose(); provider.unregister(); }, }; } + +function captureFauxResponse( + response: FauxResponseStep, + providerContexts: ProviderContextSnapshot[], +): FauxResponseFactory { + return async (context: Context, options: StreamOptions | undefined, state, model) => { + providerContexts.push(snapshotProviderContext(context)); + return typeof response === 'function' + ? response(context, options, state, model) + : (response as AssistantMessage); + }; +} + +export function snapshotProviderContext(context: Context): ProviderContextSnapshot { + const tools = [...(context.tools ?? [])]; + return { + ...(context.systemPrompt === undefined ? {} : { systemPrompt: context.systemPrompt }), + messages: [...context.messages], + tools, + activeToolNames: tools.map((tool) => tool.name), + }; +} diff --git a/src/dev/index.ts b/src/dev/index.ts index 696f8a17..ef52679c 100644 --- a/src/dev/index.ts +++ b/src/dev/index.ts @@ -21,4 +21,9 @@ export { type BrunchIntrospectionRunArtifact, type BrunchIntrospectionSession, } from './introspection-launcher.js'; +export { + resumeTier2Fixture, + runTier2RealBootFauxTurn, + type Tier2RealBootTurnResult, +} from './tier-2-harness.js'; export * as workspaceRpc from './workspace-rpc.js'; diff --git a/src/dev/tier-2-harness.test.ts b/src/dev/tier-2-harness.test.ts new file mode 100644 index 00000000..70de8a8c --- /dev/null +++ b/src/dev/tier-2-harness.test.ts @@ -0,0 +1,153 @@ +import { type ToolDefinition } from '@earendil-works/pi-coding-agent'; +import { describe, expect, it } from 'vitest'; + +import { assistantMessage, userMessage } from '../probes/test-helpers.js'; +import { + bootTier2RuntimeThroughRunBrunchTui, + resumeTier2Fixture, + runTier2RealBootFauxTurn, +} from './tier-2-harness.js'; + +describe('FE-847 Tier-2 real boot harness', () => { + it('owns real runtime boot proof for ready context and BRUNCH_DEV-gated query tools', async () => { + const productBoot = await bootTier2RuntimeThroughRunBrunchTui({ dev: false }); + try { + expect(productBoot.runtime.session.sessionManager.getHeader()).toMatchObject({ + cwd: productBoot.cwd, + id: expect.any(String), + type: 'session', + }); + await expect(readSessionContextDetails(productBoot.runtime.session)).resolves.toMatchObject({ + status: 'ready', + specId: expect.any(Number), + }); + await expect(readWorkspaceContextMarkdownFiles(productBoot.runtime.session)).resolves.toContain( + 'boot-seam.md', + ); + expect(productBoot.runtime.session.getAllTools().map((tool) => tool.name)).not.toEqual( + expect.arrayContaining(['brunch_session_query', 'brunch_introspect_query']), + ); + expect(productBoot.runtime.session.getActiveToolNames()).not.toEqual( + expect.arrayContaining(['brunch_session_query', 'brunch_introspect_query']), + ); + } finally { + await productBoot.runtime.dispose(); + productBoot.restoreEnv(); + } + + const devBoot = await bootTier2RuntimeThroughRunBrunchTui({ dev: true }); + try { + expect(devBoot.runtime.session.sessionManager.getHeader()).toMatchObject({ cwd: devBoot.cwd }); + await expect(readSessionContextDetails(devBoot.runtime.session)).resolves.toMatchObject({ + status: 'ready', + }); + expect(devBoot.runtime.session.getAllTools().map((tool) => tool.name)).toEqual( + expect.arrayContaining(['brunch_session_query', 'brunch_introspect_query']), + ); + expect(devBoot.runtime.session.getActiveToolNames()).toEqual( + expect.arrayContaining(['brunch_session_query', 'brunch_introspect_query']), + ); + } finally { + await devBoot.runtime.dispose(); + devBoot.restoreEnv(); + } + }); + + it('boots runBrunchTui, drives one faux-provider turn, captures payload, and inspects transcript entries', async () => { + const result = await runTier2RealBootFauxTurn({ + prompt: 'Tier-2 oracle prompt', + responseText: 'Tier-2 oracle response', + }); + + expect(result.providerPayload).toBeDefined(); + expect(result.providerContexts).toHaveLength(1); + expect(result.activeToolNames).toEqual(result.providerPayload?.activeToolNames); + expect(result.assistantText).toBe('Tier-2 oracle response'); + expect(result.transcriptEntries).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + type: 'message', + message: expect.objectContaining({ role: 'user' }), + }), + expect.objectContaining({ + type: 'message', + message: expect.objectContaining({ role: 'assistant' }), + }), + ]), + ); + expect(JSON.stringify(result.transcriptEntries)).toContain('Tier-2 oracle prompt'); + expect(result.renderedTranscript).toContain('Tier-2 oracle response'); + }); + + it('resumes from a fixture transcript and exposes transcript state', async () => { + const fixtureJsonl = [ + JSON.stringify(userMessage('Fixture question')), + JSON.stringify(assistantMessage('Fixture answer')), + ].join('\n'); + + const result = await resumeTier2Fixture({ fixtureJsonl }); + + expect(result.resumedSameSessionFile).toBe(true); + expect(result.originalSessionFile).toBe(result.sessionFile); + expect(result.transcriptEntries).toEqual( + expect.arrayContaining([ + expect.objectContaining({ type: 'message', message: expect.objectContaining({ role: 'user' }) }), + expect.objectContaining({ type: 'message', message: expect.objectContaining({ role: 'assistant' }) }), + ]), + ); + expect(JSON.stringify(result.transcriptEntries)).toContain('Fixture question'); + }); +}); + +describe.skip('FE-847 coverage-first scaffold — I45-L assistant-visible watermark', () => { + it('seed and full-overview snapshots advance the watermark while narrow getNodes/queryNodes reads do not'); + it( + 'worldUpdate emits only the strict-greater set when current_lsn exceeds the assistant-visible watermark', + ); + it('bare LSNs are never compared across specs; watermark comparisons use {specId, lsn}'); + it('a foreign write between snapshot read and seed insertion is not masked by the seed'); + it('same-session capture is surfaced by the next worldUpdate rather than swallowed as already visible'); +}); + +describe.skip('FE-847 coverage-first scaffold — I46-L honest origination', () => { + it('a new session seeds context and kicks an assistant-originated turn with no fabricated user entry'); + it('resume kick uses the pre-reconcile tail so a user tail still earns a kick after continuity notices'); + it('request_* and system leaves stay idle on resume'); + it('crash-after-notice-before-provider still kicks when the underlying debt is unanswered'); + it('trailing side-task or reviewer drains are continuity-only and do not manufacture or mask debt'); +}); + +describe.skip('FE-847 coverage-first scaffold — I47-L carrier discipline and idempotence', () => { + it('no redundant worldUpdate is emitted immediately after a seed naming the current snapshot LSN'); + it('compaction and resume preserve the latest watermark carrier so projection cannot regress'); + it('boot/resume seeding derives dedupe from transcript projection rather than hidden flags'); + it('continuity assertions use sets and {specId, lsn} properties rather than payload-order goldens'); +}); + +async function readSessionContextDetails(session: { + getToolDefinition(name: string): ToolDefinition | undefined; + sessionManager: unknown; +}) { + const tool = session.getToolDefinition('read_session_context'); + if (!tool) throw new Error('read_session_context tool is not registered'); + const result = await tool.execute('boot-session-context', {}, undefined, undefined, { + sessionManager: session.sessionManager, + } as never); + return result.details; +} + +async function readWorkspaceContextMarkdownFiles(session: { + getToolDefinition(name: string): ToolDefinition | undefined; + sessionManager: unknown; +}): Promise { + const tool = session.getToolDefinition('read_workspace_context'); + if (!tool) throw new Error('read_workspace_context tool is not registered'); + const result = (await tool.execute( + 'boot-workspace-context', + { mode: 'cwd_inventory' }, + undefined, + undefined, + { sessionManager: session.sessionManager } as never, + )) as { details: { data: { markdownFiles: Array<{ path: string }> } } }; + return result.details.data.markdownFiles.map((file) => file.path); +} diff --git a/src/dev/tier-2-harness.ts b/src/dev/tier-2-harness.ts new file mode 100644 index 00000000..f1873e3c --- /dev/null +++ b/src/dev/tier-2-harness.ts @@ -0,0 +1,194 @@ +import { mkdtemp, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; + +import { fauxAssistantMessage, type Context } from '@earendil-works/pi-ai'; +import { createAgentSessionRuntime } from '@earendil-works/pi-coding-agent'; + +import { createBrunchAgentSessionRuntimeFactory, runBrunchTui } from '../app/brunch-tui.js'; +import { renderSessionTranscriptFile } from '../session/session-transcript.js'; +import { createWorkspaceSessionCoordinator } from '../session/workspace-session-coordinator.js'; +import { latestAssistantText } from './agent-messages.js'; +import { + createBrunchFauxHarness, + snapshotProviderContext, + type ProviderContextSnapshot, +} from './faux-harness.js'; + +export interface Tier2RealBootTurnResult { + readonly cwd: string; + readonly sessionFile: string; + readonly prompt: string; + readonly assistantText: string; + readonly providerPayload: ProviderContextSnapshot | undefined; + readonly providerContexts: readonly ProviderContextSnapshot[]; + readonly activeToolNames: readonly string[]; + readonly transcriptEntries: readonly unknown[]; + readonly renderedTranscript: string; +} + +export async function runTier2RealBootFauxTurn( + options: { + readonly cwd?: string; + readonly specTitle?: string; + readonly prompt?: string; + readonly responseText?: string; + } = {}, +): Promise { + const cwd = options.cwd ?? (await mkdtemp(join(tmpdir(), 'brunch-tier-2-'))); + const specTitle = options.specTitle ?? 'Tier 2 scaffold spec'; + const prompt = options.prompt ?? 'Run the FE-847 Tier-2 faux turn.'; + const responseText = options.responseText ?? 'FE-847 faux turn complete.'; + const coordinator = createWorkspaceSessionCoordinator({ cwd }); + let sessionFile: string | undefined; + let providerPayload: ProviderContextSnapshot | undefined; + let assistantText = ''; + + await runBrunchTui({ + cwd, + autoOpen: false, + coordinator, + selectSpecTitle: async () => specTitle, + webSidecarRunner: async () => null, + launchInteractive: async ({ workspace }) => { + sessionFile = workspace.session.file; + const harness = await createBrunchFauxHarness({ + cwd, + responses: [ + (context: Context) => { + providerPayload = snapshotProviderContext(context); + return fauxAssistantMessage(responseText); + }, + ], + }); + try { + await harness.session.prompt(prompt, { expandPromptTemplates: false, source: 'rpc' }); + assistantText = latestAssistantText(harness.session.messages); + for (const message of harness.session.messages) { + workspace.session.manager.appendMessage(message as never); + } + flushSessionEntries(workspace.session.manager, workspace.session.file); + } finally { + harness.dispose(); + } + }, + }); + + if (!sessionFile) throw new Error('Tier-2 real boot did not activate a session file.'); + const transcriptEntries = parseJsonl(await readFile(sessionFile, 'utf8')); + return { + cwd, + sessionFile, + prompt, + assistantText, + providerPayload, + providerContexts: providerPayload === undefined ? [] : [providerPayload], + activeToolNames: providerPayload?.activeToolNames ?? [], + transcriptEntries, + renderedTranscript: await renderSessionTranscriptFile(sessionFile), + }; +} + +export async function bootTier2RuntimeThroughRunBrunchTui(options: { readonly dev: boolean }) { + const cwd = await mkdtemp(join(tmpdir(), `brunch-boot-seam-${options.dev ? 'dev' : 'prod'}-`)); + const agentDir = await mkdtemp(join(tmpdir(), 'brunch-agent-dir-')); + await writeFile(join(cwd, 'boot-seam.md'), '# Boot seam\n'); + + const previousDev = process.env.BRUNCH_DEV; + const hadPreviousDev = Object.hasOwn(process.env, 'BRUNCH_DEV'); + if (options.dev) { + process.env.BRUNCH_DEV = '1'; + } else { + delete process.env.BRUNCH_DEV; + } + + const restoreEnv = () => { + if (hadPreviousDev && previousDev !== undefined) { + process.env.BRUNCH_DEV = previousDev; + } else { + delete process.env.BRUNCH_DEV; + } + }; + + let runtime: Awaited> | undefined; + try { + await runBrunchTui({ + cwd, + autoOpen: false, + runWorkspaceDialogPreflight: async () => ({ action: 'newSpec', title: 'Boot seam smoke' }), + webSidecarRunner: async () => null, + launchInteractive: async (context) => { + runtime = await createAgentSessionRuntime(createBrunchAgentSessionRuntimeFactory(context), { + cwd, + agentDir, + sessionManager: context.workspace.session.manager, + }); + }, + }); + } catch (error) { + restoreEnv(); + throw error; + } + + if (!runtime) { + restoreEnv(); + throw new Error('runBrunchTui did not reach launchInteractive'); + } + + return { cwd, runtime, restoreEnv }; +} + +export async function resumeTier2Fixture(options: { + readonly cwd?: string; + readonly fixtureJsonl: string; + readonly specTitle?: string; +}): Promise<{ + readonly cwd: string; + readonly originalSessionFile: string; + readonly sessionFile: string; + readonly resumedSameSessionFile: boolean; + readonly transcriptEntries: readonly unknown[]; +}> { + const cwd = options.cwd ?? (await mkdtemp(join(tmpdir(), 'brunch-tier-2-resume-'))); + const coordinator = createWorkspaceSessionCoordinator({ cwd }); + const workspace = await coordinator.createSetupSession({ + specTitle: options.specTitle ?? 'Tier 2 fixture spec', + createNewSpec: true, + }); + for (const entry of parseJsonl(options.fixtureJsonl)) { + workspace.session.manager.appendMessage(entry as never); + } + flushSessionEntries(workspace.session.manager, workspace.session.file); + const resumed = await coordinator.activateWorkspace({ + action: 'openSession', + specId: workspace.spec.id, + sessionFile: workspace.session.file, + }); + if (resumed.status !== 'ready') throw new Error('Tier-2 fixture resume did not return a ready session.'); + return { + cwd, + originalSessionFile: workspace.session.file, + sessionFile: resumed.session.file, + resumedSameSessionFile: resumed.session.file === workspace.session.file, + transcriptEntries: parseJsonl(await readFile(resumed.session.file, 'utf8')), + }; +} + +function parseJsonl(jsonl: string): readonly unknown[] { + return jsonl + .split('\n') + .map((line) => line.trim()) + .filter(Boolean) + .map((line) => JSON.parse(line) as unknown); +} + +interface FlushableSessionManager { + _rewriteFile(): void; + setSessionFile(file: string): void; +} + +function flushSessionEntries(manager: unknown, sessionFile: string): void { + const flushable = manager as FlushableSessionManager; + flushable._rewriteFile(); + flushable.setSessionFile(sessionFile); +} diff --git a/src/projections/README.md b/src/projections/README.md index 086de9b7..dc7ed986 100644 --- a/src/projections/README.md +++ b/src/projections/README.md @@ -30,7 +30,9 @@ Disposition: `✓` locked · `●` keep + lock (earns place, needs invariant) · | `session/affordances` | 1 | ✓ | `affordances.test.ts` — gap-driven legality + default-on-switch derivation tested directly. Legal options are a menu projection over capability-readiness; omitted options are not capability refusals (I31-L). | | `session/capability-readiness` | 1 | ✓ | D74-L/D75-L tracer gate, not a reusable DTO. `capability-readiness.test.ts` locks the explicit capability→node-kind map, proceed / low-epistemic / negotiate outcomes, no-refusal invariant, loud failure when the gap register lacks a required kind, same-kind discrimination through `question`, and live presence-coverage flip. `session/affordances` now consumes it for axis-option legality. | | `session/readiness-estimate` | — | ✓ | D45-L soft per-band coverage rollup over `ElicitationGap[]`; UI-only and gates nothing. `readiness-estimate.test.ts` locks every-band shape, empty-band zero, importance-weighted mean, honest regression, no grade imports, and no legality-path imports. | -| `session/runtime-policy` | 4 | ○ | Policy/definitions data, not a DTO transform. Affordance legality is guarded via `affordances.test.ts`; dormant prompt manifest grade tables are temporarily local to `.pi/agents/state.ts` until the method/manifest follow-on. | +| `session/runtime-policy` | 4 | ○ | Policy/definitions data, not a DTO transform. Gap-driven legality is guarded via `affordances.test.ts`; no runtime grade table remains. | +| `session/assistant-visible-watermark` | 2 | ✓ | Carrier projection over the authoritative `continuity-entry-classifier` watermark set. Unit tests guard seed/overview/own-mutation/`worldUpdate` carriers, narrow-read exclusion, and cross-spec failure. | +| `session/continuity-entry-classifier` | 2 | ✓ | Shared FE-847 taxonomy for watermark-carrier vs continuity-only-non-debt vs debt-bearing entries; consumed by watermark projection and origination tail classification. | | `workspace/workspace-context` | 1 | ✗ | Pure `{ mode, data }` tag wrapper — zero transform, single consumer (`.pi/extensions/context/get-cwd.ts`). Source `session/workspace-context.ts` already exports the shapes + `inspect*` and can feed the consumer directly. Delete / inline. | | `workspace/workspace-state` | 4 | ● | Real flatten of the `WorkspaceSessionState` union to a narrow DTO. Shape invariant across status variants (`ready` / `needs_human` / base). | | `exchanges/request-choice` | 6 | ✓ | `request-choice.test.ts` (direct). | diff --git a/src/projections/session/assistant-visible-watermark.test.ts b/src/projections/session/assistant-visible-watermark.test.ts new file mode 100644 index 00000000..cd9d5f60 --- /dev/null +++ b/src/projections/session/assistant-visible-watermark.test.ts @@ -0,0 +1,89 @@ +import { describe, expect, it } from 'vitest'; + +import { compareWatermarks, projectAssistantVisibleWatermark } from './assistant-visible-watermark.js'; +import { + CONTINUITY_ONLY_NON_DEBT_CUSTOM_TYPES, + WATERMARK_CARRIER_CUSTOM_TYPES, + classifyContinuityEntry, + isContinuityOnlyNonDebtEntry, +} from './continuity-entry-classifier.js'; +import { projectSessionRuntimeState } from './runtime-state.js'; + +const specId = 7; + +function custom(customType: string, data: Record) { + return { type: 'custom', customType, data }; +} + +function message(role: 'user' | 'assistant', content: string) { + return { type: 'message', message: { role, content, timestamp: 0 } }; +} + +describe('assistant-visible watermark projection', () => { + it('advances from seed, full-overview snapshots, worldUpdate, and own mutations but not narrow reads', () => { + const entries = [ + custom('brunch.context_seed', { specId, snapshotLsn: 2 }), + custom('brunch.narrow_graph_read', { specId, lsn: 9 }), + custom('brunch.graph_overview_snapshot', { watermark: { specId, lsn: 4 } }), + custom('worldUpdate', { specId, currentLsn: 6, items: [{ id: 1 }] }), + custom('brunch.own_mutation', { specId, lsn: 8 }), + ]; + + expect(projectAssistantVisibleWatermark(entries, { specId })).toEqual({ specId, lsn: 8 }); + }); + + it('never compares bare LSNs across specs and fails loud on cross-spec misuse', () => { + expect(() => projectAssistantVisibleWatermark([custom('worldUpdate', { currentLsn: 2 })])).toThrow( + /bare LSN/, + ); + expect(() => + projectAssistantVisibleWatermark([ + custom('worldUpdate', { specId: 1, currentLsn: 2 }), + custom('worldUpdate', { specId: 2, currentLsn: 3 }), + ]), + ).toThrow(/multiple specs/); + expect(() => compareWatermarks({ specId: 1, lsn: 10 }, { specId: 2, lsn: 11 })).toThrow( + /different specs/, + ); + }); + + it('classifies shared carrier, continuity-only, and debt-bearing entries for FE-847 consumers', () => { + expect(WATERMARK_CARRIER_CUSTOM_TYPES).toEqual([ + 'brunch.context_seed', + 'brunch.graph_overview_snapshot', + 'brunch.own_mutation', + 'worldUpdate', + ]); + expect(CONTINUITY_ONLY_NON_DEBT_CUSTOM_TYPES).toContain('brunch.mention_staleness_hint'); + expect(classifyContinuityEntry(custom('worldUpdate', { specId, currentLsn: 3 }))).toBe( + 'watermark_carrier', + ); + expect(isContinuityOnlyNonDebtEntry(custom('brunch.side_task_result', { delivered: true }))).toBe(true); + expect(isContinuityOnlyNonDebtEntry(custom('brunch.mention_staleness_hint', { entityId: 'n1' }))).toBe( + true, + ); + expect(classifyContinuityEntry(message('user', 'Please continue'))).toBe('debt_bearing'); + }); + + it('keeps runtimeState.world.latestLsn as worldUpdate-only, not the broader watermark', () => { + const projection = projectSessionRuntimeState({ + header: { type: 'session', version: 3, id: 's1', cwd: '/tmp/workspace', timestamp: 'now' }, + binding: { schemaVersion: 1, specId }, + entries: [ + custom('brunch.context_seed', { specId, snapshotLsn: 10 }), + custom('brunch.own_mutation', { specId, lsn: 11 }), + ], + } as never); + + expect(projection.world.graph.latestLsn).toBeNull(); + expect( + projectAssistantVisibleWatermark( + [ + custom('brunch.context_seed', { specId, snapshotLsn: 10 }), + custom('brunch.own_mutation', { specId, lsn: 11 }), + ], + { specId }, + ), + ).toEqual({ specId, lsn: 11 }); + }); +}); diff --git a/src/projections/session/assistant-visible-watermark.ts b/src/projections/session/assistant-visible-watermark.ts new file mode 100644 index 00000000..3b9a2b28 --- /dev/null +++ b/src/projections/session/assistant-visible-watermark.ts @@ -0,0 +1,38 @@ +import { + watermarkFromEntry, + type ContinuityWatermark, + type TranscriptEntryLike, +} from './continuity-entry-classifier.js'; + +export type AssistantVisibleWatermark = ContinuityWatermark; + +export function projectAssistantVisibleWatermark( + entries: readonly TranscriptEntryLike[], + options: { readonly specId?: number } = {}, +): AssistantVisibleWatermark | null { + let latest: AssistantVisibleWatermark | null = null; + const seenSpecs = new Set(); + + for (const entry of entries) { + const watermark = watermarkFromEntry(entry); + if (!watermark) continue; + seenSpecs.add(watermark.specId); + if (options.specId !== undefined && watermark.specId !== options.specId) continue; + if (latest === null || watermark.lsn > latest.lsn) { + latest = watermark; + } + } + + if (options.specId === undefined && seenSpecs.size > 1) { + throw new Error('Cannot project assistant-visible watermark across multiple specs without specId.'); + } + + return latest; +} + +export function compareWatermarks(a: AssistantVisibleWatermark, b: AssistantVisibleWatermark): number { + if (a.specId !== b.specId) { + throw new Error('Cannot compare continuity watermarks from different specs.'); + } + return a.lsn - b.lsn; +} diff --git a/src/projections/session/continuity-entry-classifier.ts b/src/projections/session/continuity-entry-classifier.ts new file mode 100644 index 00000000..226a4640 --- /dev/null +++ b/src/projections/session/continuity-entry-classifier.ts @@ -0,0 +1,106 @@ +export interface ContinuityWatermark { + readonly specId: number; + readonly lsn: number; +} + +export type ContinuityEntryKind = 'watermark_carrier' | 'continuity_only_non_debt' | 'debt_bearing'; + +export const WATERMARK_CARRIER_CUSTOM_TYPES = [ + 'brunch.context_seed', + 'brunch.graph_overview_snapshot', + 'brunch.own_mutation', + 'worldUpdate', +] as const; + +export const CONTINUITY_ONLY_NON_DEBT_CUSTOM_TYPES = [ + 'brunch.context_seed', + 'brunch.graph_overview_snapshot', + 'brunch.mention', + 'brunch.mention_staleness_hint', + 'brunch.session_lifecycle', + 'brunch.side_task_result', + 'brunch.reviewer_drain', + 'worldUpdate', +] as const; + +const WATERMARK_CARRIER_TYPES = new Set(WATERMARK_CARRIER_CUSTOM_TYPES); +const CONTINUITY_ONLY_NON_DEBT_TYPES = new Set(CONTINUITY_ONLY_NON_DEBT_CUSTOM_TYPES); + +export interface TranscriptEntryLike { + readonly type?: unknown; + readonly customType?: unknown; + readonly data?: unknown; + readonly details?: unknown; + readonly message?: unknown; +} + +export function classifyContinuityEntry(entry: TranscriptEntryLike): ContinuityEntryKind { + if (isWatermarkCarrier(entry)) return 'watermark_carrier'; + if (isContinuityOnlyNonDebtEntry(entry)) return 'continuity_only_non_debt'; + return 'debt_bearing'; +} + +export function isWatermarkCarrier(entry: TranscriptEntryLike): boolean { + const customType = customEntryType(entry); + return ( + customType !== undefined && WATERMARK_CARRIER_TYPES.has(customType) && watermarkFromEntry(entry) !== null + ); +} + +export function isContinuityOnlyNonDebtEntry(entry: TranscriptEntryLike): boolean { + const customType = customEntryType(entry); + if (customType !== undefined && CONTINUITY_ONLY_NON_DEBT_TYPES.has(customType)) return true; + + const message = messageRecord(entry); + if (message?.role === 'toolResult') { + const toolName = typeof message.toolName === 'string' ? message.toolName : undefined; + return toolName === 'read_graph'; + } + return false; +} + +export function watermarkFromEntry(entry: TranscriptEntryLike): ContinuityWatermark | null { + const customType = customEntryType(entry); + if (customType === undefined || !WATERMARK_CARRIER_TYPES.has(customType)) return null; + const payload = payloadRecord(entry); + if (!payload) return null; + return readWatermark(payload); +} + +export function customEntryType(entry: TranscriptEntryLike): string | undefined { + if (typeof entry.customType === 'string') return entry.customType; + const message = messageRecord(entry); + return typeof message?.customType === 'string' ? message.customType : undefined; +} + +function payloadRecord(entry: TranscriptEntryLike): Record | undefined { + if (isRecord(entry.data)) return entry.data; + if (isRecord(entry.details)) return entry.details; + const message = messageRecord(entry); + if (isRecord(message?.data)) return message.data; + if (isRecord(message?.details)) return message.details; + return undefined; +} + +function readWatermark(payload: Record): ContinuityWatermark | null { + const nested = isRecord(payload.watermark) ? payload.watermark : payload; + const lsn = integerField(nested.lsn) ?? integerField(nested.currentLsn) ?? integerField(nested.snapshotLsn); + if (lsn === undefined) return null; + const specId = integerField(nested.specId); + if (specId === undefined) { + throw new Error('Continuity watermark carrier must include specId; bare LSN comparison is invalid.'); + } + return { specId, lsn }; +} + +function messageRecord(entry: TranscriptEntryLike): Record | undefined { + return isRecord(entry.message) ? entry.message : undefined; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null; +} + +function integerField(value: unknown): number | undefined { + return typeof value === 'number' && Number.isInteger(value) && value >= 0 ? value : undefined; +} diff --git a/src/rpc/methods/session.ts b/src/rpc/methods/session.ts index 7be7f3a1..c51a8eb0 100644 --- a/src/rpc/methods/session.ts +++ b/src/rpc/methods/session.ts @@ -15,11 +15,13 @@ import { type BrunchSessionEnvelope, } from '../../session/brunch-session-envelope.js'; import { projectLinearSessionExchangeProjection } from '../../session/exchange-projection.js'; +import { mentionEntry, resolveMentionFacts } from '../../session/mention-ledger.js'; import { resolveExplicitSessionProjectionTarget, type ExplicitSessionProjectionParams, type SessionProjectionTarget, } from '../../session/session-projection-reader.js'; +import { startAssistantTurn } from '../../session/start-assistant-turn.js'; import { acceptedResponseFromParams, nextDeterministicStructuredExchange, @@ -496,6 +498,7 @@ async function handleTriggerExchange( coordinator: DefaultWorkspaceCoordinator; cwd: string; productUpdates?: ProductUpdatePublisher; + getGraphRuntime: () => Promise; }, ): Promise { const state = await options.coordinator.openDefaultWorkspace(); @@ -516,10 +519,22 @@ async function handleTriggerExchange( }); } + const currentLsn = (await options.getGraphRuntime()) + .forSpec(existingTarget.envelope.binding.specId) + .queryGraph().lsn; + const origination = startAssistantTurn({ + specId: existingTarget.envelope.binding.specId, + currentLsn, + entries: existingTarget.envelope.entries, + origin: existingTarget.envelope.entries.length <= 3 ? 'new_session' : 'manual_trigger', + }); const exchange = nextDeterministicStructuredExchange( projectLinearSessionExchangeProjection(existingTarget.envelope).exchanges.length, ); const manager = state.session.manager; + for (const entry of origination.seedEntries) { + manager.appendCustomEntry(entry.customType, entry.data); + } manager.appendMessage(presentToolResultMessage(exchange)); flushSessionEntries(manager, state.session.file); @@ -588,6 +603,7 @@ async function handleSubmitMessage( : state.session.manager.appendMessage(ordinaryUserMessage(params.text)); flushSessionEntries(state.session.manager, state.session.file); + const graph = await options.getGraphRuntime(); const capture = params.interruption === true ? ({ @@ -598,9 +614,20 @@ async function handleSubmitMessage( specId: target.envelope.binding.specId, text: params.text, source: `session_message:${messageId}`, - commandExecutor: (await options.getGraphRuntime()).commandExecutor, + commandExecutor: graph.commandExecutor, }); + if (params.interruption !== true) { + for (const fact of resolveMentionFacts({ + text: params.text, + specId: target.envelope.binding.specId, + graph, + })) { + state.session.manager.appendCustomEntry('brunch.mention', mentionEntry(fact).data); + } + flushSessionEntries(state.session.manager, state.session.file); + } + const result: SubmitMessageResult = { status: 'accepted', messageId, diff --git a/src/session/README.md b/src/session/README.md index 6288c3fd..1880e6f7 100644 --- a/src/session/README.md +++ b/src/session/README.md @@ -1,6 +1,6 @@ # session/ — Session domain layer -SPEC decisions: D6-L, D11-L, D12-L, D13-L, D21-L, D40-L, D52-L +SPEC decisions: D6-L, D11-L, D12-L, D13-L, D21-L, D40-L, D52-L, D76-L, D77-L, D78-L ## Owns @@ -38,9 +38,12 @@ plus the coordination logic for workspace/spec/session lifecycle. - **Session envelope** — canonical session envelope reader (spec/session pair). -- **LSN staleness tracking** — Pi extension records current LSN at session - start, checks at `prepareNextTurn`, injects `worldUpdate` with optional - context refresh when stale. +- **Turn-boundary choreography** — write-side seam for the assistant-visible + watermark, `worldUpdate`, mention staleness, and honest assistant origination. + `prepare-next-turn.ts` owns the single pre-turn continuity writer; Pi lifecycle + hooks adapt it through `.pi/extensions/session/lifecycle.ts`, and + `before_provider_request` is a guard-only check. `start-assistant-turn.ts` + owns the origination decision and context seed entries. ## Runtime affordance coverage ledger diff --git a/src/session/mention-ledger.test.ts b/src/session/mention-ledger.test.ts new file mode 100644 index 00000000..533d06fb --- /dev/null +++ b/src/session/mention-ledger.test.ts @@ -0,0 +1,62 @@ +import { describe, expect, it } from 'vitest'; + +import { + graphHandlesInText, + mentionEntry, + resolveMentionFacts, + stalenessEntriesForMentions, +} from './mention-ledger.js'; + +describe('mention ledger', () => { + it('extracts stable graph handles from submitted transcript text only', () => { + expect(graphHandlesInText('Compare #G1 with #R22, then revisit #G1.')).toEqual(['G1', 'R22']); + }); + + it('resolves #CODE handles to stable entity ids and seen_lsn at submit time', () => { + const graph = { + forSpec: () => ({ + resolveNodeCode: (code: string) => (code === 'G1' ? 101 : undefined), + getNodes: () => [ + { + status: 'found', + node: { id: 101, title: 'Goal node', updatedAtLsn: 4 }, + related: [], + edges: [], + }, + ], + }), + }; + + expect( + resolveMentionFacts({ text: 'Please re-read #G1; ignore #BAD.', specId: 1, graph: graph as never }), + ).toEqual([{ entityId: '101', handle: 'G1', title: 'Goal node', seenLsn: 4 }]); + expect(mentionEntry({ entityId: '101', handle: 'G1', seenLsn: 4 })).toEqual({ + type: 'custom', + customType: 'brunch.mention', + data: { entityId: '101', handle: 'G1', seenLsn: 4 }, + }); + }); + + it('emits staleness only when the entity changed since it was last seen', () => { + const current = new Map([ + ['101', 7], + ['102', 5], + ]); + + expect( + stalenessEntriesForMentions({ + mentions: [ + { entityId: '101', handle: 'G1', seenLsn: 4 }, + { entityId: '102', handle: 'G2', seenLsn: 5 }, + ], + currentByEntityId: current, + }), + ).toEqual([ + { + type: 'custom', + customType: 'brunch.mention_staleness_hint', + data: { entityId: '101', handle: 'G1', seenLsn: 4, currentLsn: 7 }, + }, + ]); + }); +}); diff --git a/src/session/mention-ledger.ts b/src/session/mention-ledger.ts new file mode 100644 index 00000000..4ed70add --- /dev/null +++ b/src/session/mention-ledger.ts @@ -0,0 +1,79 @@ +import type { WorkspaceGraphRuntime } from '../graph/workspace-store.js'; + +export interface MentionFact { + readonly entityId: string; + readonly handle: string; + readonly title?: string; + readonly seenLsn: number; +} + +export interface MentionEntry { + readonly type: 'custom'; + readonly customType: 'brunch.mention'; + readonly data: MentionFact; +} + +export const MENTION_STALENESS_HINT_ENTRY_TYPE = 'brunch.mention_staleness_hint' as const; + +export interface MentionStalenessEntry { + readonly type: 'custom'; + readonly customType: typeof MENTION_STALENESS_HINT_ENTRY_TYPE; + readonly data: { + readonly entityId: string; + readonly handle?: string; + readonly seenLsn: number; + readonly currentLsn: number; + }; +} + +export function graphHandlesInText(text: string): readonly string[] { + return [...new Set([...text.matchAll(/#([A-Z]+\d+)/g)].map((match) => match[1]!))]; +} + +export function resolveMentionFacts(options: { + readonly text: string; + readonly specId: number; + readonly graph: WorkspaceGraphRuntime; +}): readonly MentionFact[] { + const readers = options.graph.forSpec(options.specId); + return graphHandlesInText(options.text).flatMap((handle) => { + const nodeId = readers.resolveNodeCode(handle); + if (nodeId === undefined) return []; + const [neighborhood] = readers.getNodes([{ id: nodeId }]); + if (!neighborhood || neighborhood.status !== 'found') return []; + return [ + { + entityId: String(neighborhood.node.id), + handle, + title: neighborhood.node.title, + seenLsn: neighborhood.node.updatedAtLsn, + }, + ]; + }); +} + +export function mentionEntry(fact: MentionFact): MentionEntry { + return { type: 'custom', customType: 'brunch.mention', data: fact }; +} + +export function stalenessEntriesForMentions(options: { + readonly mentions: readonly MentionFact[]; + readonly currentByEntityId: ReadonlyMap; +}): readonly MentionStalenessEntry[] { + return options.mentions.flatMap((mention) => { + const currentLsn = options.currentByEntityId.get(mention.entityId); + if (currentLsn === undefined || currentLsn <= mention.seenLsn) return []; + return [ + { + type: 'custom' as const, + customType: MENTION_STALENESS_HINT_ENTRY_TYPE, + data: { + entityId: mention.entityId, + handle: mention.handle, + seenLsn: mention.seenLsn, + currentLsn, + }, + }, + ]; + }); +} diff --git a/src/session/prepare-next-turn.test.ts b/src/session/prepare-next-turn.test.ts new file mode 100644 index 00000000..fffebd5b --- /dev/null +++ b/src/session/prepare-next-turn.test.ts @@ -0,0 +1,152 @@ +import { describe, expect, it } from 'vitest'; + +import { isContinuityOnlyNonDebtEntry } from '../projections/session/continuity-entry-classifier.js'; +import { + guardBeforeProviderRequest, + prepareNextTurn, + stampOwnMutationWatermark, +} from './prepare-next-turn.js'; + +const specId = 3; + +function seed(lsn: number) { + return { type: 'custom', customType: 'brunch.context_seed', data: { specId, snapshotLsn: lsn } }; +} + +describe('prepareNextTurn', () => { + it('emits no worldUpdate when current_lsn equals watermark and emits the strict-greater set otherwise', () => { + expect( + prepareNextTurn({ specId, currentLsn: 2, entries: [seed(2)], changes: [{ specId, lsn: 2 }] }) + .entriesToAppend, + ).toEqual([]); + + const prepared = prepareNextTurn({ + specId, + currentLsn: 5, + entries: [seed(2)], + changes: [ + { specId, lsn: 1, entityId: 'old' }, + { specId, lsn: 3, entityId: 'new-a' }, + { specId: 99, lsn: 4, entityId: 'sibling-spec' }, + { specId, lsn: 5, entityId: 'new-b' }, + ], + }); + + expect(prepared.entriesToAppend).toEqual([ + { + type: 'custom', + customType: 'worldUpdate', + data: { + specId, + currentLsn: 5, + changedSinceLsn: 2, + items: [ + { specId, lsn: 3, entityId: 'new-a' }, + { specId, lsn: 5, entityId: 'new-b' }, + ], + }, + }, + ]); + }); + + it('dedupes a seed naming the current snapshot LSN', () => { + expect( + prepareNextTurn({ + specId, + currentLsn: 10, + entries: [seed(10)], + changes: [{ specId, lsn: 10, entityId: 'snapshot-node' }], + }).entriesToAppend, + ).toEqual([]); + }); + + it('surfaces same-session submit/capture writes that were not assistant-visible yet', () => { + expect( + prepareNextTurn({ + specId, + currentLsn: 8, + entries: [seed(5)], + changes: [{ specId, lsn: 8, entityId: 'captured-from-submit', kind: 'goal' }], + }).entriesToAppend[0]?.data.items, + ).toEqual([{ specId, lsn: 8, entityId: 'captured-from-submit', kind: 'goal' }]); + }); + + it('stamps own mutations as watermark carriers without treating them as worldUpdate-only runtime state', () => { + expect(stampOwnMutationWatermark({ specId, lsn: 12, source: 'mutate_graph' })).toEqual({ + type: 'custom', + customType: 'brunch.own_mutation', + data: { specId, lsn: 12, source: 'mutate_graph' }, + }); + }); + + it('emits mention staleness hints only for changed mentioned entities', () => { + expect( + prepareNextTurn({ + specId, + currentLsn: 9, + entries: [seed(5)], + changes: [ + { specId, lsn: 9, entityId: '101' }, + { specId, lsn: 5, entityId: '102' }, + ], + mentions: [ + { entityId: '101', handle: 'G1', seenLsn: 6 }, + { entityId: '102', handle: 'G2', seenLsn: 5 }, + ], + }).entriesToAppend, + ).toEqual( + expect.arrayContaining([ + { + type: 'custom', + customType: 'brunch.mention_staleness_hint', + data: { entityId: '101', handle: 'G1', seenLsn: 6, currentLsn: 9 }, + }, + ]), + ); + }); + + it('emits side-task and reviewer drains through the reconciler as continuity-only non-debt entries', () => { + const prepared = prepareNextTurn({ + specId, + currentLsn: 1, + entries: [seed(1)], + changes: [], + drains: [ + { kind: 'side_task', id: 'side-1', summary: 'Side task done' }, + { kind: 'reviewer', id: 'review-1', summary: 'Reviewer done' }, + ], + }); + + expect(prepared.entriesToAppend.map((entry) => entry.customType)).toEqual([ + 'brunch.side_task_result', + 'brunch.reviewer_drain', + ]); + expect(prepared.entriesToAppend.every(isContinuityOnlyNonDebtEntry)).toBe(true); + }); + + it('guard re-runs preparation once and never appends continuity directly outside prepare output', async () => { + const appended: unknown[] = []; + const results = [ + { + watermarkLsn: 1, + currentLsn: 2, + entriesToAppend: [ + { type: 'custom' as const, customType: 'worldUpdate', data: { specId, currentLsn: 2 } }, + ], + }, + { watermarkLsn: 2, currentLsn: 2, entriesToAppend: [] }, + ]; + + await expect( + guardBeforeProviderRequest({ + prepare: () => results.shift()!, + append: (entry) => { + appended.push(entry); + }, + }), + ).resolves.toEqual({ watermarkLsn: 2, currentLsn: 2, entriesToAppend: [] }); + expect(appended).toEqual([ + { type: 'custom', customType: 'worldUpdate', data: { specId, currentLsn: 2 } }, + ]); + }); +}); diff --git a/src/session/prepare-next-turn.ts b/src/session/prepare-next-turn.ts new file mode 100644 index 00000000..87f057ff --- /dev/null +++ b/src/session/prepare-next-turn.ts @@ -0,0 +1,113 @@ +import { + compareWatermarks, + projectAssistantVisibleWatermark, +} from '../projections/session/assistant-visible-watermark.js'; +import type { TranscriptEntryLike } from '../projections/session/continuity-entry-classifier.js'; +import { stalenessEntriesForMentions, type MentionFact } from './mention-ledger.js'; + +export interface GraphChangeItem { + readonly specId: number; + readonly lsn: number; + readonly entityId?: string | number; + readonly kind?: string; + readonly title?: string; +} + +export interface ContinuityDrain { + readonly kind: 'side_task' | 'reviewer'; + readonly id: string; + readonly summary: string; +} + +export interface PreparedContinuityEntry { + readonly type: 'custom'; + readonly customType: string; + readonly data: Record; +} + +export interface PrepareNextTurnInput { + readonly specId: number; + readonly currentLsn: number; + readonly entries: readonly TranscriptEntryLike[]; + readonly changes: readonly GraphChangeItem[]; + readonly drains?: readonly ContinuityDrain[]; + readonly mentions?: readonly MentionFact[]; +} + +export interface PrepareNextTurnResult { + readonly watermarkLsn: number; + readonly currentLsn: number; + readonly entriesToAppend: readonly PreparedContinuityEntry[]; +} + +export function prepareNextTurn(input: PrepareNextTurnInput): PrepareNextTurnResult { + const projected = projectAssistantVisibleWatermark(input.entries, { specId: input.specId }); + const watermark = projected ?? { specId: input.specId, lsn: 0 }; + compareWatermarks(watermark, { specId: input.specId, lsn: input.currentLsn }); + + const entriesToAppend: PreparedContinuityEntry[] = []; + const strictGreater = input.changes + .filter( + (change) => + change.specId === input.specId && change.lsn > watermark.lsn && change.lsn <= input.currentLsn, + ) + .sort((a, b) => a.lsn - b.lsn || String(a.entityId ?? '').localeCompare(String(b.entityId ?? ''))); + + if (input.currentLsn > watermark.lsn && strictGreater.length > 0) { + entriesToAppend.push({ + type: 'custom', + customType: 'worldUpdate', + data: { + specId: input.specId, + currentLsn: input.currentLsn, + changedSinceLsn: watermark.lsn, + items: strictGreater.map((change) => ({ ...change })), + }, + }); + } + + const currentByEntityId = new Map( + strictGreater.flatMap((change) => + change.entityId === undefined ? [] : ([[String(change.entityId), change.lsn]] as const), + ), + ); + entriesToAppend.push(...stalenessEntriesForMentions({ mentions: input.mentions ?? [], currentByEntityId })); + + for (const drain of input.drains ?? []) { + entriesToAppend.push({ + type: 'custom', + customType: drain.kind === 'side_task' ? 'brunch.side_task_result' : 'brunch.reviewer_drain', + data: { id: drain.id, summary: drain.summary }, + }); + } + + return { watermarkLsn: watermark.lsn, currentLsn: input.currentLsn, entriesToAppend }; +} + +export function stampOwnMutationWatermark(options: { + readonly specId: number; + readonly lsn: number; + readonly source: string; +}): PreparedContinuityEntry { + return { + type: 'custom', + customType: 'brunch.own_mutation', + data: { specId: options.specId, lsn: options.lsn, source: options.source }, + }; +} + +export async function guardBeforeProviderRequest(options: { + readonly prepare: () => PrepareNextTurnResult | Promise; + readonly append: (entry: PreparedContinuityEntry) => void | Promise; +}): Promise { + const first = await options.prepare(); + if (first.entriesToAppend.length === 0) return first; + for (const entry of first.entriesToAppend) { + await options.append(entry); + } + const second = await options.prepare(); + if (second.entriesToAppend.length > 0) { + throw new Error('Continuity drift remained after one prepareNextTurn retry.'); + } + return second; +} diff --git a/src/session/start-assistant-turn.test.ts b/src/session/start-assistant-turn.test.ts new file mode 100644 index 00000000..9a2c207c --- /dev/null +++ b/src/session/start-assistant-turn.test.ts @@ -0,0 +1,94 @@ +import { describe, expect, it } from 'vitest'; + +import { startAssistantTurn, latestTailOwesAssistant } from './start-assistant-turn.js'; + +const specId = 5; + +function custom(customType: string, data: Record = {}) { + return { type: 'custom', customType, data }; +} + +function message(role: 'user' | 'assistant', content: string) { + return { type: 'message', message: { role, content, timestamp: 0 } }; +} + +describe('startAssistantTurn', () => { + it('seeds and starts a new assistant-originated session without fabricating a user turn', () => { + const decision = startAssistantTurn({ + specId, + currentLsn: 3, + entries: [], + origin: 'new_session', + strategy: 'auto', + }); + + expect(decision).toEqual({ + action: 'start', + origin: 'new_session', + seedEntries: [{ type: 'custom', customType: 'brunch.context_seed', data: { specId, snapshotLsn: 3 } }], + }); + expect(JSON.stringify(decision)).not.toContain('"role":"user"'); + }); + + it('kicks resumed user-tail debt even after reconciler-inserted continuity notices', () => { + const entries = [ + message('assistant', 'Question'), + message('user', 'Answer that still needs assistant continuation'), + custom('worldUpdate', { specId, currentLsn: 4 }), + custom('brunch.side_task_result', { id: 'side-1' }), + custom('brunch.reviewer_drain', { id: 'review-1' }), + ]; + + expect(latestTailOwesAssistant(entries)).toBe(true); + expect( + startAssistantTurn({ specId, currentLsn: 4, entries, origin: 'resume_debt', strategy: 'auto' }).action, + ).toBe('start'); + }); + + it('stays idle for request/system leaves and for explicit freestyle while AUTO remains offer-first', () => { + expect( + startAssistantTurn({ + specId, + currentLsn: 4, + entries: [message('assistant', 'Already answered'), custom('worldUpdate', { specId, currentLsn: 4 })], + origin: 'resume_debt', + strategy: 'auto', + }), + ).toEqual({ action: 'idle', reason: 'no_unresolved_debt', seedEntries: [] }); + + expect( + startAssistantTurn({ + specId, + currentLsn: 4, + entries: [message('user', 'Ambient')], + origin: 'resume_debt', + }), + ).toMatchObject({ action: 'start' }); + + expect( + startAssistantTurn({ + specId, + currentLsn: 4, + entries: [message('user', 'Ambient')], + origin: 'resume_debt', + strategy: 'freestyle', + }), + ).toMatchObject({ action: 'idle', reason: 'explicit_freestyle' }); + }); + + it('is idempotent across reboot and crash-after-notice-before-provider', () => { + const seeded = [custom('brunch.context_seed', { specId, snapshotLsn: 9 })]; + expect( + startAssistantTurn({ specId, currentLsn: 9, entries: seeded, origin: 'new_session' }).seedEntries, + ).toEqual([]); + + const crashAfterNotice = [ + message('user', 'Please continue'), + custom('brunch.context_seed', { specId, snapshotLsn: 9 }), + custom('worldUpdate', { specId, currentLsn: 9 }), + ]; + expect( + startAssistantTurn({ specId, currentLsn: 9, entries: crashAfterNotice, origin: 'resume_debt' }), + ).toMatchObject({ action: 'start', seedEntries: [] }); + }); +}); diff --git a/src/session/start-assistant-turn.ts b/src/session/start-assistant-turn.ts new file mode 100644 index 00000000..1cd6aa1b --- /dev/null +++ b/src/session/start-assistant-turn.ts @@ -0,0 +1,91 @@ +import { projectAssistantVisibleWatermark } from '../projections/session/assistant-visible-watermark.js'; +import { + isContinuityOnlyNonDebtEntry, + type TranscriptEntryLike, +} from '../projections/session/continuity-entry-classifier.js'; +import type { PreparedContinuityEntry } from './prepare-next-turn.js'; + +export type AssistantTurnOrigin = 'new_session' | 'resume_debt' | 'manual_trigger'; + +export interface StartAssistantTurnInput { + readonly specId: number; + readonly currentLsn: number; + readonly entries: readonly TranscriptEntryLike[]; + readonly origin: AssistantTurnOrigin; + readonly strategy?: 'auto' | 'freestyle'; +} + +export type StartAssistantTurnDecision = + | { + readonly action: 'start'; + readonly origin: AssistantTurnOrigin; + readonly seedEntries: readonly PreparedContinuityEntry[]; + } + | { + readonly action: 'idle'; + readonly reason: 'explicit_freestyle' | 'no_unresolved_debt'; + readonly seedEntries: readonly PreparedContinuityEntry[]; + }; + +export function startAssistantTurn(input: StartAssistantTurnInput): StartAssistantTurnDecision { + const seedEntries = contextSeedEntries(input); + if (input.strategy === 'freestyle') { + return { action: 'idle', reason: 'explicit_freestyle', seedEntries }; + } + if ( + input.origin === 'new_session' || + input.origin === 'manual_trigger' || + latestTailOwesAssistant(input.entries) + ) { + return { action: 'start', origin: input.origin, seedEntries }; + } + return { action: 'idle', reason: 'no_unresolved_debt', seedEntries }; +} + +export function contextSeedEntries(input: { + readonly specId: number; + readonly currentLsn: number; + readonly entries: readonly TranscriptEntryLike[]; +}): readonly PreparedContinuityEntry[] { + const watermark = projectAssistantVisibleWatermark(input.entries, { specId: input.specId }); + if (watermark && watermark.lsn >= input.currentLsn) return []; + return [ + { + type: 'custom', + customType: 'brunch.context_seed', + data: { specId: input.specId, snapshotLsn: input.currentLsn }, + }, + ]; +} + +export function latestTailOwesAssistant(entries: readonly TranscriptEntryLike[]): boolean { + for (let index = entries.length - 1; index >= 0; index--) { + const entry = entries[index]; + if (!entry || isContinuityOnlyNonDebtEntry(entry)) continue; + const message = messageRecord(entry); + if (message?.role === 'user') return true; + if (message?.role === 'toolResult') { + const toolName = typeof message.toolName === 'string' ? message.toolName : ''; + return toolName.startsWith('request_') && responseStatus(message) !== 'answered'; + } + return false; + } + return false; +} + +function responseStatus(message: Record): string | undefined { + const details = isRecord(message.details) + ? message.details + : isRecord(message.data) + ? message.data + : undefined; + return typeof details?.status === 'string' ? details.status : undefined; +} + +function messageRecord(entry: TranscriptEntryLike): Record | undefined { + return isRecord(entry.message) ? entry.message : undefined; +} + +function isRecord(value: unknown): value is Record { + return typeof value === 'object' && value !== null; +}