diff --git a/drizzle/0004_gaps_node_kind_reference.sql b/drizzle/0004_gaps_node_kind_reference.sql index 8aff8032..afc7a3a2 100644 --- a/drizzle/0004_gaps_node_kind_reference.sql +++ b/drizzle/0004_gaps_node_kind_reference.sql @@ -1,4 +1,10 @@ -CREATE TABLE `elicitation_gaps_new` ( +DROP TABLE IF EXISTS `elicitation_gaps_new`; +--> statement-breakpoint +DROP TABLE IF EXISTS `elicitation_gaps`; +--> statement-breakpoint +DROP TABLE IF EXISTS `elicitation_backlog`; +--> statement-breakpoint +CREATE TABLE `elicitation_gaps` ( `id` integer PRIMARY KEY AUTOINCREMENT NOT NULL, `spec_id` integer NOT NULL, `refers_to` text NOT NULL, @@ -17,69 +23,6 @@ CREATE TABLE `elicitation_gaps_new` ( `created_at_lsn` integer NOT NULL, `disposition_set_at_lsn` integer, FOREIGN KEY (`spec_id`) REFERENCES `specs`(`id`) ON UPDATE no action ON DELETE no action, - FOREIGN KEY (`arose_from_gap_id`) REFERENCES `elicitation_gaps_new`(`id`) ON UPDATE no action ON DELETE no action, + FOREIGN KEY (`arose_from_gap_id`) REFERENCES `elicitation_gaps`(`id`) ON UPDATE no action ON DELETE no action, FOREIGN KEY (`resolved_by_node_id`) REFERENCES `nodes`(`id`) ON UPDATE no action ON DELETE no action ); ---> statement-breakpoint -INSERT INTO `elicitation_gaps_new` ( - `id`, - `spec_id`, - `refers_to`, - `question`, - `rationale`, - `disposition`, - `basis`, - `readiness_band`, - `predicate_kind`, - `predicate`, - `importance`, - `plane_affinity`, - `lens_affinity`, - `arose_from_gap_id`, - `resolved_by_node_id`, - `created_at_lsn`, - `disposition_set_at_lsn` -) -SELECT - `id`, - `spec_id`, - CASE - WHEN `name` = 'domain' THEN 'context' - WHEN `name` = 'protagonist' THEN 'thesis' - WHEN `name` = 'pain_pull' THEN 'thesis' - WHEN `name` = 'constraint' THEN 'constraint' - WHEN `name` = 'value' THEN 'goal' - WHEN `name` = 'context_of_use' THEN 'context' - WHEN `name` = 'success_sketch' THEN 'criterion' - WHEN `name` = 'solution_boundary' THEN 'constraint' - ELSE COALESCE(json_extract(`predicate`, '$.nodeKind'), json_extract(`predicate`, '$.subjectKind'), 'context') - END, - CASE - WHEN `name` = 'domain' THEN 'What kind of thing is this, and what domain or environment does it live in?' - WHEN `name` = 'protagonist' THEN 'Who is this for?' - WHEN `name` = 'pain_pull' THEN 'What pull or pain makes this worth doing?' - WHEN `name` = 'constraint' THEN 'What binding constraints, non-goals, or boundaries already shape the work?' - WHEN `name` = 'value' THEN 'What outcome or value should this create?' - WHEN `name` = 'context_of_use' THEN 'When, where, or under what conditions will it be used?' - WHEN `name` = 'success_sketch' THEN 'How will we recognize success or good enough?' - WHEN `name` = 'solution_boundary' THEN 'What is explicitly out of scope or off the table?' - ELSE CASE WHEN trim(`name`) = '' THEN 'What needs to be clarified here?' ELSE trim(`name`) END - END, - `rationale`, - `disposition`, - `basis`, - `readiness_band`, - `predicate_kind`, - `predicate`, - `importance`, - `plane_affinity`, - `lens_affinity`, - `arose_from_gap_id`, - `resolved_by_node_id`, - `created_at_lsn`, - `disposition_set_at_lsn` -FROM `elicitation_gaps`; ---> statement-breakpoint -DROP TABLE `elicitation_gaps`; ---> statement-breakpoint -ALTER TABLE `elicitation_gaps_new` RENAME TO `elicitation_gaps`; diff --git a/memory/PLAN.md b/memory/PLAN.md index 5115b8b8..5c49648c 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -84,12 +84,13 @@ per ledger row: ### Active -- (none) — the FE-847 turn-boundary closure completed 2026-06-11 (see Turn-boundary choreography below). One handed-off residue: migration `0004_gaps_node_kind_reference` coherence (the in-place journal-tag rewrite + the derive-with-'context'-fallback that the new read-side `predicate_kind` throw would reject) is being fixed by another agent on the stacked successor branch — on reintegration, verify that fix actually covers the concern before considering it closed; do not touch `drizzle/` on this branch meanwhile. +- (none) — the FE-847 turn-boundary closure and `projection-shape-coverage` both completed 2026-06-11. Next sequenced work is `renderer-golden-coverage` via `memory/cards/renderer-golden-coverage--render-stage-chain.md`. The handed-off `0004_gaps_node_kind_reference` migration coherence concern is addressed by this branch's migration rewrite; verify `src/db/connection.test.ts` before treating that residue as closed. ### Recently Completed - 2026-06-11 **Turn-boundary choreography (Tier-2 layer) complete** — `turn-boundary-reconciliation` and `kick-and-context-seeding` both done on FE-847 (`ln/fe-847-turn-boundary-closure`); every I45/I46/I47 scaffold row runs live through real boot/restart; full definitions retained below for re-entry. The same branch carried the review-fix remediation (PR-comment defects fixed at top of stack, user-routed) and the typing-collapse refactor (canonical editor envelope, projected outcome union, shared grounding-gap fixture builder). -- 2026-06-11 `capability-readiness` — done after the grade-deletion sweep plus the remediation's live-wiring closure (required `getElicitationGaps`, conservative-fallback deletion, Tier-2 legality oracle); definition retained below. +- 2026-06-11 `projection-shape-coverage` — done on the top-of-stack coverage branch; the session PULL ledger and PROJECT invariants are closed, and the prepared renderer chain is the next trio move. +- 2026-06-11 `capability-readiness` — done after the grade-deletion sweep plus the remediation's live-wiring closure and prompt-authority follow-on (required `getElicitationGaps`, conservative-fallback deletion, Tier-2 legality oracle; role/mode-legal pins remain visible while gated methods/tools carry readiness negotiation); definition retained below. - 2026-06-10 `gaps-node-kind-reference` — done; gaps reference node kinds per D75-L (definition archived). - Older completed frontiers: `docs/archive/PLAN_HISTORY.md` (12 definitions archived 2026-06-11). @@ -97,9 +98,9 @@ per ledger row: The near-term spine has two tracks. The **context-pipeline coverage trio** remains the elevated product-coverage spine, sequenced in strict dependency order (lock upstream shape before downstream output). `role-safe-graph-mutations` is a graph-mutation grammar frontier that can run before or alongside the trio, and must land before relation-bearing generalized capture or semantic fixture curation rely on the new mutation surface. The `dx-feedback-loops` DX substrate, its `dx-introspection-live` follow-on, and the FE-847 turn-boundary closure are all complete and no longer gate this list. -1. `projection-shape-coverage` — **PROJECT stage** (`#project`); invariant / no-loss kind. Ledger authored in `src/projections/README.md`. Two sub-steps: (a) **PULL-session prerequisite** — ledger the session read surface (`session/workspace-context`, `workspace-session-coordinator`, `runtime-state`) the session/workspace projections lock against; (b) **earns-its-place audit then lock** — delete/inline the `✗` indirection (`workspace/workspace-context`: single-consumer tag wrapper), resolve the `◐` exchange family (direct-lock vs keep-transitive), and add a shape/no-loss invariant to each `●` survivor (`graph/neighborhood`, `session/transcript-context`, `session/runtime-state`, `workspace/workspace-state`). The graph projection stubs (`overview`, `commit-result`, `reconciliation-needs`) are `export {}` topology stubs, **not** dark implementations — leave them. Upstream of everything else in the trio; do this first so renderer goldens lock against stable shapes. +1. `projection-shape-coverage` — **done 2026-06-11.** PROJECT stage (`#project`); invariant / no-loss kind. Ledger authored in `src/projections/README.md`. The session PULL read surface (`session/workspace-context`, `workspace-session-coordinator`, `runtime-state`) is inventoried in `src/session/README.md`; the false `workspace/workspace-context` wrapper is deleted/inlined; every surviving direct projection row now carries a co-located invariant (`session/transcript-context`, `workspace/workspace-state`, `session/runtime-state`); and the exchange family is explicitly resolved as **keep-transitive** via existing `.pi` / session / probe proofs rather than new symmetry tests. `graph/neighborhood` is demoted to a direct graph PULL read/topology stub rather than a PROJECT survivor. The graph projection stubs (`neighborhood`, `overview`, `commit-result`, `reconciliation-needs`) are `export {}` topology stubs, **not** dark implementations. 2. `renderer-golden-coverage` — **RENDER stage** (`#render`); golden + invariant kind. **Depends on `projection-shape-coverage`.** Create the renderer ledger (README claims one that does not exist), extend the preview harness past `graph-neighborhood`, and golden-lock every durable renderer (only `graph/neighborhood` + `session/runtime-frame` are locked; the rest are dark or only transitively covered via the `.pi` adapter). -3. `prompt-composition-golden-coverage` — **COMPOSE stage** (`#compose`); golden + invariant kind. **Depends on `renderer-golden-coverage`.** Add a composed-prompt preview harness, golden-lock partial bodies and a representative composed-prompt matrix (axis × grade × pin) on top of the existing invariants. `elicitation-driver` rides on this stage's locked oracle, so it follows. +3. `prompt-composition-golden-coverage` — **COMPOSE stage** (`#compose`); golden + invariant kind. **Depends on `renderer-golden-coverage`.** Add a composed-prompt preview harness, golden-lock partial bodies and a representative composed-prompt matrix (axis × readiness × pin) on top of the existing invariants. `elicitation-driver` rides on this stage's locked oracle, so it follows. ### After the trio @@ -208,7 +209,8 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - **Cross-cutting obligations:** Readiness never bars graph truth or work (I31-L); `CommandExecutor` must not reject a node for a later-band kind (D64-L). The deferred milestone gate for export/plan/execute op-modes stays deferred (D45-L). Replace grade-gate tests across `compose.test.ts` / `prompting.test.ts` and createSpec/getSpec rather than preserving them. - **Traceability:** D25-L, D30-L, D32-L, D45-L, D57-L, D58-L, D59-L, D64-L, D65-L, D73-L, D74-L, D75-L / A27-L / I25-L, I31-L. Supersedes stored-grade gating and the `chrome.phase` / `chrome.chatMode` fields. - **Design docs:** `memory/SPEC.md` D45-L / D74-L; `src/projections/session/runtime-policy.ts`; `src/projections/workspace/workspace-state.ts`. -- **Current execution pointer:** Done 2026-06-11. Slices 1–5 moved all legality and display consumers from the old grade/phase-era fields to selected-spec `ElicitationGap[]` / derived readiness estimates. The final grade-deletion sweep removed `specs.readiness_grade`, `updateReadinessGrade`, `READINESS_GRADES`, `ReadinessGrade`, and `AgentPromptSpecContext.readinessGrade`; regenerated migration metadata; stripped readiness grade from seed/export fixture contracts and JSON seed files; and removed probe setup calls that only advanced the legacy grade. `createSpec` / `getSpec` now carry only spec identity (`id`, `name`, `slug`), and readiness remains gap-derived at the consumers. The 2026-06-11 live-gap legality follow-on made `GraphReaders.getElicitationGaps` required, wired the live TUI composition root to the selected-spec reader, and deleted the silent conservative prompt fallback so missing legality reads are type-visible instead of floor-locking live sessions. +- **Current execution pointer:** Done 2026-06-11. Slices 1–5 moved all legality and display consumers from the old grade/phase-era fields to selected-spec `ElicitationGap[]` / derived readiness estimates. The final grade-deletion sweep removed `specs.readiness_grade`, `updateReadinessGrade`, `READINESS_GRADES`, `ReadinessGrade`, and `AgentPromptSpecContext.readinessGrade`; regenerated migration metadata; stripped readiness grade from seed/export fixture contracts and JSON seed files; and removed probe setup calls that only advanced the legacy grade. `createSpec` / `getSpec` now carry only spec identity (`id`, `name`, `slug`), and readiness remains gap-derived at the consumers. The 2026-06-11 live-gap legality follow-on made `GraphReaders.getElicitationGaps` required, wired the live TUI composition root to the selected-spec reader, and deleted the silent conservative prompt fallback so missing legality reads are type-visible instead of floor-locking live sessions. The 2026-06-11 prompt-authority follow-on kept pinned goal/strategy/lens selections visible in manifests when role/mode-legal and moved readiness pressure onto gated methods/tool routes, so negotiation no longer crashes prompt composition. +- **Residual risks / follow-ons:** The current `capability → NodeKind[]` map still uses the coarse shared grounding floor (`context` / `thesis` / `goal` / `constraint`) for multiple capabilities, so finer per-capability obligation maps remain future work. `manual` gap satisficiency still lacks a real evaluator path, so D57-L is only structurally/tracer-proven today. ### runtime-vocab-leaf @@ -283,24 +285,25 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - **Name:** Close the projections ledger with no-loss / shape invariants (PROJECT stage) - **Linear:** unassigned - **Kind:** coverage (buildable-now) / hardening -- **Status:** next — trio stage 1 (`#project`) +- **Status:** complete — trio stage 1 (`#project`) closed 2026-06-11 on the top-of-stack coverage branch; FE-847 closure continues below - **Certainty:** proving -- **Pipeline position:** PROJECT — the info-preserving DTO stage between PULL and RENDER (`renderers/`). PULL has two halves: the *graph* read surface is locked/ledgered (`graph/queries.ts` + `src/graph/README.md`), but the *session* read surface the session/workspace projections lock against is tested-but-un-ledgered, so this frontier carries a PULL-session prerequisite. Upstream of `renderer-golden-coverage`; lock projection shapes before renderer goldens so the goldens do not churn against moving DTOs. -- **Coverage-gate verdict (2026-06-08 deep per-plane pass; refined at design checkpoint):** **Passes the admission gate, and is the genuinely-new finding.** Named load-bearing layer (`src/projections/`), closeable inventory. The ledger is now authored in `src/projections/README.md`. Direct-coverage today: only `request-choice` (`✓`) and `affordances` (`✓`) plus the `topology-boundaries` import guard. The enumeration **corrected the plan's dark-zone claim**: `graph/{overview,commit-result,reconciliation-needs}` and `exchanges/present-candidates` are `export {}` **topology stubs**, not dark implementations (nothing to lock — `○`). The real `●` survivors needing invariants are `graph/neighborhood`, `session/transcript-context`, `session/runtime-state`, and `workspace/workspace-state`. The enumeration also found one `✗` indirection: `workspace/workspace-context` is a single-consumer `{ mode, data }` tag wrapper with zero transform — **delete/inline**, feed its consumer from the source read. The exchange family (`present-*`, `request-answer/choices/review`, `review-set-payload`) is `◐`: covered transitively via `.pi` tests, direct-lock optional. +- **Pipeline position:** PROJECT — the info-preserving DTO stage between PULL and RENDER (`renderers/`). PULL has two halves: the *graph* read surface is locked/ledgered (`graph/queries.ts` + `src/graph/README.md`), and the *session* read surface the session/workspace projections lock against is now ledgered in `src/session/README.md`. Upstream of `renderer-golden-coverage`; lock projection shapes before renderer goldens so the goldens do not churn against moving DTOs. +- **Coverage-gate verdict (2026-06-08 deep per-plane pass; refined again on 2026-06-11 restack/scope):** **Passes the admission gate, and remains the genuinely-new finding.** Named load-bearing layer (`src/projections/`), closeable inventory. The ledger is now authored in `src/projections/README.md`, and the session PULL half is now inventoried in `src/session/README.md`. Direct-coverage today: `request-choice` (`✓`), `affordances` (`✓`), `transcript-context` (`✓`), `workspace-state` (`✓`), `runtime-state` (`✓`), plus the `topology-boundaries` import guard. The enumeration **corrected the plan's dark-zone claim**: `graph/{overview,commit-result,reconciliation-needs}` and `exchanges/present-candidates` are `export {}` **topology stubs**, not dark implementations (nothing to lock — `○`). The 2026-06-11 graph-neighborhood disposition checkpoint demoted `graph/neighborhood` as well: current consumers already read `NodeNeighborhood` directly from `graph/queries.ts`, and a projection would be a pass-through layer for symmetry rather than earned PROJECT work. The false `workspace/workspace-context` indirection is deleted/inlined, feeding its consumer from the source read. The exchange family is now explicitly `✓ keep-transitive`: `.pi` structured-exchange tests prove emitted `toolResult.details`, `session/exchange-projection.test.ts` proves tuple reconstruction, and the review-set path is covered through `session/structured-exchange-loop.test.ts` plus `project-graph-review-cycle-proof.test.ts`. - **Oracle kind:** **invariant / no-loss / shape — NOT golden.** Projections are info-preserving (D60-L); a golden would be brittle and could not catch the failure that matters (a projection dropping a field the renderer also hides). Lock with shape assertions (required fields present, types correct) and round-trip / no-loss properties where a projection re-shapes a typed read. An **earns-its-place gate runs before the oracle gate**: a single-consumer pass-through is deleted, not locked. -- **Boundary:** In — the `●` DTO transforms (`graph/neighborhood`, `session/transcript-context`, `session/runtime-state`, `workspace/workspace-state`), the `✗` delete (`workspace/workspace-context`), the `◐` exchange-family decision, and the PULL-session read-shape ledger. Out — `○` topology stubs (`graph/{overview,commit-result,reconciliation-needs}`, `exchanges/present-candidates`), `session/runtime-policy` (policy data, not a transform), `topology-boundaries` (already an import guard), and the already-locked `✓` rows. -- **Aggregate DoD:** Every `●` projection carries a shape/no-loss invariant; every `✗` row is deleted/inlined with its consumer fed from source; `◐` rows resolved by explicit decision; `○` rows untouched. The session-PULL read-shape ledger exists. Every `projections/` module appears in `src/projections/README.md` with a disposition (`✓`/`●`/`◐`/`✗`/`○`) + owner + oracle. +- **Boundary:** In — the direct invariants on `session/transcript-context`, `session/runtime-state`, and `workspace/workspace-state`; the `✗` delete (`workspace/workspace-context`); the exchange-family disposition checkpoint; and the PULL-session read-shape ledger. Out — `○` topology stubs (`graph/{neighborhood,overview,commit-result,reconciliation-needs}`, `exchanges/present-candidates`), direct-read graph neighborhood consumers, `session/runtime-policy` (policy data, not a transform), `topology-boundaries` (already an import guard), and the already-resolved `✓` rows. +- **Aggregate DoD:** Every surviving `●` projection carries a shape/no-loss invariant; every `✗` row is deleted/inlined with its consumer fed from source; `◐` rows are resolved by explicit decision; `○` rows untouched. The session-PULL read-shape ledger exists. The `graph/neighborhood` row is no longer ambiguous: it is explicitly demoted to direct-read/stub status, with consumers kept on `NodeNeighborhood` from `graph/queries.ts`. Every `projections/` module appears in `src/projections/README.md` with a disposition (`✓`/`●`/`◐`/`✗`/`○`) + owner + oracle. - **Inventory authority:** the closed ledger lives in `src/projections/README.md` (authored 2026-06-08), mirroring the `src/graph/README.md` read-shape ledger form (module × consumers × disposition × oracle). The PULL-session half gets a sibling read-shape ledger in `src/session/README.md`. - **Why now / unlocks:** It is the missing middle of the pipeline and the prerequisite for stable renderer goldens. Closing it makes the info-preserving half of the context pipeline (PULL+PROJECT) fully oracle-backed, matching the graph PULL template. - **Human-in-the-loop:** per-row design checkpoint = user reviews "what must be preserved" for each load-bearing DTO (and approves each `✗` delete) before the invariant is locked (see Context §design→lock rhythm). The enumeration/ledger pass itself was the first design checkpoint. - **Acceptance:** - `src/projections/README.md` carries the full projections ledger (done) and `src/session/README.md` carries the session-PULL read-shape ledger. - - Each `●` DTO carries a shape/no-loss invariant; `workspace/workspace-context` is deleted/inlined; the `◐` exchange family is dispositioned; `○` stubs are left untouched. + - Each `●` DTO carries a shape/no-loss invariant; `workspace/workspace-context` is deleted/inlined; the exchange family is explicitly resolved keep-transitive; `○` stubs are left untouched. - No golden snapshots are introduced for projections (wrong tool); `projections/` stays free of adapter/transport imports (D52-L, enforced by `topology-boundaries.test.ts`). -- **Verification:** vitest shape/round-trip asserts co-located with each projection (or a `projections//*.test.ts`); the existing `topology-boundaries.test.ts` continues to guard imports. +- **Verification:** vitest shape/round-trip asserts co-located with each direct projection lock (or a `projections//*.test.ts`); the existing `topology-boundaries.test.ts` continues to guard imports; exchange-family rows cite their owning `.pi` / session / probe proofs instead of adding symmetry tests. - **Cross-cutting obligations:** Keep projections info-preserving (no lossy text — that is RENDER's job); do not duplicate a typed read as a projection just to fill a ledger row (D60-L: many callers consume the typed read directly). - **Traceability:** D52-L, D60-L. - **Design docs:** `src/projections/README.md`; `src/graph/README.md` (ledger form to mirror). +- **Current execution pointer:** Closed on the top-of-stack coverage branch. The scope card is retired; next sequenced trio work is `renderer-golden-coverage`. ### renderer-golden-coverage @@ -310,22 +313,23 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - **Status:** next — trio stage 2 (`#render`); **depends on `projection-shape-coverage`** - **Certainty:** proving - **Pipeline position:** RENDER — the first lossy stage, consuming PROJECT outputs. Locks only after projection shapes are stable; upstream of `prompt-composition-golden-coverage` (composed prompts embed rendered context). -- **Coverage-gate verdict (2026-06-08 ln-plan):** **Passes the admission gate** — an open coverage frontier. Named load-bearing layer (`src/renderers/`), closeable inventory, honest ●/○ marking, owner+oracle per row, explicit ledger authority. Classified **buildable-now**, and framed as **partial-oracle completion, not greenfield adoption**: the preview→lock→formalize loop already exists and is adopted unevenly. `toMatchFileSnapshot` goldens are live for `graph/neighborhood` and `session/runtime-frame` (`src/renderers/**/__previews__/`); what remains is closing the gaps — `workspace-state` is still invariant-only, `renderers/exchanges` has no goldens, and `src/scripts/render-preview.ts` (`npm run render`) only supports the `graph-neighborhood` renderer. +- **Coverage-gate verdict (2026-06-08 ln-plan):** **Passes the admission gate** — an open coverage frontier. Named load-bearing layer (`src/renderers/`), closeable inventory, honest ●/○ marking, owner+oracle per row, explicit ledger authority. Classified **buildable-now**, and framed as **partial-oracle completion, not greenfield adoption**: the preview→lock→formalize loop is adopted unevenly. `toMatchFileSnapshot` goldens are live for `graph/neighborhood` and `session/runtime-frame` (`src/renderers/**/__previews__/`); what remains is closing the gaps — `workspace-state` is still invariant-only, `renderers/exchanges` has no goldens, `renderers/session/transcript.ts` is only locked indirectly through `session/session-transcript.test.ts`, and the planned sketch harness (`src/scripts/render-preview.ts` / `npm run render`) is not yet in tree. - **Boundary:** In — the durable LLM-facing renderers under `src/renderers/{graph,workspace,session,exchanges}` (per `src/renderers/README.md`). Out — format helpers/primitives (`markdown.ts`, `toon.ts`), trivial JSON serializers (`○`), non-renderer projection DTOs, intentional topology stubs not yet owning a renderer (e.g. `present-candidates`), and any new renderer not already built (no symmetry regrowth). -- **Aggregate DoD:** No required (`●`) durable renderer remains without a locked golden (`toMatchFileSnapshot`) plus targeted invariant asserts (e.g. "renders projected code, never raw id"; "active-context omits superseded nodes"; "no dangling edge endpoints"). Extend `render-preview.ts` to the renderers being locked. +- **Aggregate DoD:** No required (`●`) durable renderer remains without a locked golden (`toMatchFileSnapshot`) plus targeted invariant asserts (e.g. "renders projected code, never raw id"; "active-context omits superseded nodes"; "no dangling edge endpoints"). Establish one honest sketch path for the renderers being locked before snapshotting them. - **Inventory authority:** the closed ledger lives in `src/renderers/README.md`; golden artifacts co-locate with the renderer test (`src/renderers//__previews__/.md`), not under `.fixtures/`. - **Why now / unlocks:** The cross-cut named the preview→lock→formalize loop a prerequisite oracle; it shipped for two renderers but not the rest, so the un-locked renderers can drift silently. Closing the gaps makes every durable renderer-bearing surface drift-protected. - **Sequencing:** trio stage 2 — starts once `projection-shape-coverage` has stabilized the DTO shapes it renders. Renderer text quality is **fitness evidence**, so it is still **never a ship gate** and does not block `poc-live-ship-gate`; but per the 2026-06-08 elevation it is near-term spine work, not background discretionary hardening. -- **Human-in-the-loop:** per-row design checkpoint = user eyeballs the `npm run render` preview and approves the wording/shape before the golden is written (see Context §design→lock rhythm). +- **Human-in-the-loop:** per-row design checkpoint = user eyeballs the chosen sketch output and approves the wording/shape before the golden is written (see Context §design→lock rhythm). - **Acceptance:** - Each `●` durable renderer has a golden lock that writes on first run and diffs after (matching the existing `graph/neighborhood` + `session/runtime-frame` pattern). - Each `●` renderer carries at least one semantic invariant assert beyond the snapshot. - `src/renderers/README.md` carries the closed ledger (renderer × required/deferred × golden-present). - - `render-preview.ts` covers each newly-locked renderer; no new renderer is introduced merely to fill a symmetric cell. -- **Verification:** `npm run render` for sketch; vitest `toMatchFileSnapshot` for lock; existing invariant-style asserts for formalize. All in the renderer's co-located test file. + - A deterministic sketch path exists for each newly-locked renderer; if `npm run render` is adopted, it covers those rows explicitly. No new renderer is introduced merely to fill a symmetric cell. +- **Verification:** sketch through the chosen preview path; vitest `toMatchFileSnapshot` for lock; existing invariant-style asserts for formalize. All in the renderer's co-located test file. - **Cross-cutting obligations:** Goldens co-locate with renderer tests (not `.fixtures/`); keep `renderers/` free of adapter/transport imports (D52-L); do not promote a renderer shape to a new consumer just to fill the ledger (consumer bleed-through); leave intentional topology stubs (`present-candidates`) alone until they own a real renderer. - **Traceability:** D52-L, D60-L, D62-L. - **Design docs:** `src/renderers/README.md`; `memory/CROSS_CUT_PLAN.md` §Renderer feedback loops. +- **Current execution pointer:** Scope with `memory/cards/renderer-golden-coverage--render-stage-chain.md`; start with Card 1 (`renderer ledger + preview-loop authority`). ### prompt-composition-golden-coverage @@ -335,15 +339,15 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - **Status:** next — trio stage 3 (`#compose`); **depends on `renderer-golden-coverage`** - **Certainty:** proving - **Pipeline position:** COMPOSE — the last lossy stage; composed prompts embed rendered context strings, so lock only after RENDER goldens are stable. `elicitation-driver` rides on this stage's locked oracle and follows it. -- **Coverage-gate verdict (2026-06-08 ln-plan):** **Passes the admission gate** — an open coverage frontier of the same golden-locking kind as `renderer-golden-coverage`, surfaced from manual feedback-loop work. Named load-bearing layer (`src/.pi/skills/**` partials + `src/.pi/agents/compose.ts` composition), closeable inventory, owner+oracle per row, explicit ledger authority. Classified **buildable-now** and framed as **partial-oracle completion, not greenfield**: composition is already **invariant-rich** — `compose.test.ts` and `prompting.test.ts` assert structure, manifest legality, grade filtering, pinned/AUTO axis behavior, illegal-pin rejection, plus a `≥700`-char depth floor and a readable-resource check on every partial. What is missing is the **lock** stage: there is **no golden** of either the partial bodies or the composed-prompt output (no `__previews__`, no `toMatchFileSnapshot` for prompts; the only `.pi` inline snapshots are tool-output, not prompts), and there is **no preview harness** for composed prompts (`npm run render` only supports `graph-neighborhood`). -- **Boundary:** In — the agent prompt partials under `src/.pi/skills/{goals,strategies,lenses,methods}` and `src/.pi/agents/definitions/{elicitor,reviewer}.md`, and the `composeAgentPrompt` output for a representative matrix of axis/grade/pin combinations. Out — tool-output snapshots (already inline-locked where useful), `state.ts` legality source (guarded elsewhere), and any new partial/axis introduced merely to fill a symmetric cell (no symmetry regrowth). +- **Coverage-gate verdict (2026-06-08 ln-plan):** **Passes the admission gate** — an open coverage frontier of the same golden-locking kind as `renderer-golden-coverage`, surfaced from manual feedback-loop work. Named load-bearing layer (`src/.pi/skills/**` partials + `src/.pi/agents/compose.ts` composition), closeable inventory, owner+oracle per row, explicit ledger authority. Classified **buildable-now** and framed as **partial-oracle completion, not greenfield**: composition is already **invariant-rich** — `compose.test.ts` and `prompting.test.ts` assert structure, manifest legality, capability-readiness filtering, pinned/AUTO axis behavior, readiness-thin pin retention, role/mode-illegal pin rejection, plus a `≥700`-char depth floor and a readable-resource check on every partial. What is missing is the **lock** stage: there is **no golden** of either the partial bodies or the composed-prompt output (no `__previews__`, no `toMatchFileSnapshot` for prompts; the only `.pi` inline snapshots are tool-output, not prompts), and there is **no preview harness** for composed prompts (`npm run render` only supports `graph-neighborhood`). +- **Boundary:** In — the agent prompt partials under `src/.pi/skills/{goals,strategies,lenses,methods}` and `src/.pi/agents/definitions/{elicitor,reviewer}.md`, and the `composeAgentPrompt` output for a representative matrix of axis/readiness/pin combinations. Out — tool-output snapshots (already inline-locked where useful), `state.ts` legality source (guarded elsewhere), and any new partial/axis introduced merely to fill a symmetric cell (no symmetry regrowth). - **Aggregate DoD:** No required (`●`) prompt partial body or representative composed-prompt output remains without a locked golden plus the existing structural/legality invariants. Add a composed-prompt preview path (extend `render-preview.ts` or a sibling script) so goldens can be regenerated deterministically. - **Inventory authority:** the closed ledgers live in `src/.pi/skills/README.md` (partials) and `src/.pi/agents/README.md` (composition); golden artifacts co-locate with the owning test (`src/.pi/agents/__previews__/.md`), not under `.fixtures/`. - **Why now / unlocks:** Prompt partials and composition shape every agent turn; today they can drift in wording/depth/order while invariants stay green, because the lock stage was never adopted for prompts. Locking them makes the manual feedback loop (eyeball → lock → diff) durable instead of re-eyeballed each change. - **Sequencing:** trio stage 3 — starts once `renderer-golden-coverage` has stabilized the rendered context strings the composed prompt embeds. Still **never a ship gate**; `elicitation-driver` follows it (it adds per-turn behavior over the composition oracle locked here), so the two pair naturally. - **Human-in-the-loop:** per-row design checkpoint = user eyeballs the composed-prompt preview (new harness) and approves partial body / composed wording before each golden is written (see Context §design→lock rhythm). - **Acceptance:** - - A representative composed-prompt matrix (axis/grade/pin) has golden locks that write on first run and diff after. + - A representative composed-prompt matrix (axis/readiness/pin) has golden locks that write on first run and diff after. - Each `●` partial body has at least the existing depth/readability invariant plus a body golden where wording is load-bearing. - `src/.pi/skills/README.md` + `src/.pi/agents/README.md` carry the closed ledger (partial/composition-case × required/deferred × golden-present). - A composed-prompt preview path exists for deterministic golden regeneration; no new partial/axis is introduced merely to fill a symmetric cell. @@ -466,7 +470,7 @@ nodes: prompt-composition-golden-coverage [next · coverage] TRIO stage 3 (#compose, COMPOSE): composed-prompt preview + golden-lock partials/composition matrix; depends on renderer-golden-coverage elicitation-gaps-remodel [done · proving] remodeled elicitation_gaps obligation register; live presence derivation (grounding typology catalog superseded by gaps-node-kind-reference, D75-L) gaps-node-kind-reference [done · proving] D75-L node-kind gap reference landed; typology name/RelevantGapName retired; same-kind discrimination probe covered - capability-readiness [done · proving] JIT capability->relevant-gaps gate + readiness estimate (UI-only); stored grade / MIN_GRADE / chrome.phase+chatMode retired + capability-readiness [done · proving] JIT capability->relevant-gaps gate + readiness estimate (UI-only); stored grade / MIN_GRADE / chrome.phase+chatMode retired; residue = manual satisficiency + capability-map refinement runtime-vocab-leaf [parallel · proving] src/session/schema/kinds.ts source-of-truth leaf for op_mode/strategy/lens/goal (D73-L direction); decision-3 follow-on elicitation-driver [after-trio · proving] live per-turn what-to-ask-next driver on remodeled elicitation_gaps; rides COMPOSE oracle; closes cross-cut Seam 3a exchanges-and-generalized-capture [after-trio · proving] bounded feature (NOT coverage): narrow extractive capture + false-commit guard + exchange symmetry audit @@ -529,8 +533,8 @@ notes: - `graph-observed-shapes` is intentionally consumer-specific: do not assume every agent read shape belongs on the web observer. - `role-safe-graph-mutations` folds the prior role-named edge-surface card and semantic graph-mutation curation card into one frontier. The canonical authored graph command becomes `mutateGraph` / `mutate_graph`; role-named endpoint fields are normalized through `EDGE_CATEGORY_METADATA`; exposed `commitGraph` / `commit_graph` is retired by break-and-repair rather than kept as a weaker parallel API. Downstream capture and dev curation must not reintroduce `{category, source, target}` at authored boundaries. - `exchanges-and-generalized-capture` is a bounded proving feature, not coverage: the remaining load-bearing unknown is capture semantics, not breadth closure. The exchange surface is largely built across the three layers, with some breadth still deferred / topology-stubbed (`present-candidates`). Scope high-confidence extractive capture with a false-commit guard, do not regrow deleted `capture-*` symmetry, and treat the exchange three-layer audit as delete-oriented (drop unjustified `projections/exchanges` / `renderers/exchanges` mirrors), not breadth-building. - - **Context-pipeline coverage trio (the near-term spine, 2026-06-08 deep per-plane pass).** The four LLM-facing context concerns are one pipeline — PULL → PROJECT → RENDER → COMPOSE (D60-L). PULL has **two halves**: the *graph* read surface is the done template (`graph/queries.ts` + `src/graph/README.md`: behavioral oracle for all 8 shapes + drift guard + real ledger), but the *session* read surface (`session/workspace-context`, `workspace-session-coordinator`, `runtime-state`) is tested-but-un-ledgered and must be ledgered before the session/workspace projections lock against it. The trio closes the other three stages **in dependency order**, each completing its plane's **full ledger** via the human-in-the-loop design→lock rhythm. Oracle kind differs by stage: info-preserving stages want **invariant/no-loss** locks, lossy stages want **golden** locks. The PROJECT ledger (`src/projections/README.md`, authored 2026-06-08) applies an **earns-its-place gate before the oracle gate** — `workspace/workspace-context` is `✗` delete/inline (single-consumer tag wrapper), and the plan's earlier "dark zone = graph/{overview,commit-result,reconciliation-needs}" was wrong: those are `export {}` topology stubs (`○`), not dark implementations. - - `projection-shape-coverage` (TRIO stage 1, `#project`) is the genuinely-new finding. Ledger authored in `src/projections/README.md`. The real `●` survivors are `graph/neighborhood`, `session/transcript-context`, `session/runtime-state`, `workspace/workspace-state`; `workspace/workspace-context` is `✗` delete/inline; the graph projection stubs (`overview`, `commit-result`, `reconciliation-needs`) are `○` topology stubs, not dark. Also carries the PULL-session read-shape ledger prerequisite. Lock with shape/no-loss invariants — **not goldens** (wrong tool for an info-preserving DTO; can't catch silent field-drop). Do it first; it stabilizes the shapes renderer goldens lock against. + - **Context-pipeline coverage trio (the near-term spine, 2026-06-08 deep per-plane pass).** The four LLM-facing context concerns are one pipeline — PULL → PROJECT → RENDER → COMPOSE (D60-L). PULL has **two halves**: the *graph* read surface is the done template (`graph/queries.ts` + `src/graph/README.md`: behavioral oracle for all 8 shapes + drift guard + real ledger), and the *session* read surface (`session/workspace-context`, `workspace-session-coordinator`, `runtime-state`) is now ledgered in `src/session/README.md` for the session/workspace projection locks. The trio closes the other three stages **in dependency order**, each completing its plane's **full ledger** via the human-in-the-loop design→lock rhythm. Oracle kind differs by stage: info-preserving stages want **invariant/no-loss** locks, lossy stages want **golden** locks. The PROJECT ledger (`src/projections/README.md`, authored 2026-06-08) applies an **earns-its-place gate before the oracle gate** — the false `workspace/workspace-context` wrapper has now been deleted/inlined, and the plan's earlier "dark zone = graph/{overview,commit-result,reconciliation-needs}" was wrong: those are `export {}` topology stubs (`○`), not dark implementations. `graph/neighborhood` is now the same class: a direct graph PULL read/topology stub, not a PROJECT survivor. + - `projection-shape-coverage` (TRIO stage 1, `#project`) is complete on the top-of-stack coverage branch. Ledger authored in `src/projections/README.md`. `session/transcript-context`, `workspace/workspace-state`, and `session/runtime-state` are all locked directly; `workspace/workspace-context` is deleted/inlined; the graph projection stubs (`neighborhood`, `overview`, `commit-result`, `reconciliation-needs`) are `○` topology stubs, not dark; and the exchange family is explicitly resolved keep-transitive via existing `.pi` / session / probe proofs. The session PULL read-shape prerequisite is closed in `src/session/README.md`. This stabilizes the shapes renderer goldens lock against. - `renderer-golden-coverage` (TRIO stage 2, `#render`) **depends on stage 1**: only `graph/neighborhood` + `session/runtime-frame` are golden-locked; the rest are dark or only transitively covered via the `.pi` adapter. Create the renderer ledger (README claims one that does not exist), extend the preview harness past `graph-neighborhood`. Bound to durable renderers (exclude `markdown.ts` / `toon.ts` helpers and topology stubs). Never a ship gate. - `prompt-composition-golden-coverage` (TRIO stage 3, `#compose`) **depends on stage 2**: `compose.test.ts` / `prompting.test.ts` are invariant-rich but no golden of partial bodies or composed output exists and there is no composed-prompt preview harness. Add the preview, golden-lock partials + a composed-prompt matrix. `elicitation-driver` rides on this stage's locked oracle and follows it. Never a ship gate. - `project-graph-review-cycle` is complete evidence for the optional batch proposal/review story; keep future review-quality work as follow-up, not FE-809 completion debt. diff --git a/memory/SPEC.md b/memory/SPEC.md index 3b56da2b..f9c7861e 100644 --- a/memory/SPEC.md +++ b/memory/SPEC.md @@ -117,11 +117,11 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c | A19-L | Pi's current settings/resource lifecycle can be made product-safe through a sealed Brunch Pi Profile without forking Pi: ambient discovery remains disabled, Brunch-owned extension factories may inject explicit resources, and remaining settings/keybinding leakage can be eliminated through programmatic policy or a narrow upstream seam. | medium | open | D39-L | FE-744/profile audit: source-backed resource-loader/settings audit, tests proving no ambient `.pi/` skills/prompts/themes/extensions/context files affect Brunch, and product-owned resources still load when intentionally injected. | | A20-L | The chosen Drizzle line and row-schema derivation path can be settled during the prep envelope without forcing later M4 rework: Brunch can prove migrations, SQLite fidelity, monotonic counter allocation, change-log writes, and runtime-schema derivation on one representative persistence slice before CRUD proper starts. | high | **validated** | D16-L, D41-L | **Validated by A20-L spike (2026-06-01).** Stack: `drizzle-orm@0.45.2` + `drizzle-kit@0.31.10` + `better-sqlite3@12.8.0` + `drizzle-typebox@0.3.3` + `@sinclair/typebox@0.34.14`. Proved: (1) `drizzle-typebox` derives valid TypeBox insert/select schemas from Drizzle tables; `Value.Check` validates/rejects correctly. (2) Batch `mutateGraph`-shaped transaction (multi-node → intra-batch ref resolution → multi-edge → LSN allocation → change-log append) works atomically; full rollback on FK violation or domain-validation throw. (3) `update().returning()` works for atomic monotonic counter increment; `insert().returning()` gives auto-increment IDs for ref resolution; JSON detail column round-trips cleanly. (4) Pi tool parameters (`typebox` v1.x) and Drizzle row schemas (`@sinclair/typebox` v0.34 via `drizzle-typebox`) serve different roles and never cross — shared enum `const` arrays bridge both. | | A21-L | The POC can treat coherence as a bounded product verdict over structural legality plus explicitly detected contradictions, gaps, and unresolved reconciliation needs, without solving a general theory of “spec coherence.” | low | open | D8-L | M8 must sharpen the coherence rubric before implementation: known-bad adversarial briefs should show what counts as incoherent, what is merely immature/underspecified, and what should become a reconciliation need. | -| A22-L | The elicitor can perform synchronous post-exchange capture well enough for the POC: high-confidence extractive facts can be committed to the graph immediately and gap dispositions updated, while low-confidence implications can be kept out of graph truth and used as disambiguation material. | medium | partially validated | D18-L, D26-L, D45-L, D65-L, I30-L | 2026-06-05 `capture-response-to-graph` validated the product wiring for narrow labeled text facts (`Goal:`, `Context:`, `Constraint:`, `Criterion:`) on `session.submitExchangeResponse`. 2026-06-07 generalized the same explicit-text capture core onto `session.submitMessage`: ordinary labeled user text now appends to transcript truth, commits through `graph/capture` → `CommandExecutor.mutateGraph({createBasis: explicit, ops})`, targets the transcript binding's spec, and publishes graph invalidations; explicit interruptions are transcript-visible but do not capture or silently answer a pending exchange. 2026-06-08 `capture-quality-spike` added a fixed scenario measurement over free prose, file/ref-bearing prose, and implication-heavy prose; the sample extraction report reached precision 1.0 / recall 1.0 with zero false commits, moving generalized capture from parked evidence-gate to a narrow graduate recommendation with an explicit false-commit guard. Readiness-grade capture remains open fitness evidence. | +| A22-L | The elicitor can perform synchronous post-exchange capture well enough for the POC: high-confidence extractive facts can be committed to the graph immediately and gap dispositions updated, while low-confidence implications can be kept out of graph truth and used as disambiguation material. | medium | partially validated | D18-L, D26-L, D45-L, D65-L, I30-L | 2026-06-05 `capture-response-to-graph` validated the product wiring for narrow labeled text facts (`Goal:`, `Context:`, `Constraint:`, `Criterion:`) on `session.submitExchangeResponse`. 2026-06-07 generalized the same explicit-text capture core onto `session.submitMessage`: ordinary labeled user text now appends to transcript truth, commits through `graph/capture` → `CommandExecutor.mutateGraph({createBasis: explicit, ops})`, targets the transcript binding's spec, and publishes graph invalidations; explicit interruptions are transcript-visible but do not capture or silently answer a pending exchange. 2026-06-08 `capture-quality-spike` added a fixed scenario measurement over free prose, file/ref-bearing prose, and implication-heavy prose; the sample extraction report reached precision 1.0 / recall 1.0 with zero false commits, moving generalized capture from parked evidence-gate to a narrow graduate recommendation with an explicit false-commit guard. Gap-disposition and readiness-negotiation capture remain open fitness evidence. | | A24-L | A flat `elicitation_gaps` table (prospective memory) is sufficient to drive elicitor questioning, seed grounding, and feed capability-readiness without graph structure — gaps are typed coverage obligations (typologies), not graph nodes; apparent dependency among gaps is mediated by the claims their resolution produces. | medium | validated | D65-L, D74-L, D75-L | 2026-06-08 FE-823 materialized the flat table (built as `elicitation_backlog`) on the real LSN/change-log seam. 2026-06-10 `elicitation-gaps-remodel` replaced that question-instance shape with the typed obligation register, regenerated the table as `elicitation_gaps`, seeded the grounding typology catalog, and proved live presence-derived coverage/answered read-back without stored structural answers; `gaps-node-kind-reference` then retired the catalog/name vocabulary in favor of `refersTo: NodeKind` + free-form `question`. Remaining downstream proof is capture-reflection spawning; if genuine gap→gap dependency or rich traversal emerges, promote the table to a plane (rows→nodes, FK pointers→edges). | | A25-L | Tracking the latest `pi-coding-agent` release continuously (via source-alias in dev + package dependency bumps) keeps Brunch adaptable without routinely destabilizing it, because Brunch's pi product-behavior surface is concentrated in a few sealed integration seams (the `src/.pi/` extension bundle and the session/runtime adapters) behind the D39-L profile — even though pi *types* are imported across ~25 files, those are mostly type-only and pass through that small set of seams. | medium | partially validated | D67-L | 2026-06-09 FE-825 bumped Brunch to pi 0.79, kept type/default resolution on installed `dist`, added a `PI_SOURCE`-gated vite/vitest runtime alias to sibling `pi-mono` source, preserved product default sealed-profile/offline behavior, and passed `npm run verify`. Each later pi bump that lands without product-behavior regressions raises confidence; a bump that silently breaks sealed-profile assumptions falsifies it. | | A26-L | The refined "conversational introspection" goal can be built as a *read-only session-query-back tool*: under `BRUNCH_DEV`, the agent can call `brunch_session_query` over `ctx.sessionManager.getBranch()`, find entries by predicate, project capped dot/`[n]`/`[*]` paths, and surface exact returned values in chat without weakening D39-L sealing or turning self-reporting into product behavior. | medium | validated | D69-L, D71-L | 2026-06-09 `dx-introspection-live` slice 2 replaced the earlier fixed structured self-report/schema idea with `src/.pi/extensions/session-query/`: a dev-gated read-only tool registered only through `createBrunchPiExtensions(..., { introspection: { enabled } })`, covered by find/project/truncation unit tests, default-off/default-on registration tests, and a faux turn that returns verbatim projected session values. Live-model compliance with "call then echo verbatim" remains outer-loop fitness, not a merge gate. | -| A27-L | Gap satisfaction is expressible band-by-band at acceptable LLM cost: **commitment** typologies are structural `presence`/`field`/`coverage` predicates over the graph; **grounding** typologies are a `presence` floor plus `manual` LLM satisficiency (D57-L); **elicitation** typologies are generatively spawned. The explicit `capability → relevant gaps` map (D74-L) carries enough signal to drive proceed / negotiate without a standing grade. | medium | partially validated | D65-L, D74-L, D75-L | 2026-06-10 `elicitation-gaps-remodel` validated the structural `presence` case: a seeded grounding gap's derived coverage/answered state flips from graph truth with no stored structural answer and sibling-spec isolation holds. 2026-06-10 the `capability-readiness` D74-L gate tracer validated the grounding floor: the explicit capability→gap map drives proceed / proceed_low_epistemic / negotiate, live presence coverage flips a generative capability negotiate→proceed, and the gate imports no grade symbols. 2026-06-10 `gaps-node-kind-reference` collapsed that map onto `NodeKind` (`context`/`thesis`/`goal`/`constraint`), proved required-kind absence fails loud, and proved same-kind gaps discriminate by question+satisfier rather than typology name. 2026-06-10 the `capability-readiness` affordance-legality slice validated the affordance-path consumer: the runtime affordance projection (`affordances` / `axisOptionsForRuntimeState`) derives goal/strategy/lens menu legality from `evaluateCapabilityReadiness` over gap coverage with no grade symbols, a coverage flip moves a gated option legal, and a required kind absent from the register fails loud (config bug ≠ uncovered) — retiring the affordance-path uncertainty. 2026-06-10 the method/manifest legality slice validated the turn-boundary consumer: `before_agent_start` reads selected-spec gaps through the graph read seam, prompt manifests and active tool names derive gated methods from gap coverage, floor methods/tools remain available at zero coverage, and the `state.ts` grade tables are gone. 2026-06-10 the agent-prompt display slice validated the display consumer: `compose.ts` and `contexts/cwd.ts` render the selected-spec soft per-band estimate from gaps with stable band order/fixed decimals, and `before_agent_start` threads the same selected-spec gaps into the pushed cwd context. 2026-06-11 the review-fix remediation hardened the predicate substrate: `gapPredicateSupport` (in the union's owning schema module) is the single never-checked owner of per-arm semantics — `field`/`coverage` now **reject loudly at the CommandExecutor boundary** until derivation exists (a structural arm without derivation also fails loud at read), open presence gaps dedupe by `(specId, nodeKind)` (presence is a kind-floor obligation; situated same-kind gaps use `manual` until `field`/`coverage` land), and gap hydration fails on `predicate_kind`/JSON divergence. Remaining proof: `field`/`coverage` predicate derivation, `manual` LLM satisficiency, elicitation/commitment fixtures. Falsified if grounding readiness cannot decompose into per-typology presence+manual judgments, or if commitment obligations need logic the predicate union can't express. | +| A27-L | Gap satisfaction is expressible band-by-band at acceptable LLM cost: **commitment** typologies are structural `presence`/`field`/`coverage` predicates over the graph; **grounding** typologies are a `presence` floor plus `manual` LLM satisficiency (D57-L); **elicitation** typologies are generatively spawned. The explicit `capability → relevant gaps` map (D74-L) carries enough signal to drive proceed / negotiate without a standing grade. | medium | partially validated | D65-L, D74-L, D75-L | 2026-06-10 `elicitation-gaps-remodel` validated the structural `presence` case: a seeded grounding gap's derived coverage/answered state flips from graph truth with no stored structural answer and sibling-spec isolation holds. 2026-06-10 the `capability-readiness` D74-L gate tracer validated the grounding floor: the explicit capability→gap map drives proceed / proceed_low_epistemic / negotiate, live presence coverage flips a generative capability negotiate→proceed, and the gate imports no grade symbols. 2026-06-10 `gaps-node-kind-reference` collapsed that map onto `NodeKind` (`context`/`thesis`/`goal`/`constraint`), proved required-kind absence fails loud, and proved same-kind gaps discriminate by question+satisfier rather than typology name. 2026-06-10 the `capability-readiness` affordance-legality slice validated the affordance-path consumer: the runtime affordance projection (`affordances` / `axisOptionsForRuntimeState`) derives goal/strategy/lens menu legality from `evaluateCapabilityReadiness` over gap coverage with no grade symbols, a coverage flip moves a gated option legal, and a required kind absent from the register fails loud (config bug ≠ uncovered) — retiring the affordance-path uncertainty. 2026-06-10 the method/manifest legality slice validated the turn-boundary consumer: `before_agent_start` reads selected-spec gaps through the graph read seam, prompt manifests and active tool names derive gated methods from gap coverage, floor methods/tools remain available at zero coverage, and the `state.ts` grade tables are gone. 2026-06-10 the agent-prompt display slice validated the display consumer: `compose.ts` and `contexts/cwd.ts` render the selected-spec soft per-band estimate from gaps with stable band order/fixed decimals, and `before_agent_start` threads the same selected-spec gaps into the pushed cwd context. 2026-06-11 the review-fix remediation hardened the predicate substrate: `gapPredicateSupport` (in the union's owning schema module) is the single never-checked owner of per-arm semantics — `field`/`coverage` now **reject loudly at the CommandExecutor boundary** until derivation exists (a structural arm without derivation also fails loud at read), open presence gaps dedupe by `(specId, nodeKind)` (presence is a kind-floor obligation; situated same-kind gaps use `manual` until `field`/`coverage` land), and gap hydration fails on `predicate_kind`/JSON divergence. 2026-06-11 the prompt-authority follow-on validated the negotiation line: readiness-thin pinned goal/strategy/lens selections remain visible in manifests, while gated methods stay withheld and prompt composition no longer throws on a readiness negotiation. Remaining proof: `field`/`coverage` predicate derivation, `manual` LLM satisficiency, elicitation/commitment fixtures, and capability-map refinement beyond the shared grounding floor. Falsified if grounding readiness cannot decompose into per-typology presence+manual judgments, or if commitment obligations need logic the predicate union can't express. | ### Active Decisions @@ -153,7 +153,7 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c - **D63-L — Graph `basis` records item-level approval strength, not the mutation pathway.** Accepted nodes and edges use `basis ∈ explicit | implicit`. `explicit` means the user directly stated the graph item or approved the exact node/edge in a review set; `implicit` means the user accepted a concept/proposal and the agent materialized specific graph items to match it without per-item review (the `propose-graph` direct-commit path). The mutation pathway lives in `change_log.operation` and payload (`mutate_graph`, `accept_review_set`, post-exchange capture, etc.), while epistemic attribution lives in `Node.source` and proposal UI metadata may still carry `epistemic_status`. Low-confidence inferred material is still not graph truth; it remains in preface/capture analysis/review drafts/reconciliation needs until clarified or accepted. More abstractly, `basis` is a *provenance-directness* marker — directly from the user (`explicit`) versus agent-materialized from user input (`implicit`) — of which item-level approval strength is the claim-flavored reading; this lets the same `explicit | implicit` distinction apply to non-claim registers such as `elicitation_gaps` (user-raised vs agent-inferred, D65-L). Depends on: D26-L, D27-L, D53-L, D54-L, D55-L. Supersedes: `basis = accepted_review_set` as a persisted graph enum value and any interpretation of `basis` as a provenance/path field. - **D64-L — Readiness bands are the coarse level of one coverage axis; gap typologies (D65-L) are its finer members. Bands are non-exclusive derived node-kind groupings, not structural legality gates.** Bands are `grounding`, `elicitation`, and `commitment`; each `elicitation_gaps` typology carries exactly one band — band and typology are **one axis at two granularities**, so "bands becoming more differentiated over time" means the typology taxonomy growing, not new bands. A node kind may belong to multiple bands (e.g. `constraint` contributes to grounding as the constraint anchor and to elicitation when it bounds solution space). Bands guide what the elicitor is trying to complete, what graph filters and rendered context show, the per-band **readiness estimate** rollup (D45-L), and which gaps a capability-readiness judgment weighs (D74-L). The band's gate-character differs by band: **grounding** is mostly LLM-judged satisficiency with a count floor (D57-L), **elicitation** is generatively spawned (no fixed typology set), **commitment** is more structurally derivable. The `CommandExecutor` must not reject a clear later-band kind merely because of band; readiness controls objectives and capability-judgment, not what graph truth may contain. Depends on: D45-L, D56-L, D57-L, D59-L, D60-L, D65-L. Supersedes: treating the intent `basic | structural | reasoning` category as the readiness taxonomy, treating readiness as a per-kind creation whitelist, or treating bands as a grade rubric for a stored grade. - **D65-L — `elicitation_gaps` are typed coverage *obligations* (typologies) — the elicitor's prospective-memory agenda and the substrate of capability-readiness judgment; they guide and modulate, they never hard-gate.** Renamed and reconceived from `elicitation_backlog`. A gap is a **typology of coverage that must be addressed** (e.g. "the spec must anchor its primary constraint(s)"), **not** a literal queued question and **not** a specific point of unclarity — that would shadow the intent graph, which already owns the content (decisions, assumptions, constraints, …). The original `unknown`/process-vs-domain split still holds: `elicitation_` scopes the term to *process* gaps (knowable by asking), as opposed to the deferred domain-gap `risk` node (Future Direction §Vocabulary evolution). Each gap carries **both** a stable **name** (its typology key — machine identity used for seeding, dedup, and the `capability → relevant gaps` map (D74-L), and a short display label) **and** a **rationale** (the *meta* prose: what coverage this obligation represents, why it matters, and what counts as satisfying it — read by the elicitor to phrase the next question and to make a `manual` satisficiency judgment, D57-L). The two are not redundant: the name is for machine identity/reference, the rationale is for agent reasoning and cannot be compressed into a terse key. In addition each gap carries: a **band** (D64-L — its coarse level, one band per typology); a **predicate shape** — a tagged union of `presence` (≥N nodes of a kind/band present), `field` (a `detail` key present), `coverage` (D60-L `lacksEdge` per-member absence), or `manual` (LLM-judged, the D57-L satisficiency residue) — which routes structural-vs-JIT checking (D74-L); an **importance** (driver-weight / count-floor membership / priority — *not* a hard gate); and a derived **coverage** strength (how well addressed). Importance and coverage are deliberately **two fields, not one ambiguous `rating`**: importance is the pre-answer weight, coverage the post-answer derived strength. **Disposition** (`open | answered | not_applicable | irrelevant | reopened`) is stored *only where it is non-derivable* — scope judgments (`not_applicable` / `irrelevant`, which the agent may set in bulk) and `manual` satisficiency — while `answered` for a structural predicate is derived **live** from the graph and never hand-set; this is the anti-shadowing line: the table holds obligation/disposition/meta only, never domain content. `reopened` is a legitimate disposition (new ambiguity can reopen a typology). Gaps serve three roles: **agenda** (what to ask / propose next), **judgment drivers** for capability-readiness (D74-L), and a **density signal** that scales generative-output epistemic status (D30-L) — the candidate-proposal / disambiguation UX is precisely how open grounding gaps fill progressively, so an open gap must never wall that UX. Seeding is band-correlated. The **grounding** band has a seeded fixed catalog of typologies collated from the D30-L anchor bundle, the D57-L Walter drivers, [`docs/design/ELICITATION_LENSES.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/ELICITATION_LENSES.md) §grounding bundle, and the shaping kickoff/framing material — a **floor** of `domain` (what kind of thing is being built), `protagonist` (who it is for / most affected), `pain_pull` (what problem/pain/pull drives it), and `constraint` (what binding non-negotiables already shape it) — the anchor bundle that gates generative capabilities (D30-L) — plus softer **progressive drivers** that enrich and focus elicitation but are *never* floor (the no-moving-the-goalpost line): `value` (what value/benefit), `context_of_use` (when/where used), `success_sketch` (how success is measured / what good looks like), and `solution_boundary` (non-goals / what it is explicitly not). **elicitation** gaps are generatively spawned by capture-reflection as preceding answers raise new coverage obligations (no fixed catalog). **commitment** gaps are derived structural predicates over the graph (e.g. "every requirement has a criterion", "every decision records its rejected options", "every invariant has a proof or check"). It remains a **flat table, not a graph plane/node** — its only relations are filter attributes plus FK pointers (`arose_from`, `resolved_by`), a degenerate bipartite graph promotable later only if genuine gap→gap structure emerges; it is the *prospective* sibling of the *retrospective* `reconciliation_need` register (D8-L). `basis` applies via provenance-directness (D63-L): user-raised `explicit`, agent-inferred `implicit`. The flat-table substrate, `createSpec` seeding, `CommandExecutor`-routed mutations, and shared spec-local LSN + `change_log` boundary are settled from FE-823 (built as `elicitation_backlog`); the obligation/predicate/disposition remodel and the rename are what this decision now locks. Still open: whether the register eventually thins the `goal` axis (D59-L), and live per-turn ranking. Depends on: D8-L, D30-L, D45-L, D57-L, D59-L, D60-L, D63-L, D64-L, D74-L. Refined by: D75-L (gaps reference graph node kinds via `refersTo: NodeKind`; the parallel grounding-typology catalog and the closed gap-`name` enum are retired — substrate, predicate union, disposition, and anti-shadowing line are unchanged). Supersedes: the `elicitation_backlog` name and its question-instance / `open | closed`-status model, treating `unknown` as a graph node kind, and any readiness-grade-projection-over-open-counts as authority. -- **D74-L — Capability-readiness is a just-in-time, capability-relative judgment over relevant gaps — it replaces the standing grade gate.** When a capability is requested (a generative lens, `propose-graph`, `project-graph`, commitment review, eventual export), the agent evaluates readiness *for that capability* against the `elicitation_gaps` (D65-L) declared relevant to it. The `capability → relevant gaps` map is **explicit** and subsumes the retired `STRATEGY_MIN_GRADE` / `GOAL_MIN_GRADE` / `LENS_MIN_GRADE` thresholds in `runtime-policy.ts` plus the retired prompt-manifest/tool `METHOD_MIN_GRADE` thresholds in `.pi/agents/state.ts`, which were lossy grade-proxies for "enough grounding". Structurally-obvious relevant gaps (`presence` / `field` / `coverage`) are checked **mechanically** (cheap, no LLM); non-obvious (`manual`) ones consume an **LLM satisficiency judgment** (D57-L). The outcome is one of **proceed**, **proceed at low epistemic status** (density-scaled, D30-L), or **negotiate** — surface an `establishment_offer` ("I can, but answer X and Y first", D32-L). Capability-readiness fires **on request, reactive-primary** (proactive nudges are a separate later concern) and is the **only readiness gate**: it never bars attempting work, it scales/negotiates. This resolves the prior "lens is never gated" (`ELICITATION_LENSES.md`) vs `LENS_MIN_GRADE` contradiction (lenses are not grade-gated; readiness is JIT-judged) and dissolves the grade-ratchet / two-value problem (the soft `readiness estimate`, D45-L, gates nothing and may regress honestly). A future structural milestone gate for export/plan/execute op-modes is deferred (D45-L) until such an op-mode exists. Depends on: D25-L, D26-L, D30-L, D32-L, D45-L, D57-L, D59-L, D65-L. Refined by: D75-L (the `capability → relevant gaps` map references node kinds, not a closed typology-name enum). Supersedes: `GRADE_RANK`-based `MIN_GRADE` hard gating of goal/strategy/lens/method prompt resources and method-coupled tools, and a standing readiness scalar as the authority for capability availability. +- **D74-L — Capability-readiness is a just-in-time, capability-relative judgment over relevant gaps — it replaces the standing grade gate.** When a capability is requested (a generative lens, `propose-graph`, `project-graph`, commitment review, eventual export), the agent evaluates readiness *for that capability* against the `elicitation_gaps` (D65-L) declared relevant to it. The `capability → relevant gaps` map is **explicit** and subsumes the retired `STRATEGY_MIN_GRADE` / `GOAL_MIN_GRADE` / `LENS_MIN_GRADE` thresholds in `runtime-policy.ts` plus the retired prompt-manifest/tool `METHOD_MIN_GRADE` thresholds in `.pi/agents/state.ts`, which were lossy grade-proxies for "enough grounding". Structurally-obvious relevant gaps (`presence` / `field` / `coverage`) are checked **mechanically** (cheap, no LLM); non-obvious (`manual`) ones consume an **LLM satisficiency judgment** (D57-L). The outcome is one of **proceed**, **proceed at low epistemic status** (density-scaled, D30-L), or **negotiate** — surface an `establishment_offer` ("I can, but answer X and Y first", D32-L). Explicit user/system pins stay legible when they are role/mode-legal; readiness negotiation narrows AUTO choices and gated methods/tools rather than erasing the pin or crashing prompt composition. Capability-readiness fires **on request, reactive-primary** (proactive nudges are a separate later concern) and is the **only readiness gate**: it never bars attempting work, it scales/negotiates. This resolves the prior "lens is never gated" (`ELICITATION_LENSES.md`) vs `LENS_MIN_GRADE` contradiction (lenses are not grade-gated; readiness is JIT-judged) and dissolves the grade-ratchet / two-value problem (the soft `readiness estimate`, D45-L, gates nothing and may regress honestly). A future structural milestone gate for export/plan/execute op-modes is deferred (D45-L) until such an op-mode exists. Depends on: D25-L, D26-L, D30-L, D32-L, D45-L, D57-L, D59-L, D65-L. Refined by: D75-L (the `capability → relevant gaps` map references node kinds, not a closed typology-name enum). Supersedes: `GRADE_RANK`-based `MIN_GRADE` hard gating of goal/strategy/lens/method prompt resources and method-coupled tools, and a standing readiness scalar as the authority for capability availability. - **D75-L — `elicitation_gaps` reference graph node kinds; the parallel grounding-typology vocabulary is retired.** A gap is a **situated question that refers to a graph node kind** (`refersTo: NodeKind`), not an entry in a separate closed "typology" vocabulary. The grounding typology catalog of D65-L (`GROUNDING_GAP_TYPOLOGIES`: floor `domain` / `protagonist` / `pain_pull` / `constraint` + progressive `value` / `context_of_use` / `success_sketch` / `solution_boundary`) was a denormalized, drift-prone copy of the per-kind **source-question rubric** the intent ontology already owns (D56-L; [`docs/design/GRAPH_MODEL.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/GRAPH_MODEL.md#per-plane-node-kinds) §Per-plane node kinds — *"the abstract driver, not a literal question to parrot"*): `domain` / `context_of_use` are facets of `context`; `protagonist` / `pain_pull` of `thesis`; `value` of `goal`; `constraint` / `solution_boundary` of `constraint`; `success_sketch` of `criterion`. Collapsing onto the kind layer yields **one ontology, not two** — the only closed set is `NodeKind` (D54-L/D56-L), already owned by the drizzle-free taxonomy leaf (D73-L). Consequences: (1) the closed gap-`name` typology enum and the `RelevantGapName` union (D74-L) are replaced by `refersTo: NodeKind`; the `capability → relevant gaps` map references node kinds — the grounding floor is grounded `context` + `thesis` + `goal` + `constraint`, a graph query rather than a typology lookup, matching how GRAPH_MODEL already frames the grounding gate ("basic intent nodes are central evidence"). (2) Question text stays **free-form and situated**, projected general→specific by the elicitor per active lens/strategy and grounding density; the presence-aliasing limitation (distinct typologies aliasing one node-kind signal, the deferred finding in the now-retired refactor plan) **dissolves**, because discrimination now lives in the free-form question plus the `manual` / `coverage` satisfier (D57-L), not in a blunt `presence` count or a closed name enum. (3) Coverage extends for free to grounding-band kinds the catalog ignored — `term` (the ubiquitous-language anchor) and `assumption`. The flat-table substrate, `disposition`, `predicate` union, `importance` vs derived `coverage`, the anti-shadowing line (the table holds obligation / disposition / meta only, never domain content), `basis` provenance-directness, and band correlation (D64-L) are all **unchanged** — this decision changes how a gap *names its obligation* (by referring to a kind), not the register substrate. The example phrasings per kind are catalogued in [`docs/design/ELICITATION_QUESTIONS.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/ELICITATION_QUESTIONS.md) as a **priming / example layer for the elicitor, not a schema**: brainstorming more questions adds facets/phrasings for existing kinds and never adds ontology. The code remodel landed 2026-06-10: `ElicitationGap` and the table now carry `refersTo: NodeKind` + free-form `question`, `createSpec` seeds grounding gaps by node kind (`context`, `thesis`, `goal`, `constraint`, plus `term`/`assumption`), and capability-readiness points at a `capability → NodeKind[]` map with loud failure for a missing required kind. Depends on: D54-L, D56-L, D57-L, D64-L, D65-L, D73-L, D74-L; A24-L, A27-L. Refines: D30-L, D65-L, D74-L. Supersedes: the grounding typology catalog as a parallel closed gap vocabulary; the closed gap-`name` typology enum and the `RelevantGapName` union; and the retired refactor plan to enshrine `GROUNDING_GAP_TYPOLOGIES` as a canonical const. #### Authority & mutation @@ -255,7 +255,7 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c This division mirrors the batch-proposal flow in D26-L: `propose-graph` and `project-graph` strategies can delegate variant generation to fan-out `proposer` invocations while `intent` / `design` / `oracle` lenses frame the proposal subject; purely extractive single-exchange work may stay main-agent-only. Worker-style write-capable subagents are deferred until an execute operational mode lands. Cross-extension agent registration (Amos's `globalThis.__pi_subagents` bridge) is deferred because it conflicts with profile sealing; the POC registry is Brunch-owned only. NDJSON stream events from the subprocess drive TUI tool-progress UI; a `subagent.progress` RPC subscription for headless/web is deferred. Subagents are an optional enhancement to candidate-proposal diversity, not a load-bearing M0–M9 substrate: they enhance R20/D27-L proposal generation when bandwidth permits. Depends on: D2-L, D26-L, D27-L, D30-L, D31-L, D39-L, D41-L. Distinct from: D15-L Side task (non-blocking, status-via-custom-message), the deferred Side chat (user-invoked overlay; see Future Direction Register). Supersedes: —. - **D36-L — Spec/session selection is a reusable hierarchical decision model with transport-specific presentations.** Brunch owns a pure spec/session selection model that renders cwd-scoped inventory under the discovered project name without calling the user-created object a “workspace”. In TUI mode, the model may present a fast “continue last session” affordance when `.brunch/workspace.json` points to a valid spec+session; otherwise, or after “other spec/session”, the durable tree is: `create new spec → provide spec name → session created automatically`; `resume existing spec → choose existing spec → create a new session OR resume existing session → choose existing session`. The UI should not list every spec as a top-level action label; “resume existing spec” is the top-level intent, and the spec list is the next screen/scrollable selector. The model returns a product decision (`new spec`, `new session for spec`, `open session`, `continue selected session`, `cancel/quit`) without opening Pi sessions or mutating `.brunch/workspace.json` itself. The `WorkspaceSessionCoordinator` activates that decision and owns all persistence/session-binding effects. TUI startup and in-session paths share branded `pi-tui` components and colocated logo assets under `src/.pi/components/workspace-dialog`; adapters differ only in terminal lifecycle and Pi session-replacement mechanics (`ProcessTerminal`/`TUI.showOverlay` before Pi starts, `ctx.ui.custom(..., { overlay: true })` inside Pi), not in product semantics. RPC/headless transports must not invoke the TUI picker; they expose the same initial-selection requirement and activation decisions as JSON-RPC/product results so CLI JSON-RPC clients can select or create spec/session correctly. Depends on: D11-L, D21-L, D24-L, D33-L. Supersedes: implicit resume of `.brunch/workspace.json` on TUI launch, Pi `/resume`/`/new` as Brunch's product session chooser, one-off startup-only picker implementations, a flat action list that says “workspace” for specs, top-level `resume spec X` labels, and a separate intermediate action chooser for switching. - **D42-L — Session naming is Pi `session_info` presentation metadata, not spec identity.** Brunch-created sessions should be named at creation with neutral workspace-global defaults (`Untitled Session 1`, `Untitled Session 2`, …) so pickers/chrome never show an unnamed Brunch session and unchanged defaults do not collide across specs in the same cwd. These defaults are immediate lifecycle metadata, not LLM-generated summaries and not derived from the selected spec title. Brunch may later use Pi session lifecycle hooks to opportunistically replace a default with a short human-readable name that characterizes what happened in the transcript. The preferred generation trigger is `session_shutdown` for `quit`, `new`, and `resume` replacements because it sees the just-finished transcript and can name it before later picker lists need to distinguish sessions; `session_before_compact` or post-compaction (`session_compact`) may be used to refresh names after major summarization, and a manual/user rename command can force or override naming. The generation call should mirror the model-selection pattern in the local `summarize.ts` extension example: choose a cheap/fast authorized model, extract user/assistant text plus salient tool calls from the current branch, ask for a concise title, and append a Pi `session_info` entry through `SessionManager.appendSessionInfo`. Naming must be best-effort and non-blocking with a tight budget: failures, missing auth, empty transcripts, or shutdown aborts preserve the existing default/user label rather than blocking session replacement or exit. Session display names label sessions in pickers and chrome, but do not affect spec ids, session bindings, graph truth, or replay semantics. Depends on: D6-L, D17-L, D21-L, D35-L. Supersedes: using spec title or session UUID alone as the only durable display label once transcripts have meaningful content, leaving Brunch-created sessions unnamed, spec-local default numbering, or treating generated session names as canonical spec identity. -- **D58-L — Brunch prompt composition is a thin runtime header plus a gated prompt-resource manifest, not eager selection of every objective pack.** `.pi/agents/compose(agentId, sessionState, spec, workspace, context)` runs before Pi provider requests through Brunch's prompt extension and emits: **(1) agent control header** — keyed agent identity, model/thinking expectation, foreground role derived from `op_mode`, and mode/tool-authority summary; **(2) runtime-state header** — current pinned/AUTO `goal`, `strategy`, and `lens`, the readiness estimate (D45-L), and workspace posture; **(3) resource manifests** — XML-style ``, ``, ``, and `` entries filtered by `.pi/agents/state.ts` legal tuples, grade, `op_mode`, and the agent allow-list, each carrying `{name, description, location}` for a Brunch-owned markdown resource under `src/.pi/{agents,skills}/`; the `{name, description, location}` triples are code-owned in `.pi/agents/state.ts`, not filesystem-discovered, honoring D39-L sealing; **(4) compact pushed context** — only the minimal context handles and rendered context needed to orient the turn, with deeper context access still governed by D60-L. Detailed goal/strategy/lens/method instructions live in Brunch prompt resources and are loaded by the agent with `read` when needed, following the same simple mechanism Pi uses for skills. Method resources are the prompt-level home for Brunch tool-routing and sequencing guidance; tool definitions remain boundary schemas/execution hooks, not the whole Brunch guide to when or how tools should be composed. `AUTO` means the axis is unpinned: the manifest lists legal choices and router instructions tell the agent to choose only from the current manifest, reading the selected resource before applying it when detail matters. Pinned axes point to the pinned resource; code enforces legality and tool gating but does not choose or concatenate large semantic packs on the agent's behalf. Pi-native skills may still carry startup-scoped capabilities, but runtime-state-gated availability is Brunch's manifest, not ambient Pi discovery. `.pi/agents/` is the keyed agent prompt assembly layer (`definitions/`, `contexts/`); `.pi/skills/` carries goal/strategy/lens/method resources; `.pi/agents/contexts/` is the D60-L agent-context orchestration layer (code), not a manifest resource family or general renderer bucket. Reusable text renderers may migrate to `renderers/` under D52-L. Composition is projection, not a behavioral state machine. Depends on: D23-L, D25-L, D39-L, D40-L, D52-L, D59-L, D60-L. Supersedes: the flat "base + mode + role + strategy + lens + grade + …" layering; the fixed all-packs concatenation in `compose-brunch-prompt.ts`; "role preset / runtime bundle" as the composition unit; direct Layer-2 eager prompt-pack injection as the default mechanism; top-level `src/agents/` for Pi-only agents; and `capability` as a parallel name for `method` / ``. +- **D58-L — Brunch prompt composition is a thin runtime header plus a gated prompt-resource manifest, not eager selection of every objective pack.** `.pi/agents/compose(agentId, sessionState, spec, workspace, context)` runs before Pi provider requests through Brunch's prompt extension and emits: **(1) agent control header** — keyed agent identity, model/thinking expectation, foreground role derived from `op_mode`, and mode/tool-authority summary; **(2) runtime-state header** — current pinned/AUTO `goal`, `strategy`, and `lens`, the readiness estimate (D45-L), and workspace posture; **(3) resource manifests** — XML-style ``, ``, ``, and `` entries filtered by `.pi/agents/state.ts` legal tuples, selected-spec capability-readiness over `elicitation_gaps`, `op_mode`, and the agent allow-list, each carrying `{name, description, location}` for a Brunch-owned markdown resource under `src/.pi/{agents,skills}/`; the `{name, description, location}` triples are code-owned in `.pi/agents/state.ts`, not filesystem-discovered, honoring D39-L sealing; **(4) compact pushed context** — only the minimal context handles and rendered context needed to orient the turn, with deeper context access still governed by D60-L. Detailed goal/strategy/lens/method instructions live in Brunch prompt resources and are loaded by the agent with `read` when needed, following the same simple mechanism Pi uses for skills. Method resources are the prompt-level home for Brunch tool-routing and sequencing guidance; tool definitions remain boundary schemas/execution hooks, not the whole Brunch guide to when or how tools should be composed. `AUTO` means the axis is unpinned: the manifest lists only currently legal choices and router instructions tell the agent to choose only from that manifest, reading the selected resource before applying it when detail matters. Pinned axes point to the pinned resource whenever the tuple is role/mode-legal, even if readiness negotiates; capability-readiness instead constrains AUTO menus and gated methods/tools. Pi-native skills may still carry startup-scoped capabilities, but runtime-state-gated availability is Brunch's manifest, not ambient Pi discovery. `.pi/agents/` is the keyed agent prompt assembly layer (`definitions/`, `contexts/`); `.pi/skills/` carries goal/strategy/lens/method resources; `.pi/agents/contexts/` is the D60-L agent-context orchestration layer (code), not a manifest resource family or general renderer bucket. Reusable text renderers may migrate to `renderers/` under D52-L. Composition is projection, not a behavioral state machine. Depends on: D23-L, D25-L, D39-L, D40-L, D52-L, D59-L, D60-L. Supersedes: the flat "base + mode + role + strategy + lens + grade + …" layering; the fixed all-packs concatenation in `compose-brunch-prompt.ts`; "role preset / runtime bundle" as the composition unit; direct Layer-2 eager prompt-pack injection as the default mechanism; top-level `src/agents/` for Pi-only agents; and `capability` as a parallel name for `method` / ``. #### Continuity & origination (turn-boundary choreography) @@ -309,14 +309,14 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c | I28-L | Auto-compaction output preserves the configured anchor set byte-stable: every entry kind listed in [src/.pi/extensions/compaction/index.ts](file:///Users/lunelson/Code/hashintel/brunch-next/src/.pi/extensions/compaction/index.ts) is reconstructable post-compaction according to its `select` rule (`first | latest | active-leaves | all-unresolved`); LLM-generated narrative summary never replaces or rephrases preserved-anchor content; extension failure falls through to Pi default compaction rather than dropping anchors silently. | planned (compaction round-trip property tests at M9 plus inner-loop anchor-rendering unit tests and TypeBox schema validation of the anchor contract) | D43-L; R15, R13; I3-L, I4-L, I8-L, I12-L | | I29-L | Subagent subprocesses inherit Brunch Pi Profile sealing: every `subagent` tool invocation spawns `pi --mode json -p --no-session --no-skills --no-extensions` with an explicit per-agent tool allowlist and per-agent model; subagents never load ambient user/project `.pi/` skills, prompts, themes, extensions, context files, or behavior-shaping settings; subagents never gain direct access to the parent's `CommandExecutor`, Brunch RPC handlers, or graph persistence; subagent results return to the main agent only as tool result content (no side-effect transcript writes). | planned (subagent subprocess argv tests; isolation audit asserting absent ambient-resource leakage; tool-allowlist conformance test per starter agent) | D2-L, D39-L, D44-L; I2-L, I11-L, I24-L | | I30-L | Elicitor post-exchange capture only commits high-confidence extractive facts, concrete reconciliation needs, and justified `elicitation_gaps` disposition updates (D65-L); low-confidence implications remain in structured-exchange preface/question material and do not become graph truth until clarified, accepted, or explicitly escalated. | partially covered (`src/graph/capture/structured-response.test.ts` accepts only directly labeled text facts for the current tracer, rejects implication-only prose as `no_capture`, preserves structural diagnostics, `src/probes/capture-response-to-graph-proof.test.ts` proves public RPC response capture into selected-spec graph truth, and `src/probes/submit-message-capture-proof.test.ts` proves the same explicit-text capture path for ordinary `session.submitMessage` turns; reconciliation-needs and gap-disposition capture remain planned) | D18-L, D47-L, D65-L; A22-L | -| I31-L | Readiness never bars graph truth or work; it is just-in-time capability-readiness over relevant gaps, not a stored grade or kind whitelist. There is no `readiness_grade` scalar; capability availability is judged on request against the relevant `elicitation_gaps` (D74-L) and may proceed, proceed at low epistemic status, or negotiate — it never refuses outright. The `CommandExecutor` must not reject a graph node solely because its kind belongs to a later readiness band (D64-L). The soft `readiness estimate` (D45-L) is UI-only and gates nothing. | partially covered (`src/projections/session/capability-readiness.test.ts` covers the D74-L tracer gate, including proceed / proceed_low_epistemic / negotiate, no-refusal, no grade-symbol import, and a live `presence` coverage flip; `src/projections/session/affordances.test.ts` covers the first consumer rewire: menu legality omits gated options while relevant gaps negotiate and includes them when coverage rises, with no grade symbols in `runtime-policy.ts` / `affordances.ts`, and a required `NodeKind` absent from the gap register fails loud (config bug ≠ uncovered — readiness omission never masks a seeding error); `src/projections/session/readiness-estimate.test.ts` covers the soft D45-L estimate shape, empty-band zero, importance-weighted per-band coverage, honest regression, no grade imports, and no legality-path imports; `src/.pi/agents/compose.test.ts`, `src/.pi/agents/contexts/cwd.test.ts`, and `src/.pi/__tests__/prompting.test.ts` cover the agent-prompt display swap: selected-spec gaps render as the soft per-band estimate with deterministic formatting, `readiness_grade=` is absent from prompt display, and the turn boundary threads the same gaps into cwd context; `src/session/workspace-session-coordinator.test.ts`, `src/renderers/workspace/workspace-state.test.ts`, `src/session/workspace-context.test.ts`, `src/.pi/__tests__/context-tools.test.ts`, `src/rpc/handlers.test.ts`, and `src/web/app.test.tsx` cover the workspace/chrome display retirement: `chrome.phase` / `chrome.chatMode` no longer project through coordinator/RPC/web/chrome fixtures, and workspace overview session inventory no longer carries or renders `readinessGrade`; `createSpec` / `getSpec` persistence, seed/export fixture contracts, probes, and selected-spec prompt carriers no longer persist or transport a readiness grade) | D20-L, D45-L, D64-L, D74-L | +| I31-L | Readiness never bars graph truth or work; it is just-in-time capability-readiness over relevant gaps, not a stored grade or kind whitelist. There is no `readiness_grade` scalar; capability availability is judged on request against the relevant `elicitation_gaps` (D74-L) and may proceed, proceed at low epistemic status, or negotiate — it never refuses outright. The `CommandExecutor` must not reject a graph node solely because its kind belongs to a later readiness band (D64-L). The soft `readiness estimate` (D45-L) is UI-only and gates nothing. | partially covered (`src/projections/session/capability-readiness.test.ts` covers the D74-L tracer gate, including proceed / proceed_low_epistemic / negotiate, no-refusal, no grade-symbol import, and a live `presence` coverage flip; `src/projections/session/affordances.test.ts` covers the first consumer rewire: menu legality omits gated options while relevant gaps negotiate and includes them when coverage rises, with no grade symbols in `runtime-policy.ts` / `affordances.ts`, and a required `NodeKind` absent from the gap register fails loud (config bug ≠ uncovered — readiness omission never masks a seeding error); `src/projections/session/readiness-estimate.test.ts` covers the soft D45-L estimate shape, empty-band zero, importance-weighted per-band coverage, honest regression, and no legality-path imports; `src/.pi/agents/state.test.ts`, `src/.pi/agents/compose.test.ts`, `src/.pi/agents/contexts/cwd.test.ts`, and `src/.pi/__tests__/prompting.test.ts` cover the prompt consumer path: selected-spec gaps render as the soft per-band estimate, readiness-thin pinned axes remain visible, gated methods stay withheld, `readiness_grade=` is absent from prompt display, and the turn boundary threads the same gaps into cwd context without prompt-assembly failure; `src/session/workspace-session-coordinator.test.ts`, `src/renderers/workspace/workspace-state.test.ts`, `src/session/workspace-context.test.ts`, `src/.pi/__tests__/context-tools.test.ts`, `src/rpc/handlers.test.ts`, and `src/web/app.test.tsx` cover the workspace/chrome display retirement: `chrome.phase` / `chrome.chatMode` no longer project through coordinator/RPC/web/chrome fixtures, and workspace overview session inventory no longer carries or renders `readinessGrade`; `createSpec` / `getSpec` persistence, seed/export fixture contracts, probes, and selected-spec prompt carriers no longer persist or transport a readiness grade) | D20-L, D45-L, D64-L, D74-L | | I32-L | Public RPC structured-exchange driving never requires a client to speak raw Pi RPC: after Brunch method discovery and workspace/spec/session activation, each pending assistant-originated exchange is answered exactly once through `session.submitExchangeResponse`, and the deterministic permutation run produces linear Pi JSONL whose structured exchange projection preserves the same prompt/answer/status/comment artifacts as the equivalent TUI structured-exchange path. | covered for deterministic FE-744 parity under canonical session method names (`session.triggerExchange`, `session.pendingExchange`, `session.submitExchangeResponse`, `session.exchanges`): `rpc.discover` contract tests, pending/respond lifecycle tests, current public-RPC structured-exchange permutations, terminal non-answered status handling, option content/rationale parity, no repeated deterministic prompts, and transcript/exchange parity assertions. | D5-L, D48-L, D49-L; I10-L, I13-L, I21-L, I23-L | | I33-L | `capture_*` analysis entries are transcript evidence only: they persist as Brunch structured-exchange `toolResult` rows, are included by Brunch-semantic transcript renderers, are hidden or collapsed in TUI display, and never mutate graph truth or bypass `CommandExecutor`. | partially covered (minimum capture details schemas parse/export and reject graph payload fields; future runtime capture-analysis schema/rendering tests plus transcript renderer fixtures still need to prove persisted result rendering and TUI hide/collapse behavior; later graph-capture fixtures compare analysis candidates against committed graph mutations) | D17-L, D18-L, D37-L, D47-L, D50-L; I2-L, I11-L, I23-L, I30-L | | I34-L | `mutateGraph` batch validation is all-or-nothing: if any node or edge in the batch is structurally illegal, the entire batch is rejected and no partial state is persisted; the agent receives diagnostics sufficient for bounded self-correction retry. | covered (`command-executor/commit-graph-batch.test.ts` and graph-tool adapter tests cover dry-run/commit diagnostic parity for invalid basis, missing refs/codes, invalid category/stance, self-loop, invalid node kind/detail shape, rollback of nodes/edges/change_log/counters, transaction-local planning before LSN allocation/writes, and structured adapter diagnostics without thrown projected-code errors or fake endpoint refs) | D53-L; I1-L, I11-L | -| I35-L | Graph context reads support multiple detail levels: a cursory/compact full-graph overview for orientation, and detailed node-neighborhood context with configurable hop depth for focused work. Context builders in `.pi/agents/contexts/` orchestrate which level to inject or advertise based on mode/goal/strategy/lens/grade. | covered for current POC push path (`getGraphOverview` + `getNodeNeighborhood` in `queries.ts` with 10 tests; `src/.pi/agents/contexts/{graph,node,cwd}.test.ts` covers lens-shaped overview rendering, bounded node-neighborhood rendering, and selected-spec cwd/session/posture context; `src/.pi/__tests__/prompting.test.ts` proves the explicit shell/product prompt path supplies selected-spec-bound graph context to `composeAgentPrompt()`). Pulled context tools are part of the live read surface. | D52-L, D53-L, D58-L | +| I35-L | Graph context reads support multiple detail levels: a cursory/compact full-graph overview for orientation, and detailed node-neighborhood context with configurable hop depth for focused work. Context builders in `.pi/agents/contexts/` orchestrate which level to inject or advertise based on mode/goal/strategy/lens/readiness. | covered for current POC push path (`getGraphOverview` + `getNodeNeighborhood` in `queries.ts` with 10 tests; `src/.pi/agents/contexts/{graph,node,cwd}.test.ts` covers lens-shaped overview rendering, bounded node-neighborhood rendering, and selected-spec cwd/session/posture context; `src/.pi/__tests__/prompting.test.ts` proves the explicit shell/product prompt path supplies selected-spec-bound graph context to `composeAgentPrompt()`). Pulled context tools are part of the live read surface. | D52-L, D53-L, D58-L | | I36-L | Node `kind` is drawn from a per-plane closed enum structurally validated by the `CommandExecutor`; the intent kind category (basic / structural / reasoning) is a pure function of `kind` and is never stored on the node. | covered (CommandExecutor rejects invalid kind-for-plane; `intentKindCategory` is pure derivation with exhaustive switch; tests in `command-executor.test.ts`) | D54-L, D56-L | | I37-L | `detail` is per-kind validated by the `CommandExecutor`: `decision` and `term` nodes REQUIRE `detail` with their respective sub-schemas; all other kinds must omit `detail`; unknown fields in `detail` are rejected. | covered (detail-required/prohibited/shape tests in `command-executor.test.ts`) | D54-L | -| I38-L | Every Brunch prompt-resource manifest injected for an agent turn is generated from projected runtime state and spec/workspace gates: listed resources are Brunch-owned, readable under the active tool policy, legal for the current `(op_mode × goal × strategy × lens)` / grade / agent allow-list, and off-list resources are not advertised as available. AUTO axes never list illegal choices; pinned axes point to the pinned resource. The shared affordance derivation and prompt manifest filtering use the same grade/AUTO legality source. | covered for current P0 manifest families (`src/.pi/agents/compose.test.ts` covers default header/context/manifest output, AUTO grade/allow-list filtering, pinned singleton resources, illegal pinned grade rejection, and readable `src/.pi/` locations; `src/.pi/__tests__/prompting.test.ts` covers the explicit shell `before_agent_start` product path appending `agents/compose()` output from transcript-projected runtime state and no legacy composer import/resource discovery; `src/.pi/agents/state.test.ts` plus `src/projections/session/affordances.test.ts` cover shared legality/default behavior, including AUTO excluding `freestyle`). FE-825 added a dev-gated introspection loop (`src/.pi/extensions/introspection/` + `src/dev/introspection-launcher.ts`) that records final provider payloads and pairs them with subjective model answers under `.fixtures/scratch/introspection//`; `brunch_introspect_query` now makes the captured provider payload/tool schemas/base options queryable in-chat for the same diagnostic plane. Probe fitness may still track whether the agent reads selected resources before use. | D39-L, D40-L, D58-L, D59-L, D66-L, D69-L | +| I38-L | Every Brunch prompt-resource manifest injected for an agent turn is generated from projected runtime state and spec/workspace gates: listed resources are Brunch-owned, readable under the active tool policy, legal for the current `(op_mode × goal × strategy × lens)` tuple / capability-readiness / agent allow-list, and off-list resources are not advertised as available. AUTO axes never list readiness-illegal choices; pinned axes point to the pinned resource whenever the tuple is role/mode-legal, even if readiness negotiates. The shared affordance derivation and prompt manifest filtering use the same capability-readiness/AUTO legality source. | covered for current P0 manifest families (`src/.pi/agents/compose.test.ts` covers default header/context/manifest output, AUTO capability-readiness filtering, pinned singleton resources, readiness-thin pin retention, role/mode-illegal pin rejection, and readable `src/.pi/` locations; `src/.pi/__tests__/prompting.test.ts` covers the explicit shell `before_agent_start` product path appending `agents/compose()` output from transcript-projected runtime state and no legacy composer import/resource discovery; `src/.pi/agents/state.test.ts` plus `src/projections/session/affordances.test.ts` cover shared legality/default behavior, including AUTO excluding `freestyle` and gated methods staying withheld during negotiation). FE-825 added a dev-gated introspection loop (`src/.pi/extensions/introspection/` + `src/dev/introspection-launcher.ts`) that records final provider payloads and pairs them with subjective model answers under `.fixtures/scratch/introspection//`; `brunch_introspect_query` now makes the captured provider payload/tool schemas/base options queryable in-chat for the same diagnostic plane. Probe fitness may still track whether the agent reads selected resources before use. | D39-L, D40-L, D58-L, D59-L, D66-L, D69-L | | I39-L | Every graph node in a spec has exactly one stable projected human reference code derived from `kind` + `kind_ordinal`; `(spec_id, plane, kind, kind_ordinal)` is unique; ordinals are monotonic per `(spec_id, plane, kind)` and are not reused after deletion or supersession. | partially covered (`graph-tool-resilience` added `nodes.kind_ordinal`, `node_kind_counters`, DB uniqueness, CommandExecutor allocation for single-node/batch writes, rollback protection, `GraphNode.kindOrdinal` row mapping, globally unique 1–3 letter labels with readiness-band metadata, projected-code parsing, selected-spec adapter resolution before `CommandExecutor`, code-only `mutate_graph` / `read_graph` schemas, and code-primary prompt/tool rendering; remaining slice still needs deletion/supersession no-reuse coverage) | D54-L, D62-L; I1-L, I11-L | | I40-L | Accepted graph nodes and edges use only `basis ∈ explicit | implicit`; review-set approval and direct user statements produce `explicit`, `propose-graph` concept-level materialization produces `implicit`, and the mutation path is recoverable from `change_log` rather than from a persisted basis enum value such as `accepted_review_set`. | covered (`graph-tool-resilience` replaced the persisted basis enum with `explicit | implicit`, made `mutateGraph` apply one batch create-basis to all created nodes/edges, made single-node `createNode` reject retired basis values before LSN/counter/node/change-log allocation, made `propose-graph` adapter commits implicit, made review-set translation explicit, rejected retired `accepted_review_set`, and records `change_log.operation` independently; `capture-response-to-graph` proves direct structured text responses commit explicit-basis graph nodes through `CommandExecutor`; `.fixtures/runs/project-graph-review-cycle/2026-06-06-project-graph-review-cycle/` proves full review-cycle approval creates explicit-basis graph truth) | D26-L, D27-L, D53-L, D63-L | | I41-L | Same-spec `supersession` edges form an acyclic directed graph; every edge-creation path validates proposed supersession edges together with existing supersession edges before committing. | covered (`command-executor/commit-graph-batch.test.ts` rejects existing-cycle closure, intra-batch cycles, and mixed existing+batch cycles through the shared dry-run/commit planner before batch writes; rejected cycles roll back or avoid batch nodes/edges/change_log; acyclic supersession commits remain covered by query/CommandExecutor success paths) | D51-L, D53-L; I34-L | @@ -373,8 +373,8 @@ src/.pi/ - Manifest metadata is code-owned, not filesystem-discovered: `.pi/agents/state.ts` binds each legal axis value to its `{name, description, location}`, and `compose()` emits that binding; the agent `read`s the `.md` body at the listed `location` only when detail matters. This keeps the legal set and its labels in one tested place and honors D39-L sealing (no runtime resource discovery). Frontmatter-sourced manifest metadata is a deferred ergonomics option, not the POC mechanism. - `.pi/agents/contexts/` is the D60-L agent-context orchestration layer (TypeScript), surfaced as the header's compact pushed context or via the read tools; reusable text renderers may migrate to `renderers/`, and contexts are not part of the `read`-on-demand resource manifest and carry no `` family. - Workspace **posture** is workspace-scoped product state persisted in `.brunch/workspace.json`, not spec state, session state, or graph truth. D57-L keeps it off the spec row and graph; D58-L composition injects known posture values into the runtime header as an axis of agent influence, and the `capture-posture` goal (D59-L) can confirm or refine those values conversationally. -- Readiness is judged just-in-time per requested capability, not as a user-facing workflow stepper, a stored grade, a session-local phase, or a graph-node-kind whitelist. There is no `readiness_grade` on the spec row (D45-L); capability-readiness (D74-L) is evaluated over the relevant `elicitation_gaps`, and D64-L readiness bands describe non-exclusive evidence groupings feeding the readiness-estimate rollup, goal selection, and context filtering. The soft readiness estimate may surface in UI but gates nothing. A future structural milestone gate for export/plan/execute op-modes is deferred until such an op-mode exists; before readiness drives hard tool/agent authority beyond the POC, Brunch needs explicit `capability → relevant gaps` mappings and per-typology predicates (A27-L). -- Prompt resources and Pi skills are both progressive-disclosure mechanisms, but they are not authority. Brunch code owns runtime-state projection, legal tuple filtering, grade/allow-list gating, tool activation, and tool-call blocking. Pi-native skills may be used for startup-scoped capabilities; runtime-state-specific objective/method availability is advertised through Brunch's per-turn manifest so ambient user/project resources cannot leak into product behavior. +- Readiness is judged just-in-time per requested capability, not as a user-facing workflow stepper, a stored grade, a session-local phase, or a graph-node-kind whitelist. There is no `readiness_grade` on the spec row (D45-L); capability-readiness (D74-L) is evaluated over the relevant `elicitation_gaps`, and D64-L readiness bands describe non-exclusive evidence groupings feeding the readiness-estimate rollup, goal selection, and context filtering. The soft readiness estimate may surface in UI but gates nothing. A future structural milestone gate for export/plan/execute op-modes is deferred until such an op-mode exists; before readiness grows beyond the current tracer, Brunch still needs a real evaluator path for `manual` gaps and a more differentiated per-capability map than the shared grounding floor (A27-L). +- Prompt resources and Pi skills are both progressive-disclosure mechanisms, but they are not authority. Brunch code owns runtime-state projection, legal tuple filtering, capability-readiness/allow-list gating, tool activation, and tool-call blocking. Explicit user/system pins remain visible when readiness negotiates; negotiation changes AUTO choices, method/tool availability, and response posture rather than authority. Pi-native skills may be used for startup-scoped capabilities; runtime-state-specific objective/method availability is advertised through Brunch's per-turn manifest so ambient user/project resources cannot leak into product behavior. ### Coherence and readiness semantics @@ -438,7 +438,7 @@ src/.pi/ | **AUTO** | The unpinned state of an objective axis (`goal` / `strategy` / `lens`): composition advertises the legal choices in the current prompt-resource manifest and instructs the agent to self-select from that manifest only, reading the selected resource when detail matters (D58-L). | | **Brunch Pi Profile** | The sealed programmatic wrapper around embedded Pi: settings policy, resource-loader policy, extension factories, keybinding/command policy, tool policy, and prompt policy. It allows Brunch-owned resources while suppressing ambient `.pi/` behavior. | | **Prompt resource** | A Brunch-owned markdown file under `src/.pi/` containing detailed goal, strategy, lens, method, or agent-definition guidance. Prompt resources are loaded by the agent with `read` when needed; they are product control-plane assets, not ambient Pi prompt templates. | -| **Prompt-resource manifest** | The small per-turn D58-L manifest injected into the system prompt, listing only runtime-legal Brunch resources with `name`, `description`, and `location`. The `name`/`description`/`location` for each entry are code-owned in `.pi/agents/state.ts` (not filesystem-discovered), honoring D39-L sealing; `.pi/agents/contexts/` context renderers are not manifest resources. It mirrors Pi's skill-list pattern but is filtered by Brunch runtime state, grade, and allow-lists. | +| **Prompt-resource manifest** | The small per-turn D58-L manifest injected into the system prompt, listing only runtime-legal Brunch resources with `name`, `description`, and `location`. The `name`/`description`/`location` for each entry are code-owned in `.pi/agents/state.ts` (not filesystem-discovered), honoring D39-L sealing; `.pi/agents/contexts/` context renderers are not manifest resources. It mirrors Pi's skill-list pattern but is filtered by Brunch runtime state, capability-readiness, and allow-lists. | | **Method** | A tool-usage or workflow competence advertised as a Brunch prompt resource (`.pi/skills/methods/*.md`): run structured exchanges, infer-and-capture (D50-L), generate proposals/projections, read context, mutate the graph, review for gaps. Method resources explain when to use a tool family and how to sequence it with other tools; executable tool definitions should stay focused on schemas, authority, and runtime behavior. A method may also be backed by a Pi-native skill, but actual tool authority remains code-owned through `op_mode` policy and active-tool gating. `capability` is retired as a synonym — use `method` and ``. | | **Agent context** | The content the agent reasons over — `cwd`, `graph`, or `node` (D60-L): pulled (typed, read-only) from `graph/`/`session/`, optionally projected when a reusable DTO helps, rendered to LLM-string or JSON, surfaced pushed (compose) or pulled (`read_graph` / `read_workspace_context` / `read_session_context`). Graph context explicitly chooses graph-truth vs active-context reads and may filter by node kind, readiness band, edge category/direction, or absence of an edge category (gap query). Distinct from the **workspace projection** (`workspace.state`), which is product/UI state, not agent content. | | **Readiness estimate** | A soft, derived, live per-band coverage projection over `elicitation_gaps`, for UI surfacing only (D45-L). It is *not* stored, *not* authority, and gates nothing — it may regress honestly. Replaces the retired stored `readiness_grade`. | @@ -635,7 +635,7 @@ Dev-loop artifacts route to gitignored `.fixtures/scratch///`, res | Middle | Round-trip tests | JSONL reload, linear transcript validation, session exchange projection, compaction, graph export/import, command result serialization, `supersedes`-chain reconstruction across regeneration. | D6-L, D13-L, D24-L, D28-L; I3-L, I8-L, I10-L, I19-L. | | Middle | Property-based / model-based tests | Spec-local LSN monotonicity, change-log replay, reconciliation-need invariants, stable kind-ordinal allocation/no-reuse, mention staleness, interest-set recomputation, side-task delivery ordering, **batch-acceptance atomicity (one selected-spec LSN / one change-log entry, partial-batch impossible under mid-batch validation failure)**, **`supersedes` / `supersession` acyclicity and unique-leaf-per-thread**, **lens-routing correctness (generated elicitor entries route to the right consumer)**, **reviewer-finding turn-boundary delivery ordering**. | A4-L, A8-L, A9-L, A11-L; I1-L, I4-L, I5-L, I6-L, I9-L, I12-L, I15-L, I16-L, I18-L, I39-L, I41-L. | | Middle | Contract tests | Named RPC method families and transport adapters share handler semantics; `rpc.discover` describes public methods with usable schemas/examples; `session.triggerExchange` / `session.pendingExchange` / `session.submitExchangeResponse` / `session.exchanges` preserve transcript truth; subscriptions deliver initial state payload plus ordered updates; `CommandExecutor` hides policy/transaction details; `acceptReviewSet` returns expected structured discriminants; only prevalidated proposals become reviewable review sets. | D5-L, D19-L, D20-L, D27-L, D48-L, D49-L; R11, R12, R27, R28. | -| Middle | Architectural boundary tests | No direct ORM/SQLite mutation outside `CommandExecutor`; no canonical chat/turn store; TUI/RPC/fixture code does not write `brunch.session_binding`; spec/session picker UI returns decisions rather than opening/mutating sessions; RPC/headless boot exposes structured initial-selection state instead of invoking TUI picker code; Brunch wrappers do not expose Pi branch creation/navigation as product behavior; spec readiness-grade mutations route through commands rather than session-local memory; reviewer-attributed writes target only `reconciliation_need`; Brunch-launched Pi runtimes do not load ambient `.pi/` resources or behavior-shaping settings outside the Brunch Pi Profile; Brunch product extensions load through the explicit static shell list rather than filesystem discovery or a runtime extension-metadata protocol. | D4-L, D6-L, D18-L, D21-L, D24-L, D29-L, D36-L, D39-L, D45-L; I2-L, I10-L, I11-L, I16-L, I19-L, I22-L, I24-L, I31-L. | +| Middle | Architectural boundary tests | No direct ORM/SQLite mutation outside `CommandExecutor`; no canonical chat/turn store; TUI/RPC/fixture code does not write `brunch.session_binding`; spec/session picker UI returns decisions rather than opening/mutating sessions; RPC/headless boot exposes structured initial-selection state instead of invoking TUI picker code; Brunch wrappers do not expose Pi branch creation/navigation as product behavior; readiness authority remains gap-derived rather than spec-row or session-local mutable state; reviewer-attributed writes target only `reconciliation_need`; Brunch-launched Pi runtimes do not load ambient `.pi/` resources or behavior-shaping settings outside the Brunch Pi Profile; Brunch product extensions load through the explicit static shell list rather than filesystem discovery or a runtime extension-metadata protocol. | D4-L, D6-L, D18-L, D21-L, D24-L, D29-L, D36-L, D39-L, D45-L; I2-L, I10-L, I11-L, I16-L, I19-L, I22-L, I24-L, I31-L. | | Middle | **Differential testing** | Dry-run validation at proposal time matches real-run validation at acceptance time (no drift between modes); free-form-generation vs constrained-generation legality rates (informs whether fallback path is needed per A14-L). | D27-L; A14-L. | | Middle | Probe transcript replay and property assertions | Probe runs preserve transcript evidence that can be replayed, rendered, and compared against current Brunch projections. Future brief-driven sessions, if revived, must produce the same probe-run artifact shape. For batch proposals/review sets: **structural-legality rate of LLM proposals tracked per-run in probe metadata as POC-phase fitness, not a merge gate**; first-attempt vs retry-with-feedback rates surfaced for human review. | A5-L, A6-L, A7-L, A14-L; I7-L; R20, R21, R22, R23. | | Middle | Deterministic public-RPC parity proof | A scripted agent-as-user discovers Brunch methods, activates workspace/spec/session, drives the current structured-exchange permutations through Brunch JSON-RPC only, compares Pi JSONL plus `session.exchanges` projections against TUI-shaped structured-exchange expectations, rejects repeated deterministic prompts, and can persist a `.fixtures/runs/public-rpc-parity//` review bundle containing source `session.jsonl`, Brunch-semantic `transcript.md`, and `report.json`. The landed FE-744 proof has been reconciled to the canonical D49-L session method names. | A5-L; D5-L, D48-L, D49-L; I23-L, I32-L; R24, R27, R28. | @@ -686,13 +686,13 @@ The first required probe is M0: after manual TUI interaction, a checker proves ` | I26-L | Structured-exchange schema tests prove the acknowledged Zod seam parses and exports JSON Schema; future M4 architectural tests should grep/import-audit schema libraries and Drizzle row-schema derivation boundaries. | | I28-L | Inner — TypeBox schema validation of [src/.pi/extensions/compaction/index.ts](file:///Users/lunelson/Code/hashintel/brunch-next/src/.pi/extensions/compaction/index.ts) shape; deterministic anchor-rendering unit tests (same branch + same config → same header bytes). Middle (M9) — compaction round-trip property tests across all configured anchors and selection rules; fallback-to-Pi-default behavior under simulated auth failure, empty LLM output, and thrown error. Outer (M9) — long-horizon adversarial fixture confirms session binding, latest runtime state, latest establishment offer, in-flight side-task results, and unresolved staleness hints remain agent-intelligible post-compaction. | | I29-L | Inner — argv-shape tests for the `subagent` tool prove every spawned subprocess includes `--no-session --no-skills --no-extensions` plus an explicit per-agent `--tools`/`--extension`/`--models`/`--append-system-prompt` set; TypeBox schema validation of `src/.pi/extensions/subagents/agents/*.md` frontmatter and `src/.pi/extensions/subagents/config.json`. Middle — isolation audit (no ambient `.pi/` resources reachable inside the subprocess; tool-allowlist conformance per starter agent; parent `CommandExecutor`/Brunch RPC handlers absent from subprocess environment). Outer — probe-driven proposal-generation runs invoking scout/researcher/graph-reader confirm grounding inputs flow through subagent outputs into review-set proposals without bypassing primary authority. | -| I30-L | FE-807 covers the current labeled-text response tracer: committed graph facts are compared against transcript evidence and implication-only prose returns `no_capture`. Future capture fixtures still need reconciliation-need and readiness-grade cases plus broader LLM-quality comparisons against preface-only interpretations. | -| I31-L | Spec-row command tests for grade updates plus prompt/tool-policy tests proving grade gates unlock later actions without disabling gathering/refinement; graph write tests proving later-band node kinds are not rejected solely because the current spec grade is lower. Card 1 covers the CommandExecutor grade-write path; prompt/tool-policy tests remain with M5. | +| I30-L | FE-807 covers the current labeled-text response tracer: committed graph facts are compared against transcript evidence and implication-only prose returns `no_capture`. Future capture fixtures still need reconciliation-need, gap-disposition, and readiness-negotiation cases plus broader LLM-quality comparisons against preface-only interpretations. | +| I31-L | Capability-readiness tests proving live gap coverage negotiates/unlocks later actions without disabling gathering/refinement; prompt/tool-policy tests proving readiness-thin pinned axes still compose while gated methods stay withheld; graph write tests proving later-band node kinds are not rejected solely because grounding is thin. | | I32-L | FE-744 public-RPC structured-exchange parity proof: `rpc.discover` contract tests, pending/respond lifecycle tests, deterministic permutation run over Brunch JSON-RPC only, no repeated deterministic prompts, and parity assertions over the resulting Pi JSONL, transcript display, and session exchange projections. | | I33-L | Current schema tests cover minimum no-graph `capture_*` details and reject graph payload fields. Future capture-analysis runtime tests must still cover persisted result rendering, no graph-write side effects, Brunch-semantic transcript inclusion, and hidden/collapsed TUI rendering fallback. | | I36-L | M4 per-plane kind enum validation tests in CommandExecutor; kind-to-category derivation unit tests proving pure function parity with GRAPH_MODEL.md table. | | I37-L | M4 node-creation tests: decision/term rejected without detail; constraint accepted with or without detail; other kinds rejected with detail; unknown detail fields rejected. | -| I38-L | `agents-composition-layer` inner tests: given projected runtime states and spec grades, compose emits manifests whose goal/strategy/lens/method resources are legal, Brunch-owned, readable, and filtered by the agent allow-list; AUTO axes list only legal choices and pinned axes point to their selected resource. Middle/outer probes may track whether the model actually reads the selected resource before applying it as fitness, not as an inner-loop gate. | +| I38-L | `agents-composition-layer` inner tests: given projected runtime states and selected-spec gaps/readiness states, compose emits manifests whose goal/strategy/lens/method resources are legal, Brunch-owned, readable, and filtered by capability-readiness plus the agent allow-list; AUTO axes list only legal choices and pinned axes stay visible whenever the tuple is role/mode-legal. Middle/outer probes may track whether the model actually reads the selected resource before applying it as fitness, not as an inner-loop gate. | | I39-L | `graph-tool-resilience` CommandExecutor/adapter/context tests: counter rows allocate monotonic per-kind ordinals in multi-node batches, rollback does not persist failed ordinals/counter rows, DB constraints reject duplicate `(spec_id, plane, kind, kind_ordinal)`, projected-code metadata is unique and parses by longest prefix, existing-code refs resolve inside the selected spec, and prompt/tool renderers use codes as primary handles. Remaining proof: deletion/supersession no-reuse. | | I40-L | `graph-tool-resilience` CommandExecutor/adapter tests: `mutateGraph` applies one batch create-basis to all created nodes/edges, single-node `createNode` rejects retired basis values before LSN/counter/node/change-log allocation, `propose-graph` adapter commits use `implicit`, review-set translation uses `explicit`, retired `accepted_review_set` is rejected, and `change_log.operation` remains independent of basis. FE-807 adds direct structured text response capture with `basis: explicit`. FE-809 adds real project-graph review-cycle acceptance proof with explicit-basis readback under `.fixtures/runs/project-graph-review-cycle/2026-06-06-project-graph-review-cycle/`. | | I41-L | `graph-tool-resilience` CommandExecutor tests reject supersession cycles across existing edges, intra-batch edges, and mixed existing+batch edges, including rollback of batch nodes/edges/change_log; existing acyclic supersession paths still commit. | diff --git a/memory/cards/renderer-golden-coverage--render-stage-chain.md b/memory/cards/renderer-golden-coverage--render-stage-chain.md new file mode 100644 index 00000000..def4611f --- /dev/null +++ b/memory/cards/renderer-golden-coverage--render-stage-chain.md @@ -0,0 +1,249 @@ +# Renderer Golden Coverage Chain + +Frontier: renderer-golden-coverage +Status: active +Mode: chain +Created: 2026-06-11 + +## Orientation + +- Seam: RENDER-stage reusable lossy text under `src/renderers/` plus the sketch → lock → formalize oracle loop that keeps LLM-facing wording from drifting silently. +- Frontier: `renderer-golden-coverage`, now the next sequenced trio work after `projection-shape-coverage` closed on this branch. +- Current truth in-tree: + - already locked: `graph/graph-slice`, `graph/node-neighborhood`, `session/runtime-frame` + - uncovered likely `●`: `session/transcript`, `workspace/workspace-state`, `exchanges/request-*`, `exchanges/present-question`, `exchanges/present-options`, `exchanges/present-review-set` + - disposition still needs an honest call: `workspace/workspace-context` is named as a consumer seam in `src/session/README.md`, but current code-path grep finds no live caller + - explicit `○` / topology stubs that should stay out unless their seam activates: `graph/commit-result.ts`, `graph/reconciliation-needs.ts`, `exchanges/present-candidates.ts` +- Known doc drift to retire first: + - `memory/PLAN.md` previously spoke as if `src/scripts/render-preview.ts` / `npm run render` already existed; they do not. + - `memory/CROSS_CUT_PLAN.md` still describes file-snapshot locks as net-new even though `graph/` and `session/runtime-frame` now use `toMatchFileSnapshot`. +- Main risk: snapshotting dead or single-owner rows for symmetry, or overbuilding a preview framework before the ledger names exactly which renderers still matter. +- Cross-cutting obligations: preserve D52-L (`renderers/` stays free of adapter/transport imports); keep goldens co-located with renderer tests under `src/renderers/**/__previews__/`; keep `○` stubs untouched; preserve the human eyeball step before lock. + +Posture: proving. Card 1 retires authority drift; Cards 2-5 cash the now-legible coverage closures. + +## Dependency Sketch + +```text +Card 1 renderer ledger + preview-loop authority + ├─ unlocks Cards 2-5 by making the required rows and sketch path explicit + └─ may delete/demote workspace-context instead of snapshotting it + +Cards 2-5 are independent after Card 1 + ├─ Card 2 workspace rows + ├─ Card 3 session transcript row + ├─ Card 4 exchange request-family rows + └─ Card 5 exchange present-family rows + frontier close +``` + +## Card 1 - Close the renderer ledger and preview-loop authority + +### Objective + +Before adding any more goldens, close the authority gap around what counts as a required renderer row and what the actual sketch path is. This card should make later cards mechanical rather than rediscovery-heavy. + +### Light-card cold-start reads + +- `memory/PLAN.md` — frontier: `renderer-golden-coverage` +- `memory/CROSS_CUT_PLAN.md` — §Renderer feedback loops +- `src/renderers/README.md` +- existing locked tests: `src/renderers/graph/previews.test.ts`, `src/renderers/session/runtime-frame.test.ts` +- `src/session/README.md` — current `workspace-context` consumer claims +- `package.json` + +### Acceptance Criteria + +✓ `src/renderers/README.md` carries a closed renderer ledger with one row per current renderer, including required/deferred/stub disposition and current oracle status. + +✓ The ledger makes an explicit call on `workspace/workspace-context`: keep-and-cover only if it still owns a real consumer seam; otherwise demote or retire it. + +✓ `memory/PLAN.md` and any touched cross-cut notes stop claiming a preview harness already exists when it does not. + +✓ This card chooses one honest sketch path for the rest of the frontier: either materialize a minimal shared preview harness (`src/scripts/render-preview.ts` + `npm run render`) or explicitly narrow the frontier to test-local preview generation. Later cards should not reopen that choice. + +### Out of scope / guardrails + +- Do not snapshot every uncovered renderer in this card. +- Do not keep `workspace-context` merely to preserve directory symmetry. +- Do not invent a preview DSL or generalized renderer framework. + +### Expected touched paths (tentative) + +```text +memory/ +├── PLAN.md ~ +└── CROSS_CUT_PLAN.md ~? +package.json ~? +src/renderers/ +└── README.md ~ +src/scripts/ +└── render-preview.ts +? +``` + +## Card 2 - Close the workspace renderer rows honestly + +### Objective + +Close the `workspace/` rows without pretending both existing files are equally real. `workspace-state` is clearly load-bearing; `workspace-context` needs an explicit keep-or-retire decision before it gets any snapshot surface. + +### Light-card cold-start reads + +- Card 1 output in `src/renderers/README.md` +- `src/renderers/workspace/workspace-state.ts` +- `src/renderers/workspace/workspace-state.test.ts` +- `src/renderers/workspace/workspace-context.ts` +- `src/session/README.md` +- active callers such as `src/app/brunch.ts` + +### Acceptance Criteria + +✓ `workspace-state` has co-located preview/golden coverage plus semantic invariants over live status variants and the absence of retired chrome/readiness fields. + +✓ If `workspace-context` survives Card 1 as a required row, it gets its own co-located preview/golden over both `cwd_inventory` and `workspace_overview`; if not, it is deleted or demoted and the docs agree. + +✓ The renderer ledger marks the workspace rows closed after this card. + +### Out of scope / guardrails + +- No new workspace projection or adapter layer. +- No reintroduction of retired `phase`, `chatMode`, or persisted readiness concepts. +- Do not widen session read shapes merely to make renderer tests easier. + +### Expected touched paths (tentative) + +```text +src/renderers/workspace/ +├── workspace-state.ts ~? +├── workspace-state.test.ts ~ +├── workspace-context.ts ~|- +└── __previews__/ +? +src/renderers/README.md ~ +src/session/README.md ~? +src/app/brunch.ts ? +``` + +## Card 3 - Move transcript markdown locking into renderer home + +### Objective + +Give `renderers/session/transcript.ts` its own co-located golden and invariants so the renderer home owns transcript wording, while `session/session-transcript.ts` keeps only the wrapper/parsing proof it actually owns. + +### Light-card cold-start reads + +- Card 1 output in `src/renderers/README.md` +- `src/renderers/session/transcript.ts` +- `src/session/session-transcript.ts` +- `src/session/session-transcript.test.ts` +- `src/projections/session/transcript-context.ts` + +### Acceptance Criteria + +✓ `src/renderers/session/transcript.ts` has a co-located preview/golden built from a mixed transcript fixture covering user content, assistant text, generic tool results, structured exchange tool results, and omitted non-text blocks. + +✓ The renderer-level test owns the text-shape lock; higher-level `session/session-transcript.*` tests keep only wrapper behavior that the renderer test does not prove. + +✓ The ledger marks `session/transcript` covered while leaving `session/runtime-frame` as already covered. + +### Out of scope / guardrails + +- No new transcript projection fields or session-manager behavior changes. +- Do not widen transcript rendering to include thinking/toolCall/image blocks. +- Do not move transcript parsing ownership out of `session/`. + +### Expected touched paths (tentative) + +```text +src/renderers/session/ +├── transcript.ts ~? +├── transcript.test.ts + +└── __previews__/ + +src/session/ +├── session-transcript.ts ? +└── session-transcript.test.ts ~ +src/renderers/README.md ~ +``` + +## Card 4 - Lock the request-side exchange renderer family + +### Objective + +Lock the request-response renderers as one family because they all render the same terminal union shape (`cancelled | unavailable | answered`) and share the same failure modes: comment quoting, markdown escaping, and branch-specific copy drift. + +### Light-card cold-start reads + +- Card 1 output in `src/renderers/README.md` +- `src/renderers/exchanges/request-answer.ts` +- `src/renderers/exchanges/request-choice.ts` +- `src/renderers/exchanges/request-choices.ts` +- `src/renderers/exchanges/request-review.ts` +- corresponding `.pi/extensions/exchanges/request-*.ts` callers + +### Acceptance Criteria + +✓ `request-answer`, `request-choice`, `request-choices`, and `request-review` each have preview/golden coverage for answered and non-answered branches. + +✓ Invariants cover the semantic edges snapshots can hide: cancel/unavailable copy, markdown escaping of labels, and quote-block handling for optional comments. + +✓ Tests reuse existing domain DTO shapes and do not become a second schema/persistence test suite. + +### Out of scope / guardrails + +- No exchange-schema or editor-fallback redesign. +- No `present_candidates` work. +- No candidate-capture or generalized-capture symmetry expansion. + +### Expected touched paths (tentative) + +```text +src/renderers/exchanges/ +├── request-answer.ts ~? +├── request-choice.ts ~? +├── request-choices.ts ~? +├── request-review.ts ~? +├── request-family.test.ts +? +└── __previews__/ + +src/renderers/README.md ~ +``` + +## Card 5 - Lock the present-side exchange renderers and close the frontier + +### Objective + +Close the remaining exchange prompt-side rows, then reconcile the ledger so `renderer-golden-coverage` can hand off cleanly to `prompt-composition-golden-coverage`. + +### Light-card cold-start reads + +- Card 1 output in `src/renderers/README.md` +- `src/renderers/exchanges/present-question.ts` +- `src/renderers/exchanges/present-options.ts` +- `src/renderers/exchanges/present-review-set.ts` +- corresponding `src/projections/exchanges/*.ts` sources and current structured-exchange tests + +### Acceptance Criteria + +✓ `present-question`, `present-options`, and `present-review-set` each have co-located preview/golden coverage plus at least one semantic invariant. + +✓ The invariants guard the real failure modes: heading/body composition for question; hidden option-id comment retention plus escaping for options; stable entity/edge draft narration for review-set without raw internal refs bleeding through. + +✓ `present-candidates` remains explicit `○` / topology stub unless the active codebase now gives it a real consumer. + +✓ Final docs mark every `●` row in `src/renderers/README.md` as covered and every `○` row explicit, so the frontier can advance to `prompt-composition-golden-coverage`. + +### Out of scope / guardrails + +- No new exchange renderer families. +- No prompt-composition work yet. +- Do not reopen the sketch-path choice settled in Card 1. + +### Expected touched paths (tentative) + +```text +src/renderers/exchanges/ +├── present-question.ts ~? +├── present-options.ts ~? +├── present-review-set.ts ~? +├── present-family.test.ts +? +└── __previews__/ + +src/renderers/README.md ~ +memory/PLAN.md ~? +``` diff --git a/src/.pi/__tests__/context-tools.test.ts b/src/.pi/__tests__/context-tools.test.ts index 0637e3e4..e4ef9c06 100644 --- a/src/.pi/__tests__/context-tools.test.ts +++ b/src/.pi/__tests__/context-tools.test.ts @@ -60,17 +60,15 @@ describe('context tools', () => { }, })) as { content: Array<{ type: 'text'; text: string }>; - details: { mode: 'cwd_inventory'; data: { markdownFiles: Array<{ path: string }> } }; + details: { markdownFiles: Array<{ path: string }> }; }; expect(result.content[0]?.text).toContain('[Workspace cwd inventory]'); expect(result.content[0]?.text).toContain('existing .brunch state detected'); expect(result.content[0]?.text).toContain('session-1.jsonl'); - expect(result.details.mode).toBe('cwd_inventory'); - expect(result.details.data.markdownFiles.map((file) => file.path)).toEqual([ - 'README.md', - 'visible/guide.md', - ]); + expect(result.details).not.toHaveProperty('mode'); + expect(result.details).not.toHaveProperty('data'); + expect(result.details.markdownFiles.map((file) => file.path)).toEqual(['README.md', 'visible/guide.md']); }); it('read_session_context returns runtime-frame markdown plus typed details', async () => { @@ -234,22 +232,17 @@ describe('context tools', () => { }, })) as { content: Array<{ type: 'text'; text: string }>; - details: { - mode: 'workspace_overview'; - data: { specs: Array<{ title: string }>; sessions: Array<{ turnCount: number }> }; - }; + details: { specs: Array<{ title: string }>; sessions: Array<{ turnCount: number }> }; }; expect(result.content[0]?.text).toContain('[Workspace overview]'); expect(result.content[0]?.text).toContain('Alpha Grounding'); expect(result.content[0]?.text).toContain('Beta Commitments'); expect(result.content[0]?.text).not.toContain('readiness_grade='); - expect(result.details.mode).toBe('workspace_overview'); - expect(result.details.data.specs.map((spec) => spec.title)).toEqual([ - 'Alpha Grounding', - 'Beta Commitments', - ]); - expect(result.details.data.sessions.map((session) => session.turnCount)).toEqual([1, 2]); + expect(result.details).not.toHaveProperty('mode'); + expect(result.details).not.toHaveProperty('data'); + expect(result.details.specs.map((spec) => spec.title)).toEqual(['Alpha Grounding', 'Beta Commitments']); + expect(result.details.sessions.map((session) => session.turnCount)).toEqual([1, 2]); }); // Authentic oracle: drive the context tools against the faux harness's REAL @@ -290,12 +283,10 @@ describe('context tools', () => { const workspaceResult = (await tools .get('read_workspace_context')! .execute('faux-workspace', { mode: 'cwd_inventory' }, undefined, undefined, ctx)) as { - details: { data: { markdownFiles: Array<{ path: string }> } }; + details: { markdownFiles: Array<{ path: string }> }; }; // cwd came from the header (the temp workbench), not process.cwd(). - expect(workspaceResult.details.data.markdownFiles.map((file) => file.path)).toContain( - 'faux-guard-doc.md', - ); + expect(workspaceResult.details.markdownFiles.map((file) => file.path)).toContain('faux-guard-doc.md'); } finally { harness.dispose(); } diff --git a/src/.pi/agents/README.md b/src/.pi/agents/README.md index ad6a01cd..8dcb10e6 100644 --- a/src/.pi/agents/README.md +++ b/src/.pi/agents/README.md @@ -48,7 +48,7 @@ agents/ 1. agent control header — identity, model/thinking expectation, role derived from `op_mode`, tool authority; 2. runtime-state header — current pinned/AUTO `goal`/`strategy`/`lens`, current spec line with the soft per-band readiness estimate, posture; -3. resource manifests — ``, ``, ``, `` entries, filtered by `op_mode`/allow-list plus capability-readiness over selected-spec elicitation gaps; +3. resource manifests — ``, ``, ``, `` entries, filtered by `op_mode`/allow-list plus capability-readiness over selected-spec elicitation gaps; AUTO axes list only currently legal choices, while role/mode-legal pinned axes remain visible even when readiness negotiates and gated methods/tools stay withheld; 4. compact pushed context — minimal context handles and rendered context blocks. Detailed goal/strategy/lens/method bodies are markdown resources under `.pi/skills/` and are loaded with `read` when detail matters. Manifest metadata is code-owned in `state.ts`, not filesystem-discovered. diff --git a/src/.pi/agents/compose.test.ts b/src/.pi/agents/compose.test.ts index 1cdcb42b..2d034b5e 100644 --- a/src/.pi/agents/compose.test.ts +++ b/src/.pi/agents/compose.test.ts @@ -245,33 +245,40 @@ describe('composeAgentPrompt', () => { expect(pinnedFreestyle.prompt).toContain('name="freestyle"'); }); - it('rejects illegal pinned gap-gated selections loudly', () => { - expect(() => - composeAgentPrompt({ - agentId: 'elicitor', - sessionState: projectBrunchAgentState([ - { - type: 'custom', - customType: 'brunch.agent_runtime_state', - data: { - schemaVersion: 1, - reason: 'switch', - source: 'user', - state: { - ...DEFAULT_BRUNCH_AGENT_STATE, - agentGoal: 'commit-converge', - }, + it('keeps pinned readiness-thin selections in the prompt while gated methods remain filtered out', () => { + const result = composeAgentPrompt({ + agentId: 'elicitor', + sessionState: projectBrunchAgentState([ + { + type: 'custom', + customType: 'brunch.agent_runtime_state', + data: { + schemaVersion: 1, + reason: 'switch', + source: 'user', + state: { + ...DEFAULT_BRUNCH_AGENT_STATE, + agentStrategy: 'project-graph', + agentGoal: 'commit-converge', }, }, - ]), - spec: groundingSpec, - workspace, - activeTools: ['read'], - gaps: zeroCoverageGaps, - }), - ).toThrow( - 'Pinned goal "commit-converge" is not legal for elicitor in elicit; capability-readiness returned negotiate for current elicitation gaps.', - ); + }, + ]), + spec: groundingSpec, + workspace, + activeTools: ['read'], + gaps: zeroCoverageGaps, + }); + + expect(result.prompt).toContain('- goal: commit-converge'); + expect(result.prompt).toContain('- strategy: project-graph'); + expect(result.manifests.goals.map((entry) => entry.name)).toEqual(['commit-converge']); + expect(result.manifests.strategies.map((entry) => entry.name)).toEqual(['project-graph']); + expect(result.manifests.methods.map((entry) => entry.name)).toEqual([ + 'run-structured-exchange', + 'infer-and-capture', + 'read-context', + ]); }); it('advertises only readable .pi prompt resources without filesystem discovery', async () => { diff --git a/src/.pi/agents/state.test.ts b/src/.pi/agents/state.test.ts index a0fabf34..d0095136 100644 --- a/src/.pi/agents/state.test.ts +++ b/src/.pi/agents/state.test.ts @@ -158,7 +158,7 @@ describe('agent posture policy', () => { ); }); - it('throws on an illegal pinned axis with a negotiate outcome message, not a grade', () => { + it('keeps a pinned goal visible when capability-readiness negotiates, while gated methods stay absent', () => { const state = projectBrunchAgentState([ { type: 'custom', @@ -178,9 +178,10 @@ describe('agent posture policy', () => { }, ]); - expect(() => manifestsForState(state, groundingFloorGaps({ coverage: { thesis: 0 } }))).toThrow( - 'Pinned goal "commit-converge" is not legal for elicitor in elicit; capability-readiness returned negotiate for current elicitation gaps.', - ); + const manifests = manifestsForState(state, groundingFloorGaps({ coverage: { thesis: 0 } })); + + expect(manifests.goals.map((entry) => entry.name)).toEqual(['commit-converge']); + expect(manifests.methods.map((entry) => entry.name)).not.toContain('review-for-gaps'); }); it('fails loud on an empty gap register instead of returning empty manifests', () => { diff --git a/src/.pi/agents/state.ts b/src/.pi/agents/state.ts index f1972b7a..bcec1e5c 100644 --- a/src/.pi/agents/state.ts +++ b/src/.pi/agents/state.ts @@ -301,11 +301,14 @@ function selectAxisResources({ if (selection === 'auto') { return legal.filter((id) => !autoExcluded?.has(id)).map((id) => resources[id]); } - if (!legal.includes(selection)) { + if (!allowed.includes(selection)) { throw new Error( - `Pinned ${label} "${selection}" is not legal for ${state.agentRole} in ${state.operationalMode}; capability-readiness returned negotiate for current elicitation gaps.`, + `Pinned ${label} "${selection}" is not allowed for ${state.agentRole} in ${state.operationalMode}.`, ); } + // User/system pins are authority signals. When readiness negotiates, keep the + // pinned axis visible and let method/tool legality carry the negotiation + // boundary instead of crashing prompt assembly. return [resources[selection]]; } diff --git a/src/.pi/extensions/context/get-cwd.ts b/src/.pi/extensions/context/get-cwd.ts index 36235b2e..afe9dcfb 100644 --- a/src/.pi/extensions/context/get-cwd.ts +++ b/src/.pi/extensions/context/get-cwd.ts @@ -2,15 +2,12 @@ import { resolve } from 'node:path'; import type { SessionHeader } from '@earendil-works/pi-coding-agent'; -import { - projectWorkspaceCwdContext, - projectWorkspaceOverviewContext, - type WorkspaceContextProjection, -} from '../../../projections/workspace/workspace-context.js'; import { renderWorkspaceContext } from '../../../renderers/workspace/workspace-context.js'; import { inspectWorkspaceCwdInventory, inspectWorkspaceOverview, + type WorkspaceCwdInventory, + type WorkspaceOverview, } from '../../../session/workspace-context.js'; // The session cwd lives on the Pi header, which is reachable only via @@ -24,12 +21,12 @@ interface SessionManagerLike { export async function readWorkspaceContext( mode: 'cwd_inventory' | 'workspace_overview', sessionManager?: SessionManagerLike, -): Promise<{ readonly text: string; readonly details: WorkspaceContextProjection }> { +): Promise<{ readonly text: string; readonly details: WorkspaceCwdInventory | WorkspaceOverview }> { const cwd = resolveWorkspaceCwd(sessionManager); const details = mode === 'workspace_overview' - ? projectWorkspaceOverviewContext(await inspectWorkspaceOverview(cwd)) - : projectWorkspaceCwdContext(await inspectWorkspaceCwdInventory(cwd)); + ? await inspectWorkspaceOverview(cwd) + : await inspectWorkspaceCwdInventory(cwd); return { text: renderWorkspaceContext(details), details, diff --git a/src/.pi/extensions/runtime/authority-matrix.test.ts b/src/.pi/extensions/runtime/authority-matrix.test.ts index c7d2feb7..66a4fd08 100644 --- a/src/.pi/extensions/runtime/authority-matrix.test.ts +++ b/src/.pi/extensions/runtime/authority-matrix.test.ts @@ -84,6 +84,19 @@ describe('minimal authority matrix', () => { ).toEqual(['read', 'grep', 'find', 'ls', 'present_question', 'request_answer']); }); + it('falls back to conservative uncovered gaps when no selected-spec gap read is available', () => { + const state = projectBrunchAgentState([{ data: { state: DEFAULT_BRUNCH_AGENT_STATE } }]); + + expect(activeToolNamesForBrunchAgentState(piWithRegisteredTools(REGISTERED_POC_TOOLS), state)).toEqual([ + 'read', + 'grep', + 'find', + 'ls', + 'present_question', + 'request_answer', + ]); + }); + it('represents needs_human as structured data instead of a TUI-only dialog', () => { const result = { status: 'needs_human' } satisfies CommandResult; diff --git a/src/.pi/extensions/runtime/index.ts b/src/.pi/extensions/runtime/index.ts index e3f15af9..57015aa3 100644 --- a/src/.pi/extensions/runtime/index.ts +++ b/src/.pi/extensions/runtime/index.ts @@ -82,13 +82,13 @@ function supportsBrunchAgentStateEntries( export function activeToolNamesForBrunchAgentState( pi: ExtensionAPI, state: ResolvedBrunchAgentState, - gaps: readonly ElicitationGap[] = [], + gaps?: readonly ElicitationGap[], devAllowedToolNames?: readonly string[], ): string[] { return activeToolNamesForPosture({ registeredToolNames: pi.getAllTools().map((tool) => tool.name), state, - gaps, + gaps: gaps ?? conservativeUncoveredFloorGaps(), devAllowedToolNames, }); } diff --git a/src/.pi/skills/methods/read-context.md b/src/.pi/skills/methods/read-context.md index 8d4367e2..fa758dd9 100644 --- a/src/.pi/skills/methods/read-context.md +++ b/src/.pi/skills/methods/read-context.md @@ -2,7 +2,7 @@ Use this method when pushed prompt context is insufficient for the next elicitation move. It tells you how to sequence selected-spec reads without turning context gathering into a separate research project. -Start from the handles in the runtime prompt: selected spec, readiness grade, active goal/strategy/lens, workspace posture, and any graph overview. Pull more context only when it will change the next question, proposal, capture decision, or graph write. Prefer compact overview for orientation and focused node neighborhoods for a specific claim or projected code. +Start from the handles in the runtime prompt: selected spec, soft readiness estimate, active goal/strategy/lens, workspace posture, and any graph overview. Pull more context only when it will change the next question, proposal, capture decision, or graph write. Prefer compact overview for orientation and focused node neighborhoods for a specific claim or projected code. Use read-only context tools such as `read_graph` and `read_session_context` where available. Keep graph truth distinct from active-context projections: accepted records are truth, while rendered summaries are orientation. If the user mentions a node code, resolve it through the product read path rather than guessing from memory. diff --git a/src/.pi/skills/strategies/freestyle.md b/src/.pi/skills/strategies/freestyle.md index f641d842..63f50812 100644 --- a/src/.pi/skills/strategies/freestyle.md +++ b/src/.pi/skills/strategies/freestyle.md @@ -6,4 +6,4 @@ Turn structure is light: respond to the user's immediate intent, read context wh Commitment mechanism is ordinary-turn capture. Directly stated, high-confidence facts may be captured with explicit basis through the same generalized capture path as structured responses. Low-confidence implications, guesses, and broad summaries stay out of graph truth unless the user confirms them. -Available graph operations are context reads and legal capture/commit paths that the current goal and grade permit. Do not treat freestyle as higher authority, a new operational mode, or a bypass around review-set/direct-commit distinctions. It changes interaction style only; goal and lens still decide what the work is about. +Available graph operations are context reads and legal capture/commit paths that the current goal and capability-readiness permit. Do not treat freestyle as higher authority, a new operational mode, or a bypass around review-set/direct-commit distinctions. It changes interaction style only; goal and lens still decide what the work is about. diff --git a/src/app/brunch-tui.ts b/src/app/brunch-tui.ts index 2de6f436..a5ecec84 100644 --- a/src/app/brunch-tui.ts +++ b/src/app/brunch-tui.ts @@ -128,7 +128,7 @@ export async function runBrunchTui(options: BrunchTuiOptions = {}): Promise { const command = process.platform === 'darwin' ? 'open' : 'xdg-open'; const child = spawn(command, [url], { diff --git a/src/db/connection.test.ts b/src/db/connection.test.ts index 77ba5aa9..5b197d71 100644 --- a/src/db/connection.test.ts +++ b/src/db/connection.test.ts @@ -99,47 +99,16 @@ describe('createDb', () => { } }); - it('migrates a populated pre-node-kind elicitation gap table', async () => { + it('recreates elicitation gaps when a backlog-era table survives into a 0003-ledger DB', async () => { const dir = await mkdtemp(join(tmpdir(), 'brunch-db-legacy-gaps-')); const dbPath = join(dir, 'legacy.db'); try { - await createLegacy0003ElicitationGapDatabase(dbPath); + await createLegacy0003BacklogDriftDatabase(dbPath); const db = createDb(dbPath); - expect( - db - .select({ - refersTo: elicitationGaps.refers_to, - question: elicitationGaps.question, - rationale: elicitationGaps.rationale, - }) - .from(elicitationGaps) - .orderBy(asc(elicitationGaps.id)) - .all(), - ).toEqual([ - { - refersTo: 'context', - question: 'What kind of thing is this, and what domain or environment does it live in?', - rationale: 'Anchors the domain.', - }, - { - refersTo: 'thesis', - question: 'Who is this for?', - rationale: 'Identifies the main actor.', - }, - { - refersTo: 'criterion', - question: 'How will we recognize success or good enough?', - rationale: 'Sketches what success looks like.', - }, - { - refersTo: 'goal', - question: 'custom_goal_gap', - rationale: 'Custom legacy row should still migrate.', - }, - ]); + expect(db.select().from(elicitationGaps).orderBy(asc(elicitationGaps.id)).all()).toEqual([]); } finally { await rm(dir, { recursive: true, force: true }); } @@ -238,7 +207,7 @@ async function createLegacy0000EmptySpecDatabase(dbPath: string): Promise } } -async function createLegacy0003ElicitationGapDatabase(dbPath: string): Promise { +async function createLegacy0003BacklogDriftDatabase(dbPath: string): Promise { const migrations = await Promise.all([ readMigration('0000_deep_maria_hill.sql'), readMigration('0001_aspiring_orphan.sql'), @@ -248,37 +217,48 @@ async function createLegacy0003ElicitationGapDatabase(dbPath: string): Promise } } }; - return result.details.data.markdownFiles.map((file) => file.path); + )) as { details: { markdownFiles: Array<{ path: string }> } }; + return result.details.markdownFiles.map((file) => file.path); } diff --git a/src/graph/README.md b/src/graph/README.md index 0e1c8a0f..c2122356 100644 --- a/src/graph/README.md +++ b/src/graph/README.md @@ -102,7 +102,7 @@ not compare bare LSN values across sibling specs. - `.pi/extensions/graph/` — Pi tool adapters for `mutate_graph` and `read_graph`. - `rpc/` — graph projection handlers and synchronous response-capture wiring. -- `projections/graph/` — reusable DTO projection over graph reader/command outputs. +- `projections/graph/` — topology stubs for deferred graph PROJECT seams; node-neighborhood consumers read `NodeNeighborhood` directly from `queries.ts`. - `renderers/graph/` — reusable lossy markdown/text rendering over projected graph DTOs. - `.pi/agents/contexts/` — future prompt context renderers. - `probes/` — graph proof drivers. diff --git a/src/projections/README.md b/src/projections/README.md index dc7ed986..7d669ddc 100644 --- a/src/projections/README.md +++ b/src/projections/README.md @@ -17,37 +17,37 @@ PROJECT is the info-preserving stage of the context pipeline (D60-L: PULL → ** This ledger is the closed inventory; every implemented module appears once. Domain folders stay split only while each owns at least one earned projection (e.g. `workspace/` is kept by `workspace-state`, not by `workspace-context`). -Disposition: `✓` locked · `●` keep + lock (earns place, needs invariant) · `◐` keep, decide direct-vs-transitive · `✗` delete / inline (fails earns-its-place) · `○` leave (topology stub / policy data). Consumers = importing modules outside this file. +Disposition: `✓` resolved (direct lock or accepted transitive proof) · `●` keep + lock (earns place, needs invariant) · `◐` keep, decide direct-vs-transitive · `✗` delete / inline (fails earns-its-place) · `○` leave (topology stub / policy data). Consumers = importing modules outside this file. | Module | Consumers | Disposition | Oracle / reason | | --- | --- | --- | --- | -| `graph/neighborhood` | 4 | ● | Real `projectNeighborhood` (tagged not-found/success). Invariant: success preserves projected node code + every edge endpoint; not-found exhaustive. | +| `graph/neighborhood` | — | ○ | Deprecated topology stub. Node-local graph facts stay as `NodeNeighborhood` from `graph/queries.ts`; renderers/RPC/web consume that typed PULL shape directly, so no projection layer is materialized for symmetry. | | `graph/overview` | — | ○ | `export {}` topology stub (Input/Output/Used-by named); no implementation to lock. | | `graph/commit-result` | — | ○ | `export {}` topology stub. | | `graph/reconciliation-needs` | — | ○ | `export {}` topology stub. | -| `session/transcript-context` | 2 | ● | Real transform: filters session entries + Pi-SDK convert. Invariant: no non-empty transcript entry dropped. Consumes the Pi SDK (external trust boundary), not a PULL surface we own. | -| `session/runtime-state` | 13 | ● | Most-consumed projection; flattens runtime state. Direct flattened-shape invariant guards the field set every consumer relies on. | +| `session/transcript-context` | 2 | ✓ | `transcript-context.test.ts` — no non-empty markdown-bearing message disappears across the Pi `buildSessionContext()` + `convertToLlm()` seam; non-renderable entries drop at the projection boundary. | +| `session/runtime-state` | 13 | ✓ | `runtime-state.test.ts` — direct flattened-shape invariant for defaults, last-writer-wins runtime posture, mentions/world/lifecycle slots, and non-linear transcript rejection. | | `session/affordances` | 1 | ✓ | `affordances.test.ts` — gap-driven legality + default-on-switch derivation tested directly. Legal options are a menu projection over capability-readiness; omitted options are not capability refusals (I31-L). | | `session/capability-readiness` | 1 | ✓ | D74-L/D75-L tracer gate, not a reusable DTO. `capability-readiness.test.ts` locks the explicit capability→node-kind map, proceed / low-epistemic / negotiate outcomes, no-refusal invariant, loud failure when the gap register lacks a required kind, same-kind discrimination through `question`, and live presence-coverage flip. `session/affordances` now consumes it for axis-option legality. | | `session/readiness-estimate` | — | ✓ | D45-L soft per-band coverage rollup over `ElicitationGap[]`; UI-only and gates nothing. `readiness-estimate.test.ts` locks every-band shape, empty-band zero, importance-weighted mean, honest regression, no grade imports, and no legality-path imports. | | `session/runtime-policy` | 4 | ○ | Policy/definitions data, not a DTO transform. Gap-driven legality is guarded via `affordances.test.ts`; no runtime grade table remains. | | `session/assistant-visible-watermark` | 2 | ✓ | Carrier projection over the authoritative `continuity-entry-classifier` watermark set. Unit tests guard seed/overview/own-mutation/`worldUpdate` carriers, narrow-read exclusion, and cross-spec failure. | | `session/continuity-entry-classifier` | 2 | ✓ | Shared FE-847 taxonomy for watermark-carrier vs continuity-only-non-debt vs debt-bearing entries; consumed by watermark projection and origination tail classification. | -| `workspace/workspace-context` | 1 | ✗ | Pure `{ mode, data }` tag wrapper — zero transform, single consumer (`.pi/extensions/context/get-cwd.ts`). Source `session/workspace-context.ts` already exports the shapes + `inspect*` and can feed the consumer directly. Delete / inline. | -| `workspace/workspace-state` | 4 | ● | Real flatten of the `WorkspaceSessionState` union to a narrow DTO. Shape invariant across status variants (`ready` / `needs_human` / base). | +| `workspace/workspace-context` | — | ✗ | Deleted/inlined. `read_workspace_context` and `renderers/workspace/workspace-context.ts` now consume `session/workspace-context.ts` source shapes directly; no replacement wrapper survives. | +| `workspace/workspace-state` | 4 | ✓ | `workspace-state.test.ts` — direct variant-shape invariant over `ready`, `needs_human`, and base `select_spec`; chrome/session-manager internals and retired phase/chat fields stay out of the DTO. | | `exchanges/request-choice` | 6 | ✓ | `request-choice.test.ts` (direct). | -| `exchanges/present-options` | 5 | ◐ | Builds `toolResult.details`; covered transitively via `.pi` structured-exchange tests. Decide direct-lock vs keep-transitive at design checkpoint. | -| `exchanges/present-question` | 5 | ◐ | As above. | -| `exchanges/present-review-set` | 5 | ◐ | As above. | -| `exchanges/request-answer` | 5 | ◐ | As above. | -| `exchanges/request-choices` | 6 | ◐ | As above. | -| `exchanges/request-review` | 5 | ◐ | As above. | -| `exchanges/review-set-payload` | 1 | ◐ | Covered transitively via the graph review-set path. | +| `exchanges/present-options` | 5 | ✓ | Keep-transitive — `.pi/__tests__/structured-exchange-present-request.test.ts` proves emitted `toolResult.details`, and `session/exchange-projection.test.ts` proves tuple reconstruction from persisted details. | +| `exchanges/present-question` | 5 | ✓ | Keep-transitive — `.pi/__tests__/structured-exchange-present-request.test.ts` proves question/body projection, and `session/exchange-projection.test.ts` proves the same details survive session reconstruction. | +| `exchanges/present-review-set` | 5 | ✓ | Keep-transitive — `.pi/__tests__/structured-exchange-present-request.test.ts` proves proposal payload projection, while `session/structured-exchange-loop.test.ts` and `probes/project-graph-review-cycle-proof.test.ts` prove review-mode reconstruction and downstream use. | +| `exchanges/request-answer` | 5 | ✓ | Keep-transitive — `session/exchange-projection.test.ts` proves prompt/response pairing over persisted `toolResult.details`, and the structured-exchange `.pi` tests prove submit-time materialization. | +| `exchanges/request-choices` | 6 | ✓ | Keep-transitive — `.pi/__tests__/structured-exchange-present-request.test.ts` proves multi-select persistence and comment rules, and `session/exchange-projection.test.ts` proves the terminal tuple reconstruction. | +| `exchanges/request-review` | 5 | ✓ | Keep-transitive — `.pi/__tests__/structured-exchange-present-request.test.ts` proves approve/request-changes/reject persistence, and `probes/project-graph-review-cycle-proof.test.ts` proves the review cycle at the owning seam. | +| `exchanges/review-set-payload` | 1 | ✓ | Keep-transitive — `session/structured-exchange-loop.test.ts` reconstructs `reviewSet` from persisted details, and `probes/project-graph-review-cycle-proof.test.ts` proves the downstream review path. | | `exchanges/present-candidates` | 1 | ○ | `export {}` topology stub (candidate-family, all three layers); leave until the tool lands. | -Aggregate DoD for the PROJECT stage: every `●` row carries a direct shape/no-loss invariant (co-located `*.test.ts`); every `✗` row is deleted/inlined with its consumer fed from the source read; `◐` rows are resolved by an explicit keep-transitive or add-direct decision; `○` rows stay untouched. `topology-boundaries.test.ts` continues to guard that `projections/` imports no adapter/transport layer. +Aggregate DoD for the PROJECT stage: every `●` row carries a direct shape/no-loss invariant (co-located `*.test.ts`); every `✗` row is deleted/inlined with its consumer fed from the source read; `◐` rows are resolved by an explicit keep-transitive or add-direct decision; `○` rows stay untouched. `topology-boundaries.test.ts` continues to guard that `projections/` imports no adapter/transport layer, and that direct-read graph neighborhood consumers do not accidentally adopt the deprecated projection stub. This frontier is now closed: no `●` or `◐` rows remain. -Upstream note (PULL): `●` projections lock against their read sources, so those must be stable first. `graph/neighborhood` sits on the locked, ledgered graph read surface (`graph/queries.ts` + `src/graph/README.md`). The session-domain projections sit on session read sources (`session/workspace-context.ts`, `session/workspace-session-coordinator.ts`, `session/runtime-state.ts`) which are behaviorally tested but not yet inventoried as a closed read-shape ledger — ledger that PULL half before freezing the session/workspace projection invariants. +Upstream note (PULL): `●` projections lock against their read sources, so those must be stable first. Graph neighborhood remains a direct PULL read from the locked graph surface (`graph/queries.ts` + `src/graph/README.md`) rather than a PROJECT survivor. The session-domain projections sit on session read sources (`session/workspace-context.ts`, `session/workspace-session-coordinator.ts`, `session/runtime-state.ts`) now inventoried in `src/session/README.md`; keep those PULL rows stable while freezing the remaining session/workspace projection invariants. ## Directory layout diff --git a/src/projections/graph/neighborhood.ts b/src/projections/graph/neighborhood.ts index 370e105f..368df577 100644 --- a/src/projections/graph/neighborhood.ts +++ b/src/projections/graph/neighborhood.ts @@ -1,9 +1,11 @@ /** * Deprecated graph-neighborhood projection seam. * - * Node-local graph facts now arrive as `NodeNeighborhood` from graph/queries.ts, - * and model-facing flattening lives beside the graph renderer. Keep this module - * as a temporary empty topology marker until the projections ledger is reconciled. + * Node-local graph facts intentionally stay as `NodeNeighborhood` from + * graph/queries.ts. Renderers, RPC, web query helpers, and Pi context adapters + * consume that typed PULL shape directly; model-facing flattening lives beside + * the graph renderer. Keep this empty module as a topology marker so future + * work does not reintroduce a pass-through PROJECT layer for symmetry. */ export {}; diff --git a/src/session/runtime-state.test.ts b/src/projections/session/runtime-state.test.ts similarity index 94% rename from src/session/runtime-state.test.ts rename to src/projections/session/runtime-state.test.ts index 86b05cfc..5fb12279 100644 --- a/src/session/runtime-state.test.ts +++ b/src/projections/session/runtime-state.test.ts @@ -1,14 +1,17 @@ import { describe, expect, it } from 'vitest'; -import { projectSessionRuntimeState } from '../projections/session/runtime-state.js'; -import { NonLinearTranscriptError, type BrunchSessionEnvelope } from './brunch-session-envelope.js'; +import { + NonLinearTranscriptError, + type BrunchSessionEnvelope, +} from '../../session/brunch-session-envelope.js'; import { AGENT_STRATEGY_IDS, BRUNCH_AGENT_RUNTIME_STATE_CUSTOM_TYPE, DEFAULT_BRUNCH_AGENT_STATE, type BrunchAgentState, -} from './runtime-state.js'; -import { createSessionBindingData } from './session-binding.js'; +} from '../../session/runtime-state.js'; +import { createSessionBindingData } from '../../session/session-binding.js'; +import { projectSessionRuntimeState } from './runtime-state.js'; function envelope(entries: BrunchSessionEnvelope['entries'] = []): BrunchSessionEnvelope { return { @@ -43,7 +46,7 @@ function runtimeEntry(id: string, state: BrunchAgentState, parentId = 'binding-1 } as never; } -describe('session runtime-state projection', () => { +describe('runtime-state projection', () => { it('accepts freestyle as a real strategy id in runtime state parsing', () => { expect(AGENT_STRATEGY_IDS).toContain('freestyle'); diff --git a/src/projections/session/transcript-context.test.ts b/src/projections/session/transcript-context.test.ts new file mode 100644 index 00000000..e2b3b615 --- /dev/null +++ b/src/projections/session/transcript-context.test.ts @@ -0,0 +1,117 @@ +import type { Message } from '@earendil-works/pi-ai'; +import type { FileEntry } from '@earendil-works/pi-coding-agent'; +import { describe, expect, it } from 'vitest'; + +import { assistantMessage } from '../../probes/test-helpers.js'; +import { projectTranscriptContext } from './transcript-context.js'; + +function toolResultEntry(id: string, parentId: string, toolName: string, text: string, timestamp: number) { + return { + id, + type: 'message', + parentId, + timestamp: `2026-06-04T00:00:0${timestamp}.000Z`, + message: { + role: 'toolResult' as const, + toolCallId: `${toolName}-call-${timestamp}`, + toolName, + content: [{ type: 'text' as const, text }], + details: {}, + isError: false, + timestamp, + }, + }; +} + +function primaryText(message: Message): string | undefined { + if (typeof message.content === 'string') { + return message.content; + } + return message.content.find((block) => block.type === 'text')?.text; +} + +describe('transcript-context projection', () => { + it('keeps every markdown-bearing message in order and drops non-renderable entries', () => { + const entries = [ + { type: 'session', id: 'session-1', cwd: '/tmp/brunch' }, + { + id: 'binding-1', + type: 'custom', + customType: 'brunch.session_binding', + parentId: null, + timestamp: '2026-06-04T00:00:00.000Z', + data: { schemaVersion: 1, specId: 1 }, + }, + { + id: 'custom-message-1', + type: 'custom_message', + parentId: 'binding-1', + timestamp: '2026-06-04T00:00:01.000Z', + customType: 'brunch.note', + content: 'hello custom', + display: true, + details: { hidden: true }, + }, + toolResultEntry('generic-tool-1', 'custom-message-1', 'read', 'Generic file contents', 2), + toolResultEntry( + 'present-1', + 'generic-tool-1', + 'present_options', + '## Which direction?\n\n### 1. Fast\n\n**Rationale:** validates the seam.', + 3, + ), + toolResultEntry( + 'request-1', + 'present-1', + 'request_choice', + '### Response\n\n- Fast\n\nComment:\n\n> Keep it deterministic.', + 4, + ), + { + id: 'assistant-1', + type: 'message', + parentId: 'request-1', + timestamp: '2026-06-04T00:00:05.000Z', + message: assistantMessage( + [ + { type: 'text', text: 'I will inspect the workspace.' }, + { type: 'thinking', thinking: 'private chain of thought' }, + { type: 'toolCall', id: 'tool-call-1', name: 'read', arguments: { path: 'notes.txt' } }, + ], + 5, + ), + }, + { + id: 'assistant-2', + type: 'message', + parentId: 'assistant-1', + timestamp: '2026-06-04T00:00:06.000Z', + message: assistantMessage( + [ + { type: 'thinking', thinking: 'private chain of thought' }, + { type: 'toolCall', id: 'tool-call-2', name: 'read', arguments: { path: 'notes.txt' } }, + ], + 6, + ), + }, + ]; + + const projected = projectTranscriptContext(entries as FileEntry[]); + + expect(projected.messages).toHaveLength(5); + expect(projected.messages.map((message) => message.role)).toEqual([ + 'user', + 'toolResult', + 'toolResult', + 'toolResult', + 'assistant', + ]); + expect(projected.messages.map(primaryText)).toEqual([ + 'hello custom', + 'Generic file contents', + '## Which direction?\n\n### 1. Fast\n\n**Rationale:** validates the seam.', + '### Response\n\n- Fast\n\nComment:\n\n> Keep it deterministic.', + 'I will inspect the workspace.', + ]); + }); +}); diff --git a/src/projections/topology-boundaries.test.ts b/src/projections/topology-boundaries.test.ts index bd443720..a16c9305 100644 --- a/src/projections/topology-boundaries.test.ts +++ b/src/projections/topology-boundaries.test.ts @@ -46,6 +46,10 @@ function importedSourcePaths(file: string): string[] { .filter((path) => path.startsWith(SOURCE_ROOT)); } +function sourceImportersOf(target: string): string[] { + return sourceFilesUnder(SOURCE_ROOT).filter((file) => importedSourcePaths(file).includes(target)); +} + describe('projection and renderer topology boundaries', () => { it('keeps reusable projections out of adapter and transport layers', () => { const offenders = sourceFilesUnder(PROJECTIONS_ROOT).flatMap((file) => { @@ -69,6 +73,11 @@ describe('projection and renderer topology boundaries', () => { expect(offenders).toEqual([]); }); + + it('keeps graph neighborhood as a direct graph read instead of a projection layer', () => { + expect(sourceImportersOf('src/projections/graph/neighborhood.ts')).toEqual([]); + }); + it('keeps runtime-state transcript facts from importing reusable runtime projections', () => { expect(importedSourcePaths('src/session/runtime-state.ts')).not.toContain( 'src/projections/session/runtime-state.ts', diff --git a/src/projections/workspace/workspace-context.ts b/src/projections/workspace/workspace-context.ts deleted file mode 100644 index b3471586..00000000 --- a/src/projections/workspace/workspace-context.ts +++ /dev/null @@ -1,25 +0,0 @@ -import type { WorkspaceCwdInventory, WorkspaceOverview } from '../../session/workspace-context.js'; - -export type WorkspaceContextProjection = - | { - readonly mode: 'cwd_inventory'; - readonly data: WorkspaceCwdInventory; - } - | { - readonly mode: 'workspace_overview'; - readonly data: WorkspaceOverview; - }; - -export function projectWorkspaceCwdContext(data: WorkspaceCwdInventory): WorkspaceContextProjection { - return { - mode: 'cwd_inventory', - data, - }; -} - -export function projectWorkspaceOverviewContext(data: WorkspaceOverview): WorkspaceContextProjection { - return { - mode: 'workspace_overview', - data, - }; -} diff --git a/src/projections/workspace/workspace-state.test.ts b/src/projections/workspace/workspace-state.test.ts new file mode 100644 index 00000000..990e8b94 --- /dev/null +++ b/src/projections/workspace/workspace-state.test.ts @@ -0,0 +1,81 @@ +import type { SessionManager } from '@earendil-works/pi-coding-agent'; +import { describe, expect, it } from 'vitest'; + +import type { WorkspaceSessionState } from '../../session/workspace-session-coordinator.js'; +import { projectWorkspaceState } from './workspace-state.js'; + +const cwd = '/tmp/brunch-project'; + +function readyState(): WorkspaceSessionState { + return { + status: 'ready', + cwd, + spec: { id: 1, title: 'Alpha spec' }, + session: { + id: 'session-1', + file: '/tmp/brunch-project/.brunch/sessions/session-1.jsonl', + name: 'Alpha session', + manager: {} as SessionManager, + }, + chrome: { + cwd, + project: { name: 'Brunch', slug: 'brunch' }, + spec: { id: 1, title: 'Alpha spec' }, + }, + }; +} + +describe('workspace-state projection', () => { + it('keeps the ready DTO narrow and strips chrome/session-manager internals', () => { + expect(projectWorkspaceState(readyState())).toEqual({ + status: 'ready', + cwd, + spec: { id: 1, title: 'Alpha spec' }, + session: { + id: 'session-1', + file: '/tmp/brunch-project/.brunch/sessions/session-1.jsonl', + }, + chrome: {}, + }); + }); + + it('projects needs_human without resurrecting chrome detail fields', () => { + expect( + projectWorkspaceState({ + status: 'needs_human', + cwd, + reason: 'Pick a spec first.', + chrome: { + cwd, + project: { name: 'Brunch', slug: 'brunch' }, + spec: { id: 7, title: 'Draft spec' }, + }, + }), + ).toEqual({ + status: 'needs_human', + cwd, + spec: { id: 7, title: 'Draft spec' }, + chrome: {}, + reason: 'Pick a spec first.', + }); + }); + + it('projects select_spec as base state with no session or retired chrome fields', () => { + expect( + projectWorkspaceState({ + status: 'select_spec', + cwd, + chrome: { + cwd, + project: { name: 'Brunch', slug: 'brunch' }, + spec: null, + }, + }), + ).toEqual({ + status: 'select_spec', + cwd, + spec: null, + chrome: {}, + }); + }); +}); diff --git a/src/renderers/workspace/workspace-context.ts b/src/renderers/workspace/workspace-context.ts index 39aaa4e5..53f8f643 100644 --- a/src/renderers/workspace/workspace-context.ts +++ b/src/renderers/workspace/workspace-context.ts @@ -1,17 +1,15 @@ -import type { WorkspaceContextProjection } from '../../projections/workspace/workspace-context.js'; +import type { WorkspaceCwdInventory, WorkspaceOverview } from '../../session/workspace-context.js'; -export function renderWorkspaceContext(context: WorkspaceContextProjection): string { - if (context.mode === 'workspace_overview') { +export function renderWorkspaceContext(context: WorkspaceCwdInventory | WorkspaceOverview): string { + if ('specs' in context) { return renderWorkspaceOverview(context); } return renderWorkspaceCwd(context); } -function renderWorkspaceCwd( - context: Extract, -): string { - const { data: inventory } = context; +function renderWorkspaceCwd(context: WorkspaceCwdInventory): string { + const inventory = context; const lines = [ '[Workspace cwd inventory]', `- cwd: ${inventory.cwd}`, @@ -44,10 +42,8 @@ function renderWorkspaceCwd( return `${lines.join('\n')}\n`; } -function renderWorkspaceOverview( - context: Extract, -): string { - const { data: overview } = context; +function renderWorkspaceOverview(context: WorkspaceOverview): string { + const overview = context; const lines = [ '[Workspace overview]', `- cwd: ${overview.cwd}`, diff --git a/src/renderers/workspace/workspace-state.test.ts b/src/renderers/workspace/workspace-state.test.ts index eb125e07..d53d3a6f 100644 --- a/src/renderers/workspace/workspace-state.test.ts +++ b/src/renderers/workspace/workspace-state.test.ts @@ -1,13 +1,11 @@ -import type { SessionManager } from '@earendil-works/pi-coding-agent'; import { describe, expect, it } from 'vitest'; -import { projectWorkspaceState } from '../../projections/workspace/workspace-state.js'; -import type { WorkspaceSessionState } from '../../session/workspace-session-coordinator.js'; +import type { WorkspaceState } from '../../projections/workspace/workspace-state.js'; import { renderWorkspaceState } from './workspace-state.js'; const cwd = '/tmp/brunch-project'; -function readyState(): WorkspaceSessionState { +function readyState(): WorkspaceState { return { status: 'ready', cwd, @@ -15,30 +13,15 @@ function readyState(): WorkspaceSessionState { session: { id: 'session-1', file: '/tmp/brunch-project/.brunch/sessions/session-1.jsonl', - manager: {} as SessionManager, - }, - chrome: { - cwd, - spec: { id: 1, title: 'Alpha spec' }, }, + chrome: {}, }; } describe('print state', () => { - it('projects and renders a ready workspace without exposing pi internals', () => { - const state = projectWorkspaceState(readyState()); + it('renders a ready workspace without exposing retired chrome fields', () => { + const rendered = renderWorkspaceState(readyState()); - expect(state).toEqual({ - status: 'ready', - cwd, - spec: { id: 1, title: 'Alpha spec' }, - session: { - id: 'session-1', - file: '/tmp/brunch-project/.brunch/sessions/session-1.jsonl', - }, - chrome: {}, - }); - const rendered = renderWorkspaceState(state); expect(rendered).toContain('Brunch workspace state'); expect(rendered).toContain('status: ready'); expect(rendered).toContain('spec: Alpha spec (1)'); @@ -48,14 +31,12 @@ describe('print state', () => { }); it('renders select-spec as state instead of prompting', () => { - const state = projectWorkspaceState({ + const state: WorkspaceState = { status: 'select_spec', cwd, - chrome: { - cwd, - spec: null, - }, - }); + spec: null, + chrome: {}, + }; expect(renderWorkspaceState(state)).toContain('status: select_spec'); expect(renderWorkspaceState(state)).toContain('spec: '); diff --git a/src/session/README.md b/src/session/README.md index 1880e6f7..31cd1e7c 100644 --- a/src/session/README.md +++ b/src/session/README.md @@ -45,6 +45,21 @@ plus the coordination logic for workspace/spec/session lifecycle. `before_provider_request` is a guard-only check. `start-assistant-turn.ts` owns the origination decision and context seed entries. +## Session PULL read-shape ledger + +D60-L read-shape ownership is explicit for the session-domain sources the +PROJECT-stage DTOs lock against. These are source reads/facts, not reusable +projection seams; consumers should expose only the subset they need, and a +consumer that merely tags an existing source shape should read the source +directly instead of growing a wrapper. + +| Shape | Canonical owner | Current consumers | Disposition / reason | +| --- | --- | --- | --- | +| `cwd_inventory` | `inspectWorkspaceCwdInventory` | `read_workspace_context`, `renderers/workspace/workspace-context.ts` | Direct PULL read. The typed inventory already matches the tool/renderer seam, so no `projections/workspace/workspace-context` wrapper survives. | +| `workspace_overview` | `inspectWorkspaceOverview` | `read_workspace_context`, `renderers/workspace/workspace-context.ts` | Direct PULL read. Same rationale as `cwd_inventory`: the source shape is already the consumer shape. | +| `workspace_session_state` | `WorkspaceSessionCoordinator` (`WorkspaceSessionState`) | `projections/workspace/workspace-state.ts`, `chromeStateForWorkspace`, app/rpc/web workspace flows | Source union owned by the coordinator. Downstream code may flatten it, but the coordinator remains the authority for the narrow chrome snapshot and status-variant field set. | +| `agent_runtime_state` | `latestValidBrunchAgentStateEntryData` and transcript-backed runtime-state facts in `session/runtime-state.ts` | `projections/session/runtime-state.ts`, `projections/session/affordances.ts`, `.pi/extensions/runtime/` | Transcript-backed source read. Projection/policy layers derive from these facts rather than storing parallel hidden runtime memory. | + ## Runtime affordance coverage ledger Runtime posture affordances are pure derivations over projected runtime state plus @@ -80,8 +95,11 @@ schema, and the product-state-gated rows must stay explicit deferred tripwires. ## Imported by - `.pi/agents/contexts/` — for session/transcript context reads. +- `.pi/extensions/context/` — for direct workspace kickoff inventory / overview reads. - `projections/session/` — for reusable transcript-context DTO projection. +- `projections/workspace/` — for reusable workspace-state DTO projection. - `renderers/session/` — for reusable transcript markdown rendering. +- `renderers/workspace/` — for workspace inventory / overview text rendering over source session read shapes. - `rpc/` — for session.* and workspace.* RPC handlers. - `.pi/extensions/` — for session lifecycle hooks.