From b8f2b4e5cf9946e97976238a6ed403daff140fa4 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 10 Jun 2026 11:54:22 +0200 Subject: [PATCH 1/4] Remodel elicitation_backlog into elicitation_gaps obligation register Replace the FE-823 question-instance / open|closed backlog with the D65-L typed coverage-obligation model: ElicitationGap carries name + rationale, the presence|field|coverage|manual predicate union, importance + derived coverage, and a disposition enum. Seed the 8-typology grounding catalog at createSpec, add createElicitationGap / setElicitationGapDisposition mutations through CommandExecutor, and derive presence coverage/answered live from the graph at read-back (anti-shadowing: no stored structural state). Regenerate migration + Drizzle snapshot; reconcile SPEC/PLAN and graph/db topology READMEs. field/coverage derivation and manual satisficiency remain downstream. --- drizzle/0003_outstanding_black_bird.sql | 18 +- drizzle/meta/0003_snapshot.json | 75 +++-- memory/PLAN.md | 110 ++++++- memory/SPEC.md | 64 ++-- src/db/README.md | 12 +- src/db/row-schemas.ts | 8 +- src/db/schema.ts | 19 +- src/graph/README.md | 16 +- src/graph/architecture.test.ts | 2 +- src/graph/command-executor.test.ts | 237 +++++++-------- src/graph/command-executor.ts | 332 +++++++++++++-------- src/graph/index.ts | 11 +- src/graph/observed-shapes-coverage.test.ts | 6 +- src/graph/queries.test.ts | 45 ++- src/graph/queries.ts | 88 ++++-- src/graph/schema/elicitation-backlog.ts | 34 --- src/graph/schema/elicitation-gaps.ts | 62 ++++ src/graph/schema/kinds.ts | 4 +- 18 files changed, 700 insertions(+), 443 deletions(-) delete mode 100644 src/graph/schema/elicitation-backlog.ts create mode 100644 src/graph/schema/elicitation-gaps.ts diff --git a/drizzle/0003_outstanding_black_bird.sql b/drizzle/0003_outstanding_black_bird.sql index abdb841b..99cf255c 100644 --- a/drizzle/0003_outstanding_black_bird.sql +++ b/drizzle/0003_outstanding_black_bird.sql @@ -1,19 +1,21 @@ -CREATE TABLE `elicitation_backlog` ( +CREATE TABLE `elicitation_gaps` ( `id` integer PRIMARY KEY AUTOINCREMENT NOT NULL, `spec_id` integer NOT NULL, - `kind` text NOT NULL, - `question` text NOT NULL, - `status` text DEFAULT 'open' NOT NULL, + `name` text NOT NULL, + `rationale` text NOT NULL, + `disposition` text DEFAULT 'open' NOT NULL, `basis` text DEFAULT 'explicit' NOT NULL, `readiness_band` text NOT NULL, + `predicate_kind` text NOT NULL, + `predicate` text NOT NULL, + `importance` integer DEFAULT 1 NOT NULL, `plane_affinity` text, `lens_affinity` text, - `arose_from_entry_id` integer, + `arose_from_gap_id` integer, `resolved_by_node_id` integer, - `rationale` text, `created_at_lsn` integer NOT NULL, - `closed_at_lsn` integer, + `disposition_set_at_lsn` integer, FOREIGN KEY (`spec_id`) REFERENCES `specs`(`id`) ON UPDATE no action ON DELETE no action, - FOREIGN KEY (`arose_from_entry_id`) REFERENCES `elicitation_backlog`(`id`) ON UPDATE no action ON DELETE no action, + FOREIGN KEY (`arose_from_gap_id`) REFERENCES `elicitation_gaps`(`id`) ON UPDATE no action ON DELETE no action, FOREIGN KEY (`resolved_by_node_id`) REFERENCES `nodes`(`id`) ON UPDATE no action ON DELETE no action ); diff --git a/drizzle/meta/0003_snapshot.json b/drizzle/meta/0003_snapshot.json index f6bcc8e7..fe1e9811 100644 --- a/drizzle/meta/0003_snapshot.json +++ b/drizzle/meta/0003_snapshot.json @@ -193,8 +193,8 @@ "uniqueConstraints": {}, "checkConstraints": {} }, - "elicitation_backlog": { - "name": "elicitation_backlog", + "elicitation_gaps": { + "name": "elicitation_gaps", "columns": { "id": { "name": "id", @@ -210,22 +210,22 @@ "notNull": true, "autoincrement": false }, - "kind": { - "name": "kind", + "name": { + "name": "name", "type": "text", "primaryKey": false, "notNull": true, "autoincrement": false }, - "question": { - "name": "question", + "rationale": { + "name": "rationale", "type": "text", "primaryKey": false, "notNull": true, "autoincrement": false }, - "status": { - "name": "status", + "disposition": { + "name": "disposition", "type": "text", "primaryKey": false, "notNull": true, @@ -247,6 +247,28 @@ "notNull": true, "autoincrement": false }, + "predicate_kind": { + "name": "predicate_kind", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "predicate": { + "name": "predicate", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "importance": { + "name": "importance", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": 1 + }, "plane_affinity": { "name": "plane_affinity", "type": "text", @@ -261,8 +283,8 @@ "notNull": false, "autoincrement": false }, - "arose_from_entry_id": { - "name": "arose_from_entry_id", + "arose_from_gap_id": { + "name": "arose_from_gap_id", "type": "integer", "primaryKey": false, "notNull": false, @@ -275,13 +297,6 @@ "notNull": false, "autoincrement": false }, - "rationale": { - "name": "rationale", - "type": "text", - "primaryKey": false, - "notNull": false, - "autoincrement": false - }, "created_at_lsn": { "name": "created_at_lsn", "type": "integer", @@ -289,8 +304,8 @@ "notNull": true, "autoincrement": false }, - "closed_at_lsn": { - "name": "closed_at_lsn", + "disposition_set_at_lsn": { + "name": "disposition_set_at_lsn", "type": "integer", "primaryKey": false, "notNull": false, @@ -299,9 +314,9 @@ }, "indexes": {}, "foreignKeys": { - "elicitation_backlog_spec_id_specs_id_fk": { - "name": "elicitation_backlog_spec_id_specs_id_fk", - "tableFrom": "elicitation_backlog", + "elicitation_gaps_spec_id_specs_id_fk": { + "name": "elicitation_gaps_spec_id_specs_id_fk", + "tableFrom": "elicitation_gaps", "tableTo": "specs", "columnsFrom": [ "spec_id" @@ -312,12 +327,12 @@ "onDelete": "no action", "onUpdate": "no action" }, - "elicitation_backlog_arose_from_entry_id_elicitation_backlog_id_fk": { - "name": "elicitation_backlog_arose_from_entry_id_elicitation_backlog_id_fk", - "tableFrom": "elicitation_backlog", - "tableTo": "elicitation_backlog", + "elicitation_gaps_arose_from_gap_id_elicitation_gaps_id_fk": { + "name": "elicitation_gaps_arose_from_gap_id_elicitation_gaps_id_fk", + "tableFrom": "elicitation_gaps", + "tableTo": "elicitation_gaps", "columnsFrom": [ - "arose_from_entry_id" + "arose_from_gap_id" ], "columnsTo": [ "id" @@ -325,9 +340,9 @@ "onDelete": "no action", "onUpdate": "no action" }, - "elicitation_backlog_resolved_by_node_id_nodes_id_fk": { - "name": "elicitation_backlog_resolved_by_node_id_nodes_id_fk", - "tableFrom": "elicitation_backlog", + "elicitation_gaps_resolved_by_node_id_nodes_id_fk": { + "name": "elicitation_gaps_resolved_by_node_id_nodes_id_fk", + "tableFrom": "elicitation_gaps", "tableTo": "nodes", "columnsFrom": [ "resolved_by_node_id" diff --git a/memory/PLAN.md b/memory/PLAN.md index 050d7673..cb8437ca 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -39,6 +39,8 @@ A new graph-mutation planning result has been promoted into the rolling plan as **Developer experience promoted to a first-class frontier (2026-06-09 ln-plan).** Working over the pi harness has been slow because the only fast path was ad hoc faux wiring scattered across probes; the user has elevated development feedback loops to first-class product DX (SPEC §Development Feedback Loops, D67-L–D69-L, A25-L). Promoted as `dx-feedback-loops`: bump `@earendil-works/pi-*` to latest and add a dev source-alias to the sibling `pi-mono` `src/` checkout (D67-L); consolidate three named loops (faux / real-provider / introspection) behind one `src/dev/` front door with a shared faux-harness factory (D68-L); and add one read-only, dev-gated introspection extension that captures exactly what the model receives, with mechanical and subjective modes sharing one run (D69-L). It is a DX substrate that accelerates every later frontier, so it leads the `Next` track; its version-bump+alias slice is a shared unblocker that should land before other frontiers' pi-facing churn. It is **not** POC-ship-critical and must preserve the D39-L sealed-profile boundary (introspection observes, never shapes product behavior; offline-lift and extension inclusion are dev-gated only). The context-pipeline coverage trio remains the elevated product-coverage spine right after. +**Readiness / elicitation-gaps remodel promoted (2026-06-09 ln-plan, post-`ln-spec`).** A SPEC pass reconceived the readiness and prospective-agenda model and must now land in code (D45-L, D57-L, D64-L, D65-L, D73-L, D74-L; A24-L, A27-L; I25-L, I30-L, I31-L). Four coupled implications: (1) **`elicitation_backlog` → `elicitation_gaps`** — the FE-823 question-instance / `open|closed` table is remodeled into typed coverage *obligations* (each gap carries a `name` typology key + meta `rationale`, a band, a `presence|field|coverage|manual` predicate union, an `importance` + derived `coverage`, and a `disposition`), seeded from the collated **grounding typology catalog** (floor `domain`/`protagonist`/`pain_pull`/`constraint` + progressive drivers `value`/`context_of_use`/`success_sketch`/`solution_boundary`) instead of four literal anchor questions; (2) **JIT capability-readiness** replaces the stored grade gate — readiness is judged on a capability request against the relevant gaps (proceed / proceed-at-low-epistemic-status / negotiate), retiring `readiness_grade`, `updateReadinessGrade`, `READINESS_GRADES`, and the `MIN_GRADE` proxy tables in `runtime-policy.ts`; (3) a soft derived **readiness estimate** (UI-only, gates nothing) plus removal of the vestigial `chrome.phase` / `chrome.chatMode` fields; (4) a small follow-on **session/runtime vocabulary leaf** (`src/session/schema/kinds.ts`) mirroring `graph/schema/kinds.ts` for the `op_mode`/`strategy`/`lens`/`goal` axes. These are promoted as `elicitation-gaps-remodel` → `capability-readiness` (hard chain) plus the parallel `runtime-vocab-leaf`; none are POC-ship-critical (the delivery cut de-scopes elicitation quality). **Sequencing tension with the trio:** `capability-readiness` mutates exactly the shapes the trio would lock (`workspace/workspace-state` drops phase/chatMode and gains the readiness estimate; `session/runtime-state` + composition drop grade). By the trio's own "lock upstream shape before downstream output" principle, the gaps/readiness remodel is *upstream* of the trio's readiness/chrome-touching locks and should land before stage 1 (`projection-shape-coverage`) freezes those shapes — otherwise the locks churn. Recommended order: `elicitation-gaps-remodel` → `capability-readiness` first, then the trio; or, if the trio leads, it must explicitly bracket the grade/phase/chatMode fields until the remodel lands. `elicitation-driver` now rides the remodeled gaps substrate, not the FE-823 backlog shape. + ### Context-pipeline coverage (the next design/lock spine) The four LLM-facing context concerns are not independent — they are the stages of **one pipeline** (D60-L): **PULL → PROJECT → RENDER → COMPOSE → surface**. Coverage means *each stage carries its appropriate oracle over a complete, ledgered inventory*. The stages must be closed **in dependency order**, because each downstream lock is only stable once its upstream shape is locked (projection invariants churn while read shapes still move; renderer goldens churn while projection shapes still move; prompt goldens churn while renderer output still moves). @@ -82,6 +84,12 @@ per ledger row: - None. +### Readiness & elicitation-gaps remodel (recommended ahead of the trio) + +Post-`ln-spec` implications that are **upstream** of the context-pipeline trio's readiness/chrome-touching locks (see Context §Readiness / elicitation-gaps remodel). Land the hard chain before stage 1 freezes `workspace/workspace-state` + `session/runtime-state` shapes, or bracket those fields in the trio. + +1. `capability-readiness` — **depends on `elicitation-gaps-remodel` (done).** Replace the stored-grade gate (`readiness_grade`, `updateReadinessGrade`, `READINESS_GRADES`, `MIN_GRADE` proxies) with JIT capability→relevant-gaps judgment; add the soft derived `readiness estimate` (UI-only); remove `chrome.phase` / `chrome.chatMode`. + ### Next The near-term spine has two tracks. The **context-pipeline coverage trio** remains the elevated product-coverage spine, sequenced in strict dependency order (lock upstream shape before downstream output). `role-safe-graph-mutations` is a graph-mutation grammar frontier that can run before or alongside the trio, and must land before relation-bearing generalized capture or semantic fixture curation rely on the new mutation surface. The `dx-feedback-loops` DX substrate is complete and no longer gates this list. `dx-introspection-live` is its low-conflict follow-on (wire the dormant introspection extension into the real TUI, harden `.fixtures/` topology + `--cwd`, make introspection conversational); it is DX substrate, parallel to the product trio, and not POC-ship-critical. @@ -92,7 +100,7 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai ### After the trio -6. `elicitation-driver` — **bounded feature; cross-cut closing row** (not itself coverage): closes the last open required cross-cut row (Seam 3a `"what to ask next" driver`) and retires the temporary dual-plan state. Buildable-now on the FE-823 substrate; pairs with the COMPOSE stage (it adds per-turn behavior over the composition oracle locked there); not POC-ship-critical. +6. `elicitation-driver` — **bounded feature; cross-cut closing row** (not itself coverage): closes the last open required cross-cut row (Seam 3a `"what to ask next" driver`) and retires the temporary dual-plan state. Now rides the **remodeled `elicitation_gaps` substrate** (depends on `elicitation-gaps-remodel`), not the FE-823 backlog shape — read open gaps → rank by importance/coverage/band → select next question; capture-reflection spawns/closes gaps. Pairs with the COMPOSE stage (it adds per-turn behavior over the composition oracle locked there); not POC-ship-critical. 7. `exchanges-and-generalized-capture` — **bounded proving feature** (not coverage): the remaining load-bearing unknown is capture *semantics*, not breadth closure. Narrow high-confidence extractive capture with a false-commit guard; treat any exchange-layer cleanup as delete-oriented audit, not breadth fill. Relation-bearing capture must use the role-named `mutateGraph` grammar from `role-safe-graph-mutations`; do not revive `{category, source, target}` in a capture-local edge dialect. ### Delivery gate (in flight, independent) @@ -105,6 +113,7 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - `topology-readmes-and-boundaries` — small doc/test hardening when a frontier moves files or exposes a boundary; should remain attached to the frontier when possible rather than becoming an abstract cleanup project. - `dev-seed-fixtures` — rich, real seed data for local dev / manual / observer testing: the consolidated seed contract, the `npm run seed` loader, and growing/enhancing fixture sets (Bilal-port + legacy). Its semantic curation mutation slice is folded into / blocked by `role-safe-graph-mutations`; ongoing seed-data maintenance remains low-conflict. - `dx-introspection-live` — DX follow-on to `dx-feedback-loops`: harden the four-role `.fixtures/` topology + `--cwd` launch (D70-L), unify dev gating under `BRUNCH_DEV` and wire the dormant introspection extension into the real TUI (D71-L), and make introspection conversational (A26-L). Three sequenced slices; ready for a scoping thread. Low-conflict with the product trio; touches `.fixtures/`, `src/app/`, `src/dev/`, `src/.pi/extensions/introspection/`. +- `runtime-vocab-leaf` — establish `src/session/schema/kinds.ts` as the drizzle-free source-of-truth leaf for the session/runtime axis enums (`op_mode`, `strategy`, `lens`, `goal`, `auto` sentinel), mirroring `graph/schema/kinds.ts` (D73-L ownership direction). The decision-3 follow-on; independent of the remodel chain and the trio. Does **not** relocate `READINESS_GRADES` (retired by `capability-readiness`). ### Horizon @@ -167,6 +176,73 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - **Design docs:** `memory/SPEC.md` D65-L; `docs/design/GRAPH_MODEL.md`. - **Current execution pointer:** Done 2026-06-08 on FE-823. Materialized `elicitation_backlog` as a flat table plus generated migration, seeded grounding questions at `createSpec`, routed create/close mutations through `CommandExecutor` on the shared spec-local LSN/change-log seam, and added graph-owned per-spec read-back. The remaining prompt-resource body pass stays in `memory/CROSS_CUT_PLAN.md` as temporary coverage completion work; the live per-turn driver remains a follow-on, not frontier completion debt. +### elicitation-gaps-remodel + +- **Name:** Elicitation-gaps obligation remodel (backlog → typed coverage gaps) +- **Linear:** unassigned — create in FE / brunch when the frontier starts (sibling, not under FE-531). +- **Kind:** structural / bounded feature +- **Status:** done +- **Certainty:** proving +- **Retires:** A24-L (flat-register sufficiency, now under the obligation model rather than the question-instance model) and A27-L (per-band gap-satisfaction predicate expressibility at acceptable LLM cost). +- **Lights up:** the typed coverage-obligation register — each gap carries `name` + `rationale` + `band` + `presence|field|coverage|manual` predicate + `importance` + derived `coverage` + `disposition` — replacing the FE-823 question-instance / `open|closed` backlog. +- **Stabilizes:** D65-L's gap obligation model; I30-L gap-disposition capture; the anti-shadowing line (the table holds obligation/disposition/meta only, never domain content — that lives in the graph). +- **Objective:** Remodel the FE-823 `elicitation_backlog` table/type into `elicitation_gaps`: (a) rename module/type/table (`graph/schema/elicitation-backlog.ts` → `elicitation-gaps.ts`, `ElicitationBacklogEntry` → `ElicitationGap`); (b) replace the literal `question` field with a stable `name` (typology key — machine identity + display label) plus a mandatory meta `rationale`; (c) replace `status` / `ELICITATION_BACKLOG_STATUSES` with a `disposition` enum (`open | answered | not_applicable | irrelevant | reopened`) stored only where non-derivable (scope judgments + `manual` satisficiency); (d) add a `predicate` tagged union (`presence | field | coverage | manual`); (e) split the ambiguous rating into `importance` (pre-answer weight) + derived `coverage` (post-answer strength); (f) seed the grounding band from the collated **grounding typology catalog** (floor `domain` / `protagonist` / `pain_pull` / `constraint`; progressive drivers `value` / `context_of_use` / `success_sketch` / `solution_boundary`) in `command-executor.ts`, replacing the four `*_anchor_question` literals. Pre-release posture: regenerate the migration and seed; do not preserve the backlog row shape. +- **Why now / unlocks:** D65-L reconceived the backlog as typed obligations; both `capability-readiness` and `elicitation-driver` read this remodeled substrate, so its shape must land first. It is also upstream of the context-pipeline trio's readiness/chrome-touching locks (the gaps register surfaces through projections/renderers). +- **Acceptance:** + - The table is `elicitation_gaps` with a regenerated migration; no `question` / `status` / `ELICITATION_BACKLOG_STATUSES` residue remains. + - Each gap carries name + rationale + band + predicate + importance + derived coverage + disposition. + - Structural `answered` is derived **live** from the graph (never hand-set); only scope dispositions (`not_applicable` / `irrelevant`) and `manual` satisficiency are stored. + - `createSpec` seeds the grounding typology catalog (floor + progressive drivers), not literal questions; the four `*_anchor_question` literals are gone. + - Mutations still route through `CommandExecutor` on the shared spec-local `{specId, lsn}` / `change_log` boundary; per-spec read-back returns gaps. +- **Verification:** Inner — gaps schema/disposition tests; seed-set test asserting the grounding typology catalog (floor vs progressive); CommandExecutor create / close-disposition tests; live-derived `answered` test (graph presence flips coverage with no hand-set). Middle — per-band predicate expressibility fixtures (A27-L); capture-reflection spawning an elicitation-band gap. Outer — per-spec read-back probe over a seeded spec. +- **Cross-cutting obligations:** Anti-shadowing — the table never holds domain content (which lives in the graph). Gaps commit only through `CommandExecutor` (`basis` via provenance-directness, D63-L: user-raised `explicit`, agent-inferred `implicit`). Multi-spec discipline — each gap belongs to one spec's register. +- **Traceability:** D8-L, D30-L, D57-L, D60-L, D63-L, D64-L, D65-L, D74-L / A24-L, A27-L / I30-L. Supersedes the FE-823 backlog row shape. +- **Design docs:** `memory/SPEC.md` D65-L and §Grounding typology catalog; `src/graph/README.md`; `src/db/README.md`. +- **Current execution pointer:** Done 2026-06-10. Replaced FE-823 `elicitation_backlog` with the D65-L `elicitation_gaps` obligation register, regenerated the table/migration metadata, seeded the grounding typology catalog, routed create/disposition mutations through `CommandExecutor`, and proved live `presence` coverage/answered derivation at read-back with sibling-spec isolation. `field`/`coverage` predicate derivation and `manual` LLM satisficiency remain named follow-ons for capability-readiness / later predicate slices. + +### capability-readiness + +- **Name:** JIT capability-readiness over gaps; retire the stored readiness grade +- **Linear:** unassigned — create in FE / brunch when the frontier starts. +- **Kind:** structural +- **Status:** next (recommended ahead of the trio) +- **Certainty:** proving +- **Depends on:** `elicitation-gaps-remodel` (hard — both capability-readiness and the readiness estimate read the remodeled gaps). +- **Retires:** the stored `readiness_grade` scalar and grade-as-authority (D45-L); A27-L (the `capability → relevant gaps` map carries enough signal to drive proceed / negotiate without a standing grade). +- **Lights up:** capability-readiness — on a capability request, evaluate the relevant `elicitation_gaps` → **proceed / proceed-at-low-epistemic-status / negotiate** (`establishment_offer`) — replacing `MIN_GRADE` gating. +- **Stabilizes:** I31-L (readiness never bars work; no grade scalar; no kind whitelist) and I25-L (legal affordances are projections over resolved runtime state plus capability-readiness over gaps). +- **Objective:** Replace the grade gate with JIT capability-readiness. (1) Remove `specs.readiness_grade`, `updateReadinessGrade`, and `READINESS_GRADES`; (2) replace `GRADE_RANK` / `GOAL_MIN_GRADE` / `STRATEGY_MIN_GRADE` / `LENS_MIN_GRADE` in `src/projections/session/runtime-policy.ts` with an explicit `capability → relevant gaps` map plus JIT evaluation (structural predicates checked mechanically; `manual` gaps consume an LLM satisficiency judgment, D57-L); (3) add the soft, derived, UI-only `readiness estimate` (per-band coverage rollup over gaps) projection; (4) remove the vestigial `chrome.phase` / `chrome.chatMode` fields from `workspace-session-coordinator.ts` and `workspace-state.ts` (the readiness estimate supersedes `phase`; `chatMode` was a redundant spec-selection restatement). +- **Why now / unlocks:** D45-L/D74-L retired the grade as a conflation of gate/display/milestone; this materializes the replacement so goal derivation, affordance legality, and prompt composition stop reading a grade. It also removes the grade/phase/chatMode fields the trio would otherwise lock prematurely. +- **Acceptance:** + - No `readiness_grade` column, `updateReadinessGrade` mutation, or `READINESS_GRADES` enum remains; affected fixtures/seeds/probes regenerated. + - `runtime-policy.ts` gates capabilities via an explicit `capability → relevant gaps` map; no `MIN_GRADE` proxy tables remain. + - A capability request yields proceed / proceed-at-low-epistemic-status / negotiate; readiness never refuses outright (I31-L). + - The readiness estimate is derived, UI-surfaced, and gates nothing (may regress honestly). + - `chrome.phase` / `chrome.chatMode` are removed from the coordinator and workspace-state projection; the readiness estimate is the only readiness surface. +- **Verification:** Inner — capability-readiness unit tests (a structural gap flips readiness with no grade; a `manual` gap routes to satisficiency); readiness-estimate projection test (regresses honestly, gates nothing); affordance legality over gaps (replacing the grade-gate tests). Middle — D74-L tracer: a presence-derived grounding gap flips capability-readiness with no stored grade. Outer — composed-prompt + web observer surface the readiness estimate, not a grade. +- **Cross-cutting obligations:** Readiness never bars graph truth or work (I31-L); `CommandExecutor` must not reject a node for a later-band kind (D64-L). The deferred milestone gate for export/plan/execute op-modes stays deferred (D45-L). Replace grade-gate tests across `compose.test.ts` / `prompting.test.ts` and createSpec/getSpec rather than preserving them. +- **Traceability:** D25-L, D30-L, D32-L, D45-L, D57-L, D58-L, D59-L, D64-L, D65-L, D73-L, D74-L / A27-L / I25-L, I31-L. Supersedes stored-grade gating and the `chrome.phase` / `chrome.chatMode` fields. +- **Design docs:** `memory/SPEC.md` D45-L / D74-L; `src/projections/session/runtime-policy.ts`; `src/projections/workspace/workspace-state.ts`. + +### runtime-vocab-leaf + +- **Name:** Session/runtime vocabulary source-of-truth leaf +- **Linear:** unassigned +- **Kind:** tooling / dev-substrate (small structural) +- **Status:** parallel / low-conflict +- **Certainty:** proving (low blast radius) +- **Stabilizes:** D73-L's ownership direction extended to the runtime/session axes — a drizzle-free `src/session/schema/kinds.ts` leaf owning the closed enum arrays for the runtime axes (`op_mode`, `strategy`, `lens`, `goal`, and the `auto` selection sentinel), mirroring `src/graph/schema/kinds.ts`. +- **Objective:** Establish `src/session/schema/kinds.ts` as the single source of truth for the session/runtime axis vocabulary currently scattered (e.g. `MethodId` in `src/.pi/agents/state.ts`, axis ids in `runtime-policy.ts` / `affordances.ts`). Consumers import the closed arrays from the leaf; the leaf imports nothing (no drizzle, no pi). Does **not** relocate `READINESS_GRADES` (retired by `capability-readiness`). +- **Why now / unlocks:** The user asked (decision 3) for a runtime-state source-of-truth file parallel to `graph/schema/kinds.ts` so `op_mode` / `strategy` / `lens` / `goal` enums have one home. Independent of the remodel chain and the trio; low conflict. +- **Acceptance:** + - `src/session/schema/kinds.ts` exists as a pure constants leaf and owns the runtime axis enums; axis-id consumers import from it. + - No runtime axis enum is re-declared in `.pi/agents/state.ts`, `runtime-policy.ts`, or `affordances.ts`. + - The leaf imports nothing runtime-heavy (drizzle-free, pi-free), matching the D73-L graph-leaf posture. +- **Verification:** Inner — import-boundary / architecture test that the leaf imports nothing and that consumers source axis enums from it. +- **Cross-cutting obligations:** Keep the leaf a pure constants module, not a behavior home; do not relocate the retired `READINESS_GRADES`. +- **Traceability:** D58-L, D59-L, D73-L / I25-L. +- **Design docs:** `src/session/README.md`; `src/graph/schema/kinds.ts` (template). + ### elicitation-driver - **Name:** Live per-turn "what to ask next" driver @@ -175,17 +251,18 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - **Status:** next - **Certainty:** proving - **Promoted from:** `memory/CROSS_CUT_PLAN.md` Seam 3a `"what to ask next" driver` row (D65-L), which remained `partial · ●` after the `elicitation-backlog` substrate landed. Per the cross-cut's own DoD a seam stays open while any `●` row is partial, so the row is disposed here as a real frontier rather than residue. -- **Lights up:** open backlog entries → rank → select next question per turn; capture-reflection grows/closes entries. -- **Stabilizes:** D65-L's live elicitation behavior on top of the flat `elicitation_backlog` substrate; closes the cross-cut Seam 3a row. -- **Objective:** Add the per-turn driver that reads open backlog entries for the selected spec, ranks them (band/priority), selects the next question to surface, and reconciles entries from capture-reflection (open new, close answered) — all on the existing FE-823 read/write substrate. -- **Why now / unlocks:** This is buildable now (the FE-823 substrate and per-spec read-back exist) and it closes the last required cross-cut row. It is itself a **bounded feature, not coverage**; as the cross-cut's promoted closing row it sequences ahead of fresh coverage breadth, but it is **not** POC-ship-critical (the POC delivery cut de-scopes elicitation quality), so it is not a ship-gate blocker. +- **Depends on:** `elicitation-gaps-remodel` (hard — the driver ranks/selects over the remodeled `elicitation_gaps` obligation shape, not the FE-823 question/`status` backlog). +- **Lights up:** open gaps → rank (importance / coverage / band) → select next question per turn; capture-reflection spawns/closes gaps. +- **Stabilizes:** D65-L's live elicitation behavior on top of the `elicitation_gaps` substrate; closes the cross-cut Seam 3a row. +- **Objective:** Add the per-turn driver that reads open gaps for the selected spec, ranks them (band + importance + derived coverage), selects the next question to surface, and reconciles gaps from capture-reflection (spawn new, set disposition on answered/scope-judged) — all on the remodeled `elicitation_gaps` read/write substrate. +- **Why now / unlocks:** Buildable once `elicitation-gaps-remodel` lands (substrate + per-spec read-back exist); it closes the last required cross-cut row. It is itself a **bounded feature, not coverage**; as the cross-cut's promoted closing row it sequences ahead of fresh coverage breadth, but it is **not** POC-ship-critical (the POC delivery cut de-scopes elicitation quality), so it is not a ship-gate blocker. - **Acceptance:** - - A driver reads open entries for the selected spec and produces a deterministic ranked selection of the next question. - - Capture-reflection can open new entries and close answered ones through the existing `CommandExecutor` path; no second mutation clock. + - A driver reads open gaps for the selected spec and produces a deterministic ranked selection of the next question. + - Capture-reflection can spawn new gaps and set dispositions through the existing `CommandExecutor` path; no second mutation clock. - Selection is observable enough for a probe/transcript to prove the loop without inventing a planning plane or pointer. - The cross-cut Seam 3a row flips from `partial · ●` to done when this lands. -- **Verification:** Inner — ranking/selection and reconciliation tests over seeded backlog. Middle — per-turn driver read-back over a real graph boundary; sibling-spec isolation. Outer — probe showing rank → select → capture-reflection close across turns. -- **Cross-cutting obligations:** Preserve the D4-L/D20-L command boundary and the D16-L/A4-L one-`{specId, lsn}` clock; keep the substrate flat (no graph plane, no unknown→unknown edges); no second planning system. +- **Verification:** Inner — ranking/selection and reconciliation tests over seeded gaps. Middle — per-turn driver read-back over a real graph boundary; sibling-spec isolation. Outer — probe showing rank → select → capture-reflection close across turns. +- **Cross-cutting obligations:** Preserve the D4-L/D20-L command boundary and the D16-L/A4-L one-`{specId, lsn}` clock; keep the substrate flat (no graph plane, no gap→gap edges beyond the degenerate `arose_from`/`resolved_by` pointers); no second planning system. - **Traceability:** D16-L, D20-L, D52-L, D63-L, D64-L, D65-L / A24-L. - **Design docs:** `memory/SPEC.md` D65-L; `docs/design/GRAPH_MODEL.md`. @@ -537,7 +614,8 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - 2026-06-08 cross-cut prompt-resource body-depth pass (Seam 3a/3b) — Done (1ca02e38): deepened every thin `src/.pi/skills/{goals,strategies,lenses,methods}` body to carry its per-axis facet guidance (goals→D59-L, strategies/lenses→README+D25-L, methods→D58-L tool-routing role), and added a manifest-wide readability/depth test in `src/.pi/agents/compose.test.ts` asserting every `{GOAL,STRATEGY,LENS,METHOD}_RESOURCES` location resolves and clears a ≥700-char floor. `state.ts` untouched. This closed the prompt-resource body-depth row, but the cross-cut is **not** exhausted: its Seam 3a `"what to ask next" driver` row (`partial · ●`) remains the last required row, now promoted to the `elicitation-driver` frontier. Verified: `npm run verify` (551 tests, build). -- 2026-06-08 `elicitation-backlog` (FE-823) — Done: materialized `elicitation_backlog` as a flat spec-scoped table with generated migration, seeded the grounding agenda at `createSpec`, routed create/close entry mutations through `CommandExecutor` on the shared `{specId, lsn}` / `change_log` boundary, and added graph-owned per-spec open-entry read-back. Reconciled D65-L/A24-L and updated graph/db topology docs. Verified: `src/graph/command-executor.test.ts`, `src/graph/queries.test.ts`, and `npm run verify`. +- 2026-06-10 `elicitation-gaps-remodel` — Done: replaced the FE-823 `elicitation_backlog` question-instance table with the D65-L `elicitation_gaps` typed obligation register; seeded the grounding typology catalog; added create/disposition commands on the shared `{specId, lsn}` / `change_log` boundary; and proved live `presence` coverage/answered derivation from graph truth with sibling-spec isolation. Verified: `src/graph/command-executor.test.ts`, `src/graph/queries.test.ts`, `src/graph/architecture.test.ts`, `src/graph/observed-shapes-coverage.test.ts`, full `npm run test`, and `npm run build`. +- 2026-06-08 `elicitation-backlog` (FE-823) — Done: materialized the pre-remodel flat spec-scoped prospective register with generated migration, seeded the grounding agenda at `createSpec`, routed create/close entry mutations through `CommandExecutor` on the shared `{specId, lsn}` / `change_log` boundary, and added graph-owned per-spec open-entry read-back. Superseded by `elicitation-gaps-remodel` on 2026-06-10. Verified: `src/graph/command-executor.test.ts`, `src/graph/queries.test.ts`, and `npm run verify`. Older history (including `project-graph-review-cycle`, `topology-readmes-and-boundaries`, `capture-response-to-graph`, `dev-seed-fixtures` first tracer, `graph-tool-resilience`, spec-scoped graph-clock hardening, `agents-composition-layer`, `live-graph-observer`, `agent-graph-integration`, `spec-persistence-and-startup`, `sealed-pi-profile-runtime-state`, `pi-ui-extension-patterns`, `web-shell`, `jsonl-session-viability`, `mode-shell-and-fixture-driver`, `walking-skeleton`): `docs/archive/PLAN_HISTORY.md` @@ -559,7 +637,10 @@ nodes: projection-shape-coverage [next · coverage] TRIO stage 1 (#project, PROJECT): create projections ledger + no-loss/shape invariants over dark graph/transcript DTOs; invariant-kind, NOT golden renderer-golden-coverage [next · coverage] TRIO stage 2 (#render, RENDER): create renderer ledger + golden-lock every durable renderer; depends on projection-shape-coverage prompt-composition-golden-coverage [next · coverage] TRIO stage 3 (#compose, COMPOSE): composed-prompt preview + golden-lock partials/composition matrix; depends on renderer-golden-coverage - elicitation-driver [after-trio · proving] live per-turn what-to-ask-next driver on FE-823 substrate; rides COMPOSE oracle; closes cross-cut Seam 3a + elicitation-gaps-remodel [done · proving] remodeled elicitation_gaps obligation register; seeded grounding typology catalog; live presence derivation + capability-readiness [next · proving] JIT capability->relevant-gaps gate + readiness estimate (UI-only); retire readiness_grade / MIN_GRADE / chrome.phase+chatMode + runtime-vocab-leaf [parallel · proving] src/session/schema/kinds.ts source-of-truth leaf for op_mode/strategy/lens/goal (D73-L direction); decision-3 follow-on + elicitation-driver [after-trio · proving] live per-turn what-to-ask-next driver on remodeled elicitation_gaps; rides COMPOSE oracle; closes cross-cut Seam 3a exchanges-and-generalized-capture [after-trio · proving] bounded feature (NOT coverage): narrow extractive capture + false-commit guard + exchange symmetry audit capture-quality-spike [done · spike] A22-L fitness evidence graduated the narrow exchanges-and-generalized-capture feature probes-and-transcripts-evolution [parallel] continuous evidence substrate @@ -574,7 +655,11 @@ edges: graph-tool-resilience -[hard]-> poc-live-ship-gate project-graph-review-cycle -[optional]-> poc-live-ship-gate minimal-authority-shell -[hard]-> poc-live-ship-gate - elicitation-backlog -[hard]-> elicitation-driver + elicitation-backlog -[supersedes]-> elicitation-gaps-remodel (FE-823 backlog row shape remodeled into D65-L gaps) + elicitation-gaps-remodel -[hard]-> capability-readiness (capability-readiness + readiness estimate read the remodeled gaps) + elicitation-gaps-remodel -[hard]-> elicitation-driver (driver ranks/selects over the remodeled gaps shape) + capability-readiness -[shape]-> projection-shape-coverage (mutates workspace-state/runtime-state shapes the trio stage 1 would lock; land first or bracket those fields) + elicitation-gaps-remodel -[shape]-> projection-shape-coverage (gaps register surfaces through projections; lock upstream shape first) graph-tool-resilience -[hard]-> role-safe-graph-mutations (current graph tool + edge model exist) project-graph-review-cycle -[hard]-> role-safe-graph-mutations (current review-set proposal/accept path exists) role-safe-graph-mutations -[hard]-> exchanges-and-generalized-capture (relation-bearing capture uses mutateGraph grammar) @@ -615,5 +700,6 @@ notes: - `prompt-composition-golden-coverage` (TRIO stage 3, `#compose`) **depends on stage 2**: `compose.test.ts` / `prompting.test.ts` are invariant-rich but no golden of partial bodies or composed output exists and there is no composed-prompt preview harness. Add the preview, golden-lock partials + a composed-prompt matrix. `elicitation-driver` rides on this stage's locked oracle and follows it. Never a ship gate. - `project-graph-review-cycle` is complete evidence for the optional batch proposal/review story; keep future review-quality work as follow-up, not FE-809 completion debt. - `topology-readmes-and-boundaries` is not a license for abstract cleanup; it rides with concrete delivery seams. + - **Readiness / elicitation-gaps remodel (2026-06-09 ln-plan, post-`ln-spec`).** The SPEC pass (D45-L, D57-L, D64-L, D65-L, D73-L, D74-L; A24-L, A27-L; I25-L, I30-L, I31-L) promotes a hard chain `elicitation-gaps-remodel` → `capability-readiness` plus the parallel `runtime-vocab-leaf`. `elicitation_backlog` is remodeled into the D65-L `elicitation_gaps` obligation register (name + rationale, band, `presence|field|coverage|manual` predicate, importance + derived coverage, disposition; seeded from the grounding typology catalog). Capability-readiness becomes a JIT `capability → relevant gaps` judgment that retires the stored `readiness_grade` / `updateReadinessGrade` / `READINESS_GRADES` / `MIN_GRADE` proxies, adds a soft UI-only `readiness estimate`, and removes `chrome.phase` / `chrome.chatMode`. **These are upstream of the trio's readiness/chrome-touching locks** (`capability-readiness` mutates `workspace/workspace-state` + `session/runtime-state` shapes that `projection-shape-coverage` would freeze): land the chain before trio stage 1, or have the trio explicitly bracket the grade/phase/chatMode fields until the remodel lands. None are POC-ship-critical. `elicitation-driver` now depends on `elicitation-gaps-remodel`, not the FE-823 backlog shape. `runtime-vocab-leaf` is the decision-3 follow-on (session/runtime enum source-of-truth leaf) and does **not** relocate the retired `READINESS_GRADES`. Decision-2 (readiness-grade vs band term overlap → `capture_band`/`readiness_gate`) was explicitly **left alone**. - Multi-spec workspace discipline applies throughout: target the selected/current spec explicitly; no workspace-global graph truth in the POC. ``` diff --git a/memory/SPEC.md b/memory/SPEC.md index 688c10ed..081d62ec 100644 --- a/memory/SPEC.md +++ b/memory/SPEC.md @@ -78,7 +78,7 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c 19. Brunch must enforce a workspace state hierarchy `workspace(cwd) → spec → session`, where the workspace is only the current working directory invocation root, the user explicitly picks or creates one spec within that workspace before any agent loop runs, and then picks or creates a session within that spec. Spec selection persists across `/new`, and each session binds to exactly one spec. 20. Brunch must support multiple elicitation lenses within the `elicitor` agent role, with the agent owning lens selection and offer through transcript-native structured exchanges; lens metadata is carried on elicitor-emitted structured-exchange payload facets for downstream routing. 21. Brunch must distinguish single-exchange elicitation flows from batch-proposal/review-set flows by capture and commitment mechanism: single-exchange answers are captured synchronously by the elicitor at turn boundaries, while batch proposals carry structured entity-draft payloads and are committed only through review-set approval. -22. Brunch must maintain a spec-owned readiness grade as a forward gate inside the `elicit` operational mode. Grounding establishes the frame required for main elicitation; later grades unlock commitment and planning/export/execute posture without forbidding earlier gathering or refinement. +22. Brunch must judge readiness just-in-time, per requested capability, against the `elicitation_gaps` relevant to it — not as a spec-owned stored grade (D45-L, D74-L). Grounding establishes the frame required for generative capabilities, but readiness never forbids earlier gathering or refinement: it proceeds, scales output epistemic status, or negotiates ("I can, but answer X and Y first"). A soft, derived readiness estimate may surface in the UI but gates nothing. 23. Brunch must support a review-cycle acceptance pattern for batch proposals and commitment review sets — approve / request changes (triggering regeneration) / reject — with batch acceptance committed atomically as one CommandExecutor call; partial acceptance is not representable. 28. Brunch must support assistant-first session driving over the public JSON-RPC surface: after workspace/spec/session activation, a client can prompt or resume the agent loop, observe the current pending system/assistant-originated structured exchange, submit a typed exchange response through Brunch product methods, and let Brunch advance the transcript-backed loop without ambient user prompt injection. @@ -117,10 +117,11 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c | A19-L | Pi's current settings/resource lifecycle can be made product-safe through a sealed Brunch Pi Profile without forking Pi: ambient discovery remains disabled, Brunch-owned extension factories may inject explicit resources, and remaining settings/keybinding leakage can be eliminated through programmatic policy or a narrow upstream seam. | medium | open | D39-L | FE-744/profile audit: source-backed resource-loader/settings audit, tests proving no ambient `.pi/` skills/prompts/themes/extensions/context files affect Brunch, and product-owned resources still load when intentionally injected. | | A20-L | The chosen Drizzle line and row-schema derivation path can be settled during the prep envelope without forcing later M4 rework: Brunch can prove migrations, SQLite fidelity, monotonic counter allocation, change-log writes, and runtime-schema derivation on one representative persistence slice before CRUD proper starts. | high | **validated** | D16-L, D41-L | **Validated by A20-L spike (2026-06-01).** Stack: `drizzle-orm@0.45.2` + `drizzle-kit@0.31.10` + `better-sqlite3@12.8.0` + `drizzle-typebox@0.3.3` + `@sinclair/typebox@0.34.14`. Proved: (1) `drizzle-typebox` derives valid TypeBox insert/select schemas from Drizzle tables; `Value.Check` validates/rejects correctly. (2) Batch `mutateGraph`-shaped transaction (multi-node → intra-batch ref resolution → multi-edge → LSN allocation → change-log append) works atomically; full rollback on FK violation or domain-validation throw. (3) `update().returning()` works for atomic monotonic counter increment; `insert().returning()` gives auto-increment IDs for ref resolution; JSON detail column round-trips cleanly. (4) Pi tool parameters (`typebox` v1.x) and Drizzle row schemas (`@sinclair/typebox` v0.34 via `drizzle-typebox`) serve different roles and never cross — shared enum `const` arrays bridge both. | | A21-L | The POC can treat coherence as a bounded product verdict over structural legality plus explicitly detected contradictions, gaps, and unresolved reconciliation needs, without solving a general theory of “spec coherence.” | low | open | D8-L | M8 must sharpen the coherence rubric before implementation: known-bad adversarial briefs should show what counts as incoherent, what is merely immature/underspecified, and what should become a reconciliation need. | -| A22-L | The elicitor can perform synchronous post-exchange capture well enough for the POC: high-confidence extractive facts and readiness-grade updates can be committed immediately, while low-confidence implications can be kept out of graph truth and used as disambiguation material. | medium | partially validated | D18-L, D26-L, D45-L, I30-L | 2026-06-05 `capture-response-to-graph` validated the product wiring for narrow labeled text facts (`Goal:`, `Context:`, `Constraint:`, `Criterion:`) on `session.submitExchangeResponse`. 2026-06-07 generalized the same explicit-text capture core onto `session.submitMessage`: ordinary labeled user text now appends to transcript truth, commits through `graph/capture` → `CommandExecutor.mutateGraph({createBasis: explicit, ops})`, targets the transcript binding's spec, and publishes graph invalidations; explicit interruptions are transcript-visible but do not capture or silently answer a pending exchange. 2026-06-08 `capture-quality-spike` added a fixed scenario measurement over free prose, file/ref-bearing prose, and implication-heavy prose; the sample extraction report reached precision 1.0 / recall 1.0 with zero false commits, moving generalized capture from parked evidence-gate to a narrow graduate recommendation with an explicit false-commit guard. Readiness-grade capture remains open fitness evidence. | -| A24-L | A flat `elicitation_backlog` table (prospective memory) is sufficient to drive elicitor questioning and seed grounding without graph structure — no `unknown` plane/node and no unknown→unknown edges; apparent dependency among open questions is mediated by the claims their resolution produces. | medium | partially validated | D65-L | 2026-06-08 FE-823 materialized the flat table, `createSpec` seed set, `CommandExecutor` create/close mutations, and graph-owned per-spec read-back on the real LSN/change-log seam. Remaining proof is the live per-turn driver plus capture-reflection across elicitation fixtures; if genuine unknown→unknown dependency or rich traversal emerges, promote the table to a plane (rows→nodes, FK pointers→edges). | +| A22-L | The elicitor can perform synchronous post-exchange capture well enough for the POC: high-confidence extractive facts can be committed to the graph immediately and gap dispositions updated, while low-confidence implications can be kept out of graph truth and used as disambiguation material. | medium | partially validated | D18-L, D26-L, D45-L, D65-L, I30-L | 2026-06-05 `capture-response-to-graph` validated the product wiring for narrow labeled text facts (`Goal:`, `Context:`, `Constraint:`, `Criterion:`) on `session.submitExchangeResponse`. 2026-06-07 generalized the same explicit-text capture core onto `session.submitMessage`: ordinary labeled user text now appends to transcript truth, commits through `graph/capture` → `CommandExecutor.mutateGraph({createBasis: explicit, ops})`, targets the transcript binding's spec, and publishes graph invalidations; explicit interruptions are transcript-visible but do not capture or silently answer a pending exchange. 2026-06-08 `capture-quality-spike` added a fixed scenario measurement over free prose, file/ref-bearing prose, and implication-heavy prose; the sample extraction report reached precision 1.0 / recall 1.0 with zero false commits, moving generalized capture from parked evidence-gate to a narrow graduate recommendation with an explicit false-commit guard. Readiness-grade capture remains open fitness evidence. | +| A24-L | A flat `elicitation_gaps` table (prospective memory) is sufficient to drive elicitor questioning, seed grounding, and feed capability-readiness without graph structure — gaps are typed coverage obligations (typologies), not graph nodes; apparent dependency among gaps is mediated by the claims their resolution produces. | medium | validated | D65-L, D74-L | 2026-06-08 FE-823 materialized the flat table (built as `elicitation_backlog`) on the real LSN/change-log seam. 2026-06-10 `elicitation-gaps-remodel` replaced that question-instance shape with the typed obligation register (`name`/`rationale`/band/predicate/importance/disposition), regenerated the table as `elicitation_gaps`, seeded the grounding typology catalog, and proved live presence-derived coverage/answered read-back without stored structural answers. Remaining downstream proof is capability-readiness over the register (D74-L) and capture-reflection spawning; if genuine gap→gap dependency or rich traversal emerges, promote the table to a plane (rows→nodes, FK pointers→edges). | | A25-L | Tracking the latest `pi-coding-agent` release continuously (via source-alias in dev + package dependency bumps) keeps Brunch adaptable without routinely destabilizing it, because Brunch's pi product-behavior surface is concentrated in a few sealed integration seams (the `src/.pi/` extension bundle and the session/runtime adapters) behind the D39-L profile — even though pi *types* are imported across ~25 files, those are mostly type-only and pass through that small set of seams. | medium | partially validated | D67-L | 2026-06-09 FE-825 bumped Brunch to pi 0.79, kept type/default resolution on installed `dist`, added a `PI_SOURCE`-gated vite/vitest runtime alias to sibling `pi-mono` source, preserved product default sealed-profile/offline behavior, and passed `npm run verify`. Each later pi bump that lands without product-behavior regressions raises confidence; a bump that silently breaks sealed-profile assumptions falsifies it. | | A26-L | The refined "conversational introspection" goal can be built as a *read-only session-query-back tool*: under `BRUNCH_DEV`, the agent can call `brunch_session_query` over `ctx.sessionManager.getBranch()`, find entries by predicate, project capped dot/`[n]`/`[*]` paths, and surface exact returned values in chat without weakening D39-L sealing or turning self-reporting into product behavior. | medium | validated | D69-L, D71-L | 2026-06-09 `dx-introspection-live` slice 2 replaced the earlier fixed structured self-report/schema idea with `src/.pi/extensions/session-query/`: a dev-gated read-only tool registered only through `createBrunchPiExtensions(..., { introspection: { enabled } })`, covered by find/project/truncation unit tests, default-off/default-on registration tests, and a faux turn that returns verbatim projected session values. Live-model compliance with "call then echo verbatim" remains outer-loop fitness, not a merge gate. | +| A27-L | Gap satisfaction is expressible band-by-band at acceptable LLM cost: **commitment** typologies are structural `presence`/`field`/`coverage` predicates over the graph; **grounding** typologies are a `presence` floor plus `manual` LLM satisficiency (D57-L); **elicitation** typologies are generatively spawned. The explicit `capability → relevant gaps` map (D74-L) carries enough signal to drive proceed / negotiate without a standing grade. | medium | partially validated | D65-L, D74-L | 2026-06-10 `elicitation-gaps-remodel` validated the structural `presence` case: a seeded grounding gap's derived coverage/answered state flips from graph truth with no stored structural answer and sibling-spec isolation holds. Remaining proof: D74-L capability-readiness tracer, `field`/`coverage` predicate derivation, `manual` LLM satisficiency, and elicitation/commitment fixtures. Falsified if grounding readiness cannot decompose into per-typology presence+manual judgments, or if commitment obligations need logic the predicate union can't express. | ### Active Decisions @@ -134,24 +135,25 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c - **D34-L — Command containment separates visibility suppression from effect blocking.** Current Pi extension seams can hide unsupported slash suggestions with autocomplete wrapping and can cancel branch/session effects through lifecycle hooks, but they cannot strictly suppress exact interactive built-in commands before `InteractiveMode` dispatches them. Brunch-owned commands must use product-specific names and route writes through Brunch handlers/`CommandExecutor`; extension command collisions are not an override mechanism. Strict built-in command/keybinding policy is a Pi upstream/API ask, while POC safety relies on hiding generic affordances, blocking dangerous effects (`/fork`, `/clone`, `/tree`, raw session replacement), and failing fast on branched transcripts. Brunch's command-policy code should live in `src/.pi/extensions/commands/policy.ts`, merging branch/session-effect blocking with any product command allow/deny behavior instead of preserving a branch-only module. Depends on: D2-L, D24-L, A18-L. Supersedes: treating extension `input` handlers or command-name collisions as built-in command allowlisting. - **D35-L — Dynamic TUI chrome is a Brunch projection wrapper over Pi UI primitives.** Downstream TUI affordances should call a Brunch-owned renderer (`renderBrunchChrome` or its successor) with one activated product-state value rather than scattering raw `ctx.ui.setHeader`, `setFooter`, `setWidget`, title, or working-indicator calls. The wrapper is stateless projection over canonical workspace/session/graph facts, including the discovered project name, selected spec, real activated session id/label, launch activation kind for new-session startup headers, and app-supplied live sidecar URL when present, while its TUI footer compositor may read Pi footer telemetry (`getGitBranch`, foreign `getExtensionStatuses`) at render time. Brunch chrome and startup dialog are project-first shell surfaces with selected-spec context: the project name labels the cwd container, the spec title labels the selected graph, and the session label distinguishes transcript instances. New `newSpec` / `newSession` launches keep Pi `quietStartup` but install a Brunch-owned expandable header through the chrome wrapper; resume/open launches stay quiet. Brunch chrome does not publish a `brunch.chrome` status key; `ctx.ui.setStatus(key, text)` remains a lateral contribution channel for other extensions and future dynamic Brunch state. RPC clients should rely only on surfaces Pi actually emits for the wrapper (currently sidecar/widget-compatible string arrays and title, plus any future explicit status adapter) because header/footer/working-indicator are TUI-only in current Pi RPC mode. Session display names are product projections over Pi session metadata: every Brunch-created session should immediately receive a neutral workspace-global `Untitled Session N` `session_info` label, and later user/generated names may characterize the transcript without replacing spec identity or graph truth. Depends on: D2-L, D21-L, D34-L, A18-L. Supersedes: treating Pi UI methods as direct downstream affordance APIs, rendering placeholder session state such as `unbound` after a session is activated, consuming the status-key namespace for chrome's own static summary, using spec title as the default session label, or allowing two unchanged Brunch-created default names to collide in one cwd. - **D52-L — Source topology targets `src/{app, workspace, scripts, .pi, db, graph, session, projections, renderers, rpc, web}` with directed layer dependencies.** Product entrypoints live under `src/app/`, local executable utility ownership is reserved under `src/scripts/`, package/workspace identity tests live under `src/workspace/`, and reusable projection/rendering modules live under top-level `src/projections/` and `src/renderers/` rather than whichever domain or adapter first needed them. `app/` owns product host entrypoints and wiring. `workspace/` owns cwd/package/workspace identity helpers. `scripts/` owns local executable utilities. `.pi/` is the sealed Pi-harness runtime surface: `agents/` owns runtime prompt assembly, role definitions, legal resource manifests, and agent-context orchestration; `skills/` owns goal/strategy/lens/method markdown resources read on demand; `components/` owns reusable Pi TUI/message components; `extensions/` owns Pi registrars for tools, hooks, commands, chrome, context tools, system-prompt append, exchanges, graph tools, workspace dialogs, runtime policy, and session lifecycle. `graph/` is the domain layer: CommandExecutor, readers, policy, validators, query bucketing, change-log replay, reconciliation-need substrate; it imports from `db/` (Drizzle schema, migrations, connection lifecycle) and no other layer imports `db/` directly. `session/` owns transcript projection, exchange extraction, workspace coordination, session binding, runtime-state transcript entries, and LSN staleness tracking over Pi JSONL. `projections/` owns structured DTOs derived from graph/session/workspace/tool facts; it must not render lossy text and must not import adapters, transports, app entrypoints, or web code. `renderers/` owns lossy text/markdown/toon/tool-content rendering over domain or projection inputs; it may import input types from `graph/`, `session/`, or `projections/` as needed, but must not import adapters, transports, app entrypoints, or web code. `rpc/` owns Brunch JSON-RPC handlers. `web/` owns the React client. Dependency direction: `.pi/`, `rpc/`, and `app/` may import from `graph/`, `session/`, `projections/`, and `renderers/`; `.pi/agents/` may import from `graph/`, `session/`, `projections/`, and `renderers/` to build agent context; `.pi/extensions/` may import from `.pi/agents/` and `.pi/components/`; `projections/` may import from `graph/`, `session/`, and `workspace/`; `renderers/` may import from `projections/`, `graph/`, and `session/`; `graph/` imports from `db/`, and `db/` may import the drizzle-free taxonomy leaf `graph/schema/kinds.ts` — the single sanctioned `db/`→`graph/` edge (D73-L); `web/` is a standalone build target. Depends on: D2-L, D4-L, D39-L, D40-L. Refined by: D73-L. Supersedes: scattering session domain files at `src/` root; treating Pi-only agents as a host-independent top-level `src/.pi/` layer; nesting prompt composition under `src/.pi/context/`; treating reusable `project` / `format` helpers as owned by whichever adapter first needed them. -- **D73-L — Domain enum taxonomy is owned by a drizzle-free `src/graph/schema/kinds.ts` leaf; `db/` is a consumer, not the source.** The closed enum `const` arrays that define graph vocabulary — node kinds (`INTENT_KINDS`, `ORACLE_KINDS`, `DESIGN_KINDS`, `PLAN_KINDS`), `NODE_PLANES` (`intent`/`oracle`/`design`/`plan`), `NODE_BASES`, `EDGE_CATEGORIES`, `EDGE_STANCES`, `READINESS_GRADES`, `READINESS_BANDS`, `LENS_AFFINITIES`, `ELICITATION_BACKLOG_STATUSES` — live in `graph/schema/kinds.ts`, a pure constants leaf that imports nothing (no drizzle, no `graph/atoms`). Both `db/schema.ts` (for `text({ enum })` column constraints, including the previously-inlined `plane` columns) and `graph/` domain modules import the arrays from this leaf; `graph/index.ts` re-exports them from the leaf so non-graph layers still avoid importing `db/` directly (I26-L). Derivations stay where they are read: `NODE_KIND_METADATA`, `formatGraphNodeCode`, `parseGraphNodeCode`, and `intentKindCategory` remain in `graph/schema/nodes.ts` (D62-L). The motivating defect: because `db/schema.ts` eagerly evaluates `sqliteTable(...)` and `verbatimModuleSyntax` emits even type-only imports at runtime, any value-import path from `web/` into the old taxonomy location pulled Drizzle into the browser bundle. Locating taxonomy in a drizzle-free leaf makes the `web/` build target structurally Drizzle-free (I44-L) and corrects the ownership direction so the domain, not the persistence layer, owns its vocabulary. Depends on: D16-L, D52-L, D54-L, D62-L, D63-L, D64-L; I26-L. Supersedes: `db/schema.ts` owning the shared enum `const` arrays and the "enum literals flow outward from `db/schema.ts`" posture; the triplicated inline `['intent','oracle','design','plan']` plane literals. +- **D73-L — Domain enum taxonomy is owned by a drizzle-free `src/graph/schema/kinds.ts` leaf; `db/` is a consumer, not the source.** The closed enum `const` arrays that define graph vocabulary — node kinds (`INTENT_KINDS`, `ORACLE_KINDS`, `DESIGN_KINDS`, `PLAN_KINDS`), `NODE_PLANES` (`intent`/`oracle`/`design`/`plan`), `NODE_BASES`, `EDGE_CATEGORIES`, `EDGE_STANCES`, `READINESS_GRADES`, `READINESS_BANDS`, `LENS_AFFINITIES`, `ELICITATION_BACKLOG_STATUSES` — live in `graph/schema/kinds.ts`, a pure constants leaf that imports nothing (no drizzle, no `graph/atoms`). Both `db/schema.ts` (for `text({ enum })` column constraints, including the previously-inlined `plane` columns) and `graph/` domain modules import the arrays from this leaf; `graph/index.ts` re-exports them from the leaf so non-graph layers still avoid importing `db/` directly (I26-L). Derivations stay where they are read: `NODE_KIND_METADATA`, `formatGraphNodeCode`, `parseGraphNodeCode`, and `intentKindCategory` remain in `graph/schema/nodes.ts` (D62-L). The motivating defect: because `db/schema.ts` eagerly evaluates `sqliteTable(...)` and `verbatimModuleSyntax` emits even type-only imports at runtime, any value-import path from `web/` into the old taxonomy location pulled Drizzle into the browser bundle. Locating taxonomy in a drizzle-free leaf makes the `web/` build target structurally Drizzle-free (I44-L) and corrects the ownership direction so the domain, not the persistence layer, owns its vocabulary. Vocabulary migration (pending the D45-L/D65-L sweep): `READINESS_GRADES` is retired (readiness is no longer a stored grade, D45-L), and `ELICITATION_BACKLOG_STATUSES` is replaced by the `elicitation_gaps` disposition + predicate-shape enums (D65-L); `READINESS_BANDS` stays. Depends on: D16-L, D52-L, D54-L, D62-L, D63-L, D64-L; I26-L. Supersedes: `db/schema.ts` owning the shared enum `const` arrays and the "enum literals flow outward from `db/schema.ts`" posture; the triplicated inline `['intent','oracle','design','plan']` plane literals. #### Data model & vocabulary - **D3-L — Graph-native, session-native vocabulary; no generic `records.*` surface.** Commands converge on `graph.*` / `session.*` (with per-plane families `intent.*`, `oracle.*`, `design.*`, `plan.*` available when sharper semantics are useful). Depends on: A6-L. Supersedes: —. - **D7-L — ~~`framing_as` modality, not first-class kinds.~~ Retired.** `framing_as` is absorbed by first-class `thesis`, `term`, `constraint`, and `goal` kinds per the Phase 2 node lock. No node carries a `framing_as` field. See [`docs/design/GRAPH_MODEL.md` §framing_as — retired](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/GRAPH_MODEL.md#framing_as--retired). Depends on: A7-L (retired). Superseded by: D54-L, D56-L. -- **D8-L — Reconciliation needs are a first-class substrate alongside graph truth, change log, and a bounded coherence verdict.** Needs (impasses, gaps, contradictions, process debt) share the same spec-local LSN as their owning spec's change log and follow the same mutation invariant. Per [`docs/design/GRAPH_MODEL.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/GRAPH_MODEL.md#reconciliationneed--separate-substrate-not-a-graph-edge), each need targets exactly one of `{kind: 'edge', edgeId}` or `{kind: 'node_pair', aId, bId}` and is not itself a graph edge. For the POC, coherence is not an unbounded aesthetic or philosophical judgment; it is the product-visible verdict produced from structural legality plus surfaced contradictions/gaps/unresolved needs, with the exact rubric still open under A21-L until M8 and the subtype split deferred per A8-L. It is the *retrospective* coherence register (backward-looking repair after a mutation, worked async by the reviewer, D29-L); the *prospective* elicitation backlog register is a distinct substrate (D65-L). Depends on: A8-L, A21-L. Refined by: D51-L. Supersedes: any `concerns`-edge wiring from reconciliation needs to graph nodes. +- **D8-L — Reconciliation needs are a first-class substrate alongside graph truth, change log, and a bounded coherence verdict.** Needs (impasses, gaps, contradictions, process debt) share the same spec-local LSN as their owning spec's change log and follow the same mutation invariant. Per [`docs/design/GRAPH_MODEL.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/GRAPH_MODEL.md#reconciliationneed--separate-substrate-not-a-graph-edge), each need targets exactly one of `{kind: 'edge', edgeId}` or `{kind: 'node_pair', aId, bId}` and is not itself a graph edge. For the POC, coherence is not an unbounded aesthetic or philosophical judgment; it is the product-visible verdict produced from structural legality plus surfaced contradictions/gaps/unresolved needs, with the exact rubric still open under A21-L until M8 and the subtype split deferred per A8-L. It is the *retrospective* coherence register (backward-looking repair after a mutation, worked async by the reviewer, D29-L); the *prospective* `elicitation_gaps` register is a distinct substrate (D65-L). Depends on: A8-L, A21-L. Refined by: D51-L. Supersedes: any `concerns`-edge wiring from reconciliation needs to graph nodes. - **D9-L — Reasoning records split by shape.** `decision` is graph-native; `impasse` is a reconciliation need, not a graph node; `justification` stays compact (rendered text on the decision) until forced otherwise. Phase 2 (per `docs/design/GRAPH_MODEL.md`) keeps `decision` as a plain node rather than a hyper-edge / hub-node for the POC. Depends on: D8-L. Supersedes: —. - **D54-L — Graph node shape is a common flat interface with `kind_ordinal`, `title`, `body`, `basis`, `source`, and a per-kind `detail` JSON column; canonical contract is [`docs/design/GRAPH_MODEL.md` §GraphNode](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/GRAPH_MODEL.md#graphnode--the-single-shape).** All planes and kinds share one `nodes` table. `id` is the internal SQLite integer/FK identity; `kind_ordinal` is the monotonic per-`(spec, plane, kind)` ordinal used with `kind` to project a stable human reference code (D62-L). The rendered code string is not stored in the database. `plane` determines which closed `kind` enum applies; `kind` is structurally validated. `basis ∈ explicit | implicit` records item-level approval strength per D63-L. `source` is a free-form string for epistemic attribution (e.g. "stakeholder", "regulatory", "derived", "agent synthesis") — convention by prompt, not structural validation; it exists for context-render enrichment and will be rendered back into sparse text, not used for policy or filtering. `detail` is an optional JSON column with per-kind validated sub-structures: `decision` requires `{ chosen_option, rejected, rationale }`, `term` requires `{ definition, aliases? }`; all other kinds must omit `detail`. `provenance` is retired from the node shape — `change_log` at `createdAtLsn` owns the audit trail, while `basis` and `source` carry only local interpretation fields. The intent kind rubric (modality of claim + source question per kind) is agent-facing prompting guidance in GRAPH_MODEL.md §"Prompting guidance for kind discrimination", not structural enforcement. Depends on: D4-L, D16-L, D52-L, D56-L, D62-L, D63-L. Supersedes: D7-L (`framing_as` modality), the deferred Phase 2 node placeholder in prior GRAPH_MODEL.md. - **D55-L — `provenance` retired from both edges and nodes; `change_log` owns audit trail and mutation path.** Transcript entry pointers (`sessionId`, `entryId`, `proposalEntryId`) are fragile under compaction and redundant with `change_log` keyed by `createdAtLsn` / `updatedAtLsn`. `basis` does **not** encode the transport or strategy path; per D63-L it records whether the exact graph item was user-approved (`explicit`) or agent-materialized after concept-level approval (`implicit`). `change_log.operation` and payload record the durable mutation context (`create_node`, `mutate_graph`, `accept_review_set`, etc.). Edges retain `basis` and `rationale`; nodes retain `basis` and `source` (epistemic attribution). Depends on: D16-L, D51-L, D54-L, D63-L. Supersedes: `EdgeProvenance` from Phase 1 edge lock, the planned node-side `provenance` symmetry with edges, and the former `accepted_review_set` basis-as-path enum. - **D56-L — Intent node kinds: 11 kinds in 3 derived semantic categories (basic / structural / reasoning); canonical contract is [`docs/design/GRAPH_MODEL.md` §Per-plane node kinds](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/GRAPH_MODEL.md#per-plane-node-kinds).** `basic` (goal, thesis, term, context) carries grounding material; `structural` (requirement, assumption, constraint, invariant) carries core specification; `reasoning` (decision, criterion, example) carries decisions and evidence. This category is a pure function of intent `kind` — not stored on the node — and remains distinct from the cross-plane readiness-band metadata in D64-L. `thesis` carries "what/who/why/for whom" material (La Carte Blanche style). `term` carries canonical naming commitments (ubiquitous language). `invariant` is first-class (not a constraint subtype) because its operational role differs: invariants get `dependency` and `proof` edges, constraints get `boundary` edges. Each intent kind has a modality-of-claim and source-question rubric for agent prompting (GRAPH_MODEL.md §"Prompting guidance"). Oracle (check, validation_method, evidence, obligation), design (module, interface), and plan (milestone, frontier, slice) kinds are stable from worked examples and receive prefix/readiness-band metadata through D62-L/D64-L. Depends on: D54-L, D62-L, D64-L. Supersedes: D7-L (`framing_as`), A7-L. -- **D57-L — Spec-grade grounding gate is LLM-judged satisficiency over readiness-band evidence with a count floor, not a hard kind whitelist.** The gate from `grounding_onboarding` toward `elicitation_ready` is not structurally enforced by rubric coverage checks. The agent judges readiness using prompt-embedded abstract drivers (Walter-style: what is it, who is it for, what problem, what value, when used, how measured) plus D64-L readiness-band evidence. The grounding threshold centers on grounding-band nodes such as `goal`, `thesis`, `term`, and `context`, and may also count grounding-relevant constraints because a constraint anchor can be part of the frame. The agent cannot declare grounding complete with zero grounding-band graph evidence, but obvious lower-grade `requirement`, `criterion`, `check`, or design nodes may still be captured when the user clearly gives them; those nodes simply do not by themselves prove the grounding threshold. Grounding elicitation may establish workspace posture, but posture is not a spec-row field or graph node kind in the POC. Depends on: D45-L, D56-L, D64-L. Supersedes: D30-L grounding-bundle anchor vocabulary as the sole readiness gate description. Refines: D30-L, D45-L. +- **D57-L — Readiness satisficiency is LLM-judged over gap evidence, not a hard coverage checklist; this judgment is the non-structurally-obvious branch of JIT capability-readiness (D74-L).** Grounding readiness (originally framed as the `grounding_onboarding → elicitation_ready` transition) is not structurally enforced by rubric coverage checks. The agent judges readiness using prompt-embedded abstract drivers (Walter-style: what is it, who is it for, what problem, what value, when used, how measured) plus D64-L readiness-band evidence and the relevant `elicitation_gaps` (D65-L). Those drivers are **judgment drivers and gap typologies, not hard gates** — they enrich the satisficiency call and may seed an establishment offer, but a missing driver never bars work; the candidate-proposal / disambiguation UX is exactly how thin grounding fills progressively, so an open gap must never wall it. The grounding judgment centers on grounding-band nodes (`goal`, `thesis`, `term`, `context`, grounding-relevant `constraint`); the agent cannot declare grounding satisficed with zero grounding-band evidence (a count floor), but obvious later-band nodes may still be captured when clearly given. Generalized to all capabilities: when a gap is structurally checkable (`presence` / `field` / `coverage` predicate, D65-L) the agent need not judge; only non-obvious (`manual`) gaps consume an LLM satisficiency judgment, and the judgment is made **per-requested-capability**, not as a standing grade promotion. Grounding elicitation may establish workspace posture, but posture is not a spec-row field or graph node kind. Depends on: D45-L, D56-L, D64-L, D65-L, D74-L. Supersedes: D30-L grounding-bundle anchor vocabulary as the sole readiness gate, and the standing `grounding_onboarding → elicitation_ready` grade promotion as the gate mechanism. Refines: D30-L, D45-L. - **D51-L — Graph edge model is a closed structural-category set with a separate ReconciliationNeed substrate; canonical contract is [`docs/design/GRAPH_MODEL.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/GRAPH_MODEL.md).** Every accepted edge is one of eight closed categories (`dependency`, `proof`, `support`, `realization`, `boundary`, `composition`, `association`, `supersession`); `stance: for | against` is valid only on `proof` and `support`; `basis ∈ explicit | implicit` follows D63-L (no `inferred`, no `accepted_review_set` path value). Accepted edges have no mutable `status` field — `proposed` lives in review-set drafts, `rejected` is absent + change-log audit, `stale` is represented by a `ReconciliationNeed`. Identity fields (`category`, `sourceId`, `targetId`, `stance`) are immutable on an accepted edge; a "category change" is delete + recreate. `supersession` chains are acyclic and the `CommandExecutor` must validate acyclicity against existing same-spec edges plus proposed batch edges. Only `dependency` cascades automatically; other categories surface advisory recon-needs rather than auto-blocking. Cross-plane edges are unrestricted at the POC stage; `realization` subtypes (implementation/establishment/assertion/etc.) may be derived from node-tuple lookup later rather than encoded on the edge. `ReconciliationNeed` is a separate substrate whose target is exactly `{kind:'edge', edgeId}` or `{kind:'node_pair', aId, bId}` — it is not itself a graph edge. Depends on: D4-L, D8-L, D16-L, D27-L, A14-L, D63-L. Supersedes: the named-relation catalogue in `docs/architecture/pi-seam-extensions.md` §"Edge types" (`validates`, `instance_of`, `produces`, `discharges`, `depends_on`, `derived_from`, `counterexample_for`, `witnesses`), the per-relation policy registry / lookup, the brainstormed expanded edge taxonomy in `archive/docs/design/GRAPH_EDGE_CATEGORIES.md`, any `concerns`-edge wiring from reconciliation needs to graph nodes, and the former `accepted_review_set` edge-basis value. -- **D61-L — A spec is an initiative answering a problem; its truth-bearing units are claims resolved at node level.** A spec's identity is its problem-answering initiative, not the product areas, seams, or domains it touches; it may reach a done-state while those keep evolving. Its truth-bearing units ("claims") are the existing `structural` and `reasoning` intent node kinds (requirement, assumption, constraint, invariant, decision, criterion, example) under D54-L/D56-L — `claim` is a vocabulary umbrella, not a new node kind — so revision, conflict, and supersession resolve at node level (supersession edges per D51-L), not at whole-spec level. POC scope: each spec owns its own intent graph (no cross-spec claim sharing); the `workspace → spec → session` hierarchy (D11-L) is unchanged and `spec.readiness_grade` (D45-L) remains the only persisted spec-state — no initiative-status column is added. The full initiative/claim model (cross-spec claim survival/adoption, initiative-status lifecycle, spec-to-spec relationships, current-truth-as-projection) is deferred to Future Direction §Spec initiative & claim model; rationale: [`docs/design/SPEC_INITIATIVE_MODEL.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/SPEC_INITIATIVE_MODEL.md). Depends on: D11-L, D45-L, D54-L, D56-L. Supersedes: —. +- **D61-L — A spec is an initiative answering a problem; its truth-bearing units are claims resolved at node level.** A spec's identity is its problem-answering initiative, not the product areas, seams, or domains it touches; it may reach a done-state while those keep evolving. Its truth-bearing units ("claims") are the existing `structural` and `reasoning` intent node kinds (requirement, assumption, constraint, invariant, decision, criterion, example) under D54-L/D56-L — `claim` is a vocabulary umbrella, not a new node kind — so revision, conflict, and supersession resolve at node level (supersession edges per D51-L), not at whole-spec level. POC scope: each spec owns its own intent graph (no cross-spec claim sharing); the `workspace → spec → session` hierarchy (D11-L) is unchanged and the spec row carries only identity — no stored readiness grade (D45-L) and no initiative-status column. The full initiative/claim model (cross-spec claim survival/adoption, initiative-status lifecycle, spec-to-spec relationships, current-truth-as-projection) is deferred to Future Direction §Spec initiative & claim model; rationale: [`docs/design/SPEC_INITIATIVE_MODEL.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/SPEC_INITIATIVE_MODEL.md). Depends on: D11-L, D45-L, D54-L, D56-L. Supersedes: —. - **D62-L — Graph nodes have stable spec-scoped human reference codes projected from stored `kind_ordinal`, separate from integer storage IDs.** `NodeId` remains the SQLite integer primary key/FK used internally. The database stores `kind` and `kind_ordinal`; user/agent-facing handles such as `G1`, `CON2`, `R3`, `CR4`, `VM1`, or `SL2` are projection strings formed by a hard-coded presentation lookup from `kind` to a 1–3 capital-letter label plus `kind_ordinal`. The rendered code string is not a graph column. Labels are unique across all node kinds so `#`-mentions can parse by longest-prefix match, then resolve to `(kind, kind_ordinal)` and finally to `NodeId`. `kind_ordinal` is monotonic per `(spec_id, plane, kind)`, allocated by the `CommandExecutor` in the same transaction as node creation from a counter row (`node_kind_counters` or equivalent), not by `MAX(kind_ordinal)+1`; ordinals are never reused after deletion or supersession. DB constraints must make `(spec_id, plane, kind, kind_ordinal)` unique; there is no `(spec_id, code)` uniqueness constraint because `code` is not stored. Context renders and prompt contexts should use projected codes as primary handles and reserve raw integer IDs for internal diagnostics/adapters. Depends on: D14-L, D16-L, D20-L, D54-L, D56-L, D61-L. Supersedes: the string-`NodeId` examples in earlier GRAPH_MODEL text and the previous app's application-only `MAX(kind_ordinal)+1` allocation pattern. -- **D63-L — Graph `basis` records item-level approval strength, not the mutation pathway.** Accepted nodes and edges use `basis ∈ explicit | implicit`. `explicit` means the user directly stated the graph item or approved the exact node/edge in a review set; `implicit` means the user accepted a concept/proposal and the agent materialized specific graph items to match it without per-item review (the `propose-graph` direct-commit path). The mutation pathway lives in `change_log.operation` and payload (`mutate_graph`, `accept_review_set`, post-exchange capture, etc.), while epistemic attribution lives in `Node.source` and proposal UI metadata may still carry `epistemic_status`. Low-confidence inferred material is still not graph truth; it remains in preface/capture analysis/review drafts/reconciliation needs until clarified or accepted. More abstractly, `basis` is a *provenance-directness* marker — directly from the user (`explicit`) versus agent-materialized from user input (`implicit`) — of which item-level approval strength is the claim-flavored reading; this lets the same `explicit | implicit` distinction apply to non-claim registers such as the elicitation backlog (user-raised vs agent-inferred, D65-L). Depends on: D26-L, D27-L, D53-L, D54-L, D55-L. Supersedes: `basis = accepted_review_set` as a persisted graph enum value and any interpretation of `basis` as a provenance/path field. -- **D64-L — Readiness bands are non-exclusive derived node-kind groupings used for elicitor goals, context filters, and grade rubrics; they are not structural legality gates.** Bands are `grounding`, `elicitation`, and `commitment`. A node kind may belong to multiple bands (for example `constraint` can contribute to grounding when it is the constraint anchor and to elicitation when it bounds solution space). Bands guide what the elicitor is trying to complete at a given `readiness_grade`, what graph filters and rendered context can show, and what evidence a readiness validator considers. The `CommandExecutor` must not reject a clear `requirement`, `criterion`, `check`, design node, or other later-band kind merely because the spec is at an earlier grade; readiness controls objectives and unlocks, not what graph truth may contain. Depends on: D45-L, D56-L, D57-L, D59-L, D60-L. Supersedes: treating the intent `basic | structural | reasoning` category as the readiness taxonomy or treating readiness as a per-kind creation whitelist. -- **D65-L — The elicitation backlog is a prospective process-agenda register (the elicitor's "prospective memory"), distinct from both reconciliation needs and graph truth.** The single term `unknown` conflated two concepts with different ontological status and resolution mechanism: (a) a *process gap* — something the user has not answered yet, knowable now by asking — and (b) a *domain gap* — something nobody knows and cannot economically find out now (the deferred `risk` node, Future Direction §Vocabulary evolution). Only (a) drives elicitation, and it is modelled as an **`elicitation_backlog`** entry, not a graph node. The register is forward-looking but **async and unordered** — the name `elicitation_backlog` (chosen over `agenda`/`need`) signals that entries are logged opportunistically and need not drive the next turn: an entry logged now may only become relevant in a later grade or under a different lens. It is seeded at spec creation with grounding-band questions, read by the elicitor every turn to choose what to ask next, and grown by capture-reflection (each round may spawn new entries). Its resolution produces one of: a **claim** (answered → graph node), a **`risk`** (asked but unknowable → durable spec content, deferred), or **more entries**. It is the *prospective* sibling of the *retrospective* `reconciliation_need` coherence register (D8-L) — two registers, two loops; the elicitation register is the elicitor's per-turn agenda, the reconciliation register is the async reviewer's post-mutation repair queue (D29-L). It is a **flat table, not a graph plane/node**, because its only real relations are filter attributes (plane/lens affinity, D64-L grade-band, `open | closed` status) plus foreign-key pointers (`arose_from`, `resolved_by`); apparent unknown→unknown dependency ("answer B before A") is illusory — it is mediated by the claims that resolving a need produces, which already carry `dependency` edges (D51-L). A table with those FK pointers is a degenerate bipartite graph, forward-compatible with promotion to a plane only if genuine unknown→unknown structure later emerges; this keeps the locked graph-of-claims (D54-L/D56-L/D51-L) untouched and supplies the missing substrate for the "what to ask next" objective and generalized capture. `basis` applies via its provenance-directness reading (D63-L): a user-raised need is `explicit`, an agent-inferred need is `implicit`. The substrate is now settled: the backlog is seeded at `createSpec`, create/close mutations route through `CommandExecutor`, and those writes share the spec-local LSN + `change_log` boundary. Still open: whether the register merely complements or eventually thins the `goal` axis (D59-L), and how the live per-turn driver plus capture-reflection should rank and close entries. Depends on: D8-L, D45-L, D59-L, D63-L, D64-L. Supersedes: treating `unknown` as a graph node kind or cross-plane node/plane for driving elicitation. +- **D63-L — Graph `basis` records item-level approval strength, not the mutation pathway.** Accepted nodes and edges use `basis ∈ explicit | implicit`. `explicit` means the user directly stated the graph item or approved the exact node/edge in a review set; `implicit` means the user accepted a concept/proposal and the agent materialized specific graph items to match it without per-item review (the `propose-graph` direct-commit path). The mutation pathway lives in `change_log.operation` and payload (`mutate_graph`, `accept_review_set`, post-exchange capture, etc.), while epistemic attribution lives in `Node.source` and proposal UI metadata may still carry `epistemic_status`. Low-confidence inferred material is still not graph truth; it remains in preface/capture analysis/review drafts/reconciliation needs until clarified or accepted. More abstractly, `basis` is a *provenance-directness* marker — directly from the user (`explicit`) versus agent-materialized from user input (`implicit`) — of which item-level approval strength is the claim-flavored reading; this lets the same `explicit | implicit` distinction apply to non-claim registers such as `elicitation_gaps` (user-raised vs agent-inferred, D65-L). Depends on: D26-L, D27-L, D53-L, D54-L, D55-L. Supersedes: `basis = accepted_review_set` as a persisted graph enum value and any interpretation of `basis` as a provenance/path field. +- **D64-L — Readiness bands are the coarse level of one coverage axis; gap typologies (D65-L) are its finer members. Bands are non-exclusive derived node-kind groupings, not structural legality gates.** Bands are `grounding`, `elicitation`, and `commitment`; each `elicitation_gaps` typology carries exactly one band — band and typology are **one axis at two granularities**, so "bands becoming more differentiated over time" means the typology taxonomy growing, not new bands. A node kind may belong to multiple bands (e.g. `constraint` contributes to grounding as the constraint anchor and to elicitation when it bounds solution space). Bands guide what the elicitor is trying to complete, what graph filters and rendered context show, the per-band **readiness estimate** rollup (D45-L), and which gaps a capability-readiness judgment weighs (D74-L). The band's gate-character differs by band: **grounding** is mostly LLM-judged satisficiency with a count floor (D57-L), **elicitation** is generatively spawned (no fixed typology set), **commitment** is more structurally derivable. The `CommandExecutor` must not reject a clear later-band kind merely because of band; readiness controls objectives and capability-judgment, not what graph truth may contain. Depends on: D45-L, D56-L, D57-L, D59-L, D60-L, D65-L. Supersedes: treating the intent `basic | structural | reasoning` category as the readiness taxonomy, treating readiness as a per-kind creation whitelist, or treating bands as a grade rubric for a stored grade. +- **D65-L — `elicitation_gaps` are typed coverage *obligations* (typologies) — the elicitor's prospective-memory agenda and the substrate of capability-readiness judgment; they guide and modulate, they never hard-gate.** Renamed and reconceived from `elicitation_backlog`. A gap is a **typology of coverage that must be addressed** (e.g. "the spec must anchor its primary constraint(s)"), **not** a literal queued question and **not** a specific point of unclarity — that would shadow the intent graph, which already owns the content (decisions, assumptions, constraints, …). The original `unknown`/process-vs-domain split still holds: `elicitation_` scopes the term to *process* gaps (knowable by asking), as opposed to the deferred domain-gap `risk` node (Future Direction §Vocabulary evolution). Each gap carries **both** a stable **name** (its typology key — machine identity used for seeding, dedup, and the `capability → relevant gaps` map (D74-L), and a short display label) **and** a **rationale** (the *meta* prose: what coverage this obligation represents, why it matters, and what counts as satisfying it — read by the elicitor to phrase the next question and to make a `manual` satisficiency judgment, D57-L). The two are not redundant: the name is for machine identity/reference, the rationale is for agent reasoning and cannot be compressed into a terse key. In addition each gap carries: a **band** (D64-L — its coarse level, one band per typology); a **predicate shape** — a tagged union of `presence` (≥N nodes of a kind/band present), `field` (a `detail` key present), `coverage` (D60-L `lacksEdge` per-member absence), or `manual` (LLM-judged, the D57-L satisficiency residue) — which routes structural-vs-JIT checking (D74-L); an **importance** (driver-weight / count-floor membership / priority — *not* a hard gate); and a derived **coverage** strength (how well addressed). Importance and coverage are deliberately **two fields, not one ambiguous `rating`**: importance is the pre-answer weight, coverage the post-answer derived strength. **Disposition** (`open | answered | not_applicable | irrelevant | reopened`) is stored *only where it is non-derivable* — scope judgments (`not_applicable` / `irrelevant`, which the agent may set in bulk) and `manual` satisficiency — while `answered` for a structural predicate is derived **live** from the graph and never hand-set; this is the anti-shadowing line: the table holds obligation/disposition/meta only, never domain content. `reopened` is a legitimate disposition (new ambiguity can reopen a typology). Gaps serve three roles: **agenda** (what to ask / propose next), **judgment drivers** for capability-readiness (D74-L), and a **density signal** that scales generative-output epistemic status (D30-L) — the candidate-proposal / disambiguation UX is precisely how open grounding gaps fill progressively, so an open gap must never wall that UX. Seeding is band-correlated. The **grounding** band has a seeded fixed catalog of typologies collated from the D30-L anchor bundle, the D57-L Walter drivers, [`docs/design/ELICITATION_LENSES.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/ELICITATION_LENSES.md) §grounding bundle, and the shaping kickoff/framing material — a **floor** of `domain` (what kind of thing is being built), `protagonist` (who it is for / most affected), `pain_pull` (what problem/pain/pull drives it), and `constraint` (what binding non-negotiables already shape it) — the anchor bundle that gates generative capabilities (D30-L) — plus softer **progressive drivers** that enrich and focus elicitation but are *never* floor (the no-moving-the-goalpost line): `value` (what value/benefit), `context_of_use` (when/where used), `success_sketch` (how success is measured / what good looks like), and `solution_boundary` (non-goals / what it is explicitly not). **elicitation** gaps are generatively spawned by capture-reflection as preceding answers raise new coverage obligations (no fixed catalog). **commitment** gaps are derived structural predicates over the graph (e.g. "every requirement has a criterion", "every decision records its rejected options", "every invariant has a proof or check"). It remains a **flat table, not a graph plane/node** — its only relations are filter attributes plus FK pointers (`arose_from`, `resolved_by`), a degenerate bipartite graph promotable later only if genuine gap→gap structure emerges; it is the *prospective* sibling of the *retrospective* `reconciliation_need` register (D8-L). `basis` applies via provenance-directness (D63-L): user-raised `explicit`, agent-inferred `implicit`. The flat-table substrate, `createSpec` seeding, `CommandExecutor`-routed mutations, and shared spec-local LSN + `change_log` boundary are settled from FE-823 (built as `elicitation_backlog`); the obligation/predicate/disposition remodel and the rename are what this decision now locks. Still open: whether the register eventually thins the `goal` axis (D59-L), and live per-turn ranking. Depends on: D8-L, D30-L, D45-L, D57-L, D59-L, D60-L, D63-L, D64-L, D74-L. Supersedes: the `elicitation_backlog` name and its question-instance / `open | closed`-status model, treating `unknown` as a graph node kind, and any readiness-grade-projection-over-open-counts as authority. +- **D74-L — Capability-readiness is a just-in-time, capability-relative judgment over relevant gaps — it replaces the standing grade gate.** When a capability is requested (a generative lens, `propose-graph`, `project-graph`, commitment review, eventual export), the agent evaluates readiness *for that capability* against the `elicitation_gaps` (D65-L) declared relevant to it. The `capability → relevant gaps` map is **explicit** and subsumes the retired `STRATEGY_MIN_GRADE` / `GOAL_MIN_GRADE` / `LENS_MIN_GRADE` thresholds in `runtime-policy.ts`, which were a lossy grade-proxy for "enough grounding". Structurally-obvious relevant gaps (`presence` / `field` / `coverage`) are checked **mechanically** (cheap, no LLM); non-obvious (`manual`) ones consume an **LLM satisficiency judgment** (D57-L). The outcome is one of **proceed**, **proceed at low epistemic status** (density-scaled, D30-L), or **negotiate** — surface an `establishment_offer` ("I can, but answer X and Y first", D32-L). Capability-readiness fires **on request, reactive-primary** (proactive nudges are a separate later concern) and is the **only readiness gate**: it never bars attempting work, it scales/negotiates. This resolves the prior "lens is never gated" (`ELICITATION_LENSES.md`) vs `LENS_MIN_GRADE` contradiction (lenses are not grade-gated; readiness is JIT-judged) and dissolves the grade-ratchet / two-value problem (the soft `readiness estimate`, D45-L, gates nothing and may regress honestly). A future structural milestone gate for export/plan/execute op-modes is deferred (D45-L) until such an op-mode exists. Depends on: D25-L, D26-L, D30-L, D32-L, D45-L, D57-L, D59-L, D65-L. Supersedes: `GRADE_RANK`-based `MIN_GRADE` hard gating of goal/strategy/lens, and a standing readiness scalar as the authority for capability availability. #### Authority & mutation @@ -239,11 +241,11 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c - **D14-L — `#`-mentions are stable graph-code text references resolved by Brunch, with a session-scoped mention ledger.** Pi autocomplete persists only the inserted `AutocompleteItem.value` as ordinary transcript text; popup labels/descriptions are UI-only. Brunch autocomplete may search by title/description, but insertion must rewrite to a stable graph node code from D62-L (`#G1`, `#CON2`, `#R3`, `#CR4`, etc.) that Brunch can resolve to the graph entity id through a read-only lookup/re-read tool when the agent needs detail. Brunch prompt injection (`before_agent_start`) teaches agents how to interpret these handles; Brunch-owned parsing/indexing, not Pi autocomplete, creates mention-ledger state. The ledger stores internal `(entity_id, seen_lsn)` pairs, not titles or raw code strings alone, and drives discretionary `brunch.mention_staleness_hint` entries in `prepareNextTurn`. Depends on: A9-L, D62-L, I4-L. Supersedes: assuming Pi autocomplete persists hidden mention metadata or using raw DB ids as user-facing handles. - **D25-L — Strategy and lens are two orthogonal session-agent axes within the `elicitor` role, not separate roles or operational modes.** *Strategies* describe interaction shape (`step-wise-decision-tree`, `step-wise-disambiguate`, `propose-graph`, `project-graph`); *lenses* describe topical focus (`intent`, `design`, `oracle`; future execute-mode `plan`, `sync`, `scope`). Both are optional, AUTO-able fields of the projected session-agent record (D40-L) and are stamped onto structured-exchange payload facets (for example establishment offers, intent hints, and review/proposal material) when those facets need downstream routing; capture/reviewer/audit routing may filter on lens. Strategy determines the commitment mechanism (D26-L); the catalogue is expected to grow. Depends on: D23-L, D40-L. Supersedes: lens-as-role, strategy-as-mode, and standalone elicitor-intent/establishment/review custom-entry families as the default carrier. - **D26-L — Elicitation flows split by capture and commitment mechanism, not by a hard extractive/generative phase boundary.** Three commitment mechanisms: (1) Single-exchange flows (`step-wise-decision-tree`, `step-wise-disambiguate`, and ordinary structured questions) are captured synchronously by the elicitor post-exchange per D18-L; graph items directly stated by the user are written with `basis: explicit`. (2) Review-set flows (`project-graph` strategy) carry structured entity-draft payloads at proposal time and become durable only through review-set approval (D27-L); accepted exact items are written with `basis: explicit`. (3) Direct-commit flows (`propose-graph` strategy) present a concept to the user via structured exchange with rubric axes, choices, and a recommendation; when the user accepts a concept, the agent autonomously generates and persists the full subgraph through `mutateGraph` (D53-L) without intermediate entity-level user review — the user accepts a concept, not a graph shape — so those materialized nodes/edges are written with `basis: implicit` (D63-L). Design/oracle lenses may appear during ordinary elicitation; commitment (`commit-converge` goal and active review-set state, D59-L) changes what can be pinned, not what topics may be explored. Depends on: D18-L, D25-L, D45-L, D53-L, D63-L. Supersedes: a single uniform "agent asks questions" mental model, the observer-owned extractive vs elicitor-owned generative split as the primary architecture, and assuming all batch-graph writes require review-set approval. -- **D30-L — Grounding advances readiness for main elicitation; strategies remain available with honest epistemic signaling.** A minimum grounding bundle — *domain anchor*, *protagonist anchor*, *pain/pull anchor*, *constraint anchor* — establishes the frame required to move the spec from `grounding_onboarding` toward `elicitation_ready`. Lenses and strategies are not refused merely because grounding is thin, but their output resolution and epistemic load must honestly reflect what grounding supports: speculative outputs are visibly hedged and lower-authority, while grounded outputs may drive capture and later review-set projection. Grounding coverage should be explicit in offers/proposals where it affects confidence or gate transitions. Depends on: D26-L, D45-L. Supersedes: gating-by-refusal as a UX move and over-focusing readiness on generative lenses alone. +- **D30-L — Grounding advances readiness for main elicitation; strategies remain available with honest epistemic signaling.** A minimum grounding bundle — *domain anchor*, *protagonist anchor*, *pain/pull anchor*, *constraint anchor* — establishes the frame required before generative capabilities are worth attempting (the relevant grounding `elicitation_gaps`, D65-L). Lenses and strategies are not refused merely because grounding is thin, but their output resolution and epistemic load must honestly reflect what grounding supports: speculative outputs are visibly hedged and lower-authority, while grounded outputs may drive capture and later review-set projection. Grounding coverage should be explicit in offers/proposals where it affects confidence or a capability-readiness negotiation (D74-L). Depends on: D26-L, D45-L, D65-L, D74-L. Supersedes: gating-by-refusal as a UX move and over-focusing readiness on generative lenses alone. - **D32-L — Establishment offers are orientation artifacts, not a default next-action menu.** Establishment-offer material records the agent's current offer tree and recommended next move as durable structured-exchange payload state when it is part of an exchange, not as a mandatory standalone transcript entry family. Ambient chrome or web affordances may render the latest establishment-offer facet, and Brunch may expose a user-invoked orientation view summarizing what is established vs open, but Brunch does not surface an exhaustive lens/offer chooser by default; the agent still owns next-move selection unless the user explicitly asks to inspect alternatives. Depends on: D25-L, D30-L, A15-L. Supersedes: UI interpretations that turn establishment offers into a persistent strategy menu or separate transcript store. - **D31-L — A four-axis meta-rubric is a soft heuristic for fan-out comparison rubrics across all three flows; not architecturally enforced.** When generating comparison rubrics for fan-out alternatives across candidate-spec, technical-design, and verification-design flows, the elicitor attempts to express each axis in terms of (*legibility / cost-of-knowing*, *failure modes*, *coverage / range*, *commitment*). Project-specific axes are allowed alongside; the meta-frame is dropped when it doesn't fit. The hypothesis (uniform comparison UI across all three flows is more useful than per-flow improvisation) is testable via fixture comparison; promote to schema/UI only if it holds up. Depends on: D25-L, D26-L. Supersedes: a hardcoded per-flow rubric. -- **D45-L — Spec readiness is stored as a DB-row grade, not as session-local phase, workflow location, or elicitation posture.** The `specs` row owns `readiness_grade = grounding_onboarding | elicitation_ready | commitments_ready | planning_ready`; it also owns identity fields `id`, `name`, and `slug`. Grade is a forward gate: it unlocks later strategies, commitment review sets, and eventual export/plan/execute operational modes, but it never forbids returning to earlier gathering/refinement when new ambiguity appears. `elicitation_posture` and `commitment_focus` are retired as spec fields; active review-set state, strategy/lens selection, and workspace posture should carry those concerns when they become concrete. Grade changes route through `CommandExecutor` and carry audit context in the change log. Depends on: D18-L, D20-L, D30-L. Supersedes: treating “phase” as a user-facing location/stepper or hidden session memory, storing `elicitation_posture`, or adding `commitment_focus` as canonical spec state. -- **D46-L — Retired: commitment posture as persisted spec state.** Design and oracle lenses may still create accepted graph material, and cohesive review sets still commit atomically through `acceptReviewSet` per D27-L, but Brunch no longer models `pinning` or `commitment_focus` as spec-row state. Future commitment projection should derive from readiness grade, active strategy/lens/review-set state, and graph evidence rather than a persisted posture enum. Depends on: D27-L, D28-L, D45-L. Supersedes: per-item requirement/criterion confirmation, treating design/oracle commitment phases as first permission to discuss design/oracle topics, and storing commitment posture/focus on the spec. +- **D45-L — Spec readiness is not a stored grade; it decomposes into JIT capability-readiness (the gate), a soft derived readiness estimate (UI only), and a deferred milestone gate.** The earlier POC stored `specs.readiness_grade = grounding_onboarding | elicitation_ready | commitments_ready | planning_ready` and mutated it via `updateReadinessGrade`; that scalar is **retired** because one enum was conflating three jobs (gate, display, milestone). Readiness now splits: (1) **capability-readiness** — a just-in-time judgment made when a capability is requested (e.g. a generative lens / `propose-graph`), evaluated over the `elicitation_gaps` relevant to that capability (D74-L); it is the only gate, it never bars attempting work, and its outcome is proceed / proceed-at-low-epistemic-status / negotiate (an `establishment_offer` — "I can, but answer X and Y first", D30-L). (2) **readiness estimate** — a soft, derived, live per-band coverage projection for UI surfacing only, never authority; it may regress honestly because it gates nothing (this is what dissolves the old grade ratchet/two-value problem). (3) a **milestone gate** for export/plan/execute op-modes, deferred to Future Direction until such an op-mode exists. The vestigial `chrome.phase` and `chrome.chatMode` display fields are **retired**: the readiness estimate supersedes `phase`, and `chatMode` was a redundant `spec ? … : …` restatement of spec-selection (also removing the phase-language those fields carried). `specs` still owns identity (`id`, `name`, `slug`); `elicitation_posture` / `commitment_focus` remain retired. Depends on: D18-L, D20-L, D30-L, D57-L, D64-L, D65-L, D74-L. Supersedes: storing `readiness_grade` as a spec-row scalar, `updateReadinessGrade`, grade-as-authority for tool/strategy/lens gating, the `chrome.phase` / `chrome.chatMode` fields, and treating “phase” as a user-facing location/stepper or hidden session memory. +- **D46-L — Retired: commitment posture as persisted spec state.** Design and oracle lenses may still create accepted graph material, and cohesive review sets still commit atomically through `acceptReviewSet` per D27-L, but Brunch no longer models `pinning` or `commitment_focus` as spec-row state. Future commitment projection should derive from capability-readiness (D74-L), active strategy/lens/review-set state, and graph evidence rather than a persisted posture enum. Depends on: D27-L, D28-L, D45-L, D74-L. Supersedes: per-item requirement/criterion confirmation, treating design/oracle commitment phases as first permission to discuss design/oracle topics, and storing commitment posture/focus on the spec. - **D47-L — Structured-exchange `preface` is the near-term carrier for non-committed elicitor interpretation.** The structured-exchange payload's plain prose `preface` summarizes working context before the next question: exploratory file-reading/tool-use findings, implied graph candidates, low-confidence edges, and the rationale for what is being asked next. Preface text is transcript truth and user-visible orientation, but it is not graph truth, not candidate-artefact schema, and not a hidden side store. High-confidence facts still commit through `CommandExecutor`; low-confidence implications stay in preface/question material until clarified, accepted, or escalated to reconciliation needs. Future `capture_*` analysis entries provide a separate post-exchange/review evidence surface for candidate semantic changes; they do not replace preface as next-question orientation and do not become graph truth. Structured candidate metadata is deferred until fixtures/projections prove plain prose is insufficient. Depends on: D12-L, D18-L, D37-L, D50-L. Supersedes: inventing a candidate-artefact substrate merely to carry ordinary next-question disambiguation material. - **D50-L — `capture_*` tools persist transcript-native ANALYSIS, not graph mutations.** Brunch may add a third structured-exchange tool family such as `capture_analysis` alongside `present_*` and `request_*`. A `capture_*` tool returns a normal persisted Pi `toolResult` with Brunch details and markdown content describing likely graph/node/edge changes, grouped into high-confidence candidates that could be committed later and low-confidence candidates that should drive clarification. `capture_*` output is transcript-visible evidence for Markdown/ASCII review and later graph-mutation cross-checking, but it is not graph truth and never bypasses the `CommandExecutor`. Product UI should hide capture analysis entirely if Pi exposes a supported hide seam; otherwise `renderResult` should be maximally collapsed/minimal while preserving full persisted `toolResult.content`/`details` for transcript renderers. The current schema layer deliberately defines only minimum capture details (`schema`, `v`, `exchange_id`, `tool_meta`) and rejects graph payloads; richer analysis payloads and shared component subparts (`Preface`, prompt body, option list, answer summary, capture analysis) require a later `ln-design` pass before implementation. Depends on: D12-L, D17-L, D18-L, D37-L, D41-L, D47-L. Supersedes: using ad hoc hidden custom entries, probe-only side files, or graph writes as the first carrier for pre-graph analysis. - **D44-L — Subagents are main-agent-invoked, blocking Pi tool calls that gather data and propose variants for candidate-proposal generation.** Brunch may register a single `subagent` Pi tool whose parameters are `{ agent, task }` or `{ tasks: [] }` (parallel). Each invocation runs as an isolated `pi --mode json -p --no-session --no-skills --no-extensions` subprocess inheriting Brunch's sealed Pi Profile (D39-L); the subagent has no inherited conversation context so the task string must carry everything it needs. Agent definitions are declarative markdown files under `src/.pi/extensions/subagents/agents/*.md` with TypeBox-validated frontmatter (`name`, `description`, `tools`, `model`) plus a system-prompt body. Concurrency cap lives in an externalized [src/.pi/extensions/subagents/config.json](file:///Users/lunelson/Code/hashintel/brunch-next/src/.pi/extensions/subagents/config.json) (default 4) so it can be reviewed and updated without SPEC churn. The subagent's result text is returned directly to the main agent as tool result content; subagents do not append custom messages to the session log on their own behalf, do not invoke the `CommandExecutor`, and do not gain access to the parent's Brunch RPC handlers. POC starter agents split into two families: @@ -252,7 +254,7 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c This division mirrors the batch-proposal flow in D26-L: `propose-graph` and `project-graph` strategies can delegate variant generation to fan-out `proposer` invocations while `intent` / `design` / `oracle` lenses frame the proposal subject; purely extractive single-exchange work may stay main-agent-only. Worker-style write-capable subagents are deferred until an execute operational mode lands. Cross-extension agent registration (Amos's `globalThis.__pi_subagents` bridge) is deferred because it conflicts with profile sealing; the POC registry is Brunch-owned only. NDJSON stream events from the subprocess drive TUI tool-progress UI; a `subagent.progress` RPC subscription for headless/web is deferred. Subagents are an optional enhancement to candidate-proposal diversity, not a load-bearing M0–M9 substrate: they enhance R20/D27-L proposal generation when bandwidth permits. Depends on: D2-L, D26-L, D27-L, D30-L, D31-L, D39-L, D41-L. Distinct from: D15-L Side task (non-blocking, status-via-custom-message), the deferred Side chat (user-invoked overlay; see Future Direction Register). Supersedes: —. - **D36-L — Spec/session selection is a reusable hierarchical decision model with transport-specific presentations.** Brunch owns a pure spec/session selection model that renders cwd-scoped inventory under the discovered project name without calling the user-created object a “workspace”. In TUI mode, the model may present a fast “continue last session” affordance when `.brunch/workspace.json` points to a valid spec+session; otherwise, or after “other spec/session”, the durable tree is: `create new spec → provide spec name → session created automatically`; `resume existing spec → choose existing spec → create a new session OR resume existing session → choose existing session`. The UI should not list every spec as a top-level action label; “resume existing spec” is the top-level intent, and the spec list is the next screen/scrollable selector. The model returns a product decision (`new spec`, `new session for spec`, `open session`, `continue selected session`, `cancel/quit`) without opening Pi sessions or mutating `.brunch/workspace.json` itself. The `WorkspaceSessionCoordinator` activates that decision and owns all persistence/session-binding effects. TUI startup and in-session paths share branded `pi-tui` components and colocated logo assets under `src/.pi/components/workspace-dialog`; adapters differ only in terminal lifecycle and Pi session-replacement mechanics (`ProcessTerminal`/`TUI.showOverlay` before Pi starts, `ctx.ui.custom(..., { overlay: true })` inside Pi), not in product semantics. RPC/headless transports must not invoke the TUI picker; they expose the same initial-selection requirement and activation decisions as JSON-RPC/product results so CLI JSON-RPC clients can select or create spec/session correctly. Depends on: D11-L, D21-L, D24-L, D33-L. Supersedes: implicit resume of `.brunch/workspace.json` on TUI launch, Pi `/resume`/`/new` as Brunch's product session chooser, one-off startup-only picker implementations, a flat action list that says “workspace” for specs, top-level `resume spec X` labels, and a separate intermediate action chooser for switching. - **D42-L — Session naming is Pi `session_info` presentation metadata, not spec identity.** Brunch-created sessions should be named at creation with neutral workspace-global defaults (`Untitled Session 1`, `Untitled Session 2`, …) so pickers/chrome never show an unnamed Brunch session and unchanged defaults do not collide across specs in the same cwd. These defaults are immediate lifecycle metadata, not LLM-generated summaries and not derived from the selected spec title. Brunch may later use Pi session lifecycle hooks to opportunistically replace a default with a short human-readable name that characterizes what happened in the transcript. The preferred generation trigger is `session_shutdown` for `quit`, `new`, and `resume` replacements because it sees the just-finished transcript and can name it before later picker lists need to distinguish sessions; `session_before_compact` or post-compaction (`session_compact`) may be used to refresh names after major summarization, and a manual/user rename command can force or override naming. The generation call should mirror the model-selection pattern in the local `summarize.ts` extension example: choose a cheap/fast authorized model, extract user/assistant text plus salient tool calls from the current branch, ask for a concise title, and append a Pi `session_info` entry through `SessionManager.appendSessionInfo`. Naming must be best-effort and non-blocking with a tight budget: failures, missing auth, empty transcripts, or shutdown aborts preserve the existing default/user label rather than blocking session replacement or exit. Session display names label sessions in pickers and chrome, but do not affect spec ids, session bindings, graph truth, or replay semantics. Depends on: D6-L, D17-L, D21-L, D35-L. Supersedes: using spec title or session UUID alone as the only durable display label once transcripts have meaningful content, leaving Brunch-created sessions unnamed, spec-local default numbering, or treating generated session names as canonical spec identity. -- **D58-L — Brunch prompt composition is a thin runtime header plus a gated prompt-resource manifest, not eager selection of every objective pack.** `.pi/agents/compose(agentId, sessionState, spec, workspace, context)` runs before Pi provider requests through Brunch's prompt extension and emits: **(1) agent control header** — keyed agent identity, model/thinking expectation, foreground role derived from `op_mode`, and mode/tool-authority summary; **(2) runtime-state header** — current pinned/AUTO `goal`, `strategy`, and `lens`, `spec.readiness_grade`, and workspace posture; **(3) resource manifests** — XML-style ``, ``, ``, and `` entries filtered by `.pi/agents/state.ts` legal tuples, grade, `op_mode`, and the agent allow-list, each carrying `{name, description, location}` for a Brunch-owned markdown resource under `src/.pi/{agents,skills}/`; the `{name, description, location}` triples are code-owned in `.pi/agents/state.ts`, not filesystem-discovered, honoring D39-L sealing; **(4) compact pushed context** — only the minimal context handles and rendered context needed to orient the turn, with deeper context access still governed by D60-L. Detailed goal/strategy/lens/method instructions live in Brunch prompt resources and are loaded by the agent with `read` when needed, following the same simple mechanism Pi uses for skills. Method resources are the prompt-level home for Brunch tool-routing and sequencing guidance; tool definitions remain boundary schemas/execution hooks, not the whole Brunch guide to when or how tools should be composed. `AUTO` means the axis is unpinned: the manifest lists legal choices and router instructions tell the agent to choose only from the current manifest, reading the selected resource before applying it when detail matters. Pinned axes point to the pinned resource; code enforces legality and tool gating but does not choose or concatenate large semantic packs on the agent's behalf. Pi-native skills may still carry startup-scoped capabilities, but runtime-state-gated availability is Brunch's manifest, not ambient Pi discovery. `.pi/agents/` is the keyed agent prompt assembly layer (`definitions/`, `contexts/`); `.pi/skills/` carries goal/strategy/lens/method resources; `.pi/agents/contexts/` is the D60-L agent-context orchestration layer (code), not a manifest resource family or general renderer bucket. Reusable text renderers may migrate to `renderers/` under D52-L. Composition is projection, not a behavioral state machine. Depends on: D23-L, D25-L, D39-L, D40-L, D52-L, D59-L, D60-L. Supersedes: the flat "base + mode + role + strategy + lens + grade + …" layering; the fixed all-packs concatenation in `compose-brunch-prompt.ts`; "role preset / runtime bundle" as the composition unit; direct Layer-2 eager prompt-pack injection as the default mechanism; top-level `src/agents/` for Pi-only agents; and `capability` as a parallel name for `method` / ``. +- **D58-L — Brunch prompt composition is a thin runtime header plus a gated prompt-resource manifest, not eager selection of every objective pack.** `.pi/agents/compose(agentId, sessionState, spec, workspace, context)` runs before Pi provider requests through Brunch's prompt extension and emits: **(1) agent control header** — keyed agent identity, model/thinking expectation, foreground role derived from `op_mode`, and mode/tool-authority summary; **(2) runtime-state header** — current pinned/AUTO `goal`, `strategy`, and `lens`, the readiness estimate (D45-L), and workspace posture; **(3) resource manifests** — XML-style ``, ``, ``, and `` entries filtered by `.pi/agents/state.ts` legal tuples, grade, `op_mode`, and the agent allow-list, each carrying `{name, description, location}` for a Brunch-owned markdown resource under `src/.pi/{agents,skills}/`; the `{name, description, location}` triples are code-owned in `.pi/agents/state.ts`, not filesystem-discovered, honoring D39-L sealing; **(4) compact pushed context** — only the minimal context handles and rendered context needed to orient the turn, with deeper context access still governed by D60-L. Detailed goal/strategy/lens/method instructions live in Brunch prompt resources and are loaded by the agent with `read` when needed, following the same simple mechanism Pi uses for skills. Method resources are the prompt-level home for Brunch tool-routing and sequencing guidance; tool definitions remain boundary schemas/execution hooks, not the whole Brunch guide to when or how tools should be composed. `AUTO` means the axis is unpinned: the manifest lists legal choices and router instructions tell the agent to choose only from the current manifest, reading the selected resource before applying it when detail matters. Pinned axes point to the pinned resource; code enforces legality and tool gating but does not choose or concatenate large semantic packs on the agent's behalf. Pi-native skills may still carry startup-scoped capabilities, but runtime-state-gated availability is Brunch's manifest, not ambient Pi discovery. `.pi/agents/` is the keyed agent prompt assembly layer (`definitions/`, `contexts/`); `.pi/skills/` carries goal/strategy/lens/method resources; `.pi/agents/contexts/` is the D60-L agent-context orchestration layer (code), not a manifest resource family or general renderer bucket. Reusable text renderers may migrate to `renderers/` under D52-L. Composition is projection, not a behavioral state machine. Depends on: D23-L, D25-L, D39-L, D40-L, D52-L, D59-L, D60-L. Supersedes: the flat "base + mode + role + strategy + lens + grade + …" layering; the fixed all-packs concatenation in `compose-brunch-prompt.ts`; "role preset / runtime bundle" as the composition unit; direct Layer-2 eager prompt-pack injection as the default mechanism; top-level `src/agents/` for Pi-only agents; and `capability` as a parallel name for `method` / ``. #### Development experience (DX) @@ -261,8 +263,8 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c - **D69-L — Agent-input introspection is one read-only, dev-gated Brunch extension; mechanical and conversational modes are separate planes.** A single Brunch-owned extension family, wired through `brunch-pi-extensions.ts` (never ambient discovery), provides **mechanical** introspection two ways: (a) a passive `before_provider_request`/`before_agent_start` tap that records *exactly the final payload the model receives* (system prompt, tool JSON schemas, D58-L prompt-resource manifest), and (b) an on-demand `/introspect` command that reports the **base** system-prompt inputs via `ctx.getSystemPromptOptions()` (base inputs only — `getSystemPromptOptions` returns pi's `_baseSystemPromptOptions`, so it does *not* reflect later `before_agent_start`/`before_provider_request` mutations) and the latest passive capture. The extension returns every payload unchanged so it observes but never shapes product behavior (D39-L sealing); because `before_provider_request` is a registration-ordered transformation chain in pi, the introspection tap must be registered *after* all Brunch prompt/tool/policy mutators to record the post-mutation payload. **Conversational** introspection is the sibling read-only query-tool plane: under the same `BRUNCH_DEV`/`introspection.enabled` gate, `brunch_session_query` reads `ctx.sessionManager.getBranch()` and `brunch_introspect_query` reads the captured provider payload plus base prompt options. Both tools project exact values with the shared capped dot/`[n]`/`[*]` grammar, truncate/spill large output, and rely on the agent's normal chat reply to echo/discuss the returned bytes. The D40-L active-tool allow-list explicitly unions this dev query-tool set only when the factory's dev gate is on, subtracts blocked tools, and intersects registered tools; registration alone is not advertisement. Tool-description nudges are the only prompt surface; no product prompt resource or fixed self-report schema is added. Subjective live interrogation remains an injected turn driven from the dev front-door launcher (`session.prompt`) or typed interactively, not a separate slash command. Captured scratch runs still write under `.fixtures/scratch/introspection//` (D70-L) so "what was sent" and "how the model read it" stay correlated. The launcher performs no global environment mutation; real TUI launches keep Pi startup update suppression scoped at the session-construction site with save/restore scoping (D71-L). Direct diagnostic for the "Prompt-resource discretionary loading" blind spot (I38-L). Depends on: D39-L, D40-L, D58-L, D68-L, D70-L; I38-L. Supersedes: treating "how the model sees our tools/skills" as an outer-loop-only, non-instrumented concern, and the fixed structured self-report schema as the default conversational surface. - **D70-L — `.fixtures/` is a four-role tree (seeds / workbenches / runs / scratch); dev-loop artifacts decouple operating-cwd from artifact-root.** `.fixtures/` separates four lifecycles, each with its own git policy: **`seeds/`** — tracked, reusable explicit-basis starting truth loaded via `npm run seed` (INPUT); **`workbenches/`** — launchable Brunch workspaces whose `.brunch/` is gitignored local state (the directories a dev `--cwd` targets, D71-L); **`runs/`** — tracked, *curated/promoted* probe evidence under `//`, probe-first per D68-L (EVIDENCE); **`scratch/`** — gitignored, ephemeral live dev-loop output under `//` (SCRATCH). Dev launchers (faux/introspection) must resolve their artifact root to the package-relative repo `.fixtures/scratch/`, **not** to the operating `cwd` — the same operating-cwd-vs-`fixtureRoot` decoupling the probe layer already uses (`mkdtemp` ephemeral cwd + repo-resolved `fixtureRoot`). This removes the `join(cwd, '.fixtures', …)` nesting defect where launching against a workbench would write `/.fixtures/…`. An exploratory scratch run becomes durable evidence only by explicit promotion (move `scratch///` → `runs///`, then track it), keeping curated `runs/` clean. `.fixtures/scratch/` is the chosen scratch home (over reusing `tmp/`) so promotion is a move within one tree. Depends on: D52-L, D68-L; the probe/transcript model. Supersedes: pinning dev-run artifacts to the operating cwd; treating all `.fixtures/runs/` output as tracked evidence; leaving the `workbenches/` role undocumented. - **D71-L — One `BRUNCH_DEV` switch gates all dev affordances; the main CLI accepts `--cwd`; introspection is present-but-dead in prod.** The over-specific `BRUNCH_DEV_RPC` env var is generalized to a single `BRUNCH_DEV` switch that, when set, enables dev affordances together: dev RPC methods (`dev.*`), registration of the read-only introspection extension (D69-L), and routing of dev-loop artifacts to `.fixtures/scratch/` (D70-L). `runBrunchCli` parses a `--cwd ` flag (defaulting to `process.cwd()`) so a dev session can target a `.fixtures/workbenches/` workspace without `cd`. Two independent prod-safety gates hold: (1) `src/dev/**` is build-excluded by `tsconfig.build.json`, so launchers/harness/alias never ship; (2) the introspection extension, though compiled into `dist` under `src/.pi/`, only *registers* when `createBrunchPiExtensions(..., { introspection: { enabled } })` opts in — and the TUI call site sets `enabled` from `BRUNCH_DEV` only, so absent the switch it is present-but-dead, never wired, honoring D39-L explicit-opt-in sealing (no ambient discovery). Brunch-launched TUI sessions keep Pi startup update suppression on in both product and `BRUNCH_DEV` runs by scoping `PI_OFFLINE=1` through `InteractiveMode.run()` unless the user already set a value; prior `PI_OFFLINE` / `PI_SKIP_VERSION_CHECK` state is restored in `finally`, never as a leaked global `process.env` mutation. Depends on: D39-L, D67-L, D68-L, D69-L, D70-L. Supersedes: the `BRUNCH_DEV_RPC`-only dev gate; relying on the operating cwd to choose the dev workspace; the assumption that the introspection extension needs build-exclusion (runtime opt-in suffices); lifting Pi offline mode in `BRUNCH_DEV` TUI sessions merely to enable live-provider behavior. -- **D59-L — `goal` is a grade-derived, AUTO-able objective axis, distinct from strategy.** A *goal* is what the session agent currently pursues; a *strategy* is the reusable interaction shape used to pursue it — a goal is pursued *via* a strategy *through* a lens (three orthogonal axes). The goal set is derived/gated by `spec.readiness_grade`: `grounding-advance` (fill grounding and advance the grade), `elicit-expand` (expand the elicited specification graph while ambiguity remains productive), `commit-converge` (reduce / lock down reviewable commitments), plus an always-on `capture-posture` (capture or confirm dev `posture`, D45-L). `goal` defaults to the grade-derived objective, may be pinned, or left `AUTO`; in either case D58-L manifests advertise the legal resource(s) rather than injecting the whole goal body. For now `goal` is **internal/grade-derived and not part of the user posture-change surface** (it is too contingent to expose as a user-mutable axis); the pin affordance is reserved for system/internal logic, and unlike `strategy`/`lens` the user does not switch it (D40-L, Q4). `elicit-expand` and `commit-converge` intentionally form the diverge/converge pair for the elicitation diamond; `elicit-I` / `elicit-II` are retired because they were phase-like labels, not objectives. "Advance the grade" is a goal, not a strategy — though the `grounding-advance` goal may carry a dedicated default interaction pattern. Depends on: D45-L, D57-L, D58-L. Supersedes: conflating the elicit-lifecycle objective with strategy selection. -- **D66-L — `freestyle` is a structure-optional elicitation strategy; it and generalized free-text capture are one slice.** `freestyle` joins the strategy axis (D25-L) as a fifth value alongside `step-wise-decision-tree`, `step-wise-disambiguate`, `propose-graph`, and `project-graph`. The four existing strategies impose structured-exchange turn discipline (offer-first `present_*`/`request_*` ritual, D37-L); `freestyle` makes that discipline *optional* — the turn may be ordinary user-driven chat, structured-exchange tools remain available (not prohibited), and user-invoked slash/skill-commands are ergonomic here precisely because no pending structured exchange is consuming the turn. It is **initiative/interaction-style, not authority**: it is not a new `op_mode`, adds no tool authority, and `op_mode`-gated tool policy (D40-L) is unchanged. Because freestyle has no mandatory exchange, the only way it grows graph truth is **generalized capture**, so the two land together: post-exchange capture (D18-L) is now wired onto the ordinary-message path (`session.submitMessage`, D49-L) over the same `session exchange` unit — which already spans plain user text — routing high-confidence directly-stated facts through `CommandExecutor.mutateGraph({createBasis: explicit, ops})` exactly as the structured-response capture tracer does, while low-confidence implications stay in preface / `capture_*` analysis (D47-L, D50-L) and never become graph truth. Freestyle therefore *composes with*, and does not replace, the `goal` (D59-L) and `lens` (D25-L) axes: the user still pursues `grounding-advance` / `elicit-expand` / etc., just through free chat, and freestyle capture can both resolve and spawn `elicitation_backlog` entries (D65-L). **AUTO must not select `freestyle`** — it is an explicit user pin only (a "let me just talk" escape hatch); the runtime manifest now omits it under AUTO while still allowing explicit pins, so spontaneous AUTO entry cannot silently abandon the offer-first product thesis (R16). Remaining open quality questions are limited to capture scope beyond directly-labeled facts (fitness evidence under A22-L, materially harder without a structured prompt), whether capture eventually runs on every freestyle turn or on demand, and the exact slash/skill-command surface (the Q6 method-vs-command question). Depends on: D18-L, D25-L, D26-L, D40-L, D45-L, D49-L, D50-L, D59-L, D63-L, D65-L. Refines: R16. Supersedes: treating offer-first (R16) as a universal per-turn session invariant; treating freestyle as a new operational mode or authority posture. +- **D59-L — `goal` is a readiness-derived, AUTO-able objective axis, distinct from strategy.** A *goal* is what the session agent currently pursues; a *strategy* is the reusable interaction shape used to pursue it — a goal is pursued *via* a strategy *through* a lens (three orthogonal axes). The goal set is derived from readiness-band coverage (D64-L) rather than a stored grade: `grounding-advance` (fill grounding gaps and raise grounding coverage), `elicit-expand` (expand the elicited specification graph while ambiguity remains productive), `commit-converge` (reduce / lock down reviewable commitments), plus an always-on `capture-posture` (capture or confirm dev `posture`, D45-L). `goal` defaults to the readiness-derived objective (e.g. while grounding coverage is thin, `grounding-advance`), may be pinned, or left `AUTO`; in either case D58-L manifests advertise the legal resource(s) rather than injecting the whole goal body. For now `goal` is **internal/readiness-derived and not part of the user posture-change surface** (it is too contingent to expose as a user-mutable axis); the pin affordance is reserved for system/internal logic, and unlike `strategy`/`lens` the user does not switch it (D40-L, Q4). `elicit-expand` and `commit-converge` intentionally form the diverge/converge pair for the elicitation diamond; `elicit-I` / `elicit-II` are retired because they were phase-like labels, not objectives. "Advance grounding" is a goal, not a strategy — though the `grounding-advance` goal may carry a dedicated default interaction pattern. Depends on: D45-L, D57-L, D58-L, D64-L. Supersedes: conflating the elicit-lifecycle objective with strategy selection, and deriving the goal set from a stored readiness grade. +- **D66-L — `freestyle` is a structure-optional elicitation strategy; it and generalized free-text capture are one slice.** `freestyle` joins the strategy axis (D25-L) as a fifth value alongside `step-wise-decision-tree`, `step-wise-disambiguate`, `propose-graph`, and `project-graph`. The four existing strategies impose structured-exchange turn discipline (offer-first `present_*`/`request_*` ritual, D37-L); `freestyle` makes that discipline *optional* — the turn may be ordinary user-driven chat, structured-exchange tools remain available (not prohibited), and user-invoked slash/skill-commands are ergonomic here precisely because no pending structured exchange is consuming the turn. It is **initiative/interaction-style, not authority**: it is not a new `op_mode`, adds no tool authority, and `op_mode`-gated tool policy (D40-L) is unchanged. Because freestyle has no mandatory exchange, the only way it grows graph truth is **generalized capture**, so the two land together: post-exchange capture (D18-L) is now wired onto the ordinary-message path (`session.submitMessage`, D49-L) over the same `session exchange` unit — which already spans plain user text — routing high-confidence directly-stated facts through `CommandExecutor.mutateGraph({createBasis: explicit, ops})` exactly as the structured-response capture tracer does, while low-confidence implications stay in preface / `capture_*` analysis (D47-L, D50-L) and never become graph truth. Freestyle therefore *composes with*, and does not replace, the `goal` (D59-L) and `lens` (D25-L) axes: the user still pursues `grounding-advance` / `elicit-expand` / etc., just through free chat, and freestyle capture can both resolve and spawn `elicitation_gaps` (D65-L). **AUTO must not select `freestyle`** — it is an explicit user pin only (a "let me just talk" escape hatch); the runtime manifest now omits it under AUTO while still allowing explicit pins, so spontaneous AUTO entry cannot silently abandon the offer-first product thesis (R16). Remaining open quality questions are limited to capture scope beyond directly-labeled facts (fitness evidence under A22-L, materially harder without a structured prompt), whether capture eventually runs on every freestyle turn or on demand, and the exact slash/skill-command surface (the Q6 method-vs-command question). Depends on: D18-L, D25-L, D26-L, D40-L, D45-L, D49-L, D50-L, D59-L, D63-L, D65-L. Refines: R16. Supersedes: treating offer-first (R16) as a universal per-turn session invariant; treating freestyle as a new operational mode or authority posture. - **D60-L — Agent context splits into pull / projection / render / surface, distinguishes graph-truth from active-context reads, and keeps `workspace.state` separate.** **Agent context** = content the agent reasons over: `cwd` (filesystem kickoff heuristic — `.brunch?`, session count/length, README/markdown sizes, file counts), `graph` (overview/list/query), or `node` (variable-hop neighborhood). **PULL** is typed, read-only data access owned by the data layer (`graph/queries.ts` for graph/node; `session/` for cwd) and bypasses `CommandExecutor` (reads only); the typed value *is* the JSON form. Graph pulls must make the read projection explicit: `graph_truth` includes accepted truth records, while `active_context` hides superseded predecessors and must also omit edges whose endpoints are hidden so active-context reads do not contain dangling references. The graph read family should support the observed query shapes without becoming a generic records API: list nodes by kind(s), list nodes by D64-L readiness band(s), find nodes related to anchor node(s) by edge category/direction/hop depth, and find class-members lacking an edge of a given category in a given direction (gap query — a single named absence shape, not a generic NOT-predicate language). `src/graph/README.md` owns the consumer coverage ledger: `read_graph` exposes the six agent shapes, while RPC and web deliberately expose only overview + neighborhood until a scoped feature promotes another shape. **PROJECTION** is optional info-preserving shaping for reusable DTOs; when multiple adapters need the same structured view, it belongs in `projections/`, but many callers can consume the typed read directly. **RENDER** turns a typed or projected value into either an LLM-friendly string or JSON (trivial serialization). Reusable lossy text/markdown rendering belongs in `renderers/`; `.pi/agents/contexts/` owns the agent-context orchestration decision — which typed pull to expose, how much detail to include, and how lens-plane/grade-depth shape the prompt-facing string — and may call reusable renderers. Rendered projected stable node codes (D62-L) remain the primary handles. **SURFACE** delivers it: *pushed* (compose injects at turn boundary), *pulled* (`read_graph`, `read_workspace_context`, `read_session_context` wrap the relevant reads/renderers — markdown in `toolResult.content`, typed JSON in `toolResult.details` per I33-L), or *rpc/ui*. The separate **workspace projection** (`workspace.state` — workspace/session/spec/chrome product state) is a different subject and keeps that name. Depends on: D35-L, D52-L, D53-L, D62-L, D64-L. Supersedes: pre-rendering context strings in the pull layer, scattering context-build logic across `graph/`, `.pi/agents/contexts/`, and tool adapters, or silently mixing graph-truth and active-context reads. ### Critical Invariants @@ -293,13 +295,13 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c | I22-L | Brunch TUI startup must not render prior session transcript entries or enter an agent loop until the user has explicitly activated a spec/session decision; creating a new spec implicitly creates its first session, creating a new session for an existing spec lands in a binding-only session, resuming a prior transcript is opt-in, and RPC/headless startup exposes structured initial-selection state rather than invoking TUI picker code. | covered (FE-744 coordinator tests; hierarchical spec/session picker model + component tests; `workspace.selectionState` / `workspace.activate` JSON-RPC contract tests with source assertion that RPC does not import TUI picker code; `src/probes/scripts/verify-startup-no-resume.sh` pty/ANSI-stripped TUI probe oracle proving stale transcript text is absent before explicit activation) | D11-L, D21-L, D22-L, D36-L | | I23-L | Every structured elicitation interaction that owns the response surface persists durable semantic display only through Pi `toolResult` rows rendered by `renderResult`; `renderCall` and live `ctx.ui.*` surfaces are transient. A structured-exchange tuple has a recoverable `present_*` result and, when required, exactly one matching terminal `request_*` result before the next agent turn consumes it. The target details model is checked by `schema` + `v`, `exchange_id`, and `tool_meta`; request outcomes are an exactly-one property-presence union; user-authored text is `comment` and runtime-authored text is `message`; present-side status/kind/expected-request aliases and capture graph payloads are invalid in the Zod-authored schema layer. `toolResult.content` is rich markdown suitable for both TUI transcript display and model context; `toolResult.details` carries structured projection/recovery data. | covered for current structured-exchange tools (registered sequential `present_question`, `present_options`, `present_review_set`, `request_answer`, `request_choice`, `request_choices`, and `request_review`; runtime details are emitted from canonical `schema`/`v`/snake_case Zod shapes; tests cover non-semantic `renderCall`, markdown `renderResult`, present/request details, unmatched-present recovery, active-vs-stub registry, JSON-editor fallback for multi-choice, terminal `answered`/`cancelled`/`unavailable` projection closure, option content/rationale parity, review-set `nodes`/`edges` details parity, invalid review proposal non-recovery, review pending-exchange recovery, public-RPC deterministic permutations, capture response-to-graph proof, and same-assistant-message `present_options → request_choice` ordering over a real Pi RPC run. The Zod-authored schema layer is covered by JSON Schema export, drift-rejection, and source-boundary tests for present/request/capture details. `present_candidates` remains a named stub and intentionally unregistered.) | D12-L, D13-L, D17-L, D37-L, D38-L, D41-L | | I24-L | A Brunch-launched Pi runtime does not load ambient user/project Pi context files, extensions, skills, prompt templates, themes, or behavior-shaping settings unless Brunch's sealed Pi settings/extension boundary explicitly allows them; Brunch-owned extension-discovered resources are identified as intentional product resources. | covered for TUI-launch settings/extension boundary by contract tests: ambient resource flags and explicit extension factories are preserved; hostile ambient global/project settings are ignored by the in-memory Brunch settings policy before and after reload; audited Pi settings getters are tracked in `src/.pi/brunch-pi-settings.ts`. Subagent subprocess inheritance remains future coverage under I29-L. | D2-L, D39-L | -| I25-L | The active `op_mode`, `strategy`, `lens`, and `goal` are reconstructable from linear `brunch.agent_runtime_state` entries at turn start and through `session.runtimeState`; concrete axis ids stay separate from the `auto` selection sentinel; the foreground session-agent role is derived from `op_mode`, not separately stored; tool gating follows the reconstructed `op_mode` so `elicit` cannot use execute/dangerous tools such as raw `bash`/`write` unless explicitly permitted. Runtime-state projection remains transcript-backed and exposes empty/default mention, world-watermark, and lifecycle slots without inventing hidden extension memory; legal option/default affordances are pure projections over resolved runtime state plus readiness grade, not persisted state. | covered (`src/session/runtime-state.test.ts` covers default state, cumulative last-writer-wins posture, mention/world/lifecycle slot projection, and non-linear rejection; `src/rpc/handlers.test.ts` covers explicit-target `session.runtimeState` discovery/params/spec validation; `src/.pi/__tests__/operational-mode.test.ts` covers append/project/switch helpers over the reconciled axis vocabulary, AUTO selection for every objective axis, init idempotence, previous-state values, malformed/illegal tuple rejection, role derivation from `op_mode`, and Pi JSONL reload projection; `prompting.test.ts` covers prompt/tool-policy projection from the same transcript-backed runtime state, including selected-spec grade activation for commitment-grade `present_review_set` / `request_review` proposal tools; `src/.pi/extensions/runtime/authority-matrix.test.ts` covers the current POC authority matrix for `elicit-read-only`, blocking `bash`/`edit`/`write`, and structured `needs_human` result representability while leaving A18-L strict built-in suppression as residue; `src/projections/session/affordances.test.ts` covers shared goal/strategy/lens legal options, defaults, AUTO freestyle exclusion, pinned freestyle, and grade-sensitive legality; `src/session/runtime-affordances-coverage.test.ts` guards the required-vs-deferred affordance ledger). | D17-L, D23-L, D40-L, D58-L, D59-L, D66-L | +| I25-L | The active `op_mode`, `strategy`, `lens`, and `goal` are reconstructable from linear `brunch.agent_runtime_state` entries at turn start and through `session.runtimeState`; concrete axis ids stay separate from the `auto` selection sentinel; the foreground session-agent role is derived from `op_mode`, not separately stored; tool gating follows the reconstructed `op_mode` so `elicit` cannot use execute/dangerous tools such as raw `bash`/`write` unless explicitly permitted. Runtime-state projection remains transcript-backed and exposes empty/default mention, world-watermark, and lifecycle slots without inventing hidden extension memory; legal option/default affordances are pure projections over resolved runtime state plus capability-readiness over gaps (D74-L), not persisted state. | covered (`src/session/runtime-state.test.ts` covers default state, cumulative last-writer-wins posture, mention/world/lifecycle slot projection, and non-linear rejection; `src/rpc/handlers.test.ts` covers explicit-target `session.runtimeState` discovery/params/spec validation; `src/.pi/__tests__/operational-mode.test.ts` covers append/project/switch helpers over the reconciled axis vocabulary, AUTO selection for every objective axis, init idempotence, previous-state values, malformed/illegal tuple rejection, role derivation from `op_mode`, and Pi JSONL reload projection; `prompting.test.ts` covers prompt/tool-policy projection from the same transcript-backed runtime state, including selected-spec grade activation for commitment-grade `present_review_set` / `request_review` proposal tools; `src/.pi/extensions/runtime/authority-matrix.test.ts` covers the current POC authority matrix for `elicit-read-only`, blocking `bash`/`edit`/`write`, and structured `needs_human` result representability while leaving A18-L strict built-in suppression as residue; `src/projections/session/affordances.test.ts` covers shared goal/strategy/lens legal options, defaults, AUTO freestyle exclusion, pinned freestyle, and grade-sensitive legality; `src/session/runtime-affordances-coverage.test.ts` guards the required-vs-deferred affordance ledger). | D17-L, D23-L, D40-L, D58-L, D59-L, D66-L | | I27-L | Session display names are presentation metadata only: every Brunch-created session gets a neutral workspace-global default `session_info` label (`Untitled Session N`) at creation, unchanged defaults do not collide across specs in one cwd, later user/generated names may replace the default, and no naming path mutates spec identity, session binding, or graph truth. | planned (creation/boundary tests for workspace-global default allocation across specs and replacement sessions; session-lifecycle naming tests with empty transcript/auth failure/success paths; picker/chrome projection tests read session names when present) | D6-L, D21-L, D35-L, D42-L | | I26-L | Runtime schema-library imports stay deliberately scoped: Zod may appear in D41-L-acknowledged product/protocol schema seams — the structured-exchange schemas (`src/.pi/extensions/exchanges/schemas/`) and the dev-gated query-tool params (`src/.pi/extensions/{session-query,introspect-query}/`), each converting to Pi `TSchema` only through a single per-plane `z.toJSONSchema(..., { unrepresentable: 'throw' })` cast adapter (`exchanges/pi-schema.ts`, `shared/pi-tool-schema.ts`); TypeBox remains valid for unrelated Pi tool parameters (e.g. graph tools), small config/frontmatter contracts, and Drizzle-derived row schemas; no boundary may hand-author parallel Zod and TypeBox sources for the same shape. Pi tool parameter schemas authored in Zod must export JSON Schema draft 2020-12 (Zod v4 default), so tuples emit `prefixItems` rather than the draft-07 array-`items`/`additionalItems` form that strict provider validators (Anthropic) reject. Drizzle row/insert/update schemas are not hand-authored alongside their target tables. | covered (structured-exchange schema tests prove Zod parse/export and assert semantic details contracts stay in `src/.pi/extensions/exchanges/schemas/`; the legacy `shared/model.ts` details interface is retired; structured-exchange TypeBox usage is quarantined to the single Pi `TSchema` cast adapter in `src/.pi/extensions/exchanges/pi-schema.ts`, and the dev query tools to `src/.pi/extensions/shared/pi-tool-schema.ts`; `session-query`/`introspect-query` tests assert the advertised parameter schema is draft 2020-12 with no draft-07 tuple form; grep-based architectural boundary test in `architecture.test.ts` enforces no direct `db/` imports outside `graph/`; Drizzle derivation via `drizzle-typebox` in `row-schemas.ts`) | D41-L | | I28-L | Auto-compaction output preserves the configured anchor set byte-stable: every entry kind listed in [src/.pi/extensions/compaction/index.ts](file:///Users/lunelson/Code/hashintel/brunch-next/src/.pi/extensions/compaction/index.ts) is reconstructable post-compaction according to its `select` rule (`first | latest | active-leaves | all-unresolved`); LLM-generated narrative summary never replaces or rephrases preserved-anchor content; extension failure falls through to Pi default compaction rather than dropping anchors silently. | planned (compaction round-trip property tests at M9 plus inner-loop anchor-rendering unit tests and TypeBox schema validation of the anchor contract) | D43-L; R15, R13; I3-L, I4-L, I8-L, I12-L | | I29-L | Subagent subprocesses inherit Brunch Pi Profile sealing: every `subagent` tool invocation spawns `pi --mode json -p --no-session --no-skills --no-extensions` with an explicit per-agent tool allowlist and per-agent model; subagents never load ambient user/project `.pi/` skills, prompts, themes, extensions, context files, or behavior-shaping settings; subagents never gain direct access to the parent's `CommandExecutor`, Brunch RPC handlers, or graph persistence; subagent results return to the main agent only as tool result content (no side-effect transcript writes). | planned (subagent subprocess argv tests; isolation audit asserting absent ambient-resource leakage; tool-allowlist conformance test per starter agent) | D2-L, D39-L, D44-L; I2-L, I11-L, I24-L | -| I30-L | Elicitor post-exchange capture only commits high-confidence extractive facts, concrete reconciliation needs, and justified spec readiness-grade updates; low-confidence implications remain in structured-exchange preface/question material and do not become graph truth until clarified, accepted, or explicitly escalated. | partially covered (`src/graph/capture/structured-response.test.ts` accepts only directly labeled text facts for the current tracer, rejects implication-only prose as `no_capture`, preserves structural diagnostics, `src/probes/capture-response-to-graph-proof.test.ts` proves public RPC response capture into selected-spec graph truth, and `src/probes/submit-message-capture-proof.test.ts` proves the same explicit-text capture path for ordinary `session.submitMessage` turns; reconciliation-needs and readiness-grade capture remain planned) | D18-L, D47-L; A22-L | -| I31-L | `readiness_grade` is a forward gate, not a workflow location or kind whitelist: higher grades unlock later strategies/commitments/export paths but do not make earlier gathering/refinement invalid or unavailable, and the `CommandExecutor` must not reject a graph node solely because its kind belongs to a later readiness band. All grade mutations route through `CommandExecutor` and carry audit through the change log. | partially covered (`createSpec` / `getSpec` / `updateReadinessGrade` command tests cover storage and mutation audit; `src/.pi/agents/compose.test.ts` covers prompt-resource grade gates rejecting illegal pinned commitment selections and filtering AUTO availability; kind-vs-grade write permissiveness remains planned with graph-code/readiness-band work) | D20-L, D45-L, D64-L | +| I30-L | Elicitor post-exchange capture only commits high-confidence extractive facts, concrete reconciliation needs, and justified `elicitation_gaps` disposition updates (D65-L); low-confidence implications remain in structured-exchange preface/question material and do not become graph truth until clarified, accepted, or explicitly escalated. | partially covered (`src/graph/capture/structured-response.test.ts` accepts only directly labeled text facts for the current tracer, rejects implication-only prose as `no_capture`, preserves structural diagnostics, `src/probes/capture-response-to-graph-proof.test.ts` proves public RPC response capture into selected-spec graph truth, and `src/probes/submit-message-capture-proof.test.ts` proves the same explicit-text capture path for ordinary `session.submitMessage` turns; reconciliation-needs and gap-disposition capture remain planned) | D18-L, D47-L, D65-L; A22-L | +| I31-L | Readiness never bars graph truth or work; it is just-in-time capability-readiness over relevant gaps, not a stored grade or kind whitelist. There is no `readiness_grade` scalar; capability availability is judged on request against the relevant `elicitation_gaps` (D74-L) and may proceed, proceed at low epistemic status, or negotiate — it never refuses outright. The `CommandExecutor` must not reject a graph node solely because its kind belongs to a later readiness band (D64-L). The soft `readiness estimate` (D45-L) is UI-only and gates nothing. | partially covered (current `createSpec` / `getSpec` / `updateReadinessGrade` and `compose.test.ts` grade-gate tests predate the D45-L/D74-L remodel and will be replaced; JIT capability-readiness + readiness-estimate coverage is planned) | D20-L, D45-L, D64-L, D74-L | | I32-L | Public RPC structured-exchange driving never requires a client to speak raw Pi RPC: after Brunch method discovery and workspace/spec/session activation, each pending assistant-originated exchange is answered exactly once through `session.submitExchangeResponse`, and the deterministic permutation run produces linear Pi JSONL whose structured exchange projection preserves the same prompt/answer/status/comment artifacts as the equivalent TUI structured-exchange path. | covered for deterministic FE-744 parity under canonical session method names (`session.triggerExchange`, `session.pendingExchange`, `session.submitExchangeResponse`, `session.exchanges`): `rpc.discover` contract tests, pending/respond lifecycle tests, current public-RPC structured-exchange permutations, terminal non-answered status handling, option content/rationale parity, no repeated deterministic prompts, and transcript/exchange parity assertions. | D5-L, D48-L, D49-L; I10-L, I13-L, I21-L, I23-L | | I33-L | `capture_*` analysis entries are transcript evidence only: they persist as Brunch structured-exchange `toolResult` rows, are included by Brunch-semantic transcript renderers, are hidden or collapsed in TUI display, and never mutate graph truth or bypass `CommandExecutor`. | partially covered (minimum capture details schemas parse/export and reject graph payload fields; future runtime capture-analysis schema/rendering tests plus transcript renderer fixtures still need to prove persisted result rendering and TUI hide/collapse behavior; later graph-capture fixtures compare analysis candidates against committed graph mutations) | D17-L, D18-L, D37-L, D47-L, D50-L; I2-L, I11-L, I23-L, I30-L | | I34-L | `mutateGraph` batch validation is all-or-nothing: if any node or edge in the batch is structurally illegal, the entire batch is rejected and no partial state is persisted; the agent receives diagnostics sufficient for bounded self-correction retry. | covered (`command-executor/commit-graph-batch.test.ts` and graph-tool adapter tests cover dry-run/commit diagnostic parity for invalid basis, missing refs/codes, invalid category/stance, self-loop, invalid node kind/detail shape, rollback of nodes/edges/change_log/counters, transaction-local planning before LSN allocation/writes, and structured adapter diagnostics without thrown projected-code errors or fake endpoint refs) | D53-L; I1-L, I11-L | @@ -359,20 +361,20 @@ src/.pi/ - Manifest metadata is code-owned, not filesystem-discovered: `.pi/agents/state.ts` binds each legal axis value to its `{name, description, location}`, and `compose()` emits that binding; the agent `read`s the `.md` body at the listed `location` only when detail matters. This keeps the legal set and its labels in one tested place and honors D39-L sealing (no runtime resource discovery). Frontmatter-sourced manifest metadata is a deferred ergonomics option, not the POC mechanism. - `.pi/agents/contexts/` is the D60-L agent-context orchestration layer (TypeScript), surfaced as the header's compact pushed context or via the read tools; reusable text renderers may migrate to `renderers/`, and contexts are not part of the `read`-on-demand resource manifest and carry no `` family. - Workspace **posture** is workspace-scoped product state persisted in `.brunch/workspace.json`, not spec state, session state, or graph truth. D57-L keeps it off the spec row and graph; D58-L composition injects known posture values into the runtime header as an axis of agent influence, and the `capture-posture` goal (D59-L) can confirm or refine those values conversationally. -- Readiness is an internal forward gate, not a user-facing workflow stepper, session-local phase, or graph-node-kind whitelist. `readiness_grade` lives on the spec row per D45-L; D64-L readiness bands describe non-exclusive evidence/rubric groupings for goal selection and context filtering. Validators may warn when graph/transcript evidence and assigned grade diverge. Before readiness drives hard tool/agent authority beyond the POC, Brunch needs explicit rubrics for what evidence advances, blocks, or regresses grade. +- Readiness is judged just-in-time per requested capability, not as a user-facing workflow stepper, a stored grade, a session-local phase, or a graph-node-kind whitelist. There is no `readiness_grade` on the spec row (D45-L); capability-readiness (D74-L) is evaluated over the relevant `elicitation_gaps`, and D64-L readiness bands describe non-exclusive evidence groupings feeding the readiness-estimate rollup, goal selection, and context filtering. The soft readiness estimate may surface in UI but gates nothing. A future structural milestone gate for export/plan/execute op-modes is deferred until such an op-mode exists; before readiness drives hard tool/agent authority beyond the POC, Brunch needs explicit `capability → relevant gaps` mappings and per-typology predicates (A27-L). - Prompt resources and Pi skills are both progressive-disclosure mechanisms, but they are not authority. Brunch code owns runtime-state projection, legal tuple filtering, grade/allow-list gating, tool activation, and tool-call blocking. Pi-native skills may be used for startup-scoped capabilities; runtime-state-specific objective/method availability is advertised through Brunch's per-turn manifest so ambient user/project resources cannot leak into product behavior. ### Coherence and readiness semantics - Coherence must remain bounded for the POC: a visible verdict tied to structural legality and actionable reconciliation needs, not a vague promise that the specification “makes sense.” M8 owns the sharper rubric and adversarial examples. -- Avoid phase/stage/maturity language for the elicit lifecycle except when referring to legacy docs. The canonical internal model is readiness grade plus the session-agent `goal` / `strategy` / `lens` axes and active review-set state. PLAN/frontier text should describe concrete readiness gates rather than imply a user-facing phase machine. +- Avoid phase/stage/maturity language for the elicit lifecycle except when referring to legacy docs. The canonical internal model is capability-readiness over `elicitation_gaps` plus the session-agent `goal` / `strategy` / `lens` axes and active review-set state; the readiness estimate is a soft UI projection, not a stage. PLAN/frontier text should describe concrete capability-readiness gates rather than imply a user-facing phase machine. ### Vocabulary evolution - Whether public graph commands eventually split from one `graph.*` umbrella into `intent.*` / `oracle.*` / `design.*` / `plan.*` namespaces is deferred; current posture is unified `graph.*` for the POC. - ~~Whether `framing_as` values graduate to first-class node kinds~~ — resolved: `framing_as` retired, absorbed by `thesis`, `term`, `constraint`, and `goal` (D54-L, D56-L). - `posture` is a workspace-level POC-stubbed property set for now; whether it earns richer persistence or graph-native representation is deferred until product pressure shows concrete readers beyond startup/prompt context. -- **Durable `risk` / `unknown` node (deferred).** A domain-epistemic gap — something nobody knows and cannot economically find out now — is distinct from the prospective elicitation backlog register (D65-L) and from `assumption` (which proceeds on a believed-but-unprovable value; a risk is upstream and must be structurally accommodated via the assumptions, decisions, design, verification, and planning it spawns). Because it carries real cross-plane edges, a `risk` — if adopted — is a first-class intent-plane node kind, not a table; deferred because it reopens the locked kind set (D54-L/D56-L), and the sub-question (own kind vs an `assumption` variant) needs its own pass. Modelled on the prior prototype's `risk` node. +- **Durable `risk` / `unknown` node (deferred).** A domain-epistemic gap — something nobody knows and cannot economically find out now — is distinct from the prospective `elicitation_gaps` register (D65-L) and from `assumption` (which proceeds on a believed-but-unprovable value; a risk is upstream and must be structurally accommodated via the assumptions, decisions, design, verification, and planning it spawns). Because it carries real cross-plane edges, a `risk` — if adopted — is a first-class intent-plane node kind, not a table; deferred because it reopens the locked kind set (D54-L/D56-L), and the sub-question (own kind vs an `assumption` variant) needs its own pass. Modelled on the prior prototype's `risk` node. ### Thin transport/read posture @@ -406,7 +408,7 @@ src/.pi/ ### Spec initiative & claim model -- D61-L locks only the vocabulary (spec = initiative answering a problem; claims = truth-bearing nodes resolved at node level). The richer model in [`docs/design/SPEC_INITIATIVE_MODEL.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/SPEC_INITIATIVE_MODEL.md) is deferred and surfaces only when multi-spec work lands: **claims survive their parent spec** and may be adopted by later specs (a cross-spec claim graph, not per-spec); an **initiative-status lifecycle** (`proposed` / `drafting` / `active` / `adopted` / `done` / `superseded` / `abandoned`) distinct from `readiness_grade`; a small closed set of **spec-to-spec relationships** (`informed_by`, `supersedes`, `parallel_to`, `depends_on`, `conflicts_with`) rather than a generic "related" edge; and **current truth as a projection** over surviving claims with explicit precedence — explicit supersession wins, `adopted` / `active` / `done` outrank `drafting` / `abandoned`, and unresolved overlap surfaces a reconciliation need instead of silent last-writer-wins. Adopting any of these is a frontier decision, not yet product contract. +- D61-L locks only the vocabulary (spec = initiative answering a problem; claims = truth-bearing nodes resolved at node level). The richer model in [`docs/design/SPEC_INITIATIVE_MODEL.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/SPEC_INITIATIVE_MODEL.md) is deferred and surfaces only when multi-spec work lands: **claims survive their parent spec** and may be adopted by later specs (a cross-spec claim graph, not per-spec); an **initiative-status lifecycle** (`proposed` / `drafting` / `active` / `adopted` / `done` / `superseded` / `abandoned`) distinct from capability-readiness and the readiness estimate (D45-L); a small closed set of **spec-to-spec relationships** (`informed_by`, `supersedes`, `parallel_to`, `depends_on`, `conflicts_with`) rather than a generic "related" edge; and **current truth as a projection** over surviving claims with explicit precedence — explicit supersession wins, `adopted` / `active` / `done` outrank `drafting` / `abandoned`, and unresolved overlap surfaces a reconciliation need instead of silent last-writer-wins. Adopting any of these is a frontier decision, not yet product contract. ## Lexicon @@ -420,15 +422,17 @@ src/.pi/ | **Session agent** | The main-thread agent that drives the session forward — `elicitor` now, future `executor` — resolved from `op_mode`. It is the only agent represented in session state (D40-L); side/sub-agents are out-of-band. | | **Strategy** | An optional, AUTO-able session-agent axis (D25-L) describing interaction shape: `step-wise-decision-tree` (single-exchange Q&A), `step-wise-disambiguate` (contrastive examples), `propose-graph` (novel coherent subgraph via direct commit), `project-graph` (derived nodes/edges via review-set), `freestyle` (structure-optional, user-driven turns; D66-L). Strategy determines the commitment mechanism (D26-L). Detailed strategy behavior lives in a Brunch prompt resource advertised through D58-L manifests. | | **Lens** | An optional, AUTO-able session-agent axis (D25-L) describing topical focus: `intent`, `design`, `oracle` for elicit mode; future execute-mode `plan`, `sync`, `scope`. Orthogonal to strategy; stamped onto elicitor-emitted entries as provenance (I18-L). Detailed lens behavior lives in a Brunch prompt resource advertised through D58-L manifests. | -| **Goal** | An optional, AUTO-able session-agent objective axis (D59-L): what the agent currently pursues, derived/gated by `spec.readiness_grade` — `grounding-advance`, `elicit-expand`, `commit-converge`, plus always-on `capture-posture`. Distinct from strategy (the *how*) and lens (the topical focus); `elicit-expand` / `commit-converge` are the diverge/converge pair in the elicitation diamond. | +| **Goal** | An optional, AUTO-able session-agent objective axis (D59-L): what the agent currently pursues, derived from readiness-band coverage (D64-L) rather than a stored grade — `grounding-advance`, `elicit-expand`, `commit-converge`, plus always-on `capture-posture`. Distinct from strategy (the *how*) and lens (the topical focus); `elicit-expand` / `commit-converge` are the diverge/converge pair in the elicitation diamond. | | **AUTO** | The unpinned state of an objective axis (`goal` / `strategy` / `lens`): composition advertises the legal choices in the current prompt-resource manifest and instructs the agent to self-select from that manifest only, reading the selected resource when detail matters (D58-L). | | **Brunch Pi Profile** | The sealed programmatic wrapper around embedded Pi: settings policy, resource-loader policy, extension factories, keybinding/command policy, tool policy, and prompt policy. It allows Brunch-owned resources while suppressing ambient `.pi/` behavior. | | **Prompt resource** | A Brunch-owned markdown file under `src/.pi/` containing detailed goal, strategy, lens, method, or agent-definition guidance. Prompt resources are loaded by the agent with `read` when needed; they are product control-plane assets, not ambient Pi prompt templates. | | **Prompt-resource manifest** | The small per-turn D58-L manifest injected into the system prompt, listing only runtime-legal Brunch resources with `name`, `description`, and `location`. The `name`/`description`/`location` for each entry are code-owned in `.pi/agents/state.ts` (not filesystem-discovered), honoring D39-L sealing; `.pi/agents/contexts/` context renderers are not manifest resources. It mirrors Pi's skill-list pattern but is filtered by Brunch runtime state, grade, and allow-lists. | | **Method** | A tool-usage or workflow competence advertised as a Brunch prompt resource (`.pi/skills/methods/*.md`): run structured exchanges, infer-and-capture (D50-L), generate proposals/projections, read context, mutate the graph, review for gaps. Method resources explain when to use a tool family and how to sequence it with other tools; executable tool definitions should stay focused on schemas, authority, and runtime behavior. A method may also be backed by a Pi-native skill, but actual tool authority remains code-owned through `op_mode` policy and active-tool gating. `capability` is retired as a synonym — use `method` and ``. | | **Agent context** | The content the agent reasons over — `cwd`, `graph`, or `node` (D60-L): pulled (typed, read-only) from `graph/`/`session/`, optionally projected when a reusable DTO helps, rendered to LLM-string or JSON, surfaced pushed (compose) or pulled (`read_graph` / `read_workspace_context` / `read_session_context`). Graph context explicitly chooses graph-truth vs active-context reads and may filter by node kind, readiness band, edge category/direction, or absence of an edge category (gap query). Distinct from the **workspace projection** (`workspace.state`), which is product/UI state, not agent content. | -| **Readiness grade** | Spec-owned forward gate stored on the `specs` row: `grounding_onboarding | elicitation_ready | commitments_ready | planning_ready`. It unlocks later strategies, review sets, and eventual export/plan/execute posture, but never forbids earlier gathering, refinement, or capture of clear later-band node kinds. | -| **Elicitation posture** | Retired as persisted spec state. Use readiness grade plus active strategy/lens/review-set state to explain elicit behavior. | +| **Readiness estimate** | A soft, derived, live per-band coverage projection over `elicitation_gaps`, for UI surfacing only (D45-L). It is *not* stored, *not* authority, and gates nothing — it may regress honestly. Replaces the retired stored `readiness_grade`. | +| **Capability-readiness** | The only readiness gate (D74-L): a just-in-time, capability-relative judgment made when a capability is requested, evaluated over the `elicitation_gaps` declared relevant to it. Structural gaps are checked mechanically; `manual` gaps consume an LLM satisficiency judgment (D57-L). Outcome: proceed / proceed-at-low-epistemic-status / negotiate. Never bars attempting work. | +| **Readiness grade** *(retired)* | Formerly a spec-row forward-gate scalar (`grounding_onboarding | …`). Retired (D45-L): it conflated gate, display, and milestone. Superseded by capability-readiness (gate), readiness estimate (display), and a deferred milestone gate. | +| **Elicitation posture** | Retired as persisted spec state. Use capability-readiness plus active strategy/lens/review-set state to explain elicit behavior. | | **Commitment focus** | Retired as persisted spec state. Future commitment projection should derive from active review-set state and graph evidence if needed. | | **Coherence** | Bounded product-visible verdict over whether the current spec graph is structurally legal and free of known unresolved contradictions/gaps at the current maturity. It is backed by reconciliation needs and remains intentionally narrower than a general judgment that the whole idea is good or complete. | | **Structural legality** | Synchronous schema/ontology validity of graph mutations: edge categories from the closed set in `docs/design/GRAPH_MODEL.md`, per-category stance/cardinality/acyclicity rules (including supersession cycles), immutable accepted-edge identity (`category`, `sourceId`, `targetId`, `stance`), per-plane closed node `kind` enums, stable kind-ordinal uniqueness/counter allocation, approval-basis enum validity, required `detail` sub-schemas for `decision`/`term`, and transaction invariants. Structural legality can fail even before semantic coherence is evaluated. | @@ -502,14 +506,16 @@ src/.pi/ | **Framing-as** | ~~Orthogonal modality classifying a node's product role.~~ **Retired.** Absorbed by `thesis`, `term`, `constraint`, and `goal` (D54-L, D56-L). | | **Thesis** | A first-class intent node kind (`kind: "thesis"`). A chosen position or bet about the product — falsifiable, carries "what/who/why/for whom" material (La Carte Blanche style). Not a requirement (it's a bet, not a need), not a goal (it's falsifiable, not aspirational), not an assumption (it's a chosen position, not a dependency). Natural edge relationships: criteria and evidence witness for/against a thesis via `proof` edges. | | **Term** | A first-class intent node kind (`kind: "term"`). A canonical naming commitment for ubiquitous language and conceptual consistency. Requires `detail: { definition, aliases? }`. Participates in graph edges: downstream nodes may `dependency`-depend on the term's definition; a term may `boundary`-scope what counts as X; a newer term may `supersession`-replace a prior term. | -| **Graph basis** | Provenance-directness field (`explicit | implicit`) on accepted graph nodes and edges: `explicit` when the item came directly from the user (stated or user-reviewed); `implicit` when the agent materialized it from user input after concept-level acceptance. Approval strength is the claim-flavored reading of this axis; the same `explicit | implicit` distinction also applies to non-claim registers such as the elicitation backlog (user-raised vs agent-inferred, D65-L). Mutation path lives in `change_log`, not in `basis` (D63-L). | +| **Graph basis** | Provenance-directness field (`explicit | implicit`) on accepted graph nodes and edges: `explicit` when the item came directly from the user (stated or user-reviewed); `implicit` when the agent materialized it from user input after concept-level acceptance. Approval strength is the claim-flavored reading of this axis; the same `explicit | implicit` distinction also applies to non-claim registers such as `elicitation_gaps` (user-raised vs agent-inferred, D65-L). Mutation path lives in `change_log`, not in `basis` (D63-L). | | **Node source** | Free-form string on `GraphNode.source` for epistemic attribution (e.g. "stakeholder", "regulatory", "derived", "agent synthesis"). Convention by prompt, not structural validation. Exists for context-render enrichment — rendered back into sparse text in prompt context, not used for policy or filtering. Not applicable to edges. | -| **Elicitation backlog** | A prospective process-agenda register — the elicitor's "prospective memory" of open questions the user has not answered yet (knowable now by asking). Seeded at spec creation, read every turn to choose what to ask next, grown by capture-reflection; resolution yields a claim, a `risk`, or more entries. Async and unordered (named `backlog` over `agenda`/`need`): entries may be logged now but only matter under a later grade or different lens. A flat `elicitation_backlog` table (not a graph node); the *prospective* sibling of the *retrospective* `reconciliation_need` register. See D65-L. | +| **Elicitation gap** | A typed coverage *obligation* (a typology of coverage that must be addressed — e.g. "the spec must anchor its primary constraint(s)"), **not** a literal question and not domain content (which lives in the graph). Each gap carries both a **name** (typology key — machine identity + display label) and a **rationale** (meta prose: what coverage it represents, why it matters, what satisfies it), plus a band (D64-L), a predicate shape (`presence | field | coverage | manual`), an importance (driver-weight), a derived coverage strength, and a disposition (`open | answered | not_applicable | irrelevant | reopened`). Stored in a flat `elicitation_gaps` table (not a graph node); seeded at spec creation for grounding, generatively spawned for elicitation, derived for commitment. Serves as the elicitor's agenda, the substrate of capability-readiness, and a density signal. The *prospective* sibling of the *retrospective* `reconciliation_need` register. See D65-L. | +| **Grounding typology catalog** | The seeded fixed set of grounding-band gap typologies (D65-L), collated from the D30-L anchor bundle, D57-L Walter drivers, `ELICITATION_LENSES.md`, and shaping kickoff/framing material. **Floor** (gates generative capabilities): `domain`, `protagonist`, `pain_pull`, `constraint`. **Progressive drivers** (enrich, never floor): `value`, `context_of_use`, `success_sketch`, `solution_boundary` (non-goals). | +| **Elicitation backlog** *(renamed)* | Former name for the elicitation-gaps register and its question-instance / `open|closed` model. Renamed and reconceived as **elicitation gap** (D65-L). | | **Risk** *(deferred)* | A durable domain-epistemic gap: something nobody knows and cannot economically find out now, requiring strategic accommodation (assumptions, decisions, design/verification/planning) rather than elicitation. Distinct from `assumption` (which proceeds on a believed-but-unprovable value); a risk is upstream and cannot yet pick a value. If adopted it is a first-class intent-plane node kind (it carries real cross-plane edges), not a table; deferred because it reopens the locked kind set (D54-L/D56-L). Future Direction §Vocabulary evolution; D65-L. | | **Node detail** | Optional JSON column on `GraphNode.detail` with per-kind validated sub-structures. `decision` requires `{ chosen_option, rejected, rationale }`; `term` requires `{ definition, aliases? }`. All other kinds omit `detail`. | | **Context (node kind)** | A first-class intent node kind (`kind: "context"`). A descriptive claim about the environment — observed facts that color interpretation without driving decisions directly. Last-resort basic bucket: before filing as context, check the promotion heuristic (must be true for success → requirement/invariant; limits solutions → constraint; may be false → assumption; chooses among alternatives → decision; bet about users/market → thesis). | | **Intent kind category** | Derived semantic grouping of intent node kinds: `basic` (goal, thesis, term, context), `structural` (requirement, assumption, constraint, invariant), `reasoning` (decision, criterion, example). A pure function of `kind`, not stored. Distinct from readiness bands. | -| **Readiness band** | Non-exclusive derived grouping over node kinds — `grounding`, `elicitation`, `commitment` — used by elicitor goals, graph context filters, and grade-advancement rubrics. A band is not a validation gate; clear later-band nodes may be captured at earlier grades (D64-L). | +| **Readiness band** | The coarse level of one coverage axis (`grounding`, `elicitation`, `commitment`); gap typologies (D65-L) are its finer members — one axis, two granularities. A non-exclusive derived grouping over node kinds, used by elicitor goals, graph context filters, the readiness-estimate rollup, and capability-readiness weighting. A band is not a validation gate; clear later-band nodes may be captured at any time (D64-L). | | **Posture** | A workspace-level POC-stubbed property set declaring project epistemic/strategic stance (certainty, stakes, audience, horizon, migration, sourcing). Not a graph node kind or spec-row field in the POC. Grounding elicitation may help establish it, but startup persists only the workspace stub. | | **Kernel** | A behavioural elicitation pattern from `docs/design/BEHAVIORAL_KERNELS.md` (state/lifecycle, containment, concurrency, etc.). | | **Probe run** | A scripted or executable check of a Brunch seam that drives the public product surface and persists reviewable artifacts under `.fixtures/runs///`. | diff --git a/src/db/README.md b/src/db/README.md index 4cc08793..afd3a890 100644 --- a/src/db/README.md +++ b/src/db/README.md @@ -7,7 +7,7 @@ SPEC decisions: D16-L, D41-L, D52-L, D54-L, D62-L - **Drizzle table definitions** (`schema.ts`) — the canonical column-level source of truth for persisted graph/workspace rows. It owns the SQLite table names and column names. Domain enum taxonomy (`INTENT_KINDS`, - `READINESS_GRADES`, `EDGE_CATEGORIES`, etc.) is owned by + `GAP_DISPOSITIONS`, `EDGE_CATEGORIES`, etc.) is owned by `graph/schema/kinds.ts`; `db/schema.ts` imports those literals only for column constraints. @@ -99,14 +99,14 @@ their boundary. The current graph and graph-adjacent tables are spec-scoped: `specs`, `nodes`, `edges`, `node_kind_counters`, `graph_clock`, `change_log`, -`reconciliation_need`, and `elicitation_backlog`. `graph_clock` is keyed by +`reconciliation_need`, and `elicitation_gaps`. `graph_clock` is keyed by `spec_id`; `change_log` carries `spec_id` and is keyed by `(spec_id, lsn)`, so a bare LSN is comparable only inside one spec. -`elicitation_backlog` is the prospective sibling of `reconciliation_need`: a -flat process-agenda register, not a graph plane or node table. It still lives -here only as storage substrate; graph-owned command/query code continues to own -its semantics. +`elicitation_gaps` is the prospective sibling of `reconciliation_need`: a +flat typed coverage-obligation register, not a graph plane or node table. It +stores obligation/disposition/meta only; graph-owned command/query code derives +structural coverage from graph truth at read time. `nodes.kind_ordinal` is persisted as the storage half of the D62-L projected-code contract. `node_kind_counters` owns monotonic per-`(spec_id, plane, kind)` diff --git a/src/db/row-schemas.ts b/src/db/row-schemas.ts index 0abc61ec..6c80d2c1 100644 --- a/src/db/row-schemas.ts +++ b/src/db/row-schemas.ts @@ -13,7 +13,7 @@ import { createInsertSchema, createSelectSchema } from 'drizzle-typebox'; import { changeLog, edges, - elicitationBacklog, + elicitationGaps, graphClock, nodeKindCounters, nodes, @@ -47,6 +47,6 @@ export const selectNodeKindCounterSchema = createSelectSchema(nodeKindCounters); export const insertReconciliationNeedSchema = createInsertSchema(reconciliationNeed); export const selectReconciliationNeedSchema = createSelectSchema(reconciliationNeed); -// --- Elicitation backlog schemas --- -export const insertElicitationBacklogSchema = createInsertSchema(elicitationBacklog); -export const selectElicitationBacklogSchema = createSelectSchema(elicitationBacklog); +// --- Elicitation gaps schemas --- +export const insertElicitationGapSchema = createInsertSchema(elicitationGaps); +export const selectElicitationGapSchema = createSelectSchema(elicitationGaps); diff --git a/src/db/schema.ts b/src/db/schema.ts index 0950cb19..d4aa66ce 100644 --- a/src/db/schema.ts +++ b/src/db/schema.ts @@ -21,7 +21,8 @@ import { import { EDGE_CATEGORIES, EDGE_STANCES, - ELICITATION_BACKLOG_STATUSES, + GAP_DISPOSITIONS, + GAP_PREDICATE_KINDS, LENS_AFFINITIES, NODE_BASES, NODE_PLANES, @@ -145,21 +146,23 @@ export const reconciliationNeed = sqliteTable('reconciliation_need', { resolved_at_lsn: integer(), }); -export const elicitationBacklog = sqliteTable('elicitation_backlog', { +export const elicitationGaps = sqliteTable('elicitation_gaps', { id: integer().primaryKey({ autoIncrement: true }), spec_id: integer() .notNull() .references(() => specs.id), - kind: text().notNull(), // open taxonomy: grounding anchors today, richer agenda kinds later - question: text().notNull(), - status: text({ enum: ELICITATION_BACKLOG_STATUSES }).notNull().default('open'), + name: text().notNull(), + rationale: text().notNull(), + disposition: text({ enum: GAP_DISPOSITIONS }).notNull().default('open'), basis: text({ enum: NODE_BASES }).notNull().default('explicit'), readiness_band: text({ enum: READINESS_BANDS }).notNull(), + predicate_kind: text({ enum: GAP_PREDICATE_KINDS }).notNull(), + predicate: text().notNull(), + importance: integer().notNull().default(1), plane_affinity: text({ enum: NODE_PLANES }), lens_affinity: text({ enum: LENS_AFFINITIES }), - arose_from_entry_id: integer().references((): AnySQLiteColumn => elicitationBacklog.id), + arose_from_gap_id: integer().references((): AnySQLiteColumn => elicitationGaps.id), resolved_by_node_id: integer().references(() => nodes.id), - rationale: text(), created_at_lsn: integer().notNull(), - closed_at_lsn: integer(), + disposition_set_at_lsn: integer(), }); diff --git a/src/graph/README.md b/src/graph/README.md index fd9465a9..9b190d0e 100644 --- a/src/graph/README.md +++ b/src/graph/README.md @@ -9,8 +9,8 @@ SPEC decisions: D4-L, D20-L, D27-L, D51-L, D52-L, D53-L, D54-L, D60-L, D62-L, D6 graph/spec writes. It hides structural validation, transaction mechanics, spec-local LSN allocation, per-kind node ordinal allocation, change-log append, and structured command results. It also owns prospective-register writes for - `elicitation_backlog` (`createSpec` seeding plus create/close entry commands), - because the backlog shares the same spec-local LSN and audit boundary. + `elicitation_gaps` (`createSpec` seeding plus create/disposition commands), + because the gap register shares the same spec-local LSN and audit boundary. - **mutateGraph** — atomic graph mutation for direct writers and future curation: one tool call, one transaction, one selected-spec LSN, all-or-nothing. The @@ -33,12 +33,12 @@ SPEC decisions: D4-L, D20-L, D27-L, D51-L, D52-L, D53-L, D54-L, D60-L, D62-L, D6 - **Readers / query functions** (`queries.ts`) — graph reads at multiple detail levels: active-context and graph-truth overview, node neighborhood, selected-spec graph-code lookup, open reconciliation needs, and - open elicitation-backlog entries. These return typed domain objects or + elicitation gaps. These return typed domain objects or internal ids, not Drizzle rows. - **Domain schema types** (`schema/`) — `GraphNode`, `GraphEdge`, - `ReconciliationNeed`, `ElicitationBacklogEntry`, kind/category types, + `ReconciliationNeed`, `ElicitationGap`, kind/category types, per-kind node ordinals, and derived intent-kind grouping. Raw domain enum taxonomy lives in the zero-import `schema/kinds.ts` leaf so web-facing graph imports do not pull in Drizzle. @@ -70,7 +70,7 @@ D60-L read-shape ownership is explicit: every durable graph read shape has one c | `gaps` | `getGraphGaps` | required | n/a | n/a | Agent/RPC-only diagnostic shape; not a web observer projection. | | `related` | `getRelatedNodes` | required | n/a | n/a | Agent/RPC-only traversal helper; not a web observer projection. | | `reconciliation_needs` | `getOpenReconciliationNeeds` | deferred | deferred | deferred | Agent-internal register read; no transport consumer yet. | -| `elicitation_backlog` | `getOpenElicitationBacklogEntries` | deferred | deferred | deferred | Agent-internal prospective-register read; per-turn driver follow-on owns exposure. | +| `elicitation_gaps` | `getElicitationGaps` | deferred | deferred | deferred | Agent-internal prospective-register read; per-turn driver follow-on owns exposure. | `observed-shapes-coverage.test.ts` guards the required subsets against accidental drift: the tool mode union must stay at the six required agent shapes, while RPC and web stay at `overview` + `neighborhood` until a scoped feature deliberately promotes another row. @@ -120,7 +120,7 @@ graph/ CommandExecutor command input/result types createSpec - create/close elicitation-backlog entry + create/set elicitation-gap disposition updateReadinessGrade createNode per-kind node ordinal allocation @@ -153,7 +153,7 @@ graph/ getGraphOverview getNodeNeighborhood resolveGraphNodeCode - getOpenElicitationBacklogEntries + getElicitationGaps getOpenReconciliationNeeds row -> domain mapping @@ -165,7 +165,7 @@ graph/ schema/ kinds.ts zero-import domain enum taxonomy leaf - elicitation-backlog.ts + elicitation-gaps.ts nodes.ts edges.ts reconciliation-need.ts diff --git a/src/graph/architecture.test.ts b/src/graph/architecture.test.ts index 61411700..6120abba 100644 --- a/src/graph/architecture.test.ts +++ b/src/graph/architecture.test.ts @@ -49,7 +49,7 @@ describe('I26-L architectural boundary', () => { it('db/schema.ts does not own domain enum const arrays', () => { const result = execSync( - `rg "export const (INTENT_KINDS|ORACLE_KINDS|DESIGN_KINDS|PLAN_KINDS|NODE_PLANES|NODE_BASES|EDGE_CATEGORIES|EDGE_STANCES|READINESS_GRADES|READINESS_BANDS|LENS_AFFINITIES|ELICITATION_BACKLOG_STATUSES)" src/db/schema.ts || true`, + `rg "export const (INTENT_KINDS|ORACLE_KINDS|DESIGN_KINDS|PLAN_KINDS|NODE_PLANES|NODE_BASES|EDGE_CATEGORIES|EDGE_STANCES|READINESS_GRADES|READINESS_BANDS|LENS_AFFINITIES|GAP_DISPOSITIONS|GAP_PREDICATE_KINDS)" src/db/schema.ts || true`, { cwd: process.cwd(), encoding: 'utf-8' }, ); diff --git a/src/graph/command-executor.test.ts b/src/graph/command-executor.test.ts index 75f20b1a..61aeee25 100644 --- a/src/graph/command-executor.test.ts +++ b/src/graph/command-executor.test.ts @@ -11,7 +11,7 @@ import { describe, expect, it, beforeEach } from 'vitest'; import { createDb, type BrunchDb } from '../db/connection.js'; import { changeLog, - elicitationBacklog, + elicitationGaps, graphClock, nodeKindCounters, nodes, @@ -406,7 +406,7 @@ describe('CommandExecutor', () => { ).toEqual([{ specId: result.specId, lsn: 1 }]); }); - it('seeds explicit grounding backlog entries for the new spec at create-spec LSN', () => { + it('seeds grounding typology gaps for the new spec at create-spec LSN', () => { const result = executor.createSpec({ name: 'Grounded Spec', slug: 'grounded-spec' }); expect(result.status).toBe('success'); if (result.status !== 'success') throw new Error('unreachable'); @@ -414,60 +414,41 @@ describe('CommandExecutor', () => { expect( db .select({ - kind: elicitationBacklog.kind, - question: elicitationBacklog.question, - status: elicitationBacklog.status, - basis: elicitationBacklog.basis, - readinessBand: elicitationBacklog.readiness_band, - planeAffinity: elicitationBacklog.plane_affinity, - lensAffinity: elicitationBacklog.lens_affinity, - createdAtLsn: elicitationBacklog.created_at_lsn, + name: elicitationGaps.name, + disposition: elicitationGaps.disposition, + basis: elicitationGaps.basis, + readinessBand: elicitationGaps.readiness_band, + predicateKind: elicitationGaps.predicate_kind, + importance: elicitationGaps.importance, + planeAffinity: elicitationGaps.plane_affinity, + lensAffinity: elicitationGaps.lens_affinity, + createdAtLsn: elicitationGaps.created_at_lsn, }) - .from(elicitationBacklog) - .where(eq(elicitationBacklog.spec_id, result.specId)) + .from(elicitationGaps) + .where(eq(elicitationGaps.spec_id, result.specId)) .all(), - ).toEqual([ - { - kind: 'domain_anchor_question', - question: 'What is the thing or domain we are specifying?', - status: 'open', - basis: 'explicit', - readinessBand: 'grounding', - planeAffinity: 'intent', - lensAffinity: 'intent', - createdAtLsn: result.lsn, - }, - { - kind: 'protagonist_anchor_question', - question: 'Who is this for, or who is most affected by it?', - status: 'open', - basis: 'explicit', + ).toEqual( + [ + 'domain', + 'protagonist', + 'pain_pull', + 'constraint', + 'value', + 'context_of_use', + 'success_sketch', + 'solution_boundary', + ].map((name, index) => ({ + name, + disposition: 'open', + basis: 'implicit', readinessBand: 'grounding', + predicateKind: 'presence', + importance: index < 4 ? 3 : 1, planeAffinity: 'intent', lensAffinity: 'intent', createdAtLsn: result.lsn, - }, - { - kind: 'pain_anchor_question', - question: 'What problem, pain, or pull is driving this work?', - status: 'open', - basis: 'explicit', - readinessBand: 'grounding', - planeAffinity: 'intent', - lensAffinity: 'intent', - createdAtLsn: result.lsn, - }, - { - kind: 'constraint_anchor_question', - question: 'What constraints or non-negotiable boundaries already shape it?', - status: 'open', - basis: 'explicit', - readinessBand: 'grounding', - planeAffinity: 'intent', - lensAffinity: 'intent', - createdAtLsn: result.lsn, - }, - ]); + })), + ); }); it('scopes create_spec audit LSNs to the newly created spec', () => { @@ -679,120 +660,145 @@ describe('CommandExecutor', () => { }); }); - describe('createElicitationBacklogEntry', () => { - it('creates an open backlog entry and preserves the arose-from pointer', () => { - const parent = executor.createElicitationBacklogEntry({ + describe('createElicitationGap', () => { + it('creates an open gap and preserves the arose-from pointer', () => { + const parent = executor.createElicitationGap({ specId, - kind: 'domain_anchor_question', - question: 'What is the thing or domain we are specifying?', - readinessBand: 'grounding', + name: 'domain', + rationale: 'Name the product domain.', + band: 'grounding', + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'context', minimum: 1 }, planeAffinity: 'intent', lensAffinity: 'intent', }); expect(parent.status).toBe('success'); if (parent.status !== 'success') throw new Error('unreachable'); - const child = executor.createElicitationBacklogEntry({ + const child = executor.createElicitationGap({ specId, - kind: 'follow_on_question', - question: 'Which user is blocked most by the current version?', - readinessBand: 'grounding', + name: 'follow_on', + rationale: 'Clarify which user is blocked most by the current version.', + band: 'grounding', + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'context', minimum: 1 }, planeAffinity: 'intent', lensAffinity: 'intent', - aroseFromEntryId: parent.id, + aroseFromGapId: parent.id, }); expect(child.status).toBe('success'); if (child.status !== 'success') throw new Error('unreachable'); - expect( - db.select().from(elicitationBacklog).where(eq(elicitationBacklog.id, child.id)).get(), - ).toMatchObject({ + expect(db.select().from(elicitationGaps).where(eq(elicitationGaps.id, child.id)).get()).toMatchObject({ spec_id: specId, - kind: 'follow_on_question', - question: 'Which user is blocked most by the current version?', - status: 'open', + name: 'follow_on', + rationale: 'Clarify which user is blocked most by the current version.', + disposition: 'open', basis: 'explicit', readiness_band: 'grounding', + predicate_kind: 'presence', plane_affinity: 'intent', lens_affinity: 'intent', - arose_from_entry_id: parent.id, + arose_from_gap_id: parent.id, created_at_lsn: child.lsn, - closed_at_lsn: null, + disposition_set_at_lsn: null, }); }); - it('rejects malformed entries without writing rows or advancing the clock', () => { - const result = executor.createElicitationBacklogEntry({ + it('rejects malformed gaps without writing rows or advancing the clock', () => { + const result = executor.createElicitationGap({ specId, - kind: ' ', - question: ' ', - readinessBand: 'later' as never, + name: ' ', + rationale: ' ', + band: 'later' as never, + predicate: { kind: 'presence', minimum: 0 }, }); expect(result.status).toBe('structural_illegal'); if (result.status !== 'structural_illegal') throw new Error('unreachable'); expect(result.diagnostics.map((diagnostic) => diagnostic.field)).toEqual( - expect.arrayContaining(['kind', 'question', 'readinessBand']), + expect.arrayContaining(['name', 'rationale', 'band', 'predicate.minimum']), ); - expect(db.select().from(elicitationBacklog).all()).toEqual([]); + expect(db.select().from(elicitationGaps).all()).toEqual([]); expect(graphClockLsn(db, specId)).toBe(0); expect(db.select().from(changeLog).all()).toEqual([]); }); }); - describe('closeElicitationBacklogEntry', () => { - it('closes an open entry and records resolvedByNodeId and closedAtLsn', () => { - const entry = executor.createElicitationBacklogEntry({ + describe('setElicitationGapDisposition', () => { + it('sets a non-derivable disposition and records resolvedByNodeId and dispositionSetAtLsn', () => { + const entry = executor.createElicitationGap({ specId, - kind: 'domain_anchor_question', - question: 'What is the thing or domain we are specifying?', - readinessBand: 'grounding', + name: 'manual_grounding', + rationale: 'Judge whether grounding is sufficient.', + band: 'grounding', + predicate: { kind: 'manual', rubric: 'Sufficiently grounded for generative work.' }, }); expect(entry.status).toBe('success'); if (entry.status !== 'success') throw new Error('unreachable'); - const node = executor.createNode({ - specId, - plane: 'intent', - kind: 'goal', - title: 'Clarified goal', - }); + const node = executor.createNode({ specId, plane: 'intent', kind: 'goal', title: 'Clarified goal' }); expect(node.status).toBe('success'); if (node.status !== 'success') throw new Error('unreachable'); - const close = executor.closeElicitationBacklogEntry({ + const setDisposition = executor.setElicitationGapDisposition({ specId, id: entry.id, + disposition: 'answered', resolvedByNodeId: node.nodeId, }); - expect(close.status).toBe('success'); - if (close.status !== 'success') throw new Error('unreachable'); - expect(close.lsn).toBeGreaterThan(node.lsn); + expect(setDisposition.status).toBe('success'); + if (setDisposition.status !== 'success') throw new Error('unreachable'); + expect(setDisposition.lsn).toBeGreaterThan(node.lsn); expect( db .select({ - status: elicitationBacklog.status, - resolvedByNodeId: elicitationBacklog.resolved_by_node_id, - closedAtLsn: elicitationBacklog.closed_at_lsn, + disposition: elicitationGaps.disposition, + resolvedByNodeId: elicitationGaps.resolved_by_node_id, + dispositionSetAtLsn: elicitationGaps.disposition_set_at_lsn, }) - .from(elicitationBacklog) - .where(eq(elicitationBacklog.id, entry.id)) + .from(elicitationGaps) + .where(eq(elicitationGaps.id, entry.id)) .get(), ).toEqual({ - status: 'closed', + disposition: 'answered', resolvedByNodeId: node.nodeId, - closedAtLsn: close.lsn, + dispositionSetAtLsn: setDisposition.lsn, + }); + }); + + it('rejects hand-setting answered for structural predicates', () => { + const entry = executor.createElicitationGap({ + specId, + name: 'domain', + rationale: 'Name the product domain.', + band: 'grounding', + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'context', minimum: 1 }, }); + expect(entry.status).toBe('success'); + if (entry.status !== 'success') throw new Error('unreachable'); + + const result = executor.setElicitationGapDisposition({ specId, id: entry.id, disposition: 'answered' }); + + expect(result.status).toBe('structural_illegal'); + if (result.status !== 'structural_illegal') throw new Error('unreachable'); + expect(result.diagnostics[0]!.field).toBe('disposition'); + expect( + db + .select({ disposition: elicitationGaps.disposition }) + .from(elicitationGaps) + .where(eq(elicitationGaps.id, entry.id)) + .get(), + ).toEqual({ disposition: 'open' }); }); it('rejects a resolved-by node from another spec', () => { - const entry = executor.createElicitationBacklogEntry({ + const entry = executor.createElicitationGap({ specId, - kind: 'domain_anchor_question', - question: 'What is the thing or domain we are specifying?', - readinessBand: 'grounding', + name: 'manual_grounding', + rationale: 'Judge whether grounding is sufficient.', + band: 'grounding', + predicate: { kind: 'manual', rubric: 'Sufficiently grounded for generative work.' }, }); expect(entry.status).toBe('success'); if (entry.status !== 'success') throw new Error('unreachable'); @@ -809,30 +815,27 @@ describe('CommandExecutor', () => { expect(otherNode.status).toBe('success'); if (otherNode.status !== 'success') throw new Error('unreachable'); - const close = executor.closeElicitationBacklogEntry({ + const result = executor.setElicitationGapDisposition({ specId, id: entry.id, + disposition: 'answered', resolvedByNodeId: otherNode.nodeId, }); - expect(close.status).toBe('structural_illegal'); - if (close.status !== 'structural_illegal') throw new Error('unreachable'); - expect(close.diagnostics[0]!.field).toBe('resolvedByNodeId'); + expect(result.status).toBe('structural_illegal'); + if (result.status !== 'structural_illegal') throw new Error('unreachable'); + expect(result.diagnostics[0]!.field).toBe('resolvedByNodeId'); expect( db .select({ - status: elicitationBacklog.status, - resolvedByNodeId: elicitationBacklog.resolved_by_node_id, - closedAtLsn: elicitationBacklog.closed_at_lsn, + disposition: elicitationGaps.disposition, + resolvedByNodeId: elicitationGaps.resolved_by_node_id, + dispositionSetAtLsn: elicitationGaps.disposition_set_at_lsn, }) - .from(elicitationBacklog) - .where(eq(elicitationBacklog.id, entry.id)) + .from(elicitationGaps) + .where(eq(elicitationGaps.id, entry.id)) .get(), - ).toEqual({ - status: 'open', - resolvedByNodeId: null, - closedAtLsn: null, - }); + ).toEqual({ disposition: 'open', resolvedByNodeId: null, dispositionSetAtLsn: null }); }); }); diff --git a/src/graph/command-executor.ts b/src/graph/command-executor.ts index d5e6c311..8b61eb9e 100644 --- a/src/graph/command-executor.ts +++ b/src/graph/command-executor.ts @@ -34,10 +34,12 @@ import type { } from './command-executor/graph-mutation-types.js'; import { writeGraphMutation } from './command-executor/graph-mutation-writer.js'; import { translateReviewSetPayloadToMutateGraph } from './review-set.js'; -import type { ElicitationBacklogLensAffinity } from './schema/elicitation-backlog.js'; +import type { ElicitationGapLensAffinity, GapDisposition, GapPredicate } from './schema/elicitation-gaps.js'; import { DESIGN_KINDS, INTENT_KINDS, + GAP_DISPOSITIONS, + GAP_PREDICATE_KINDS, LENS_AFFINITIES, NODE_BASES, ORACLE_KINDS, @@ -109,15 +111,15 @@ interface CreateSpecSuccess { readonly lsn: number; } -/** Successful elicitation-backlog creation. */ -interface ElicitationBacklogSuccess { +/** Successful elicitation-gap creation. */ +interface ElicitationGapSuccess { readonly status: 'success'; readonly id: number; readonly lsn: number; } -/** Successful elicitation-backlog close. */ -interface ElicitationBacklogCloseSuccess { +/** Successful elicitation-gap disposition update. */ +interface ElicitationGapDispositionSuccess { readonly status: 'success'; readonly lsn: number; } @@ -144,8 +146,8 @@ export type CommandResult = | ReconNeedSuccess | ReconNeedResolveSuccess | CreateSpecSuccess - | ElicitationBacklogSuccess - | ElicitationBacklogCloseSuccess + | ElicitationGapSuccess + | ElicitationGapDispositionSuccess | UpdateReadinessGradeSuccess | StructuralIllegal | NeedsHuman @@ -164,11 +166,11 @@ export type ResolveReconNeedResult = ReconNeedResolveSuccess | StructuralIllegal /** Result of a createSpec command. */ export type CreateSpecResult = CreateSpecSuccess | StructuralIllegal; -/** Result of a createElicitationBacklogEntry command. */ -export type CreateElicitationBacklogEntryResult = ElicitationBacklogSuccess | StructuralIllegal; +/** Result of a createElicitationGap command. */ +export type CreateElicitationGapResult = ElicitationGapSuccess | StructuralIllegal; -/** Result of a closeElicitationBacklogEntry command. */ -export type CloseElicitationBacklogEntryResult = ElicitationBacklogCloseSuccess | StructuralIllegal; +/** Result of a setElicitationGapDisposition command. */ +export type SetElicitationGapDispositionResult = ElicitationGapDispositionSuccess | StructuralIllegal; /** Result of an updateReadinessGrade command. */ export type UpdateReadinessGradeResult = UpdateReadinessGradeSuccess | StructuralIllegal; @@ -208,23 +210,28 @@ export interface AcceptReviewSetInput { readonly payload: unknown; } -/** Input for creating an elicitation-backlog entry. */ -export interface CreateElicitationBacklogEntryInput { +/** Input for creating an elicitation gap. */ +export interface CreateElicitationGapInput { readonly specId: number; - readonly kind: string; - readonly question: string; + readonly name: string; + readonly rationale: string; readonly basis?: NodeBasis | undefined; - readonly readinessBand: ReadinessBand; + readonly band: ReadinessBand; + readonly predicate: GapPredicate; + readonly importance?: number | undefined; readonly planeAffinity?: NodePlane | undefined; - readonly lensAffinity?: ElicitationBacklogLensAffinity | undefined; - readonly aroseFromEntryId?: number | undefined; - readonly rationale?: string | undefined; + readonly lensAffinity?: ElicitationGapLensAffinity | undefined; + readonly aroseFromGapId?: number | undefined; } -/** Input for closing an elicitation-backlog entry. */ -export interface CloseElicitationBacklogEntryInput { +/** Input for updating an elicitation gap's non-derivable disposition. */ +export interface SetElicitationGapDispositionInput { readonly specId: number; readonly id: number; + readonly disposition: Extract< + GapDisposition, + 'open' | 'answered' | 'not_applicable' | 'irrelevant' | 'reopened' + >; readonly resolvedByNodeId?: number | undefined; } @@ -289,45 +296,97 @@ const KINDS_REQUIRING_DETAIL = new Set(['decision', 'term']); const VALID_READINESS_GRADES = READINESS_GRADES as unknown as string[]; const VALID_NODE_BASES = NODE_BASES as unknown as string[]; const VALID_READINESS_BANDS = READINESS_BANDS as unknown as string[]; +const VALID_GAP_DISPOSITIONS = GAP_DISPOSITIONS as unknown as string[]; +const VALID_GAP_PREDICATE_KINDS = GAP_PREDICATE_KINDS as unknown as string[]; const VALID_LENS_AFFINITIES = LENS_AFFINITIES as unknown as string[]; -const SEEDED_ELICITATION_BACKLOG: readonly { - readonly kind: string; - readonly question: string; +const SEEDED_ELICITATION_GAPS: readonly { + readonly name: string; + readonly rationale: string; readonly basis: NodeBasis; - readonly readinessBand: ReadinessBand; + readonly band: ReadinessBand; + readonly predicate: GapPredicate; + readonly importance: number; readonly planeAffinity: NodePlane; - readonly lensAffinity: ElicitationBacklogLensAffinity; + readonly lensAffinity: ElicitationGapLensAffinity; }[] = [ { - kind: 'domain_anchor_question', - question: 'What is the thing or domain we are specifying?', - basis: 'explicit', - readinessBand: 'grounding', + name: 'domain', + rationale: 'Anchors what kind of thing is being specified and the domain it belongs to.', + basis: 'implicit', + band: 'grounding', + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'context', minimum: 1 }, + importance: 3, + planeAffinity: 'intent', + lensAffinity: 'intent', + }, + { + name: 'protagonist', + rationale: 'Identifies who the spec is for or who is most affected by the outcome.', + basis: 'implicit', + band: 'grounding', + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'context', minimum: 1 }, + importance: 3, + planeAffinity: 'intent', + lensAffinity: 'intent', + }, + { + name: 'pain_pull', + rationale: 'States the problem, pain, or pull that makes the work worth doing.', + basis: 'implicit', + band: 'grounding', + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'goal', minimum: 1 }, + importance: 3, + planeAffinity: 'intent', + lensAffinity: 'intent', + }, + { + name: 'constraint', + rationale: 'Captures binding constraints or non-negotiable boundaries already shaping the work.', + basis: 'implicit', + band: 'grounding', + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'constraint', minimum: 1 }, + importance: 3, planeAffinity: 'intent', lensAffinity: 'intent', }, { - kind: 'protagonist_anchor_question', - question: 'Who is this for, or who is most affected by it?', - basis: 'explicit', - readinessBand: 'grounding', + name: 'value', + rationale: 'Clarifies the benefit or value the work should create.', + basis: 'implicit', + band: 'grounding', + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'goal', minimum: 1 }, + importance: 1, planeAffinity: 'intent', lensAffinity: 'intent', }, { - kind: 'pain_anchor_question', - question: 'What problem, pain, or pull is driving this work?', - basis: 'explicit', - readinessBand: 'grounding', + name: 'context_of_use', + rationale: 'Describes when, where, or under what conditions the result will be used.', + basis: 'implicit', + band: 'grounding', + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'context', minimum: 1 }, + importance: 1, planeAffinity: 'intent', lensAffinity: 'intent', }, { - kind: 'constraint_anchor_question', - question: 'What constraints or non-negotiable boundaries already shape it?', - basis: 'explicit', - readinessBand: 'grounding', + name: 'success_sketch', + rationale: 'Sketches what success looks like or how goodness will be recognized.', + basis: 'implicit', + band: 'grounding', + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'criterion', minimum: 1 }, + importance: 1, + planeAffinity: 'intent', + lensAffinity: 'intent', + }, + { + name: 'solution_boundary', + rationale: 'Names non-goals or boundaries around what the solution is explicitly not.', + basis: 'implicit', + band: 'grounding', + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'constraint', minimum: 1 }, + importance: 1, planeAffinity: 'intent', lensAffinity: 'intent', }, @@ -349,10 +408,41 @@ function isReadinessBand(value: string): value is ReadinessBand { return VALID_READINESS_BANDS.includes(value); } -function isElicitationBacklogLensAffinity(value: string): value is ElicitationBacklogLensAffinity { +function isElicitationGapLensAffinity(value: string): value is ElicitationGapLensAffinity { return VALID_LENS_AFFINITIES.includes(value); } +function isGapDisposition(value: string): value is GapDisposition { + return VALID_GAP_DISPOSITIONS.includes(value); +} + +function validateGapPredicate(predicate: GapPredicate, diagnostics: Diagnostic[]): void { + if (typeof predicate !== 'object' || predicate === null) { + diagnostics.push({ field: 'predicate', message: 'predicate must be an object' }); + return; + } + + if (!VALID_GAP_PREDICATE_KINDS.includes(predicate.kind)) { + diagnostics.push({ field: 'predicate.kind', message: 'predicate kind is not valid' }); + return; + } + + if (predicate.kind === 'presence') { + if (!Number.isInteger(predicate.minimum) || predicate.minimum < 1) { + diagnostics.push({ field: 'predicate.minimum', message: 'minimum must be a positive integer' }); + } + if (predicate.plane !== undefined && !isNodePlane(predicate.plane)) { + diagnostics.push({ field: 'predicate.plane', message: 'plane is not valid' }); + } + if (predicate.band !== undefined && !isReadinessBand(predicate.band)) { + diagnostics.push({ field: 'predicate.band', message: 'band is not valid' }); + } + if (predicate.nodeKind === undefined && predicate.band === undefined) { + diagnostics.push({ field: 'predicate', message: 'presence predicate needs nodeKind or band' }); + } + } +} + function validateCreateNode(input: CreateNodeInput): Diagnostic[] { const diagnostics: Diagnostic[] = []; @@ -489,28 +579,34 @@ function validateEdgePatch(patch: EdgePatch): Diagnostic[] { return diagnostics; } -function validateCreateElicitationBacklogEntry(input: CreateElicitationBacklogEntryInput): Diagnostic[] { +function validateCreateElicitationGap(input: CreateElicitationGapInput): Diagnostic[] { const diagnostics: Diagnostic[] = []; - if (!input.kind.trim()) { - diagnostics.push({ field: 'kind', message: 'kind must be non-empty' }); + if (!input.name.trim()) { + diagnostics.push({ field: 'name', message: 'name must be non-empty' }); } - if (!input.question.trim()) { - diagnostics.push({ field: 'question', message: 'question must be non-empty' }); + if (!input.rationale.trim()) { + diagnostics.push({ field: 'rationale', message: 'rationale must be non-empty' }); } if (input.basis !== undefined && !isNodeBasis(input.basis)) { diagnostics.push({ field: 'basis', message: 'basis must be explicit or implicit' }); } - if (!isReadinessBand(input.readinessBand)) { + if (!isReadinessBand(input.band)) { diagnostics.push({ - field: 'readinessBand', - message: `"${String(input.readinessBand)}" is not a valid readiness band`, + field: 'band', + message: `"${String(input.band)}" is not a valid readiness band`, }); } + if (input.importance !== undefined && (!Number.isInteger(input.importance) || input.importance < 1)) { + diagnostics.push({ field: 'importance', message: 'importance must be a positive integer' }); + } + + validateGapPredicate(input.predicate, diagnostics); + if (input.planeAffinity !== undefined && !isNodePlane(input.planeAffinity)) { diagnostics.push({ field: 'planeAffinity', @@ -518,7 +614,7 @@ function validateCreateElicitationBacklogEntry(input: CreateElicitationBacklogEn }); } - if (input.lensAffinity !== undefined && !isElicitationBacklogLensAffinity(input.lensAffinity)) { + if (input.lensAffinity !== undefined && !isElicitationGapLensAffinity(input.lensAffinity)) { diagnostics.push({ field: 'lensAffinity', message: `"${String(input.lensAffinity)}" is not a valid lens affinity`, @@ -674,15 +770,18 @@ export class CommandExecutor { return existing.nextOrdinal; } - private seedElicitationBacklog(tx: Pick, specId: number, lsn: number): void { - tx.insert(schema.elicitationBacklog) + private seedElicitationGaps(tx: Pick, specId: number, lsn: number): void { + tx.insert(schema.elicitationGaps) .values( - SEEDED_ELICITATION_BACKLOG.map((entry) => ({ + SEEDED_ELICITATION_GAPS.map((entry) => ({ spec_id: specId, - kind: entry.kind, - question: entry.question, + name: entry.name, + rationale: entry.rationale, basis: entry.basis, - readiness_band: entry.readinessBand, + readiness_band: entry.band, + predicate_kind: entry.predicate.kind, + predicate: JSON.stringify(entry.predicate), + importance: entry.importance, plane_affinity: entry.planeAffinity, lens_affinity: entry.lensAffinity, created_at_lsn: lsn, @@ -717,7 +816,7 @@ export class CommandExecutor { const lsn = this.createInitialSpecClock(tx, row!.id); - this.seedElicitationBacklog(tx, row!.id, lsn); + this.seedElicitationGaps(tx, row!.id, lsn); tx.insert(schema.changeLog) .values({ @@ -732,11 +831,9 @@ export class CommandExecutor { }); } - /** Create an elicitation-backlog entry through the command boundary. */ - createElicitationBacklogEntry( - input: CreateElicitationBacklogEntryInput, - ): CreateElicitationBacklogEntryResult { - const diagnostics = validateCreateElicitationBacklogEntry(input); + /** Create an elicitation gap through the command boundary. */ + createElicitationGap(input: CreateElicitationGapInput): CreateElicitationGapResult { + const diagnostics = validateCreateElicitationGap(input); if (diagnostics.length > 0) { return { status: 'structural_illegal', diagnostics }; } @@ -754,21 +851,18 @@ export class CommandExecutor { }; } - if (input.aroseFromEntryId != null) { + if (input.aroseFromGapId != null) { const parent = tx - .select({ id: schema.elicitationBacklog.id, specId: schema.elicitationBacklog.spec_id }) - .from(schema.elicitationBacklog) - .where(eq(schema.elicitationBacklog.id, input.aroseFromEntryId)) + .select({ id: schema.elicitationGaps.id, specId: schema.elicitationGaps.spec_id }) + .from(schema.elicitationGaps) + .where(eq(schema.elicitationGaps.id, input.aroseFromGapId)) .get(); if (!parent) { return { status: 'structural_illegal' as const, diagnostics: [ - { - field: 'aroseFromEntryId', - message: `elicitation backlog entry ${input.aroseFromEntryId} does not exist`, - }, + { field: 'aroseFromGapId', message: `elicitation gap ${input.aroseFromGapId} does not exist` }, ], }; } @@ -778,10 +872,8 @@ export class CommandExecutor { status: 'structural_illegal' as const, diagnostics: [ { - field: 'aroseFromEntryId', - message: - `elicitation backlog entry ${input.aroseFromEntryId} belongs to a different spec ` + - `(command spec ${input.specId})`, + field: 'aroseFromGapId', + message: `elicitation gap ${input.aroseFromGapId} belongs to a different spec`, }, ], }; @@ -791,35 +883,38 @@ export class CommandExecutor { const lsn = this.bumpExistingSpecLsn(tx, input.specId); const entry = tx - .insert(schema.elicitationBacklog) + .insert(schema.elicitationGaps) .values({ spec_id: input.specId, - kind: input.kind.trim(), - question: input.question.trim(), + name: input.name.trim(), + rationale: input.rationale.trim(), basis: input.basis ?? 'explicit', - readiness_band: input.readinessBand, + readiness_band: input.band, + predicate_kind: input.predicate.kind, + predicate: JSON.stringify(input.predicate), + importance: input.importance ?? 1, plane_affinity: input.planeAffinity ?? null, lens_affinity: input.lensAffinity ?? null, - arose_from_entry_id: input.aroseFromEntryId ?? null, - rationale: input.rationale ?? null, + arose_from_gap_id: input.aroseFromGapId ?? null, created_at_lsn: lsn, }) - .returning({ id: schema.elicitationBacklog.id }) + .returning({ id: schema.elicitationGaps.id }) .get(); tx.insert(schema.changeLog) .values({ spec_id: input.specId, lsn, - operation: 'create_elicitation_backlog_entry', + operation: 'create_elicitation_gap', payload: JSON.stringify({ id: entry!.id, specId: input.specId, - kind: input.kind.trim(), - readinessBand: input.readinessBand, + name: input.name.trim(), + band: input.band, + predicateKind: input.predicate.kind, planeAffinity: input.planeAffinity, lensAffinity: input.lensAffinity, - ...(input.aroseFromEntryId != null ? { aroseFromEntryId: input.aroseFromEntryId } : {}), + ...(input.aroseFromGapId != null ? { aroseFromGapId: input.aroseFromGapId } : {}), }), }) .run(); @@ -828,36 +923,40 @@ export class CommandExecutor { }); } - /** Close an elicitation-backlog entry through the command boundary. */ - closeElicitationBacklogEntry(input: CloseElicitationBacklogEntryInput): CloseElicitationBacklogEntryResult { + /** Set an elicitation gap's non-derivable disposition through the command boundary. */ + setElicitationGapDisposition(input: SetElicitationGapDispositionInput): SetElicitationGapDispositionResult { + if (!isGapDisposition(input.disposition)) { + return { + status: 'structural_illegal', + diagnostics: [{ field: 'disposition', message: 'disposition is not valid' }], + }; + } + return this.db.transaction((tx) => { - const entry = tx + const gap = tx .select() - .from(schema.elicitationBacklog) - .where( - and( - eq(schema.elicitationBacklog.id, input.id), - eq(schema.elicitationBacklog.spec_id, input.specId), - ), - ) + .from(schema.elicitationGaps) + .where(and(eq(schema.elicitationGaps.id, input.id), eq(schema.elicitationGaps.spec_id, input.specId))) .get(); - if (!entry) { + if (!gap) { return { status: 'structural_illegal' as const, diagnostics: [ - { - field: 'id', - message: `elicitation backlog entry ${input.id} does not exist for spec ${input.specId}`, - }, + { field: 'id', message: `elicitation gap ${input.id} does not exist for spec ${input.specId}` }, ], }; } - if (entry.status === 'closed') { + if (input.disposition === 'answered' && gap.predicate_kind !== 'manual') { return { status: 'structural_illegal' as const, - diagnostics: [{ field: 'id', message: `elicitation backlog entry ${input.id} is already closed` }], + diagnostics: [ + { + field: 'disposition', + message: 'structural gap answered state is graph-derived, not hand-settable', + }, + ], }; } @@ -872,10 +971,7 @@ export class CommandExecutor { return { status: 'structural_illegal' as const, diagnostics: [ - { - field: 'resolvedByNodeId', - message: `node ${input.resolvedByNodeId} does not exist`, - }, + { field: 'resolvedByNodeId', message: `node ${input.resolvedByNodeId} does not exist` }, ], }; } @@ -886,9 +982,7 @@ export class CommandExecutor { diagnostics: [ { field: 'resolvedByNodeId', - message: - `node ${input.resolvedByNodeId} belongs to a different spec ` + - `(command spec ${input.specId})`, + message: `node ${input.resolvedByNodeId} belongs to a different spec`, }, ], }; @@ -897,28 +991,24 @@ export class CommandExecutor { const lsn = this.bumpExistingSpecLsn(tx, input.specId); - tx.update(schema.elicitationBacklog) + tx.update(schema.elicitationGaps) .set({ - status: 'closed', + disposition: input.disposition, resolved_by_node_id: input.resolvedByNodeId ?? null, - closed_at_lsn: lsn, + disposition_set_at_lsn: lsn, }) - .where( - and( - eq(schema.elicitationBacklog.id, input.id), - eq(schema.elicitationBacklog.spec_id, input.specId), - ), - ) + .where(and(eq(schema.elicitationGaps.id, input.id), eq(schema.elicitationGaps.spec_id, input.specId))) .run(); tx.insert(schema.changeLog) .values({ spec_id: input.specId, lsn, - operation: 'close_elicitation_backlog_entry', + operation: 'set_elicitation_gap_disposition', payload: JSON.stringify({ id: input.id, specId: input.specId, + disposition: input.disposition, ...(input.resolvedByNodeId != null ? { resolvedByNodeId: input.resolvedByNodeId } : {}), }), }) diff --git a/src/graph/index.ts b/src/graph/index.ts index 4cce0fb2..7171f17f 100644 --- a/src/graph/index.ts +++ b/src/graph/index.ts @@ -22,10 +22,12 @@ export { READINESS_GRADES, READINESS_BANDS, LENS_AFFINITIES, - ELICITATION_BACKLOG_STATUSES, + GAP_DISPOSITIONS, + GAP_PREDICATE_KINDS, } from './schema/kinds.js'; export type { EdgeCategory, GraphEdge } from './schema/edges.js'; +export type { ElicitationGap, GapDisposition, GapPredicate } from './schema/elicitation-gaps.js'; export type { GraphNode, NodeKind, ReadinessBand } from './schema/nodes.js'; @@ -45,12 +47,7 @@ export type { AnchorRole, EdgeLabelInput } from './projection/labels.js'; export { edgeImpact, relationFromAnchor } from './projection/direction.js'; export type { AnchoredRelation, EdgeImpact, EdgeRelation } from './projection/direction.js'; -export { - queryGraph, - getNodes, - getOpenElicitationBacklogEntries, - getOpenReconciliationNeeds, -} from './queries.js'; +export { queryGraph, getNodes, getElicitationGaps, getOpenReconciliationNeeds } from './queries.js'; export type { EdgeDirection, GraphSlice, diff --git a/src/graph/observed-shapes-coverage.test.ts b/src/graph/observed-shapes-coverage.test.ts index 3199c778..70f60542 100644 --- a/src/graph/observed-shapes-coverage.test.ts +++ b/src/graph/observed-shapes-coverage.test.ts @@ -55,8 +55,8 @@ const observedShapeLedger = [ web: 'deferred', }, { - shape: 'elicitation_backlog', - owner: 'getOpenElicitationBacklogEntries', + shape: 'elicitation_gaps', + owner: 'getElicitationGaps', tool: 'deferred', rpc: 'deferred', web: 'deferred', @@ -91,7 +91,7 @@ describe('graph observed-shape coverage ledger', () => { 'getGraphGaps', 'getRelatedNodes', 'getOpenReconciliationNeeds', - 'getOpenElicitationBacklogEntries', + 'getElicitationGaps', ]); }); diff --git a/src/graph/queries.test.ts b/src/graph/queries.test.ts index e127c13a..d46593fe 100644 --- a/src/graph/queries.test.ts +++ b/src/graph/queries.test.ts @@ -3,7 +3,7 @@ import { beforeEach, describe, expect, it } from 'vitest'; import { createDb, type BrunchDb } from '../db/connection.js'; import { graphClock, specs } from '../db/schema.js'; import { CommandExecutor } from './command-executor.js'; -import { getOpenElicitationBacklogEntries, getOpenReconciliationNeeds } from './queries.js'; +import { getElicitationGaps, getOpenReconciliationNeeds } from './queries.js'; import { NODE_KIND_METADATA, parseGraphNodeCode } from './schema/nodes.js'; import { runCreateOnlyMutation } from './test-support/create-only-mutation.js'; @@ -75,7 +75,7 @@ describe('getOpenReconciliationNeeds', () => { }); }); -describe('getOpenElicitationBacklogEntries', () => { +describe('getElicitationGaps', () => { let db: BrunchDb; let executor: CommandExecutor; let specId: number; @@ -89,40 +89,31 @@ describe('getOpenElicitationBacklogEntries', () => { specId = created.specId; }); - it('returns only open entries for the requested spec', () => { + it('returns gaps for the requested spec with live presence-derived coverage', () => { const other = executor.createSpec({ name: 'Other Spec', slug: 'other-spec' }); expect(other.status).toBe('success'); if (other.status !== 'success') throw new Error('unreachable'); - const created = executor.createElicitationBacklogEntry({ - specId, - kind: 'follow_on_question', - question: 'What evidence would prove this is working?', - readinessBand: 'elicitation', - planeAffinity: 'oracle', - lensAffinity: 'oracle', - }); - expect(created.status).toBe('success'); - if (created.status !== 'success') throw new Error('unreachable'); + const before = getElicitationGaps(db, specId).find((gap) => gap.name === 'domain')!; + expect(before.coverage).toBe(0); + expect(before.answered).toBe(false); + expect(before.disposition).toBe('open'); const resolvedNode = executor.createNode({ specId, plane: 'intent', - kind: 'goal', - title: 'Goal clarified', + kind: 'context', + title: 'Brunch is a local spec-workspace product', }); expect(resolvedNode.status).toBe('success'); - if (resolvedNode.status !== 'success') throw new Error('unreachable'); - - expect( - executor.closeElicitationBacklogEntry({ - specId, - id: created.id, - resolvedByNodeId: resolvedNode.nodeId, - }).status, - ).toBe('success'); - - expect(getOpenElicitationBacklogEntries(db, specId)).toHaveLength(4); - expect(getOpenElicitationBacklogEntries(db, other.specId)).toHaveLength(4); + + const after = getElicitationGaps(db, specId).find((gap) => gap.name === 'domain')!; + expect(after.coverage).toBe(1); + expect(after.answered).toBe(true); + expect(after.disposition).toBe('answered'); + + expect(getElicitationGaps(db, specId)).toHaveLength(8); + expect(getElicitationGaps(db, other.specId)).toHaveLength(8); + expect(getElicitationGaps(db, other.specId).find((gap) => gap.name === 'domain')!.answered).toBe(false); }); }); diff --git a/src/graph/queries.ts b/src/graph/queries.ts index 35590c85..c69e35b8 100644 --- a/src/graph/queries.ts +++ b/src/graph/queries.ts @@ -13,7 +13,7 @@ import type { BrunchDb } from '../db/connection.js'; import * as schema from '../db/schema.js'; import type { Lsn } from './atoms.js'; import type { EdgeCategory, GraphEdge } from './schema/edges.js'; -import type { ElicitationBacklogEntry } from './schema/elicitation-backlog.js'; +import type { ElicitationGap, GapDisposition, GapPredicate } from './schema/elicitation-gaps.js'; import { NODE_KIND_METADATA, parseGraphNodeCode, @@ -335,57 +335,91 @@ export function getOpenReconciliationNeeds(db: BrunchDb, specId: number): Reconc return rows.map(rowToReconNeed); } -function rowToElicitationBacklogEntry( - row: typeof schema.elicitationBacklog.$inferSelect, -): ElicitationBacklogEntry { - type MutableElicitationBacklogEntry = { - -readonly [K in keyof ElicitationBacklogEntry]: ElicitationBacklogEntry[K]; +function derivePresenceCoverage( + db: BrunchDb, + specId: number, + predicate: Extract, +): number { + const rows = db.select().from(schema.nodes).where(eq(schema.nodes.spec_id, specId)).all(); + const count = rows.filter((row) => { + if (predicate.plane !== undefined && row.plane !== predicate.plane) return false; + if (predicate.nodeKind !== undefined && row.kind !== predicate.nodeKind) return false; + if (predicate.band !== undefined) { + const metadata = NODE_KIND_METADATA[row.kind as NodeKind]; + if (!(metadata.readinessBands as readonly ReadinessBand[]).includes(predicate.band)) return false; + } + return true; + }).length; + return Math.min(1, count / predicate.minimum); +} + +function deriveGapCoverage( + db: BrunchDb, + specId: number, + predicate: GapPredicate, + disposition: GapDisposition, +): number { + if (disposition === 'not_applicable' || disposition === 'irrelevant' || disposition === 'answered') + return 1; + if (predicate.kind === 'presence') return derivePresenceCoverage(db, specId, predicate); + return 0; +} + +function rowToElicitationGap(db: BrunchDb, row: typeof schema.elicitationGaps.$inferSelect): ElicitationGap { + type MutableElicitationGap = { + -readonly [K in keyof ElicitationGap]: ElicitationGap[K]; }; - const entry: MutableElicitationBacklogEntry = { + const storedDisposition = row.disposition as GapDisposition; + const predicate = JSON.parse(row.predicate) as GapPredicate; + const coverage = deriveGapCoverage(db, row.spec_id, predicate, storedDisposition); + const answered = coverage >= 1; + const disposition = answered && storedDisposition === 'open' ? 'answered' : storedDisposition; + + const entry: MutableElicitationGap = { id: String(row.id), specId: row.spec_id, - kind: row.kind, - question: row.question, - status: row.status as ElicitationBacklogEntry['status'], - basis: row.basis as ElicitationBacklogEntry['basis'], - readinessBand: row.readiness_band as ElicitationBacklogEntry['readinessBand'], + name: row.name, + rationale: row.rationale, + disposition, + basis: row.basis as ElicitationGap['basis'], + band: row.readiness_band as ElicitationGap['band'], + predicate, + importance: row.importance, + coverage, + answered, createdAtLsn: row.created_at_lsn, }; if (row.plane_affinity != null) { - entry.planeAffinity = row.plane_affinity as NonNullable; + entry.planeAffinity = row.plane_affinity as NonNullable; } if (row.lens_affinity != null) { - entry.lensAffinity = row.lens_affinity as NonNullable; + entry.lensAffinity = row.lens_affinity as NonNullable; } - if (row.arose_from_entry_id != null) { - entry.aroseFromEntryId = String(row.arose_from_entry_id); + if (row.arose_from_gap_id != null) { + entry.aroseFromGapId = String(row.arose_from_gap_id); } if (row.resolved_by_node_id != null) { entry.resolvedByNodeId = row.resolved_by_node_id; } - if (row.rationale != null) { - entry.rationale = row.rationale; - } - - if (row.closed_at_lsn != null) { - entry.closedAtLsn = row.closed_at_lsn; + if (row.disposition_set_at_lsn != null) { + entry.dispositionSetAtLsn = row.disposition_set_at_lsn; } return entry; } -export function getOpenElicitationBacklogEntries(db: BrunchDb, specId: number): ElicitationBacklogEntry[] { +export function getElicitationGaps(db: BrunchDb, specId: number): ElicitationGap[] { const rows = db .select() - .from(schema.elicitationBacklog) - .where(and(eq(schema.elicitationBacklog.status, 'open'), eq(schema.elicitationBacklog.spec_id, specId))) - .orderBy(schema.elicitationBacklog.created_at_lsn, schema.elicitationBacklog.id) + .from(schema.elicitationGaps) + .where(eq(schema.elicitationGaps.spec_id, specId)) + .orderBy(schema.elicitationGaps.created_at_lsn, schema.elicitationGaps.id) .all(); - return rows.map(rowToElicitationBacklogEntry); + return rows.map((row) => rowToElicitationGap(db, row)); } diff --git a/src/graph/schema/elicitation-backlog.ts b/src/graph/schema/elicitation-backlog.ts deleted file mode 100644 index 47551cd8..00000000 --- a/src/graph/schema/elicitation-backlog.ts +++ /dev/null @@ -1,34 +0,0 @@ -/** - * Elicitation-backlog type definitions. - * - * Canonical reference: memory/SPEC.md D65-L - * - * The elicitation_backlog is the elicitor's prospective process-agenda register: - * open questions the user has not answered yet, seeded at spec creation and grown - * later by capture-reflection. It is a flat table, not a graph node/plane. - */ - -import type { Lsn, NodeId } from '../atoms.js'; -import { ELICITATION_BACKLOG_STATUSES, LENS_AFFINITIES } from './kinds.js'; -import type { NodeBasis, NodePlane, ReadinessBand } from './nodes.js'; - -type ElicitationBacklogStatus = (typeof ELICITATION_BACKLOG_STATUSES)[number]; - -export type ElicitationBacklogLensAffinity = (typeof LENS_AFFINITIES)[number]; - -export interface ElicitationBacklogEntry { - readonly id: string; - readonly specId: number; - readonly kind: string; - readonly question: string; - readonly status: ElicitationBacklogStatus; - readonly basis: NodeBasis; - readonly readinessBand: ReadinessBand; - readonly planeAffinity?: NodePlane; - readonly lensAffinity?: ElicitationBacklogLensAffinity; - readonly aroseFromEntryId?: string; - readonly resolvedByNodeId?: NodeId; - readonly rationale?: string; - readonly createdAtLsn: Lsn; - readonly closedAtLsn?: Lsn; -} diff --git a/src/graph/schema/elicitation-gaps.ts b/src/graph/schema/elicitation-gaps.ts new file mode 100644 index 00000000..b4e26cf5 --- /dev/null +++ b/src/graph/schema/elicitation-gaps.ts @@ -0,0 +1,62 @@ +/** + * Elicitation-gaps type definitions. + * + * Canonical reference: memory/SPEC.md D65-L + * + * The elicitation_gaps register is the elicitor's prospective coverage-obligation + * register: typed obligations seeded at spec creation and grown later by + * capture-reflection. It is a flat table, not a graph node/plane. Structural + * coverage is derived from the graph at read time, not stored here. + */ + +import type { Lsn, NodeId } from '../atoms.js'; +import { GAP_DISPOSITIONS, GAP_PREDICATE_KINDS, LENS_AFFINITIES } from './kinds.js'; +import type { NodeBasis, NodeKind, NodePlane, ReadinessBand } from './nodes.js'; + +export type GapDisposition = (typeof GAP_DISPOSITIONS)[number]; +export type GapPredicateKind = (typeof GAP_PREDICATE_KINDS)[number]; + +export type ElicitationGapLensAffinity = (typeof LENS_AFFINITIES)[number]; + +export type GapPredicate = + | { + readonly kind: 'presence'; + readonly minimum: number; + readonly plane?: NodePlane; + readonly nodeKind?: NodeKind; + readonly band?: ReadinessBand; + } + | { + readonly kind: 'field'; + readonly nodeKind: NodeKind; + readonly field: string; + } + | { + readonly kind: 'coverage'; + readonly subjectKind: NodeKind; + readonly relation: string; + } + | { + readonly kind: 'manual'; + readonly rubric: string; + }; + +export interface ElicitationGap { + readonly id: string; + readonly specId: number; + readonly name: string; + readonly rationale: string; + readonly basis: NodeBasis; + readonly band: ReadinessBand; + readonly predicate: GapPredicate; + readonly importance: number; + readonly coverage: number; + readonly answered: boolean; + readonly disposition: GapDisposition; + readonly planeAffinity?: NodePlane; + readonly lensAffinity?: ElicitationGapLensAffinity; + readonly aroseFromGapId?: string; + readonly resolvedByNodeId?: NodeId; + readonly createdAtLsn: Lsn; + readonly dispositionSetAtLsn?: Lsn; +} diff --git a/src/graph/schema/kinds.ts b/src/graph/schema/kinds.ts index 62303f75..5974eac7 100644 --- a/src/graph/schema/kinds.ts +++ b/src/graph/schema/kinds.ts @@ -46,4 +46,6 @@ export const READINESS_BANDS = ['grounding', 'elicitation', 'commitment'] as con export const LENS_AFFINITIES = ['intent', 'design', 'oracle'] as const; -export const ELICITATION_BACKLOG_STATUSES = ['open', 'closed'] as const; +export const GAP_DISPOSITIONS = ['open', 'answered', 'not_applicable', 'irrelevant', 'reopened'] as const; + +export const GAP_PREDICATE_KINDS = ['presence', 'field', 'coverage', 'manual'] as const; From 6c37f1b52432f3e7786472682cda391428b7adc1 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 10 Jun 2026 12:33:52 +0200 Subject: [PATCH 2/4] Add JIT capability-readiness gate over elicitation gaps Introduce the D74-L gate (src/projections/session/capability-readiness.ts): an explicit capability -> relevant-gaps map and evaluateCapabilityReadiness returning proceed / proceed_low_epistemic / negotiate(EstablishmentOffer), driven by live gap coverage with no stored readiness grade. Read-only judgment; never refuses outright (I31-L). Coexists with the existing grade gating for now. Reconcile SPEC/PLAN and projections README. Deferred follow-ons (capability-readiness frontier): readiness-estimate projection, MIN_GRADE consumer rewire, stored-grade deletion, and chrome.phase/chatMode removal. --- memory/PLAN.md | 1 + memory/SPEC.md | 7 +- src/projections/README.md | 1 + .../session/capability-readiness.test.ts | 147 ++++++++++++++++++ .../session/capability-readiness.ts | 90 +++++++++++ 5 files changed, 245 insertions(+), 1 deletion(-) create mode 100644 src/projections/session/capability-readiness.test.ts create mode 100644 src/projections/session/capability-readiness.ts diff --git a/memory/PLAN.md b/memory/PLAN.md index cb8437ca..5795eb52 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -223,6 +223,7 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - **Cross-cutting obligations:** Readiness never bars graph truth or work (I31-L); `CommandExecutor` must not reject a node for a later-band kind (D64-L). The deferred milestone gate for export/plan/execute op-modes stays deferred (D45-L). Replace grade-gate tests across `compose.test.ts` / `prompting.test.ts` and createSpec/getSpec rather than preserving them. - **Traceability:** D25-L, D30-L, D32-L, D45-L, D57-L, D58-L, D59-L, D64-L, D65-L, D73-L, D74-L / A27-L / I25-L, I31-L. Supersedes stored-grade gating and the `chrome.phase` / `chrome.chatMode` fields. - **Design docs:** `memory/SPEC.md` D45-L / D74-L; `src/projections/session/runtime-policy.ts`; `src/projections/workspace/workspace-state.ts`. +- **Current execution pointer:** D74-L JIT gate tracer done 2026-06-10 via `memory/cards/capability-readiness--jit-gate.md`: explicit capability→grounding-gap map, proceed / low-epistemic / negotiate outcome, live presence-coverage flip, no grade-symbol import. Deferred follow-ons remain to re-scope because their shape depends on the gate interface: readiness-estimate projection, consumer rewire off `MIN_GRADE`, stored-grade deletion, `chrome.phase`/`chatMode` removal. ### runtime-vocab-leaf diff --git a/memory/SPEC.md b/memory/SPEC.md index 081d62ec..54106adb 100644 --- a/memory/SPEC.md +++ b/memory/SPEC.md @@ -120,8 +120,13 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c | A22-L | The elicitor can perform synchronous post-exchange capture well enough for the POC: high-confidence extractive facts can be committed to the graph immediately and gap dispositions updated, while low-confidence implications can be kept out of graph truth and used as disambiguation material. | medium | partially validated | D18-L, D26-L, D45-L, D65-L, I30-L | 2026-06-05 `capture-response-to-graph` validated the product wiring for narrow labeled text facts (`Goal:`, `Context:`, `Constraint:`, `Criterion:`) on `session.submitExchangeResponse`. 2026-06-07 generalized the same explicit-text capture core onto `session.submitMessage`: ordinary labeled user text now appends to transcript truth, commits through `graph/capture` → `CommandExecutor.mutateGraph({createBasis: explicit, ops})`, targets the transcript binding's spec, and publishes graph invalidations; explicit interruptions are transcript-visible but do not capture or silently answer a pending exchange. 2026-06-08 `capture-quality-spike` added a fixed scenario measurement over free prose, file/ref-bearing prose, and implication-heavy prose; the sample extraction report reached precision 1.0 / recall 1.0 with zero false commits, moving generalized capture from parked evidence-gate to a narrow graduate recommendation with an explicit false-commit guard. Readiness-grade capture remains open fitness evidence. | | A24-L | A flat `elicitation_gaps` table (prospective memory) is sufficient to drive elicitor questioning, seed grounding, and feed capability-readiness without graph structure — gaps are typed coverage obligations (typologies), not graph nodes; apparent dependency among gaps is mediated by the claims their resolution produces. | medium | validated | D65-L, D74-L | 2026-06-08 FE-823 materialized the flat table (built as `elicitation_backlog`) on the real LSN/change-log seam. 2026-06-10 `elicitation-gaps-remodel` replaced that question-instance shape with the typed obligation register (`name`/`rationale`/band/predicate/importance/disposition), regenerated the table as `elicitation_gaps`, seeded the grounding typology catalog, and proved live presence-derived coverage/answered read-back without stored structural answers. Remaining downstream proof is capability-readiness over the register (D74-L) and capture-reflection spawning; if genuine gap→gap dependency or rich traversal emerges, promote the table to a plane (rows→nodes, FK pointers→edges). | | A25-L | Tracking the latest `pi-coding-agent` release continuously (via source-alias in dev + package dependency bumps) keeps Brunch adaptable without routinely destabilizing it, because Brunch's pi product-behavior surface is concentrated in a few sealed integration seams (the `src/.pi/` extension bundle and the session/runtime adapters) behind the D39-L profile — even though pi *types* are imported across ~25 files, those are mostly type-only and pass through that small set of seams. | medium | partially validated | D67-L | 2026-06-09 FE-825 bumped Brunch to pi 0.79, kept type/default resolution on installed `dist`, added a `PI_SOURCE`-gated vite/vitest runtime alias to sibling `pi-mono` source, preserved product default sealed-profile/offline behavior, and passed `npm run verify`. Each later pi bump that lands without product-behavior regressions raises confidence; a bump that silently breaks sealed-profile assumptions falsifies it. | +<<<<<<< HEAD | A26-L | The refined "conversational introspection" goal can be built as a *read-only session-query-back tool*: under `BRUNCH_DEV`, the agent can call `brunch_session_query` over `ctx.sessionManager.getBranch()`, find entries by predicate, project capped dot/`[n]`/`[*]` paths, and surface exact returned values in chat without weakening D39-L sealing or turning self-reporting into product behavior. | medium | validated | D69-L, D71-L | 2026-06-09 `dx-introspection-live` slice 2 replaced the earlier fixed structured self-report/schema idea with `src/.pi/extensions/session-query/`: a dev-gated read-only tool registered only through `createBrunchPiExtensions(..., { introspection: { enabled } })`, covered by find/project/truncation unit tests, default-off/default-on registration tests, and a faux turn that returns verbatim projected session values. Live-model compliance with "call then echo verbatim" remains outer-loop fitness, not a merge gate. | | A27-L | Gap satisfaction is expressible band-by-band at acceptable LLM cost: **commitment** typologies are structural `presence`/`field`/`coverage` predicates over the graph; **grounding** typologies are a `presence` floor plus `manual` LLM satisficiency (D57-L); **elicitation** typologies are generatively spawned. The explicit `capability → relevant gaps` map (D74-L) carries enough signal to drive proceed / negotiate without a standing grade. | medium | partially validated | D65-L, D74-L | 2026-06-10 `elicitation-gaps-remodel` validated the structural `presence` case: a seeded grounding gap's derived coverage/answered state flips from graph truth with no stored structural answer and sibling-spec isolation holds. Remaining proof: D74-L capability-readiness tracer, `field`/`coverage` predicate derivation, `manual` LLM satisficiency, and elicitation/commitment fixtures. Falsified if grounding readiness cannot decompose into per-typology presence+manual judgments, or if commitment obligations need logic the predicate union can't express. | +======= +| A26-L | The "conversational introspection" goal — the in-product agent reporting, in chat, on what tools sent/returned, how understandable inputs/outputs were, errors/uncertainty it hit, and how cleanly a skill activated — can be built as a *read-only* extension of the D69-L tap (adding `tool_call`/`tool_result` observation) plus a small structured self-report schema and an in-chat surface, **without** weakening D39-L sealing or making the agent's self-report a product behavior. | medium | open | D69-L | Prove with a dev-gated slice: pi's `tool_call`/`tool_result` hooks can observe tool I/O and errors read-only; the agent can emit a parseable self-report (not free prose) on demand; and the report can render back into the conversation paired to the same scratch run — all behind `BRUNCH_DEV`. Risk: getting a reliable *structured* self-report rather than narration, and choosing the in-chat-vs-artifact surface, are the open unknowns. | +| A27-L | Gap satisfaction is expressible band-by-band at acceptable LLM cost: **commitment** typologies are structural `presence`/`field`/`coverage` predicates over the graph; **grounding** typologies are a `presence` floor plus `manual` LLM satisficiency (D57-L); **elicitation** typologies are generatively spawned. The explicit `capability → relevant gaps` map (D74-L) carries enough signal to drive proceed / negotiate without a standing grade. | medium | partially validated | D65-L, D74-L | 2026-06-10 `elicitation-gaps-remodel` validated the structural `presence` case: a seeded grounding gap's derived coverage/answered state flips from graph truth with no stored structural answer and sibling-spec isolation holds. 2026-06-10 `capability-readiness--jit-gate` validated the D74-L tracer for the grounding floor: the explicit capability→gap map drives proceed / proceed_low_epistemic / negotiate, live presence coverage flips a generative capability negotiate→proceed, and the gate imports no grade symbols. Remaining proof: `field`/`coverage` predicate derivation, `manual` LLM satisficiency, elicitation/commitment fixtures, and rewiring consumers off grade thresholds. Falsified if grounding readiness cannot decompose into per-typology presence+manual judgments, or if commitment obligations need logic the predicate union can't express. | +>>>>>>> f6cd213e (Add JIT capability-readiness gate over elicitation gaps) ### Active Decisions @@ -301,7 +306,7 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c | I28-L | Auto-compaction output preserves the configured anchor set byte-stable: every entry kind listed in [src/.pi/extensions/compaction/index.ts](file:///Users/lunelson/Code/hashintel/brunch-next/src/.pi/extensions/compaction/index.ts) is reconstructable post-compaction according to its `select` rule (`first | latest | active-leaves | all-unresolved`); LLM-generated narrative summary never replaces or rephrases preserved-anchor content; extension failure falls through to Pi default compaction rather than dropping anchors silently. | planned (compaction round-trip property tests at M9 plus inner-loop anchor-rendering unit tests and TypeBox schema validation of the anchor contract) | D43-L; R15, R13; I3-L, I4-L, I8-L, I12-L | | I29-L | Subagent subprocesses inherit Brunch Pi Profile sealing: every `subagent` tool invocation spawns `pi --mode json -p --no-session --no-skills --no-extensions` with an explicit per-agent tool allowlist and per-agent model; subagents never load ambient user/project `.pi/` skills, prompts, themes, extensions, context files, or behavior-shaping settings; subagents never gain direct access to the parent's `CommandExecutor`, Brunch RPC handlers, or graph persistence; subagent results return to the main agent only as tool result content (no side-effect transcript writes). | planned (subagent subprocess argv tests; isolation audit asserting absent ambient-resource leakage; tool-allowlist conformance test per starter agent) | D2-L, D39-L, D44-L; I2-L, I11-L, I24-L | | I30-L | Elicitor post-exchange capture only commits high-confidence extractive facts, concrete reconciliation needs, and justified `elicitation_gaps` disposition updates (D65-L); low-confidence implications remain in structured-exchange preface/question material and do not become graph truth until clarified, accepted, or explicitly escalated. | partially covered (`src/graph/capture/structured-response.test.ts` accepts only directly labeled text facts for the current tracer, rejects implication-only prose as `no_capture`, preserves structural diagnostics, `src/probes/capture-response-to-graph-proof.test.ts` proves public RPC response capture into selected-spec graph truth, and `src/probes/submit-message-capture-proof.test.ts` proves the same explicit-text capture path for ordinary `session.submitMessage` turns; reconciliation-needs and gap-disposition capture remain planned) | D18-L, D47-L, D65-L; A22-L | -| I31-L | Readiness never bars graph truth or work; it is just-in-time capability-readiness over relevant gaps, not a stored grade or kind whitelist. There is no `readiness_grade` scalar; capability availability is judged on request against the relevant `elicitation_gaps` (D74-L) and may proceed, proceed at low epistemic status, or negotiate — it never refuses outright. The `CommandExecutor` must not reject a graph node solely because its kind belongs to a later readiness band (D64-L). The soft `readiness estimate` (D45-L) is UI-only and gates nothing. | partially covered (current `createSpec` / `getSpec` / `updateReadinessGrade` and `compose.test.ts` grade-gate tests predate the D45-L/D74-L remodel and will be replaced; JIT capability-readiness + readiness-estimate coverage is planned) | D20-L, D45-L, D64-L, D74-L | +| I31-L | Readiness never bars graph truth or work; it is just-in-time capability-readiness over relevant gaps, not a stored grade or kind whitelist. There is no `readiness_grade` scalar; capability availability is judged on request against the relevant `elicitation_gaps` (D74-L) and may proceed, proceed at low epistemic status, or negotiate — it never refuses outright. The `CommandExecutor` must not reject a graph node solely because its kind belongs to a later readiness band (D64-L). The soft `readiness estimate` (D45-L) is UI-only and gates nothing. | partially covered (`src/projections/session/capability-readiness.test.ts` covers the D74-L tracer gate, including proceed / proceed_low_epistemic / negotiate, no-refusal, no grade-symbol import, and a live `presence` coverage flip; current `createSpec` / `getSpec` / `updateReadinessGrade`, prompt composition, and affordance tests still predate the full D45-L/D74-L remodel and will be replaced by follow-on consumer rewire / grade deletion / readiness-estimate coverage) | D20-L, D45-L, D64-L, D74-L | | I32-L | Public RPC structured-exchange driving never requires a client to speak raw Pi RPC: after Brunch method discovery and workspace/spec/session activation, each pending assistant-originated exchange is answered exactly once through `session.submitExchangeResponse`, and the deterministic permutation run produces linear Pi JSONL whose structured exchange projection preserves the same prompt/answer/status/comment artifacts as the equivalent TUI structured-exchange path. | covered for deterministic FE-744 parity under canonical session method names (`session.triggerExchange`, `session.pendingExchange`, `session.submitExchangeResponse`, `session.exchanges`): `rpc.discover` contract tests, pending/respond lifecycle tests, current public-RPC structured-exchange permutations, terminal non-answered status handling, option content/rationale parity, no repeated deterministic prompts, and transcript/exchange parity assertions. | D5-L, D48-L, D49-L; I10-L, I13-L, I21-L, I23-L | | I33-L | `capture_*` analysis entries are transcript evidence only: they persist as Brunch structured-exchange `toolResult` rows, are included by Brunch-semantic transcript renderers, are hidden or collapsed in TUI display, and never mutate graph truth or bypass `CommandExecutor`. | partially covered (minimum capture details schemas parse/export and reject graph payload fields; future runtime capture-analysis schema/rendering tests plus transcript renderer fixtures still need to prove persisted result rendering and TUI hide/collapse behavior; later graph-capture fixtures compare analysis candidates against committed graph mutations) | D17-L, D18-L, D37-L, D47-L, D50-L; I2-L, I11-L, I23-L, I30-L | | I34-L | `mutateGraph` batch validation is all-or-nothing: if any node or edge in the batch is structurally illegal, the entire batch is rejected and no partial state is persisted; the agent receives diagnostics sufficient for bounded self-correction retry. | covered (`command-executor/commit-graph-batch.test.ts` and graph-tool adapter tests cover dry-run/commit diagnostic parity for invalid basis, missing refs/codes, invalid category/stance, self-loop, invalid node kind/detail shape, rollback of nodes/edges/change_log/counters, transaction-local planning before LSN allocation/writes, and structured adapter diagnostics without thrown projected-code errors or fake endpoint refs) | D53-L; I1-L, I11-L | diff --git a/src/projections/README.md b/src/projections/README.md index 6fadb6cb..efd78bd7 100644 --- a/src/projections/README.md +++ b/src/projections/README.md @@ -28,6 +28,7 @@ Disposition: `✓` locked · `●` keep + lock (earns place, needs invariant) · | `session/transcript-context` | 2 | ● | Real transform: filters session entries + Pi-SDK convert. Invariant: no non-empty transcript entry dropped. Consumes the Pi SDK (external trust boundary), not a PULL surface we own. | | `session/runtime-state` | 13 | ● | Most-consumed projection; flattens runtime state. Direct flattened-shape invariant guards the field set every consumer relies on. | | `session/affordances` | 1 | ✓ | `affordances.test.ts` — legality + default-on-switch derivation tested directly. | +| `session/capability-readiness` | 0 | ✓ | D74-L tracer gate, not a reusable DTO. `capability-readiness.test.ts` locks the explicit capability→grounding-gap map, proceed / low-epistemic / negotiate outcomes, no-refusal invariant, and live presence-coverage flip. Consumer rewire remains deferred by the active scope card. | | `session/runtime-policy` | 4 | ○ | Policy/definitions data, not a DTO transform. Legality source already guarded via `affordances.test.ts` + `.pi` state tests. | | `workspace/workspace-context` | 1 | ✗ | Pure `{ mode, data }` tag wrapper — zero transform, single consumer (`.pi/extensions/context/get-cwd.ts`). Source `session/workspace-context.ts` already exports the shapes + `inspect*` and can feed the consumer directly. Delete / inline. | | `workspace/workspace-state` | 4 | ● | Real flatten of the `WorkspaceSessionState` union to a narrow DTO. Shape invariant across status variants (`ready` / `needs_human` / base). | diff --git a/src/projections/session/capability-readiness.test.ts b/src/projections/session/capability-readiness.test.ts new file mode 100644 index 00000000..a408ca19 --- /dev/null +++ b/src/projections/session/capability-readiness.test.ts @@ -0,0 +1,147 @@ +import { readFileSync } from 'node:fs'; +import { fileURLToPath } from 'node:url'; + +import { describe, expect, it } from 'vitest'; + +import { createDb, type BrunchDb } from '../../db/connection.js'; +import { CommandExecutor } from '../../graph/command-executor.js'; +import { getElicitationGaps } from '../../graph/queries.js'; +import type { ElicitationGap } from '../../graph/schema/elicitation-gaps.js'; +import { + CAPABILITY_RELEVANT_GAPS, + evaluateCapabilityReadiness, + type CapabilityReadinessOutcome, +} from './capability-readiness.js'; + +function gap(overrides: Partial & Pick): ElicitationGap { + return { + id: overrides.id ?? overrides.name, + specId: overrides.specId ?? 1, + name: overrides.name, + rationale: overrides.rationale ?? `${overrides.name} rationale`, + basis: overrides.basis ?? 'implicit', + band: overrides.band ?? 'grounding', + predicate: overrides.predicate ?? { kind: 'presence', minimum: 1, band: 'grounding' }, + importance: overrides.importance ?? 1, + coverage: overrides.coverage, + answered: overrides.answered ?? overrides.coverage >= 1, + disposition: overrides.disposition ?? (overrides.coverage >= 1 ? 'answered' : 'open'), + createdAtLsn: overrides.createdAtLsn ?? 1, + }; +} + +function expectOutcomeStatus( + outcome: CapabilityReadinessOutcome, + status: CapabilityReadinessOutcome['status'], +): void { + expect(outcome.status).toBe(status); +} + +function createTestDb(): BrunchDb { + return createDb(':memory:'); +} + +describe('capability readiness over elicitation gaps', () => { + it('enumerates relevant gap typologies per requested capability', () => { + expect(CAPABILITY_RELEVANT_GAPS).toEqual({ + 'generative-lens': ['domain', 'protagonist', 'pain_pull', 'constraint'], + 'propose-graph': ['domain', 'protagonist', 'pain_pull', 'constraint'], + 'project-graph': ['domain', 'protagonist', 'pain_pull', 'constraint'], + 'commitment-review': ['domain', 'protagonist', 'pain_pull', 'constraint'], + }); + }); + + it('proceeds when all relevant gaps are covered', () => { + const outcome = evaluateCapabilityReadiness('propose-graph', [ + gap({ name: 'domain', coverage: 1 }), + gap({ name: 'protagonist', coverage: 1 }), + gap({ name: 'pain_pull', coverage: 1 }), + gap({ name: 'constraint', coverage: 1 }), + ]); + + expect(outcome).toEqual({ status: 'proceed' }); + }); + + it('negotiates with establishment-offer-shaped missing gaps when relevant grounding gaps are uncovered', () => { + const outcome = evaluateCapabilityReadiness('project-graph', [ + gap({ name: 'domain', coverage: 1 }), + gap({ name: 'protagonist', coverage: 0 }), + gap({ name: 'pain_pull', coverage: 0 }), + gap({ name: 'constraint', coverage: 1 }), + ]); + + expect(outcome.status).toBe('negotiate'); + if (outcome.status !== 'negotiate') return; + expect(outcome.offer.kind).toBe('establishment_offer'); + expect(outcome.offer.missingGaps.map((missing) => missing.name)).toEqual(['protagonist', 'pain_pull']); + expect(outcome.offer.message).toContain('I can try, but'); + }); + + it('proceeds at low epistemic status when relevant gaps have only partial coverage', () => { + const outcome = evaluateCapabilityReadiness('generative-lens', [ + gap({ name: 'domain', coverage: 1 }), + gap({ name: 'protagonist', coverage: 0.5 }), + gap({ name: 'pain_pull', coverage: 1 }), + gap({ name: 'constraint', coverage: 1 }), + ]); + + expect(outcome).toEqual({ status: 'proceed_low_epistemic', coverage: 0.875 }); + }); + + it('moves from negotiate to proceed when live presence coverage fills the grounding floor', () => { + const db = createTestDb(); + const executor = new CommandExecutor(db); + const created = executor.createSpec({ name: 'Readiness Spec', slug: 'readiness-spec' }); + expect(created.status).toBe('success'); + if (created.status !== 'success') throw new Error('unreachable'); + + expectOutcomeStatus( + evaluateCapabilityReadiness('propose-graph', getElicitationGaps(db, created.specId)), + 'negotiate', + ); + + for (const [kind, title] of [ + ['context', 'Local product spec workspace'], + ['goal', 'Help a builder co-author a graph-native spec'], + ['assumption', 'Current planning is too prose-heavy'], + ['constraint', 'Runs locally over the Pi harness'], + ] as const) { + const result = executor.createNode({ specId: created.specId, plane: 'intent', kind, title }); + expect(result.status).toBe('success'); + } + + expectOutcomeStatus( + evaluateCapabilityReadiness('propose-graph', getElicitationGaps(db, created.specId)), + 'proceed', + ); + }); + + it('never returns a refusal outcome and does not import grade-gate symbols', () => { + const outcomes = [ + evaluateCapabilityReadiness('propose-graph', []), + evaluateCapabilityReadiness('propose-graph', [ + gap({ name: 'domain', coverage: 0.25 }), + gap({ name: 'protagonist', coverage: 1 }), + gap({ name: 'pain_pull', coverage: 1 }), + gap({ name: 'constraint', coverage: 1 }), + ]), + evaluateCapabilityReadiness('propose-graph', [ + gap({ name: 'domain', coverage: 1 }), + gap({ name: 'protagonist', coverage: 1 }), + gap({ name: 'pain_pull', coverage: 1 }), + gap({ name: 'constraint', coverage: 1 }), + ]), + ]; + + expect(outcomes.map((outcome) => outcome.status)).toEqual([ + 'negotiate', + 'proceed_low_epistemic', + 'proceed', + ]); + expect(outcomes.map((outcome) => outcome.status)).not.toContain('refuse'); + + const sourcePath = fileURLToPath(new URL('./capability-readiness.ts', import.meta.url)); + const source = readFileSync(sourcePath, 'utf8'); + expect(source).not.toMatch(/ReadinessGrade|GRADE_RANK|MIN_GRADE/); + }); +}); diff --git a/src/projections/session/capability-readiness.ts b/src/projections/session/capability-readiness.ts new file mode 100644 index 00000000..dfad5da3 --- /dev/null +++ b/src/projections/session/capability-readiness.ts @@ -0,0 +1,90 @@ +import type { ElicitationGap } from '../../graph/schema/elicitation-gaps.js'; + +export type CapabilityId = 'generative-lens' | 'propose-graph' | 'project-graph' | 'commitment-review'; + +type RelevantGapName = 'domain' | 'protagonist' | 'pain_pull' | 'constraint'; + +export const CAPABILITY_RELEVANT_GAPS: Record = { + 'generative-lens': ['domain', 'protagonist', 'pain_pull', 'constraint'], + 'propose-graph': ['domain', 'protagonist', 'pain_pull', 'constraint'], + 'project-graph': ['domain', 'protagonist', 'pain_pull', 'constraint'], + 'commitment-review': ['domain', 'protagonist', 'pain_pull', 'constraint'], +}; + +interface CapabilityMissingGap { + readonly id: string; + readonly name: string; + readonly rationale: string; + readonly coverage: number; +} + +interface EstablishmentOffer { + readonly kind: 'establishment_offer'; + readonly message: string; + readonly missingGaps: readonly CapabilityMissingGap[]; +} + +export type CapabilityReadinessOutcome = + | { readonly status: 'proceed' } + | { readonly status: 'proceed_low_epistemic'; readonly coverage: number } + | { readonly status: 'negotiate'; readonly offer: EstablishmentOffer }; + +export function evaluateCapabilityReadiness( + capability: CapabilityId, + gaps: readonly ElicitationGap[], +): CapabilityReadinessOutcome { + const relevantGaps = relevantGapRecords(capability, gaps); + const missing = relevantGaps.filter((gap) => gap.coverage <= 0); + if (missing.length > 0) { + return { + status: 'negotiate', + offer: { + kind: 'establishment_offer', + message: `I can try, but answering ${formatGapList(missing)} first would make this materially safer.`, + missingGaps: missing.map((gap) => ({ + id: gap.id, + name: gap.name, + rationale: gap.rationale, + coverage: gap.coverage, + })), + }, + }; + } + + const coverage = relevantGaps.length === 0 ? 0 : average(relevantGaps.map((gap) => gap.coverage)); + if (coverage >= 1) return { status: 'proceed' }; + return { status: 'proceed_low_epistemic', coverage }; +} + +function relevantGapRecords( + capability: CapabilityId, + gaps: readonly ElicitationGap[], +): readonly ElicitationGap[] { + const relevantNames = CAPABILITY_RELEVANT_GAPS[capability]; + return relevantNames.map((name) => gaps.find((gap) => gap.name === name) ?? missingGap(name)); +} + +function missingGap(name: RelevantGapName): ElicitationGap { + return { + id: `missing:${name}`, + specId: 0, + name, + rationale: `Missing seeded grounding gap: ${name}`, + basis: 'implicit', + band: 'grounding', + predicate: { kind: 'presence', minimum: 1, band: 'grounding' }, + importance: 1, + coverage: 0, + answered: false, + disposition: 'open', + createdAtLsn: 0, + }; +} + +function average(values: readonly number[]): number { + return values.reduce((total, value) => total + value, 0) / values.length; +} + +function formatGapList(gaps: readonly ElicitationGap[]): string { + return gaps.map((gap) => gap.name).join(', '); +} From 2eebb4ee45bb30ef9a3ca6c0b366fe3c499042cb Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 10 Jun 2026 15:06:37 +0200 Subject: [PATCH 3/4] question catalog re-discussion --- docs/README.md | 1 + docs/design/ELICITATION_QUESTIONS.md | 238 ++++++++++++++++++ memory/PLAN.md | 50 +++- memory/SPEC.md | 18 +- ...ode-kind-reference--node-kind-reference.md | 122 +++++++++ 5 files changed, 407 insertions(+), 22 deletions(-) create mode 100644 docs/design/ELICITATION_QUESTIONS.md create mode 100644 memory/cards/gaps-node-kind-reference--node-kind-reference.md diff --git a/docs/README.md b/docs/README.md index 5c5c9e15..6516b8ce 100644 --- a/docs/README.md +++ b/docs/README.md @@ -38,6 +38,7 @@ Older brief-library examples were retired; future behavioral-kernel evidence sho ## Horizon design notes - [`docs/design/SPEC_INITIATIVE_MODEL.md`](../design/SPEC_INITIATIVE_MODEL.md) — working design proposal for spec as initiative/problem lifecycle, claim as truth-bearing unit, projected current truth, and repo-native branching/merge implications for planning data. +- [`docs/design/ELICITATION_QUESTIONS.md`](../design/ELICITATION_QUESTIONS.md) — priming catalog of elicitation questions organized by graph node kind; the questions are projectable examples that feed the elicitor agent, anchoring gaps on the node-kind ontology rather than a parallel typology vocabulary. ## Working conventions diff --git a/docs/design/ELICITATION_QUESTIONS.md b/docs/design/ELICITATION_QUESTIONS.md new file mode 100644 index 00000000..cbcc29a9 --- /dev/null +++ b/docs/design/ELICITATION_QUESTIONS.md @@ -0,0 +1,238 @@ +# Elicitation Question Catalog + +Companion to [`GRAPH_MODEL.md`](GRAPH_MODEL.md) (§Per-plane node kinds, §Prompting) and +[`ELICITATION_LENSES.md`](ELICITATION_LENSES.md). Rationale and texture, not authority — +`memory/SPEC.md` remains the canonical register (D56-L, D65-L). + +## What this is — and what it is not + +This is a **priming catalog for the elicitor agent**, organized by graph node kind. Each row +is phrased as a question, but the questions are **examples, not a schema**. The agent does not +read them off a list; it **projects from the general to the specific** according to its current +strategy (lens), the spec's grounding density, and what the user just said. + +The load-bearing idea from the thread that produced this doc: + +> **The node kind is the closed ontology. Questions are the open, projectable layer *inside* a kind.** + +"Who is it for" and "who are the stakeholders" are both `thesis` questions — not two new types. +Adding more questions never adds ontology; it adds priming for an existing kind. A spec +elicitation gap is therefore modelled as a **situated question that refers to a graph node kind**, +not as an entry in a parallel "typology" vocabulary: + +``` +elicitation_gap = ⟨ question (free text), refersTo: NodeKind, band, satisfier, disposition ⟩ +``` + +Every intent kind already ships a canonical **source-question** (GRAPH_MODEL §Per-plane node +kinds, D56-L) described there as *"the abstract driver — not a literal question to parrot, but a +heuristic for what kind of material the node captures."* This catalog expands each driver into a +fan of facets and example phrasings. + +### Three guardrails + +1. **Examples, not enum.** Nothing here is a closed set or a stored value. These prime + projection; they are not persisted as gap names or domain content. +2. **Anti-shadowing.** The catalog lives in prompt/heuristic space. A gap row stores the + *projected* question and the kind it refers to — never the catalog text, never domain content. +3. **Band-gated.** The `band` on each kind (grounding → elicitation → commitment) sequences when + its questions become live. Grounding intent questions open a spec; structural, reasoning, + oracle, design, and plan questions activate as readiness advances. + +The four-anchor "grounding bundle" in ELICITATION_LENSES (Domain / Protagonist / Pain-pull / +Constraint) is the same idea seen at lower resolution: those anchors are facets of `context`, +`thesis`, `goal`, and `constraint`. This catalog generalizes them back onto the kind layer so +there is **one ontology**, not two. + +--- + +## Intent plane · basic (grounding band — opens the spec) + +### `goal` — value or outcome claim +*Source question:* **What outcome are we after?** +*Activating concepts:* outcomes-over-output, jobs-to-be-done, value proposition, payoff, North-Star metric. + +| What it may answer | Example question forms | +| --- | --- | +| the win / desired outcome | What's the win? What does success unlock? What outcome are we chasing? | +| the job it's hired to do | What job does the user hire this to do? What were they doing before? | +| value created | What's the payoff? What's better once this ships? Who benefits and how? | +| the measure of value | What would tell us it worked? What number should move? | + +### `thesis` — position or bet claim +*Source question:* **Who is this for, and why?** +*Activating concepts:* stakeholders, target user / persona, unique value proposition (UVP), positioning, "the bet", problem statement, jobs-to-be-done audience. + +| What it may answer | Example question forms | +| --- | --- | +| whom it's for | Who is the primary user? Who is it *not* for? | +| who the stakeholders are | Who are the stakeholders? Who else is affected, funds it, or signs off? | +| stakeholder beliefs / needs | What does each stakeholder believe they need? Where do they disagree? | +| why we're doing it | Why now? What pull or pain makes this worth doing? | +| what we think it accomplishes | What do we believe it changes for these people? | +| what we think the UVP is | What's the unique value here vs alternatives? Why this and not the obvious substitute? | + +### `term` — naming commitment +*Source question:* **What do we mean when we say X?** +*Activating concepts:* **ubiquitous language** (DDD), glossary, bounded-context vocabulary, conceptual integrity, lexicon (see `memory/SPEC.md` §Lexicon). + +| What it may answer | Example question forms | +| --- | --- | +| canonical definitions | What exactly do we mean by «key word»? | +| jargon to pin down | Is there domain jargon a newcomer wouldn't know? | +| one-word-two-meanings | Are we using one word for two things (or two words for one)? | +| naming commitments | What should we *always* call this, so we stop drifting? | + +### `context` — descriptive claim +*Source question:* **What is true about the world this lives in?** +*Activating concepts:* domain, environment, situation of use, deployment topology, platform, ecosystem, integration surface, the system it replaces. + +| What it may answer | Example question forms | +| --- | --- | +| what kind of thing it is | What kind of thing is this — a CLI, a service, a library, a UI? | +| where / when it's used | When and where is it used? Under what conditions? | +| local / remote / both | Does it run locally, remotely, or both? Where does the work happen? | +| connectivity | Does it use the internet? Offline-capable? | +| integrations | What external systems must it talk to? What does it read or write? | +| what it replaces / sits beside | What does this replace? What already exists in this space? | +| platform / environment | What platform, runtime, or environment does it live in? | + +--- + +## Intent plane · structural (elicitation / commitment bands) + +### `requirement` — obligation claim +*Source question:* **What must the system do?** +*Activating concepts:* capabilities, user stories, functional requirements, MVP / walking skeleton, must-have vs nice-to-have. + +| What it may answer | Example question forms | +| --- | --- | +| core capabilities | What must it do? What's the core capability it can't ship without? | +| priority split | What's must-have vs nice-to-have? What's the smallest useful version? | +| observable behavior | From the outside, what should a user be able to do? | + +### `assumption` — uncertainty claim +*Source question:* **What might be false?** +*Activating concepts:* risks, hypotheses, leap-of-faith assumptions (Lean Startup), unknowns, "what we're betting on". + +| What it may answer | Example question forms | +| --- | --- | +| open bets | What are we assuming that we haven't verified? | +| fragility | What could shift under us and break the plan? | +| dependencies on belief | What has to be true for this to work? | + +### `constraint` — boundary claim +*Source question:* **What does this rule out?** +*Activating concepts:* non-functional requirements (NFRs), guardrails, budget / time / regulatory / technical limits, non-goals, fixed technology basis. + +| What it may answer | Example question forms | +| --- | --- | +| fixed technical basis | Is the tech stack / language / framework already decided? What's locked? | +| budget & schedule | What's the deadline or budget? | +| scale / data envelope | What volume, latency, or data size must it handle? | +| regulatory / policy | Any compliance, privacy, or policy limits? | +| non-goals | What is this explicitly *not*? What's off the table? | + +### `invariant` — preservation claim +*Source question:* **What must never be broken?** +*Activating concepts:* safety properties, security guarantees, data integrity, "always holds". + +| What it may answer | Example question forms | +| --- | --- | +| must-always-hold | What must always be true, no matter what? | +| safety / security | What would be catastrophic if violated? | +| integrity rules | What data or state must never be corrupted? | + +--- + +## Intent plane · reasoning + +### `decision` — choice claim +*Source question:* **What did we pick among real alternatives?** +*Activating concepts:* trade-offs, architecture decision records (ADRs), reversibility (one-way vs two-way doors). + +| What it may answer | Example question forms | +| --- | --- | +| the chosen option | What did we pick, and why over the alternatives? | +| rejected options | What did we rule out? Why? | +| reversibility | Is this reversible, or a one-way door? | + +### `criterion` — oracle claim +*Source question:* **How will we judge that it holds?** +*Activating concepts:* acceptance criteria, definition of done, success metrics, oracles. + +| What it may answer | Example question forms | +| --- | --- | +| acceptance | How do we know it's good enough? What's the acceptance bar? | +| definition of done | When is this "done"? | +| measurable success | What would we measure to confirm it? | + +### `example` — witness or disambiguator claim +*Source question:* **What concrete case would settle this?** +*Activating concepts:* edge cases, counter-examples, behavioral kernels (see [BEHAVIORAL_KERNELS.md](BEHAVIORAL_KERNELS.md)), Given-When-Then, contrastive disambiguation. + +| What it may answer | Example question forms | +| --- | --- | +| illustrative case | Can you give a concrete example? | +| edge / tricky case | What's a case at the boundary that's easy to get wrong? | +| counter-example | What's a case that should *fail* or be rejected? | +| disambiguator | Here are two readings — which one do you mean? | + +--- + +## Other planes (band-gated; activate later) + +These follow the same pattern; depth here is intentionally lighter because they open after the +intent grounding is in place. + +### Oracle plane — *how we know* +`check`, `validation_method`, `evidence`, `obligation`. +*Activating concepts:* verification, tests, proof, audit trail. + +| Kind | Example question forms | +| --- | --- | +| `check` | How is this verified? What test or gate proves it? | +| `validation_method` | What method establishes the criterion holds? | +| `evidence` | What artifact shows it's true (a run, a measurement)? | +| `obligation` | What ongoing obligation does this create? | + +### Design plane — *how it's shaped* +`module`, `interface`. +*Activating concepts:* deep modules / information hiding (Ousterhout, Parnas), seams, API surface. + +| Kind | Example question forms | +| --- | --- | +| `module` | What are the parts? How does it decompose? What does each part hide? | +| `interface` | Where's the boundary? What's the contract across it? | + +### Plan plane — *how it's sequenced* +`milestone`, `frontier`, `slice`. +*Activating concepts:* walking skeleton, tracer-bullet slices, sequencing, risk retirement. + +| Kind | Example question forms | +| --- | --- | +| `milestone` | What's the phase boundary? What bundle must be true to advance? | +| `frontier` | What's the next named unit of work? | +| `slice` | What's the thinnest end-to-end slice to build first? | + +--- + +## How the agent uses this + +```diagram +╭──────────────────────────────────────────────────────────────╮ +│ projection loop (one step of generalized capture) │ +│ │ +│ 1. read open gaps + grounding density for THIS spec │ +│ 2. pick a node kind whose source-question is under-answered │ +│ 3. project: bind the kind's facets to what's already known │ +│ (domain X + protagonist Y → a concrete, situated question) │ +│ 4. emit as an elicitation_gap: ⟨question, refersTo: kind, …⟩ │ +│ 5. NEVER mint a new kind/typology to hold a question — │ +│ attach to the nearest existing kind │ +╰──────────────────────────────────────────────────────────────╯ +``` + +"Expand then contract" is native to this shape: **expand** = project many situated questions; +**contract** = every one of them refers to a single existing node kind. The catalog grows by +brainstorming more facets and phrasings; the ontology never grows. diff --git a/memory/PLAN.md b/memory/PLAN.md index 5795eb52..e96830e0 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -39,7 +39,7 @@ A new graph-mutation planning result has been promoted into the rolling plan as **Developer experience promoted to a first-class frontier (2026-06-09 ln-plan).** Working over the pi harness has been slow because the only fast path was ad hoc faux wiring scattered across probes; the user has elevated development feedback loops to first-class product DX (SPEC §Development Feedback Loops, D67-L–D69-L, A25-L). Promoted as `dx-feedback-loops`: bump `@earendil-works/pi-*` to latest and add a dev source-alias to the sibling `pi-mono` `src/` checkout (D67-L); consolidate three named loops (faux / real-provider / introspection) behind one `src/dev/` front door with a shared faux-harness factory (D68-L); and add one read-only, dev-gated introspection extension that captures exactly what the model receives, with mechanical and subjective modes sharing one run (D69-L). It is a DX substrate that accelerates every later frontier, so it leads the `Next` track; its version-bump+alias slice is a shared unblocker that should land before other frontiers' pi-facing churn. It is **not** POC-ship-critical and must preserve the D39-L sealed-profile boundary (introspection observes, never shapes product behavior; offline-lift and extension inclusion are dev-gated only). The context-pipeline coverage trio remains the elevated product-coverage spine right after. -**Readiness / elicitation-gaps remodel promoted (2026-06-09 ln-plan, post-`ln-spec`).** A SPEC pass reconceived the readiness and prospective-agenda model and must now land in code (D45-L, D57-L, D64-L, D65-L, D73-L, D74-L; A24-L, A27-L; I25-L, I30-L, I31-L). Four coupled implications: (1) **`elicitation_backlog` → `elicitation_gaps`** — the FE-823 question-instance / `open|closed` table is remodeled into typed coverage *obligations* (each gap carries a `name` typology key + meta `rationale`, a band, a `presence|field|coverage|manual` predicate union, an `importance` + derived `coverage`, and a `disposition`), seeded from the collated **grounding typology catalog** (floor `domain`/`protagonist`/`pain_pull`/`constraint` + progressive drivers `value`/`context_of_use`/`success_sketch`/`solution_boundary`) instead of four literal anchor questions; (2) **JIT capability-readiness** replaces the stored grade gate — readiness is judged on a capability request against the relevant gaps (proceed / proceed-at-low-epistemic-status / negotiate), retiring `readiness_grade`, `updateReadinessGrade`, `READINESS_GRADES`, and the `MIN_GRADE` proxy tables in `runtime-policy.ts`; (3) a soft derived **readiness estimate** (UI-only, gates nothing) plus removal of the vestigial `chrome.phase` / `chrome.chatMode` fields; (4) a small follow-on **session/runtime vocabulary leaf** (`src/session/schema/kinds.ts`) mirroring `graph/schema/kinds.ts` for the `op_mode`/`strategy`/`lens`/`goal` axes. These are promoted as `elicitation-gaps-remodel` → `capability-readiness` (hard chain) plus the parallel `runtime-vocab-leaf`; none are POC-ship-critical (the delivery cut de-scopes elicitation quality). **Sequencing tension with the trio:** `capability-readiness` mutates exactly the shapes the trio would lock (`workspace/workspace-state` drops phase/chatMode and gains the readiness estimate; `session/runtime-state` + composition drop grade). By the trio's own "lock upstream shape before downstream output" principle, the gaps/readiness remodel is *upstream* of the trio's readiness/chrome-touching locks and should land before stage 1 (`projection-shape-coverage`) freezes those shapes — otherwise the locks churn. Recommended order: `elicitation-gaps-remodel` → `capability-readiness` first, then the trio; or, if the trio leads, it must explicitly bracket the grade/phase/chatMode fields until the remodel lands. `elicitation-driver` now rides the remodeled gaps substrate, not the FE-823 backlog shape. +**Readiness / elicitation-gaps remodel promoted (2026-06-09 ln-plan, post-`ln-spec`).** A SPEC pass reconceived the readiness and prospective-agenda model and must now land in code (D45-L, D57-L, D64-L, D65-L, D73-L, D74-L; A24-L, A27-L; I25-L, I30-L, I31-L). Four coupled implications: (1) **`elicitation_backlog` → `elicitation_gaps`** — the FE-823 question-instance / `open|closed` table is remodeled into typed coverage *obligations* (each gap carries a `name` typology key + meta `rationale`, a band, a `presence|field|coverage|manual` predicate union, an `importance` + derived `coverage`, and a `disposition`), seeded from the collated **grounding typology catalog** (floor `domain`/`protagonist`/`pain_pull`/`constraint` + progressive drivers `value`/`context_of_use`/`success_sketch`/`solution_boundary`) instead of four literal anchor questions; (2) **JIT capability-readiness** replaces the stored grade gate — readiness is judged on a capability request against the relevant gaps (proceed / proceed-at-low-epistemic-status / negotiate), retiring `readiness_grade`, `updateReadinessGrade`, `READINESS_GRADES`, and the `MIN_GRADE` proxy tables in `runtime-policy.ts`; (3) a soft derived **readiness estimate** (UI-only, gates nothing) plus removal of the vestigial `chrome.phase` / `chrome.chatMode` fields; (4) a small follow-on **session/runtime vocabulary leaf** (`src/session/schema/kinds.ts`) mirroring `graph/schema/kinds.ts` for the `op_mode`/`strategy`/`lens`/`goal` axes. These are promoted as `elicitation-gaps-remodel` → `capability-readiness` (hard chain) plus the parallel `runtime-vocab-leaf`; none are POC-ship-critical (the delivery cut de-scopes elicitation quality). **Sequencing tension with the trio:** `capability-readiness` mutates exactly the shapes the trio would lock (`workspace/workspace-state` drops phase/chatMode and gains the readiness estimate; `session/runtime-state` + composition drop grade). By the trio's own "lock upstream shape before downstream output" principle, the gaps/readiness remodel is *upstream* of the trio's readiness/chrome-touching locks and should land before stage 1 (`projection-shape-coverage`) freezes those shapes — otherwise the locks churn. Recommended order: `elicitation-gaps-remodel` → `capability-readiness` first, then the trio; or, if the trio leads, it must explicitly bracket the grade/phase/chatMode fields until the remodel lands. `elicitation-driver` now rides the remodeled gaps substrate, not the FE-823 backlog shape. **2026-06-10 follow-on (D75-L):** a further SPEC pass collapsed the parallel grounding-typology vocabulary onto the node-kind ontology — gaps now reference graph node kinds (`refersTo: NodeKind`) instead of a closed typology `name` enum. This inserts `gaps-node-kind-reference` at the head of the chain (`elicitation-gaps-remodel` → `gaps-node-kind-reference` → `capability-readiness`); it reshapes the gaps substrate and the `capability → NodeKind[]` map, and absorbs the now-retired refactor plan (which had planned to enshrine the typology catalog). ### Context-pipeline coverage (the next design/lock spine) @@ -88,7 +88,8 @@ per ledger row: Post-`ln-spec` implications that are **upstream** of the context-pipeline trio's readiness/chrome-touching locks (see Context §Readiness / elicitation-gaps remodel). Land the hard chain before stage 1 freezes `workspace/workspace-state` + `session/runtime-state` shapes, or bracket those fields in the trio. -1. `capability-readiness` — **depends on `elicitation-gaps-remodel` (done).** Replace the stored-grade gate (`readiness_grade`, `updateReadinessGrade`, `READINESS_GRADES`, `MIN_GRADE` proxies) with JIT capability→relevant-gaps judgment; add the soft derived `readiness estimate` (UI-only); remove `chrome.phase` / `chrome.chatMode`. +1. `gaps-node-kind-reference` — **depends on `elicitation-gaps-remodel` (done).** Reshape the gaps substrate onto node kinds per D75-L: `refersTo: NodeKind` + a free-form `question` replaces the typology `name` enum; reseed grounding by node kind (floor `context`/`thesis`/`goal`/`constraint` plus the now-covered `term`/`assumption`); `capability → NodeKind[]` replaces `RelevantGapName`. Absorbs the retired refactor plan (folded into D75-L). Upstream of `capability-readiness`. +2. `capability-readiness` — **depends on `gaps-node-kind-reference`.** Replace the stored-grade gate (`readiness_grade`, `updateReadinessGrade`, `READINESS_GRADES`, `MIN_GRADE` proxies) with JIT capability→relevant-gaps judgment over the node-kind map; add the soft derived `readiness estimate` (UI-only); remove `chrome.phase` / `chrome.chatMode`. ### Next @@ -198,7 +199,32 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - **Cross-cutting obligations:** Anti-shadowing — the table never holds domain content (which lives in the graph). Gaps commit only through `CommandExecutor` (`basis` via provenance-directness, D63-L: user-raised `explicit`, agent-inferred `implicit`). Multi-spec discipline — each gap belongs to one spec's register. - **Traceability:** D8-L, D30-L, D57-L, D60-L, D63-L, D64-L, D65-L, D74-L / A24-L, A27-L / I30-L. Supersedes the FE-823 backlog row shape. - **Design docs:** `memory/SPEC.md` D65-L and §Grounding typology catalog; `src/graph/README.md`; `src/db/README.md`. -- **Current execution pointer:** Done 2026-06-10. Replaced FE-823 `elicitation_backlog` with the D65-L `elicitation_gaps` obligation register, regenerated the table/migration metadata, seeded the grounding typology catalog, routed create/disposition mutations through `CommandExecutor`, and proved live `presence` coverage/answered derivation at read-back with sibling-spec isolation. `field`/`coverage` predicate derivation and `manual` LLM satisficiency remain named follow-ons for capability-readiness / later predicate slices. +- **Current execution pointer:** Done 2026-06-10. Replaced FE-823 `elicitation_backlog` with the D65-L `elicitation_gaps` obligation register, regenerated the table/migration metadata, seeded the grounding typology catalog, routed create/disposition mutations through `CommandExecutor`, and proved live `presence` coverage/answered derivation at read-back with sibling-spec isolation. `field`/`coverage` predicate derivation and `manual` LLM satisficiency remain named follow-ons for capability-readiness / later predicate slices. **Superseded in part by `gaps-node-kind-reference` (D75-L):** the grounding typology catalog and gap-`name` enum are retired in favor of `refersTo: NodeKind` + a free-form question; the flat-table substrate, predicate union, disposition, and live derivation this frontier established stand. + +### gaps-node-kind-reference + +- **Name:** Gaps reference node kinds; retire the grounding-typology vocabulary (D75-L) +- **Linear:** unassigned — create in FE / brunch when the frontier starts. +- **Kind:** structural +- **Status:** next (heads the readiness chain, ahead of `capability-readiness`) +- **Certainty:** proving +- **Depends on:** `elicitation-gaps-remodel` (done — reshapes its `name`-typology output onto node kinds). +- **Retires:** the `GROUNDING_GAP_TYPOLOGIES` seed catalog (8 typology names), the closed gap-`name` typology enum, and `capability-readiness`'s `RelevantGapName` union (D75-L); absorbs the retired refactor plan, folded into D75-L (do not enshrine the catalog). +- **Lights up:** an `elicitation_gaps` row that names its obligation by `refersTo: NodeKind` + a free-form `question`; capability-relevant gaps expressed as a `capability → NodeKind[]` map (grounding floor = `context` + `thesis` + `goal` + `constraint`). +- **Stabilizes:** D75-L (one ontology — gaps reference the node-kind taxonomy, not a parallel vocabulary) and the anti-shadowing line (the table holds obligation/disposition/meta, never domain content). +- **Objective:** Implement the D75-L substrate reshape. (1) `graph/schema/elicitation-gaps.ts`: replace `name` (typology key) with `refersTo: NodeKind` + a free-form `question`, keeping `rationale` / `band` / `predicate` / `importance` / derived `coverage` / `disposition`; regenerate the table + migration (pre-release free-rewrite, no typology residue). (2) `graph/command-executor.ts`: reseed grounding from node kinds — floor `context` / `thesis` / `goal` / `constraint` plus the now-covered `term` / `assumption` — instead of the 8-entry `SEEDED_ELICITATION_GAPS` catalog; draw seeded question text from the `docs/design/ELICITATION_QUESTIONS.md` priming examples. (3) `projections/session/capability-readiness.ts`: replace `RelevantGapName` + `CAPABILITY_RELEVANT_GAPS` with a `capability → NodeKind[]` map; a referenced kind absent from the register still fails loud (config bug ≠ uncovered). (4) Reconcile the graph / db / projections topology READMEs + the seed-set and capability-readiness tests. +- **Why now / unlocks:** D75-L is canonical but the code still implements the typology catalog; this is the upstream substrate reshape `capability-readiness` builds its gate on, so it lands before that frontier rewires the gate. It is also upstream of the trio's projection-shape lock (the gaps register surfaces through projections). +- **Acceptance:** + - `ElicitationGap` carries `refersTo: NodeKind` + `question`; no typology `name` enum, no `GROUNDING_GAP_TYPOLOGIES`, no `RelevantGapName` remain; table/migration regenerated with no typology residue. + - `createSpec` seeds grounding gaps by node kind (floor + `term` / `assumption`), not the eight literal typologies. + - capability-readiness reads a `capability → NodeKind[]` map; the grounding floor is grounded `context` + `thesis` + `goal` + `constraint`; a referenced kind absent from the register fails loud. + - Live presence-derived coverage/answered still flips from graph truth; two same-kind gaps (e.g. two `thesis` questions) are discriminated by question + `manual` / `coverage` satisfier, not aliased by a blunt presence count. + - graph / db / projections READMEs and the affected tests reconciled. +- **Verification:** Inner — gaps schema test (`refersTo: NodeKind`, no name enum); reseed test asserting the grounding floor by node kind incl. `term` / `assumption`; capability-readiness map test over node kinds incl. loud-fail-on-miss; live presence coverage flip preserved. Middle — the **discrimination probe** (the proving unknown): two `thesis`-referencing gaps resolve independently via question + judgment, not one shared presence count — retiring the presence-aliasing risk the retired refactor plan only deferred. Outer — per-spec seeded read-back probe. +- **Cross-cutting obligations:** anti-shadowing (D65-L/D75-L) — the table never stores domain content; the `NodeKind` union stays owned by the drizzle-free leaf `graph/schema/kinds.ts` (D73-L) — gaps import it, never redefine it; the `CommandExecutor` boundary + shared `{specId, lsn}` / `change_log` clock are unchanged. +- **Traceability:** D54-L, D56-L, D57-L, D60-L, D64-L, D65-L, D73-L, D74-L, D75-L / A24-L, A27-L / I30-L. Supersedes the grounding typology catalog, the gap-`name` typology enum, and `RelevantGapName`; absorbs the retired refactor plan. +- **Design docs:** `memory/SPEC.md` D75-L / D65-L; `docs/design/ELICITATION_QUESTIONS.md`; `src/graph/schema/elicitation-gaps.ts`; `src/graph/command-executor.ts`; `src/projections/session/capability-readiness.ts`; `src/graph/README.md`; `src/db/README.md`; `src/projections/README.md`. +- **Current execution pointer:** `memory/cards/gaps-node-kind-reference--node-kind-reference.md` (active, single full card). ### capability-readiness @@ -207,11 +233,11 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - **Kind:** structural - **Status:** next (recommended ahead of the trio) - **Certainty:** proving -- **Depends on:** `elicitation-gaps-remodel` (hard — both capability-readiness and the readiness estimate read the remodeled gaps). +- **Depends on:** `gaps-node-kind-reference` (hard — the gate reads node-kind-referencing gaps and a `capability → NodeKind[]` map; transitively `elicitation-gaps-remodel`, done). - **Retires:** the stored `readiness_grade` scalar and grade-as-authority (D45-L); A27-L (the `capability → relevant gaps` map carries enough signal to drive proceed / negotiate without a standing grade). - **Lights up:** capability-readiness — on a capability request, evaluate the relevant `elicitation_gaps` → **proceed / proceed-at-low-epistemic-status / negotiate** (`establishment_offer`) — replacing `MIN_GRADE` gating. - **Stabilizes:** I31-L (readiness never bars work; no grade scalar; no kind whitelist) and I25-L (legal affordances are projections over resolved runtime state plus capability-readiness over gaps). -- **Objective:** Replace the grade gate with JIT capability-readiness. (1) Remove `specs.readiness_grade`, `updateReadinessGrade`, and `READINESS_GRADES`; (2) replace `GRADE_RANK` / `GOAL_MIN_GRADE` / `STRATEGY_MIN_GRADE` / `LENS_MIN_GRADE` in `src/projections/session/runtime-policy.ts` with an explicit `capability → relevant gaps` map plus JIT evaluation (structural predicates checked mechanically; `manual` gaps consume an LLM satisficiency judgment, D57-L); (3) add the soft, derived, UI-only `readiness estimate` (per-band coverage rollup over gaps) projection; (4) remove the vestigial `chrome.phase` / `chrome.chatMode` fields from `workspace-session-coordinator.ts` and `workspace-state.ts` (the readiness estimate supersedes `phase`; `chatMode` was a redundant spec-selection restatement). +- **Objective:** Replace the grade gate with JIT capability-readiness. (1) Remove `specs.readiness_grade`, `updateReadinessGrade`, and `READINESS_GRADES`; (2) replace `GRADE_RANK` / `GOAL_MIN_GRADE` / `STRATEGY_MIN_GRADE` / `LENS_MIN_GRADE` in `src/projections/session/runtime-policy.ts` with the `capability → NodeKind[]` map from `gaps-node-kind-reference` (D75-L) plus JIT evaluation (structural predicates checked mechanically; `manual` gaps consume an LLM satisficiency judgment, D57-L); (3) add the soft, derived, UI-only `readiness estimate` (per-band coverage rollup over gaps) projection; (4) remove the vestigial `chrome.phase` / `chrome.chatMode` fields from `workspace-session-coordinator.ts` and `workspace-state.ts` (the readiness estimate supersedes `phase`; `chatMode` was a redundant spec-selection restatement). - **Why now / unlocks:** D45-L/D74-L retired the grade as a conflation of gate/display/milestone; this materializes the replacement so goal derivation, affordance legality, and prompt composition stop reading a grade. It also removes the grade/phase/chatMode fields the trio would otherwise lock prematurely. - **Acceptance:** - No `readiness_grade` column, `updateReadinessGrade` mutation, or `READINESS_GRADES` enum remains; affected fixtures/seeds/probes regenerated. @@ -221,9 +247,9 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - `chrome.phase` / `chrome.chatMode` are removed from the coordinator and workspace-state projection; the readiness estimate is the only readiness surface. - **Verification:** Inner — capability-readiness unit tests (a structural gap flips readiness with no grade; a `manual` gap routes to satisficiency); readiness-estimate projection test (regresses honestly, gates nothing); affordance legality over gaps (replacing the grade-gate tests). Middle — D74-L tracer: a presence-derived grounding gap flips capability-readiness with no stored grade. Outer — composed-prompt + web observer surface the readiness estimate, not a grade. - **Cross-cutting obligations:** Readiness never bars graph truth or work (I31-L); `CommandExecutor` must not reject a node for a later-band kind (D64-L). The deferred milestone gate for export/plan/execute op-modes stays deferred (D45-L). Replace grade-gate tests across `compose.test.ts` / `prompting.test.ts` and createSpec/getSpec rather than preserving them. -- **Traceability:** D25-L, D30-L, D32-L, D45-L, D57-L, D58-L, D59-L, D64-L, D65-L, D73-L, D74-L / A27-L / I25-L, I31-L. Supersedes stored-grade gating and the `chrome.phase` / `chrome.chatMode` fields. +- **Traceability:** D25-L, D30-L, D32-L, D45-L, D57-L, D58-L, D59-L, D64-L, D65-L, D73-L, D74-L, D75-L / A27-L / I25-L, I31-L. Supersedes stored-grade gating and the `chrome.phase` / `chrome.chatMode` fields. - **Design docs:** `memory/SPEC.md` D45-L / D74-L; `src/projections/session/runtime-policy.ts`; `src/projections/workspace/workspace-state.ts`. -- **Current execution pointer:** D74-L JIT gate tracer done 2026-06-10 via `memory/cards/capability-readiness--jit-gate.md`: explicit capability→grounding-gap map, proceed / low-epistemic / negotiate outcome, live presence-coverage flip, no grade-symbol import. Deferred follow-ons remain to re-scope because their shape depends on the gate interface: readiness-estimate projection, consumer rewire off `MIN_GRADE`, stored-grade deletion, `chrome.phase`/`chatMode` removal. +- **Current execution pointer:** D74-L JIT gate tracer done 2026-06-10: explicit capability→grounding-gap map, proceed / low-epistemic / negotiate outcome, live presence-coverage flip, no grade-symbol import. Deferred follow-ons remain to re-scope because their shape depends on the gate interface: readiness-estimate projection, consumer rewire off `MIN_GRADE`, stored-grade deletion, `chrome.phase`/`chatMode` removal. ### runtime-vocab-leaf @@ -638,7 +664,8 @@ nodes: projection-shape-coverage [next · coverage] TRIO stage 1 (#project, PROJECT): create projections ledger + no-loss/shape invariants over dark graph/transcript DTOs; invariant-kind, NOT golden renderer-golden-coverage [next · coverage] TRIO stage 2 (#render, RENDER): create renderer ledger + golden-lock every durable renderer; depends on projection-shape-coverage prompt-composition-golden-coverage [next · coverage] TRIO stage 3 (#compose, COMPOSE): composed-prompt preview + golden-lock partials/composition matrix; depends on renderer-golden-coverage - elicitation-gaps-remodel [done · proving] remodeled elicitation_gaps obligation register; seeded grounding typology catalog; live presence derivation + elicitation-gaps-remodel [done · proving] remodeled elicitation_gaps obligation register; live presence derivation (grounding typology catalog superseded by gaps-node-kind-reference, D75-L) + gaps-node-kind-reference [next · proving] reshape gaps onto node kinds (refersTo NodeKind + question); reseed grounding by kind; capability->NodeKind[] replaces RelevantGapName; absorbs retired refactor plan (D75-L) capability-readiness [next · proving] JIT capability->relevant-gaps gate + readiness estimate (UI-only); retire readiness_grade / MIN_GRADE / chrome.phase+chatMode runtime-vocab-leaf [parallel · proving] src/session/schema/kinds.ts source-of-truth leaf for op_mode/strategy/lens/goal (D73-L direction); decision-3 follow-on elicitation-driver [after-trio · proving] live per-turn what-to-ask-next driver on remodeled elicitation_gaps; rides COMPOSE oracle; closes cross-cut Seam 3a @@ -657,10 +684,11 @@ edges: project-graph-review-cycle -[optional]-> poc-live-ship-gate minimal-authority-shell -[hard]-> poc-live-ship-gate elicitation-backlog -[supersedes]-> elicitation-gaps-remodel (FE-823 backlog row shape remodeled into D65-L gaps) - elicitation-gaps-remodel -[hard]-> capability-readiness (capability-readiness + readiness estimate read the remodeled gaps) - elicitation-gaps-remodel -[hard]-> elicitation-driver (driver ranks/selects over the remodeled gaps shape) + elicitation-gaps-remodel -[hard]-> gaps-node-kind-reference (reshape gaps onto node kinds; refersTo NodeKind replaces the typology name enum, D75-L) + gaps-node-kind-reference -[hard]-> capability-readiness (gate + readiness estimate read node-kind-referencing gaps and a capability->NodeKind[] map) + gaps-node-kind-reference -[hard]-> elicitation-driver (driver ranks/selects over the final gap shape: refersTo NodeKind + question) capability-readiness -[shape]-> projection-shape-coverage (mutates workspace-state/runtime-state shapes the trio stage 1 would lock; land first or bracket those fields) - elicitation-gaps-remodel -[shape]-> projection-shape-coverage (gaps register surfaces through projections; lock upstream shape first) + gaps-node-kind-reference -[shape]-> projection-shape-coverage (gaps register surfaces through projections; lock upstream shape first) graph-tool-resilience -[hard]-> role-safe-graph-mutations (current graph tool + edge model exist) project-graph-review-cycle -[hard]-> role-safe-graph-mutations (current review-set proposal/accept path exists) role-safe-graph-mutations -[hard]-> exchanges-and-generalized-capture (relation-bearing capture uses mutateGraph grammar) diff --git a/memory/SPEC.md b/memory/SPEC.md index 54106adb..c1ec5a5e 100644 --- a/memory/SPEC.md +++ b/memory/SPEC.md @@ -118,15 +118,10 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c | A20-L | The chosen Drizzle line and row-schema derivation path can be settled during the prep envelope without forcing later M4 rework: Brunch can prove migrations, SQLite fidelity, monotonic counter allocation, change-log writes, and runtime-schema derivation on one representative persistence slice before CRUD proper starts. | high | **validated** | D16-L, D41-L | **Validated by A20-L spike (2026-06-01).** Stack: `drizzle-orm@0.45.2` + `drizzle-kit@0.31.10` + `better-sqlite3@12.8.0` + `drizzle-typebox@0.3.3` + `@sinclair/typebox@0.34.14`. Proved: (1) `drizzle-typebox` derives valid TypeBox insert/select schemas from Drizzle tables; `Value.Check` validates/rejects correctly. (2) Batch `mutateGraph`-shaped transaction (multi-node → intra-batch ref resolution → multi-edge → LSN allocation → change-log append) works atomically; full rollback on FK violation or domain-validation throw. (3) `update().returning()` works for atomic monotonic counter increment; `insert().returning()` gives auto-increment IDs for ref resolution; JSON detail column round-trips cleanly. (4) Pi tool parameters (`typebox` v1.x) and Drizzle row schemas (`@sinclair/typebox` v0.34 via `drizzle-typebox`) serve different roles and never cross — shared enum `const` arrays bridge both. | | A21-L | The POC can treat coherence as a bounded product verdict over structural legality plus explicitly detected contradictions, gaps, and unresolved reconciliation needs, without solving a general theory of “spec coherence.” | low | open | D8-L | M8 must sharpen the coherence rubric before implementation: known-bad adversarial briefs should show what counts as incoherent, what is merely immature/underspecified, and what should become a reconciliation need. | | A22-L | The elicitor can perform synchronous post-exchange capture well enough for the POC: high-confidence extractive facts can be committed to the graph immediately and gap dispositions updated, while low-confidence implications can be kept out of graph truth and used as disambiguation material. | medium | partially validated | D18-L, D26-L, D45-L, D65-L, I30-L | 2026-06-05 `capture-response-to-graph` validated the product wiring for narrow labeled text facts (`Goal:`, `Context:`, `Constraint:`, `Criterion:`) on `session.submitExchangeResponse`. 2026-06-07 generalized the same explicit-text capture core onto `session.submitMessage`: ordinary labeled user text now appends to transcript truth, commits through `graph/capture` → `CommandExecutor.mutateGraph({createBasis: explicit, ops})`, targets the transcript binding's spec, and publishes graph invalidations; explicit interruptions are transcript-visible but do not capture or silently answer a pending exchange. 2026-06-08 `capture-quality-spike` added a fixed scenario measurement over free prose, file/ref-bearing prose, and implication-heavy prose; the sample extraction report reached precision 1.0 / recall 1.0 with zero false commits, moving generalized capture from parked evidence-gate to a narrow graduate recommendation with an explicit false-commit guard. Readiness-grade capture remains open fitness evidence. | -| A24-L | A flat `elicitation_gaps` table (prospective memory) is sufficient to drive elicitor questioning, seed grounding, and feed capability-readiness without graph structure — gaps are typed coverage obligations (typologies), not graph nodes; apparent dependency among gaps is mediated by the claims their resolution produces. | medium | validated | D65-L, D74-L | 2026-06-08 FE-823 materialized the flat table (built as `elicitation_backlog`) on the real LSN/change-log seam. 2026-06-10 `elicitation-gaps-remodel` replaced that question-instance shape with the typed obligation register (`name`/`rationale`/band/predicate/importance/disposition), regenerated the table as `elicitation_gaps`, seeded the grounding typology catalog, and proved live presence-derived coverage/answered read-back without stored structural answers. Remaining downstream proof is capability-readiness over the register (D74-L) and capture-reflection spawning; if genuine gap→gap dependency or rich traversal emerges, promote the table to a plane (rows→nodes, FK pointers→edges). | +| A24-L | A flat `elicitation_gaps` table (prospective memory) is sufficient to drive elicitor questioning, seed grounding, and feed capability-readiness without graph structure — gaps are typed coverage obligations (typologies), not graph nodes; apparent dependency among gaps is mediated by the claims their resolution produces. | medium | validated | D65-L, D74-L, D75-L | 2026-06-08 FE-823 materialized the flat table (built as `elicitation_backlog`) on the real LSN/change-log seam. 2026-06-10 `elicitation-gaps-remodel` replaced that question-instance shape with the typed obligation register (`name`/`rationale`/band/predicate/importance/disposition), regenerated the table as `elicitation_gaps`, seeded the grounding typology catalog, and proved live presence-derived coverage/answered read-back without stored structural answers. Remaining downstream proof is capability-readiness over the register (D74-L) and capture-reflection spawning; if genuine gap→gap dependency or rich traversal emerges, promote the table to a plane (rows→nodes, FK pointers→edges). | | A25-L | Tracking the latest `pi-coding-agent` release continuously (via source-alias in dev + package dependency bumps) keeps Brunch adaptable without routinely destabilizing it, because Brunch's pi product-behavior surface is concentrated in a few sealed integration seams (the `src/.pi/` extension bundle and the session/runtime adapters) behind the D39-L profile — even though pi *types* are imported across ~25 files, those are mostly type-only and pass through that small set of seams. | medium | partially validated | D67-L | 2026-06-09 FE-825 bumped Brunch to pi 0.79, kept type/default resolution on installed `dist`, added a `PI_SOURCE`-gated vite/vitest runtime alias to sibling `pi-mono` source, preserved product default sealed-profile/offline behavior, and passed `npm run verify`. Each later pi bump that lands without product-behavior regressions raises confidence; a bump that silently breaks sealed-profile assumptions falsifies it. | -<<<<<<< HEAD | A26-L | The refined "conversational introspection" goal can be built as a *read-only session-query-back tool*: under `BRUNCH_DEV`, the agent can call `brunch_session_query` over `ctx.sessionManager.getBranch()`, find entries by predicate, project capped dot/`[n]`/`[*]` paths, and surface exact returned values in chat without weakening D39-L sealing or turning self-reporting into product behavior. | medium | validated | D69-L, D71-L | 2026-06-09 `dx-introspection-live` slice 2 replaced the earlier fixed structured self-report/schema idea with `src/.pi/extensions/session-query/`: a dev-gated read-only tool registered only through `createBrunchPiExtensions(..., { introspection: { enabled } })`, covered by find/project/truncation unit tests, default-off/default-on registration tests, and a faux turn that returns verbatim projected session values. Live-model compliance with "call then echo verbatim" remains outer-loop fitness, not a merge gate. | -| A27-L | Gap satisfaction is expressible band-by-band at acceptable LLM cost: **commitment** typologies are structural `presence`/`field`/`coverage` predicates over the graph; **grounding** typologies are a `presence` floor plus `manual` LLM satisficiency (D57-L); **elicitation** typologies are generatively spawned. The explicit `capability → relevant gaps` map (D74-L) carries enough signal to drive proceed / negotiate without a standing grade. | medium | partially validated | D65-L, D74-L | 2026-06-10 `elicitation-gaps-remodel` validated the structural `presence` case: a seeded grounding gap's derived coverage/answered state flips from graph truth with no stored structural answer and sibling-spec isolation holds. Remaining proof: D74-L capability-readiness tracer, `field`/`coverage` predicate derivation, `manual` LLM satisficiency, and elicitation/commitment fixtures. Falsified if grounding readiness cannot decompose into per-typology presence+manual judgments, or if commitment obligations need logic the predicate union can't express. | -======= -| A26-L | The "conversational introspection" goal — the in-product agent reporting, in chat, on what tools sent/returned, how understandable inputs/outputs were, errors/uncertainty it hit, and how cleanly a skill activated — can be built as a *read-only* extension of the D69-L tap (adding `tool_call`/`tool_result` observation) plus a small structured self-report schema and an in-chat surface, **without** weakening D39-L sealing or making the agent's self-report a product behavior. | medium | open | D69-L | Prove with a dev-gated slice: pi's `tool_call`/`tool_result` hooks can observe tool I/O and errors read-only; the agent can emit a parseable self-report (not free prose) on demand; and the report can render back into the conversation paired to the same scratch run — all behind `BRUNCH_DEV`. Risk: getting a reliable *structured* self-report rather than narration, and choosing the in-chat-vs-artifact surface, are the open unknowns. | -| A27-L | Gap satisfaction is expressible band-by-band at acceptable LLM cost: **commitment** typologies are structural `presence`/`field`/`coverage` predicates over the graph; **grounding** typologies are a `presence` floor plus `manual` LLM satisficiency (D57-L); **elicitation** typologies are generatively spawned. The explicit `capability → relevant gaps` map (D74-L) carries enough signal to drive proceed / negotiate without a standing grade. | medium | partially validated | D65-L, D74-L | 2026-06-10 `elicitation-gaps-remodel` validated the structural `presence` case: a seeded grounding gap's derived coverage/answered state flips from graph truth with no stored structural answer and sibling-spec isolation holds. 2026-06-10 `capability-readiness--jit-gate` validated the D74-L tracer for the grounding floor: the explicit capability→gap map drives proceed / proceed_low_epistemic / negotiate, live presence coverage flips a generative capability negotiate→proceed, and the gate imports no grade symbols. Remaining proof: `field`/`coverage` predicate derivation, `manual` LLM satisficiency, elicitation/commitment fixtures, and rewiring consumers off grade thresholds. Falsified if grounding readiness cannot decompose into per-typology presence+manual judgments, or if commitment obligations need logic the predicate union can't express. | ->>>>>>> f6cd213e (Add JIT capability-readiness gate over elicitation gaps) +| A27-L | Gap satisfaction is expressible band-by-band at acceptable LLM cost: **commitment** typologies are structural `presence`/`field`/`coverage` predicates over the graph; **grounding** typologies are a `presence` floor plus `manual` LLM satisficiency (D57-L); **elicitation** typologies are generatively spawned. The explicit `capability → relevant gaps` map (D74-L) carries enough signal to drive proceed / negotiate without a standing grade. | medium | partially validated | D65-L, D74-L, D75-L | 2026-06-10 `elicitation-gaps-remodel` validated the structural `presence` case: a seeded grounding gap's derived coverage/answered state flips from graph truth with no stored structural answer and sibling-spec isolation holds. 2026-06-10 the `capability-readiness` D74-L gate tracer validated the grounding floor: the explicit capability→gap map drives proceed / proceed_low_epistemic / negotiate, live presence coverage flips a generative capability negotiate→proceed, and the gate imports no grade symbols. Remaining proof: `field`/`coverage` predicate derivation, `manual` LLM satisficiency, elicitation/commitment fixtures, and rewiring consumers off grade thresholds. Falsified if grounding readiness cannot decompose into per-typology presence+manual judgments, or if commitment obligations need logic the predicate union can't express. | ### Active Decisions @@ -157,8 +152,9 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c - **D62-L — Graph nodes have stable spec-scoped human reference codes projected from stored `kind_ordinal`, separate from integer storage IDs.** `NodeId` remains the SQLite integer primary key/FK used internally. The database stores `kind` and `kind_ordinal`; user/agent-facing handles such as `G1`, `CON2`, `R3`, `CR4`, `VM1`, or `SL2` are projection strings formed by a hard-coded presentation lookup from `kind` to a 1–3 capital-letter label plus `kind_ordinal`. The rendered code string is not a graph column. Labels are unique across all node kinds so `#`-mentions can parse by longest-prefix match, then resolve to `(kind, kind_ordinal)` and finally to `NodeId`. `kind_ordinal` is monotonic per `(spec_id, plane, kind)`, allocated by the `CommandExecutor` in the same transaction as node creation from a counter row (`node_kind_counters` or equivalent), not by `MAX(kind_ordinal)+1`; ordinals are never reused after deletion or supersession. DB constraints must make `(spec_id, plane, kind, kind_ordinal)` unique; there is no `(spec_id, code)` uniqueness constraint because `code` is not stored. Context renders and prompt contexts should use projected codes as primary handles and reserve raw integer IDs for internal diagnostics/adapters. Depends on: D14-L, D16-L, D20-L, D54-L, D56-L, D61-L. Supersedes: the string-`NodeId` examples in earlier GRAPH_MODEL text and the previous app's application-only `MAX(kind_ordinal)+1` allocation pattern. - **D63-L — Graph `basis` records item-level approval strength, not the mutation pathway.** Accepted nodes and edges use `basis ∈ explicit | implicit`. `explicit` means the user directly stated the graph item or approved the exact node/edge in a review set; `implicit` means the user accepted a concept/proposal and the agent materialized specific graph items to match it without per-item review (the `propose-graph` direct-commit path). The mutation pathway lives in `change_log.operation` and payload (`mutate_graph`, `accept_review_set`, post-exchange capture, etc.), while epistemic attribution lives in `Node.source` and proposal UI metadata may still carry `epistemic_status`. Low-confidence inferred material is still not graph truth; it remains in preface/capture analysis/review drafts/reconciliation needs until clarified or accepted. More abstractly, `basis` is a *provenance-directness* marker — directly from the user (`explicit`) versus agent-materialized from user input (`implicit`) — of which item-level approval strength is the claim-flavored reading; this lets the same `explicit | implicit` distinction apply to non-claim registers such as `elicitation_gaps` (user-raised vs agent-inferred, D65-L). Depends on: D26-L, D27-L, D53-L, D54-L, D55-L. Supersedes: `basis = accepted_review_set` as a persisted graph enum value and any interpretation of `basis` as a provenance/path field. - **D64-L — Readiness bands are the coarse level of one coverage axis; gap typologies (D65-L) are its finer members. Bands are non-exclusive derived node-kind groupings, not structural legality gates.** Bands are `grounding`, `elicitation`, and `commitment`; each `elicitation_gaps` typology carries exactly one band — band and typology are **one axis at two granularities**, so "bands becoming more differentiated over time" means the typology taxonomy growing, not new bands. A node kind may belong to multiple bands (e.g. `constraint` contributes to grounding as the constraint anchor and to elicitation when it bounds solution space). Bands guide what the elicitor is trying to complete, what graph filters and rendered context show, the per-band **readiness estimate** rollup (D45-L), and which gaps a capability-readiness judgment weighs (D74-L). The band's gate-character differs by band: **grounding** is mostly LLM-judged satisficiency with a count floor (D57-L), **elicitation** is generatively spawned (no fixed typology set), **commitment** is more structurally derivable. The `CommandExecutor` must not reject a clear later-band kind merely because of band; readiness controls objectives and capability-judgment, not what graph truth may contain. Depends on: D45-L, D56-L, D57-L, D59-L, D60-L, D65-L. Supersedes: treating the intent `basic | structural | reasoning` category as the readiness taxonomy, treating readiness as a per-kind creation whitelist, or treating bands as a grade rubric for a stored grade. -- **D65-L — `elicitation_gaps` are typed coverage *obligations* (typologies) — the elicitor's prospective-memory agenda and the substrate of capability-readiness judgment; they guide and modulate, they never hard-gate.** Renamed and reconceived from `elicitation_backlog`. A gap is a **typology of coverage that must be addressed** (e.g. "the spec must anchor its primary constraint(s)"), **not** a literal queued question and **not** a specific point of unclarity — that would shadow the intent graph, which already owns the content (decisions, assumptions, constraints, …). The original `unknown`/process-vs-domain split still holds: `elicitation_` scopes the term to *process* gaps (knowable by asking), as opposed to the deferred domain-gap `risk` node (Future Direction §Vocabulary evolution). Each gap carries **both** a stable **name** (its typology key — machine identity used for seeding, dedup, and the `capability → relevant gaps` map (D74-L), and a short display label) **and** a **rationale** (the *meta* prose: what coverage this obligation represents, why it matters, and what counts as satisfying it — read by the elicitor to phrase the next question and to make a `manual` satisficiency judgment, D57-L). The two are not redundant: the name is for machine identity/reference, the rationale is for agent reasoning and cannot be compressed into a terse key. In addition each gap carries: a **band** (D64-L — its coarse level, one band per typology); a **predicate shape** — a tagged union of `presence` (≥N nodes of a kind/band present), `field` (a `detail` key present), `coverage` (D60-L `lacksEdge` per-member absence), or `manual` (LLM-judged, the D57-L satisficiency residue) — which routes structural-vs-JIT checking (D74-L); an **importance** (driver-weight / count-floor membership / priority — *not* a hard gate); and a derived **coverage** strength (how well addressed). Importance and coverage are deliberately **two fields, not one ambiguous `rating`**: importance is the pre-answer weight, coverage the post-answer derived strength. **Disposition** (`open | answered | not_applicable | irrelevant | reopened`) is stored *only where it is non-derivable* — scope judgments (`not_applicable` / `irrelevant`, which the agent may set in bulk) and `manual` satisficiency — while `answered` for a structural predicate is derived **live** from the graph and never hand-set; this is the anti-shadowing line: the table holds obligation/disposition/meta only, never domain content. `reopened` is a legitimate disposition (new ambiguity can reopen a typology). Gaps serve three roles: **agenda** (what to ask / propose next), **judgment drivers** for capability-readiness (D74-L), and a **density signal** that scales generative-output epistemic status (D30-L) — the candidate-proposal / disambiguation UX is precisely how open grounding gaps fill progressively, so an open gap must never wall that UX. Seeding is band-correlated. The **grounding** band has a seeded fixed catalog of typologies collated from the D30-L anchor bundle, the D57-L Walter drivers, [`docs/design/ELICITATION_LENSES.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/ELICITATION_LENSES.md) §grounding bundle, and the shaping kickoff/framing material — a **floor** of `domain` (what kind of thing is being built), `protagonist` (who it is for / most affected), `pain_pull` (what problem/pain/pull drives it), and `constraint` (what binding non-negotiables already shape it) — the anchor bundle that gates generative capabilities (D30-L) — plus softer **progressive drivers** that enrich and focus elicitation but are *never* floor (the no-moving-the-goalpost line): `value` (what value/benefit), `context_of_use` (when/where used), `success_sketch` (how success is measured / what good looks like), and `solution_boundary` (non-goals / what it is explicitly not). **elicitation** gaps are generatively spawned by capture-reflection as preceding answers raise new coverage obligations (no fixed catalog). **commitment** gaps are derived structural predicates over the graph (e.g. "every requirement has a criterion", "every decision records its rejected options", "every invariant has a proof or check"). It remains a **flat table, not a graph plane/node** — its only relations are filter attributes plus FK pointers (`arose_from`, `resolved_by`), a degenerate bipartite graph promotable later only if genuine gap→gap structure emerges; it is the *prospective* sibling of the *retrospective* `reconciliation_need` register (D8-L). `basis` applies via provenance-directness (D63-L): user-raised `explicit`, agent-inferred `implicit`. The flat-table substrate, `createSpec` seeding, `CommandExecutor`-routed mutations, and shared spec-local LSN + `change_log` boundary are settled from FE-823 (built as `elicitation_backlog`); the obligation/predicate/disposition remodel and the rename are what this decision now locks. Still open: whether the register eventually thins the `goal` axis (D59-L), and live per-turn ranking. Depends on: D8-L, D30-L, D45-L, D57-L, D59-L, D60-L, D63-L, D64-L, D74-L. Supersedes: the `elicitation_backlog` name and its question-instance / `open | closed`-status model, treating `unknown` as a graph node kind, and any readiness-grade-projection-over-open-counts as authority. -- **D74-L — Capability-readiness is a just-in-time, capability-relative judgment over relevant gaps — it replaces the standing grade gate.** When a capability is requested (a generative lens, `propose-graph`, `project-graph`, commitment review, eventual export), the agent evaluates readiness *for that capability* against the `elicitation_gaps` (D65-L) declared relevant to it. The `capability → relevant gaps` map is **explicit** and subsumes the retired `STRATEGY_MIN_GRADE` / `GOAL_MIN_GRADE` / `LENS_MIN_GRADE` thresholds in `runtime-policy.ts`, which were a lossy grade-proxy for "enough grounding". Structurally-obvious relevant gaps (`presence` / `field` / `coverage`) are checked **mechanically** (cheap, no LLM); non-obvious (`manual`) ones consume an **LLM satisficiency judgment** (D57-L). The outcome is one of **proceed**, **proceed at low epistemic status** (density-scaled, D30-L), or **negotiate** — surface an `establishment_offer` ("I can, but answer X and Y first", D32-L). Capability-readiness fires **on request, reactive-primary** (proactive nudges are a separate later concern) and is the **only readiness gate**: it never bars attempting work, it scales/negotiates. This resolves the prior "lens is never gated" (`ELICITATION_LENSES.md`) vs `LENS_MIN_GRADE` contradiction (lenses are not grade-gated; readiness is JIT-judged) and dissolves the grade-ratchet / two-value problem (the soft `readiness estimate`, D45-L, gates nothing and may regress honestly). A future structural milestone gate for export/plan/execute op-modes is deferred (D45-L) until such an op-mode exists. Depends on: D25-L, D26-L, D30-L, D32-L, D45-L, D57-L, D59-L, D65-L. Supersedes: `GRADE_RANK`-based `MIN_GRADE` hard gating of goal/strategy/lens, and a standing readiness scalar as the authority for capability availability. +- **D65-L — `elicitation_gaps` are typed coverage *obligations* (typologies) — the elicitor's prospective-memory agenda and the substrate of capability-readiness judgment; they guide and modulate, they never hard-gate.** Renamed and reconceived from `elicitation_backlog`. A gap is a **typology of coverage that must be addressed** (e.g. "the spec must anchor its primary constraint(s)"), **not** a literal queued question and **not** a specific point of unclarity — that would shadow the intent graph, which already owns the content (decisions, assumptions, constraints, …). The original `unknown`/process-vs-domain split still holds: `elicitation_` scopes the term to *process* gaps (knowable by asking), as opposed to the deferred domain-gap `risk` node (Future Direction §Vocabulary evolution). Each gap carries **both** a stable **name** (its typology key — machine identity used for seeding, dedup, and the `capability → relevant gaps` map (D74-L), and a short display label) **and** a **rationale** (the *meta* prose: what coverage this obligation represents, why it matters, and what counts as satisfying it — read by the elicitor to phrase the next question and to make a `manual` satisficiency judgment, D57-L). The two are not redundant: the name is for machine identity/reference, the rationale is for agent reasoning and cannot be compressed into a terse key. In addition each gap carries: a **band** (D64-L — its coarse level, one band per typology); a **predicate shape** — a tagged union of `presence` (≥N nodes of a kind/band present), `field` (a `detail` key present), `coverage` (D60-L `lacksEdge` per-member absence), or `manual` (LLM-judged, the D57-L satisficiency residue) — which routes structural-vs-JIT checking (D74-L); an **importance** (driver-weight / count-floor membership / priority — *not* a hard gate); and a derived **coverage** strength (how well addressed). Importance and coverage are deliberately **two fields, not one ambiguous `rating`**: importance is the pre-answer weight, coverage the post-answer derived strength. **Disposition** (`open | answered | not_applicable | irrelevant | reopened`) is stored *only where it is non-derivable* — scope judgments (`not_applicable` / `irrelevant`, which the agent may set in bulk) and `manual` satisficiency — while `answered` for a structural predicate is derived **live** from the graph and never hand-set; this is the anti-shadowing line: the table holds obligation/disposition/meta only, never domain content. `reopened` is a legitimate disposition (new ambiguity can reopen a typology). Gaps serve three roles: **agenda** (what to ask / propose next), **judgment drivers** for capability-readiness (D74-L), and a **density signal** that scales generative-output epistemic status (D30-L) — the candidate-proposal / disambiguation UX is precisely how open grounding gaps fill progressively, so an open gap must never wall that UX. Seeding is band-correlated. The **grounding** band has a seeded fixed catalog of typologies collated from the D30-L anchor bundle, the D57-L Walter drivers, [`docs/design/ELICITATION_LENSES.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/ELICITATION_LENSES.md) §grounding bundle, and the shaping kickoff/framing material — a **floor** of `domain` (what kind of thing is being built), `protagonist` (who it is for / most affected), `pain_pull` (what problem/pain/pull drives it), and `constraint` (what binding non-negotiables already shape it) — the anchor bundle that gates generative capabilities (D30-L) — plus softer **progressive drivers** that enrich and focus elicitation but are *never* floor (the no-moving-the-goalpost line): `value` (what value/benefit), `context_of_use` (when/where used), `success_sketch` (how success is measured / what good looks like), and `solution_boundary` (non-goals / what it is explicitly not). **elicitation** gaps are generatively spawned by capture-reflection as preceding answers raise new coverage obligations (no fixed catalog). **commitment** gaps are derived structural predicates over the graph (e.g. "every requirement has a criterion", "every decision records its rejected options", "every invariant has a proof or check"). It remains a **flat table, not a graph plane/node** — its only relations are filter attributes plus FK pointers (`arose_from`, `resolved_by`), a degenerate bipartite graph promotable later only if genuine gap→gap structure emerges; it is the *prospective* sibling of the *retrospective* `reconciliation_need` register (D8-L). `basis` applies via provenance-directness (D63-L): user-raised `explicit`, agent-inferred `implicit`. The flat-table substrate, `createSpec` seeding, `CommandExecutor`-routed mutations, and shared spec-local LSN + `change_log` boundary are settled from FE-823 (built as `elicitation_backlog`); the obligation/predicate/disposition remodel and the rename are what this decision now locks. Still open: whether the register eventually thins the `goal` axis (D59-L), and live per-turn ranking. Depends on: D8-L, D30-L, D45-L, D57-L, D59-L, D60-L, D63-L, D64-L, D74-L. Refined by: D75-L (gaps reference graph node kinds via `refersTo: NodeKind`; the parallel grounding-typology catalog and the closed gap-`name` enum are retired — substrate, predicate union, disposition, and anti-shadowing line are unchanged). Supersedes: the `elicitation_backlog` name and its question-instance / `open | closed`-status model, treating `unknown` as a graph node kind, and any readiness-grade-projection-over-open-counts as authority. +- **D74-L — Capability-readiness is a just-in-time, capability-relative judgment over relevant gaps — it replaces the standing grade gate.** When a capability is requested (a generative lens, `propose-graph`, `project-graph`, commitment review, eventual export), the agent evaluates readiness *for that capability* against the `elicitation_gaps` (D65-L) declared relevant to it. The `capability → relevant gaps` map is **explicit** and subsumes the retired `STRATEGY_MIN_GRADE` / `GOAL_MIN_GRADE` / `LENS_MIN_GRADE` thresholds in `runtime-policy.ts`, which were a lossy grade-proxy for "enough grounding". Structurally-obvious relevant gaps (`presence` / `field` / `coverage`) are checked **mechanically** (cheap, no LLM); non-obvious (`manual`) ones consume an **LLM satisficiency judgment** (D57-L). The outcome is one of **proceed**, **proceed at low epistemic status** (density-scaled, D30-L), or **negotiate** — surface an `establishment_offer` ("I can, but answer X and Y first", D32-L). Capability-readiness fires **on request, reactive-primary** (proactive nudges are a separate later concern) and is the **only readiness gate**: it never bars attempting work, it scales/negotiates. This resolves the prior "lens is never gated" (`ELICITATION_LENSES.md`) vs `LENS_MIN_GRADE` contradiction (lenses are not grade-gated; readiness is JIT-judged) and dissolves the grade-ratchet / two-value problem (the soft `readiness estimate`, D45-L, gates nothing and may regress honestly). A future structural milestone gate for export/plan/execute op-modes is deferred (D45-L) until such an op-mode exists. Depends on: D25-L, D26-L, D30-L, D32-L, D45-L, D57-L, D59-L, D65-L. Refined by: D75-L (the `capability → relevant gaps` map references node kinds, not a closed typology-name enum). Supersedes: `GRADE_RANK`-based `MIN_GRADE` hard gating of goal/strategy/lens, and a standing readiness scalar as the authority for capability availability. +- **D75-L — `elicitation_gaps` reference graph node kinds; the parallel grounding-typology vocabulary is retired.** A gap is a **situated question that refers to a graph node kind** (`refersTo: NodeKind`), not an entry in a separate closed "typology" vocabulary. The grounding typology catalog of D65-L (`GROUNDING_GAP_TYPOLOGIES`: floor `domain` / `protagonist` / `pain_pull` / `constraint` + progressive `value` / `context_of_use` / `success_sketch` / `solution_boundary`) was a denormalized, drift-prone copy of the per-kind **source-question rubric** the intent ontology already owns (D56-L; [`docs/design/GRAPH_MODEL.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/GRAPH_MODEL.md#per-plane-node-kinds) §Per-plane node kinds — *"the abstract driver, not a literal question to parrot"*): `domain` / `context_of_use` are facets of `context`; `protagonist` / `pain_pull` of `thesis`; `value` of `goal`; `constraint` / `solution_boundary` of `constraint`; `success_sketch` of `criterion`. Collapsing onto the kind layer yields **one ontology, not two** — the only closed set is `NodeKind` (D54-L/D56-L), already owned by the drizzle-free taxonomy leaf (D73-L). Consequences: (1) the closed gap-`name` typology enum and the `RelevantGapName` union (D74-L) are replaced by `refersTo: NodeKind`; the `capability → relevant gaps` map references node kinds — the grounding floor is grounded `context` + `thesis` + `goal` + `constraint`, a graph query rather than a typology lookup, matching how GRAPH_MODEL already frames the grounding gate ("basic intent nodes are central evidence"). (2) Question text stays **free-form and situated**, projected general→specific by the elicitor per active lens/strategy and grounding density; the presence-aliasing limitation (distinct typologies aliasing one node-kind signal, the deferred finding in the now-retired refactor plan) **dissolves**, because discrimination now lives in the free-form question plus the `manual` / `coverage` satisfier (D57-L), not in a blunt `presence` count or a closed name enum. (3) Coverage extends for free to grounding-band kinds the catalog ignored — `term` (the ubiquitous-language anchor) and `assumption`. The flat-table substrate, `disposition`, `predicate` union, `importance` vs derived `coverage`, the anti-shadowing line (the table holds obligation / disposition / meta only, never domain content), `basis` provenance-directness, and band correlation (D64-L) are all **unchanged** — this decision changes how a gap *names its obligation* (by referring to a kind), not the register substrate. The example phrasings per kind are catalogued in [`docs/design/ELICITATION_QUESTIONS.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/ELICITATION_QUESTIONS.md) as a **priming / example layer for the elicitor, not a schema**: brainstorming more questions adds facets/phrasings for existing kinds and never adds ontology. The code remodel (replace `GROUNDING_GAP_TYPOLOGIES` seeding + `RelevantGapName` with `refersTo: NodeKind`; point capability-readiness at node kinds) is a pending frontier, re-scoping the `capability-readiness` follow-ons in `memory/PLAN.md`. Depends on: D54-L, D56-L, D57-L, D64-L, D65-L, D73-L, D74-L; A24-L, A27-L. Refines: D30-L, D65-L, D74-L. Supersedes: the grounding typology catalog as a parallel closed gap vocabulary; the closed gap-`name` typology enum and the `RelevantGapName` union; and the retired refactor plan to enshrine `GROUNDING_GAP_TYPOLOGIES` as a canonical const. #### Authority & mutation @@ -513,8 +509,8 @@ src/.pi/ | **Term** | A first-class intent node kind (`kind: "term"`). A canonical naming commitment for ubiquitous language and conceptual consistency. Requires `detail: { definition, aliases? }`. Participates in graph edges: downstream nodes may `dependency`-depend on the term's definition; a term may `boundary`-scope what counts as X; a newer term may `supersession`-replace a prior term. | | **Graph basis** | Provenance-directness field (`explicit | implicit`) on accepted graph nodes and edges: `explicit` when the item came directly from the user (stated or user-reviewed); `implicit` when the agent materialized it from user input after concept-level acceptance. Approval strength is the claim-flavored reading of this axis; the same `explicit | implicit` distinction also applies to non-claim registers such as `elicitation_gaps` (user-raised vs agent-inferred, D65-L). Mutation path lives in `change_log`, not in `basis` (D63-L). | | **Node source** | Free-form string on `GraphNode.source` for epistemic attribution (e.g. "stakeholder", "regulatory", "derived", "agent synthesis"). Convention by prompt, not structural validation. Exists for context-render enrichment — rendered back into sparse text in prompt context, not used for policy or filtering. Not applicable to edges. | -| **Elicitation gap** | A typed coverage *obligation* (a typology of coverage that must be addressed — e.g. "the spec must anchor its primary constraint(s)"), **not** a literal question and not domain content (which lives in the graph). Each gap carries both a **name** (typology key — machine identity + display label) and a **rationale** (meta prose: what coverage it represents, why it matters, what satisfies it), plus a band (D64-L), a predicate shape (`presence | field | coverage | manual`), an importance (driver-weight), a derived coverage strength, and a disposition (`open | answered | not_applicable | irrelevant | reopened`). Stored in a flat `elicitation_gaps` table (not a graph node); seeded at spec creation for grounding, generatively spawned for elicitation, derived for commitment. Serves as the elicitor's agenda, the substrate of capability-readiness, and a density signal. The *prospective* sibling of the *retrospective* `reconciliation_need` register. See D65-L. | -| **Grounding typology catalog** | The seeded fixed set of grounding-band gap typologies (D65-L), collated from the D30-L anchor bundle, D57-L Walter drivers, `ELICITATION_LENSES.md`, and shaping kickoff/framing material. **Floor** (gates generative capabilities): `domain`, `protagonist`, `pain_pull`, `constraint`. **Progressive drivers** (enrich, never floor): `value`, `context_of_use`, `success_sketch`, `solution_boundary` (non-goals). | +| **Elicitation gap** | A typed coverage *obligation* — a **situated question that refers to a graph node kind** (`refersTo: NodeKind`, D75-L), **not** a literal queued question and not domain content (which lives in the graph). Each gap carries a free-form question, the node kind it refers to, plus a band (D64-L), a predicate shape (`presence | field | coverage | manual`), an importance (driver-weight), a derived coverage strength, a `rationale`, and a disposition (`open | answered | not_applicable | irrelevant | reopened`). Stored in a flat `elicitation_gaps` table (not a graph node); seeded at spec creation for grounding, generatively spawned for elicitation, derived for commitment. Serves as the elicitor's agenda, the substrate of capability-readiness, and a density signal. The *prospective* sibling of the *retrospective* `reconciliation_need` register. See D65-L (substrate) and D75-L (node-kind reference; the parallel typology vocabulary retired). | +| **Grounding typology catalog** *(retired — D75-L)* | The former seeded fixed set of grounding-band gap typologies (floor `domain` / `protagonist` / `pain_pull` / `constraint`; progressive `value` / `context_of_use` / `success_sketch` / `solution_boundary`). Retired as a parallel closed vocabulary: it was a denormalized copy of the per-kind **source-question rubric** the intent ontology already owns (D56-L). Gaps now refer to graph node kinds directly (D75-L); example question phrasings per kind live in [`docs/design/ELICITATION_QUESTIONS.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/ELICITATION_QUESTIONS.md) as a priming layer, not an enum. | | **Elicitation backlog** *(renamed)* | Former name for the elicitation-gaps register and its question-instance / `open|closed` model. Renamed and reconceived as **elicitation gap** (D65-L). | | **Risk** *(deferred)* | A durable domain-epistemic gap: something nobody knows and cannot economically find out now, requiring strategic accommodation (assumptions, decisions, design/verification/planning) rather than elicitation. Distinct from `assumption` (which proceeds on a believed-but-unprovable value); a risk is upstream and cannot yet pick a value. If adopted it is a first-class intent-plane node kind (it carries real cross-plane edges), not a table; deferred because it reopens the locked kind set (D54-L/D56-L). Future Direction §Vocabulary evolution; D65-L. | | **Node detail** | Optional JSON column on `GraphNode.detail` with per-kind validated sub-structures. `decision` requires `{ chosen_option, rejected, rationale }`; `term` requires `{ definition, aliases? }`. All other kinds omit `detail`. | diff --git a/memory/cards/gaps-node-kind-reference--node-kind-reference.md b/memory/cards/gaps-node-kind-reference--node-kind-reference.md new file mode 100644 index 00000000..e8c0d2e2 --- /dev/null +++ b/memory/cards/gaps-node-kind-reference--node-kind-reference.md @@ -0,0 +1,122 @@ +# Gaps reference node kinds (retire the grounding-typology vocabulary) + +Frontier: gaps-node-kind-reference +Status: active +Mode: single +Created: 2026-06-10 + +## Orientation + +- **Seam:** the `elicitation_gaps` obligation register (`src/graph/schema/elicitation-gaps.ts` type, `src/db/schema.ts` table, `src/graph/command-executor.ts` seeding/create, `src/graph/queries.ts` read derivation) and its single consumer `src/projections/session/capability-readiness.ts`. Just built by `elicitation-gaps-remodel` (2026-06-10); D75-L reshapes it. +- **Frontier:** `gaps-node-kind-reference` (PLAN §Frontier Definitions) — heads the readiness chain `elicitation-gaps-remodel` → **this** → `capability-readiness`. +- **Volatile state:** none in HANDOFF; the prior refactor plan is retired and deleted (its catalog-enshrining direction was wrong, absorbed here under D75-L). +- **Open risk (the proving unknown):** does referring to a node kind + a judgment satisfier preserve the discrimination the eight typology names carried, given `thesis` fuses protagonist+pain_pull and `context` fuses domain+context_of_use? The slice must prove two same-kind gaps resolve independently. + +Posture: proving (inherited from gaps-node-kind-reference). + +Cross-cutting obligations carried: +- **Anti-shadowing** (D65-L/D75-L): the table holds obligation/disposition/meta only — never domain content. `refersTo` is a kind tag, `question`/`rationale` are meta prose, not captured answers. +- **Taxonomy ownership** (D73-L): `NodeKind` stays owned by the drizzle-free leaf `src/graph/schema/kinds.ts`; the gaps schema and capability-readiness *import* the union, never redefine it. +- **Command/clock boundary** (D4-L/D16-L): gap mutations stay on `CommandExecutor` + the shared `{specId, lsn}` / `change_log` seam — unchanged. + +## Target Behavior + +Grounding gaps name their obligation by referring to a graph node kind (`refersTo: NodeKind` + a free-form `question`) end-to-end — schema, seeding, read derivation, and the capability-readiness gate — with the typology `name` vocabulary (`GROUNDING_GAP_TYPOLOGIES`, the gap-`name` enum, `RelevantGapName`) removed. + +## Full-card cold-start reads + +``` +- memory/SPEC.md — decisions: D75-L (primary), D65-L, D56-L, D54-L, D73-L, D57-L, D64-L; assumptions A24-L, A27-L; invariant I30-L +- memory/PLAN.md — frontier: gaps-node-kind-reference +- docs/design/ELICITATION_QUESTIONS.md — per-kind question priming examples to seed grounding question text +- docs/design/GRAPH_MODEL.md — §Per-plane node kinds (source-question rubric; grounding-band kinds) +- src/graph/schema/elicitation-gaps.ts, src/graph/schema/kinds.ts, src/graph/schema/nodes.ts — current gap type + NodeKind union +- src/db/schema.ts (elicitationGaps table), src/graph/command-executor.ts (SEEDED_ELICITATION_GAPS, CreateElicitationGapInput, seedElicitationGaps, validateCreateElicitationGap), src/graph/queries.ts (rowToElicitationGap) +- src/projections/session/capability-readiness.ts — the only gate consumer +``` + +## Boundary Crossings + +``` +→ createSpec (CommandExecutor.seedElicitationGaps) +→ db elicitation_gaps table (refers_to + question columns; regenerated migration) +→ queries.rowToElicitationGap (read derivation: coverage/answered preserved) +→ projections/session/capability-readiness (capability → NodeKind[] gate) +``` + +## Risks and Assumptions + +``` +- RISK: same-kind gaps (two `thesis`, two `context`) collapse to one signal under the gate + → MITIGATION: the gate aggregates coverage over ALL gaps of a required kind (floor = ≥1 grounded node of that kind), and discrimination lives in question + manual/coverage satisfier — not a per-name lookup. Prove with the discrimination probe. +- RISK: `presence` predicate's `nodeKind` field now overlaps `refersTo` + → MITIGATION: keep the predicate union as-is (D75-L: substrate unchanged); `refersTo` is the obligation referent, the predicate is the satisfaction check. Do not merge them in this slice. +- ASSUMPTION: the gate's per-kind coverage aggregation rule is an in-model implementation detail, not a new durable decision (D75-L already fixes the floor + discrimination locus). + → IMPACT IF FALSE: a genuine gate-semantics decision would promote back to ln-spec. + → VALIDATE: the capability-readiness map test + discrimination probe; if the aggregation rule needs a recorded choice, stop and route to ln-spec. + → memory/SPEC.md §Assumptions A27-L +- ASSUMPTION: pre-release free-rewrite — regenerate the migration and seed; no `name`-column or typology residue to preserve. + → IMPACT IF FALSE: would need a data migration; SPEC/PLAN do not require it. + → VALIDATE: AGENTS §development phase posture. +``` + +## Posture check (proving) + +- **Proof of life:** a gap references a node kind end-to-end (seed → store → read → gate) — a new shape lit across the whole seam. +- **Invariants:** locks the D75-L one-ontology seam (gaps reference `NodeKind`; no parallel vocabulary). +- **Uncertainty:** retires the presence-aliasing / same-kind-discrimination unknown the retired refactor plan only deferred (finding #1). The discrimination probe is the tracer that breaks if the model is wrong. + +Scores on all three — build it. + +## Acceptance Criteria + +``` +✓ elicitation-gaps schema test — ElicitationGap carries `refersTo: NodeKind` + `question`; no `name` typology field; no GROUNDING_GAP_TYPOLOGIES export +✓ command-executor seed-set test — createSpec seeds grounding gaps keyed by node kind: floor `context`/`thesis`/`goal`/`constraint` plus `term`/`assumption`; no eight literal typology names +✓ command-executor create/validate test — createElicitationGap requires `refersTo` (a valid NodeKind) + non-empty `question`; rejects an invalid kind +✓ queries read test — rowToElicitationGap maps `refers_to`/`question`; live presence-derived coverage/answered still flips from graph truth with sibling-spec isolation +✓ capability-readiness map test — CAPABILITY_RELEVANT_GAPS is `Record`; grounding floor = context+thesis+goal+constraint; a required kind with zero referring gaps in the register fails loud (config bug ≠ uncovered) +✓ discrimination probe (proving) — two `thesis`-referring gaps with different questions resolve independently (one covered, one open) rather than aliasing to a single presence count +✓ no residue — `rg "GROUNDING_GAP_TYPOLOGIES|RelevantGapName|gap\.name"` over src returns nothing; migration regenerated with `refers_to`/`question`, no `name` column +``` + +## Verification Approach + +``` +- Inner: vitest unit — schema/type, seed-set, create/validate, queries read derivation, capability-readiness map + loud-fail (npm run verify gate) +- Middle: the discrimination probe — two same-kind gaps resolved independently via question + satisfier (retires the proving unknown) +- Outer: per-spec seeded read-back over a freshly created spec (existing observed-shapes / read-back probe extended) +``` + +## Cross-cutting obligations + +``` +- Anti-shadowing: table stores obligation/disposition/meta only, never domain content (D65-L/D75-L) +- NodeKind union owned by the drizzle-free leaf graph/schema/kinds.ts; import, never redefine (D73-L) +- Mutations stay on CommandExecutor + shared {specId, lsn} / change_log clock (D4-L/D16-L) +- Reconcile topology READMEs that name the catalog/seeding: src/graph/README.md, src/db/README.md, src/projections/README.md +``` + +## Expected touched paths (tentative) + +``` +src/graph/schema/ +├── elicitation-gaps.ts ~ (name → refersTo: NodeKind + question) +drizzle/ +├── 0004_.sql + (regenerated migration: refers_to + question) +└── meta/ ~ (snapshot) +src/db/ +├── schema.ts ~ (elicitation_gaps: refers_to + question columns) +└── README.md ~ +src/graph/ +├── command-executor.ts ~ (SEEDED_ELICITATION_GAPS → seed by kind; CreateElicitationGapInput; seedElicitationGaps; validate) +├── command-executor.test.ts ~ +├── queries.ts ~ (rowToElicitationGap) +├── queries.test.ts ~ +├── observed-shapes-coverage.test.ts ? +└── README.md ~ +src/projections/ +├── session/capability-readiness.ts ~ (RelevantGapName → NodeKind; capability → NodeKind[]; loud-fail; discrimination) +├── session/capability-readiness.test.ts ~ +└── README.md ~ +``` From 74aa041841136c7dd677858ab4a96a412b3001b6 Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Wed, 10 Jun 2026 18:36:45 +0200 Subject: [PATCH 4/4] final slice of elicitation gaps I --- drizzle/0004_gaps_node_kind_reference.sql | 3 + drizzle/meta/0004_snapshot.json | 788 ++++++++++++++++++ drizzle/meta/_journal.json | 7 + memory/PLAN.md | 10 +- memory/SPEC.md | 8 +- ...ode-kind-reference--node-kind-reference.md | 122 --- src/db/README.md | 8 +- src/db/schema.ts | 3 +- src/graph/README.md | 8 +- src/graph/command-executor.test.ts | 58 +- src/graph/command-executor.ts | 85 +- src/graph/queries.test.ts | 15 +- src/graph/queries.ts | 3 +- src/graph/schema/elicitation-gaps.ts | 3 +- src/projections/README.md | 4 +- .../session/capability-readiness.test.ts | 121 +-- .../session/capability-readiness.ts | 58 +- 17 files changed, 1011 insertions(+), 293 deletions(-) create mode 100644 drizzle/0004_gaps_node_kind_reference.sql create mode 100644 drizzle/meta/0004_snapshot.json delete mode 100644 memory/cards/gaps-node-kind-reference--node-kind-reference.md diff --git a/drizzle/0004_gaps_node_kind_reference.sql b/drizzle/0004_gaps_node_kind_reference.sql new file mode 100644 index 00000000..695d3b0b --- /dev/null +++ b/drizzle/0004_gaps_node_kind_reference.sql @@ -0,0 +1,3 @@ +ALTER TABLE `elicitation_gaps` ADD `refers_to` text NOT NULL;--> statement-breakpoint +ALTER TABLE `elicitation_gaps` ADD `question` text NOT NULL;--> statement-breakpoint +ALTER TABLE `elicitation_gaps` DROP COLUMN `name`; \ No newline at end of file diff --git a/drizzle/meta/0004_snapshot.json b/drizzle/meta/0004_snapshot.json new file mode 100644 index 00000000..205508df --- /dev/null +++ b/drizzle/meta/0004_snapshot.json @@ -0,0 +1,788 @@ +{ + "version": "6", + "dialect": "sqlite", + "id": "4ae1e0f1-9aa2-4188-b2b0-1c9b2b39a320", + "prevId": "d9b2bf4a-2462-4820-b5ef-4ad514c15a1d", + "tables": { + "change_log": { + "name": "change_log", + "columns": { + "spec_id": { + "name": "spec_id", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "lsn": { + "name": "lsn", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "operation": { + "name": "operation", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "payload": { + "name": "payload", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "created_at": { + "name": "created_at", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "(datetime('now'))" + } + }, + "indexes": {}, + "foreignKeys": { + "change_log_spec_id_specs_id_fk": { + "name": "change_log_spec_id_specs_id_fk", + "tableFrom": "change_log", + "tableTo": "specs", + "columnsFrom": [ + "spec_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": { + "change_log_spec_lsn_pk": { + "columns": [ + "spec_id", + "lsn" + ], + "name": "change_log_spec_lsn_pk" + } + }, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "edges": { + "name": "edges", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "spec_id": { + "name": "spec_id", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "category": { + "name": "category", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "source_id": { + "name": "source_id", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "target_id": { + "name": "target_id", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "stance": { + "name": "stance", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "basis": { + "name": "basis", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'explicit'" + }, + "rationale": { + "name": "rationale", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at_lsn": { + "name": "created_at_lsn", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "updated_at_lsn": { + "name": "updated_at_lsn", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "edges_spec_id_specs_id_fk": { + "name": "edges_spec_id_specs_id_fk", + "tableFrom": "edges", + "tableTo": "specs", + "columnsFrom": [ + "spec_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "edges_source_id_nodes_id_fk": { + "name": "edges_source_id_nodes_id_fk", + "tableFrom": "edges", + "tableTo": "nodes", + "columnsFrom": [ + "source_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "edges_target_id_nodes_id_fk": { + "name": "edges_target_id_nodes_id_fk", + "tableFrom": "edges", + "tableTo": "nodes", + "columnsFrom": [ + "target_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "elicitation_gaps": { + "name": "elicitation_gaps", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "spec_id": { + "name": "spec_id", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "refers_to": { + "name": "refers_to", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "question": { + "name": "question", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "rationale": { + "name": "rationale", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "disposition": { + "name": "disposition", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'open'" + }, + "basis": { + "name": "basis", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'explicit'" + }, + "readiness_band": { + "name": "readiness_band", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "predicate_kind": { + "name": "predicate_kind", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "predicate": { + "name": "predicate", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "importance": { + "name": "importance", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": 1 + }, + "plane_affinity": { + "name": "plane_affinity", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "lens_affinity": { + "name": "lens_affinity", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "arose_from_gap_id": { + "name": "arose_from_gap_id", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "resolved_by_node_id": { + "name": "resolved_by_node_id", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at_lsn": { + "name": "created_at_lsn", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "disposition_set_at_lsn": { + "name": "disposition_set_at_lsn", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "elicitation_gaps_spec_id_specs_id_fk": { + "name": "elicitation_gaps_spec_id_specs_id_fk", + "tableFrom": "elicitation_gaps", + "tableTo": "specs", + "columnsFrom": [ + "spec_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "elicitation_gaps_arose_from_gap_id_elicitation_gaps_id_fk": { + "name": "elicitation_gaps_arose_from_gap_id_elicitation_gaps_id_fk", + "tableFrom": "elicitation_gaps", + "tableTo": "elicitation_gaps", + "columnsFrom": [ + "arose_from_gap_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "elicitation_gaps_resolved_by_node_id_nodes_id_fk": { + "name": "elicitation_gaps_resolved_by_node_id_nodes_id_fk", + "tableFrom": "elicitation_gaps", + "tableTo": "nodes", + "columnsFrom": [ + "resolved_by_node_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "graph_clock": { + "name": "graph_clock", + "columns": { + "spec_id": { + "name": "spec_id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": false + }, + "lsn": { + "name": "lsn", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": 0 + } + }, + "indexes": {}, + "foreignKeys": { + "graph_clock_spec_id_specs_id_fk": { + "name": "graph_clock_spec_id_specs_id_fk", + "tableFrom": "graph_clock", + "tableTo": "specs", + "columnsFrom": [ + "spec_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "node_kind_counters": { + "name": "node_kind_counters", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "spec_id": { + "name": "spec_id", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "plane": { + "name": "plane", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "kind": { + "name": "kind", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "next_ordinal": { + "name": "next_ordinal", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": 1 + } + }, + "indexes": { + "node_kind_counters_spec_plane_kind_unique": { + "name": "node_kind_counters_spec_plane_kind_unique", + "columns": [ + "spec_id", + "plane", + "kind" + ], + "isUnique": true + } + }, + "foreignKeys": { + "node_kind_counters_spec_id_specs_id_fk": { + "name": "node_kind_counters_spec_id_specs_id_fk", + "tableFrom": "node_kind_counters", + "tableTo": "specs", + "columnsFrom": [ + "spec_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "nodes": { + "name": "nodes", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "spec_id": { + "name": "spec_id", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "plane": { + "name": "plane", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "kind": { + "name": "kind", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "kind_ordinal": { + "name": "kind_ordinal", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "title": { + "name": "title", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "body": { + "name": "body", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "basis": { + "name": "basis", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'explicit'" + }, + "source": { + "name": "source", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "detail": { + "name": "detail", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at_lsn": { + "name": "created_at_lsn", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "updated_at_lsn": { + "name": "updated_at_lsn", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + } + }, + "indexes": { + "nodes_spec_plane_kind_ordinal_unique": { + "name": "nodes_spec_plane_kind_ordinal_unique", + "columns": [ + "spec_id", + "plane", + "kind", + "kind_ordinal" + ], + "isUnique": true + } + }, + "foreignKeys": { + "nodes_spec_id_specs_id_fk": { + "name": "nodes_spec_id_specs_id_fk", + "tableFrom": "nodes", + "tableTo": "specs", + "columnsFrom": [ + "spec_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "reconciliation_need": { + "name": "reconciliation_need", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "spec_id": { + "name": "spec_id", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "target_kind": { + "name": "target_kind", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "target_edge_id": { + "name": "target_edge_id", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "target_a_id": { + "name": "target_a_id", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "target_b_id": { + "name": "target_b_id", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "kind": { + "name": "kind", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "status": { + "name": "status", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'open'" + }, + "reason": { + "name": "reason", + "type": "text", + "primaryKey": false, + "notNull": false, + "autoincrement": false + }, + "created_at_lsn": { + "name": "created_at_lsn", + "type": "integer", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "resolved_at_lsn": { + "name": "resolved_at_lsn", + "type": "integer", + "primaryKey": false, + "notNull": false, + "autoincrement": false + } + }, + "indexes": {}, + "foreignKeys": { + "reconciliation_need_spec_id_specs_id_fk": { + "name": "reconciliation_need_spec_id_specs_id_fk", + "tableFrom": "reconciliation_need", + "tableTo": "specs", + "columnsFrom": [ + "spec_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "reconciliation_need_target_edge_id_edges_id_fk": { + "name": "reconciliation_need_target_edge_id_edges_id_fk", + "tableFrom": "reconciliation_need", + "tableTo": "edges", + "columnsFrom": [ + "target_edge_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "reconciliation_need_target_a_id_nodes_id_fk": { + "name": "reconciliation_need_target_a_id_nodes_id_fk", + "tableFrom": "reconciliation_need", + "tableTo": "nodes", + "columnsFrom": [ + "target_a_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + }, + "reconciliation_need_target_b_id_nodes_id_fk": { + "name": "reconciliation_need_target_b_id_nodes_id_fk", + "tableFrom": "reconciliation_need", + "tableTo": "nodes", + "columnsFrom": [ + "target_b_id" + ], + "columnsTo": [ + "id" + ], + "onDelete": "no action", + "onUpdate": "no action" + } + }, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + }, + "specs": { + "name": "specs", + "columns": { + "id": { + "name": "id", + "type": "integer", + "primaryKey": true, + "notNull": true, + "autoincrement": true + }, + "name": { + "name": "name", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "slug": { + "name": "slug", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false + }, + "readiness_grade": { + "name": "readiness_grade", + "type": "text", + "primaryKey": false, + "notNull": true, + "autoincrement": false, + "default": "'grounding_onboarding'" + } + }, + "indexes": {}, + "foreignKeys": {}, + "compositePrimaryKeys": {}, + "uniqueConstraints": {}, + "checkConstraints": {} + } + }, + "views": {}, + "enums": {}, + "_meta": { + "schemas": {}, + "tables": {}, + "columns": {} + }, + "internal": { + "indexes": {} + } +} \ No newline at end of file diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json index 019f38df..b2834794 100644 --- a/drizzle/meta/_journal.json +++ b/drizzle/meta/_journal.json @@ -29,6 +29,13 @@ "when": 1780904720280, "tag": "0003_outstanding_black_bird", "breakpoints": true + }, + { + "idx": 4, + "version": "6", + "when": 1781108939451, + "tag": "0004_gaps_node_kind_reference", + "breakpoints": true } ] } \ No newline at end of file diff --git a/memory/PLAN.md b/memory/PLAN.md index e96830e0..f38e7bcd 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -88,8 +88,8 @@ per ledger row: Post-`ln-spec` implications that are **upstream** of the context-pipeline trio's readiness/chrome-touching locks (see Context §Readiness / elicitation-gaps remodel). Land the hard chain before stage 1 freezes `workspace/workspace-state` + `session/runtime-state` shapes, or bracket those fields in the trio. -1. `gaps-node-kind-reference` — **depends on `elicitation-gaps-remodel` (done).** Reshape the gaps substrate onto node kinds per D75-L: `refersTo: NodeKind` + a free-form `question` replaces the typology `name` enum; reseed grounding by node kind (floor `context`/`thesis`/`goal`/`constraint` plus the now-covered `term`/`assumption`); `capability → NodeKind[]` replaces `RelevantGapName`. Absorbs the retired refactor plan (folded into D75-L). Upstream of `capability-readiness`. -2. `capability-readiness` — **depends on `gaps-node-kind-reference`.** Replace the stored-grade gate (`readiness_grade`, `updateReadinessGrade`, `READINESS_GRADES`, `MIN_GRADE` proxies) with JIT capability→relevant-gaps judgment over the node-kind map; add the soft derived `readiness estimate` (UI-only); remove `chrome.phase` / `chrome.chatMode`. +1. `gaps-node-kind-reference` — **done 2026-06-10.** Reshaped the gaps substrate onto node kinds per D75-L: `refersTo: NodeKind` + a free-form `question` replaced the typology `name` enum; reseeded grounding by node kind (floor `context`/`thesis`/`goal`/`constraint` plus `term`/`assumption`); `capability → NodeKind[]` replaced `RelevantGapName`. Absorbed the retired refactor plan (folded into D75-L). +2. `capability-readiness` — **depends on `gaps-node-kind-reference` (done).** Replace the stored-grade gate (`readiness_grade`, `updateReadinessGrade`, `READINESS_GRADES`, `MIN_GRADE` proxies) with JIT capability→relevant-gaps judgment over the node-kind map; add the soft derived `readiness estimate` (UI-only); remove `chrome.phase` / `chrome.chatMode`. ### Next @@ -206,7 +206,7 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - **Name:** Gaps reference node kinds; retire the grounding-typology vocabulary (D75-L) - **Linear:** unassigned — create in FE / brunch when the frontier starts. - **Kind:** structural -- **Status:** next (heads the readiness chain, ahead of `capability-readiness`) +- **Status:** done - **Certainty:** proving - **Depends on:** `elicitation-gaps-remodel` (done — reshapes its `name`-typology output onto node kinds). - **Retires:** the `GROUNDING_GAP_TYPOLOGIES` seed catalog (8 typology names), the closed gap-`name` typology enum, and `capability-readiness`'s `RelevantGapName` union (D75-L); absorbs the retired refactor plan, folded into D75-L (do not enshrine the catalog). @@ -224,7 +224,7 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - **Cross-cutting obligations:** anti-shadowing (D65-L/D75-L) — the table never stores domain content; the `NodeKind` union stays owned by the drizzle-free leaf `graph/schema/kinds.ts` (D73-L) — gaps import it, never redefine it; the `CommandExecutor` boundary + shared `{specId, lsn}` / `change_log` clock are unchanged. - **Traceability:** D54-L, D56-L, D57-L, D60-L, D64-L, D65-L, D73-L, D74-L, D75-L / A24-L, A27-L / I30-L. Supersedes the grounding typology catalog, the gap-`name` typology enum, and `RelevantGapName`; absorbs the retired refactor plan. - **Design docs:** `memory/SPEC.md` D75-L / D65-L; `docs/design/ELICITATION_QUESTIONS.md`; `src/graph/schema/elicitation-gaps.ts`; `src/graph/command-executor.ts`; `src/projections/session/capability-readiness.ts`; `src/graph/README.md`; `src/db/README.md`; `src/projections/README.md`. -- **Current execution pointer:** `memory/cards/gaps-node-kind-reference--node-kind-reference.md` (active, single full card). +- **Current execution pointer:** Done 2026-06-10. Replaced gap `name` with `refersTo: NodeKind` + `question` across schema, DB, `CommandExecutor`, reads, and capability-readiness; added migration `0004_gaps_node_kind_reference`; reseeded grounding by node kind (`context`, `thesis`, `goal`, `constraint`, plus `term`/`assumption`); proved live presence coverage still flips, required-kind absence fails loud, and two `thesis` gaps discriminate independently by question+satisfier. Topology READMEs reconciled. ### capability-readiness @@ -665,7 +665,7 @@ nodes: renderer-golden-coverage [next · coverage] TRIO stage 2 (#render, RENDER): create renderer ledger + golden-lock every durable renderer; depends on projection-shape-coverage prompt-composition-golden-coverage [next · coverage] TRIO stage 3 (#compose, COMPOSE): composed-prompt preview + golden-lock partials/composition matrix; depends on renderer-golden-coverage elicitation-gaps-remodel [done · proving] remodeled elicitation_gaps obligation register; live presence derivation (grounding typology catalog superseded by gaps-node-kind-reference, D75-L) - gaps-node-kind-reference [next · proving] reshape gaps onto node kinds (refersTo NodeKind + question); reseed grounding by kind; capability->NodeKind[] replaces RelevantGapName; absorbs retired refactor plan (D75-L) + gaps-node-kind-reference [done · proving] D75-L node-kind gap reference landed; typology name/RelevantGapName retired; same-kind discrimination probe covered capability-readiness [next · proving] JIT capability->relevant-gaps gate + readiness estimate (UI-only); retire readiness_grade / MIN_GRADE / chrome.phase+chatMode runtime-vocab-leaf [parallel · proving] src/session/schema/kinds.ts source-of-truth leaf for op_mode/strategy/lens/goal (D73-L direction); decision-3 follow-on elicitation-driver [after-trio · proving] live per-turn what-to-ask-next driver on remodeled elicitation_gaps; rides COMPOSE oracle; closes cross-cut Seam 3a diff --git a/memory/SPEC.md b/memory/SPEC.md index c1ec5a5e..eb0b0925 100644 --- a/memory/SPEC.md +++ b/memory/SPEC.md @@ -118,10 +118,10 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c | A20-L | The chosen Drizzle line and row-schema derivation path can be settled during the prep envelope without forcing later M4 rework: Brunch can prove migrations, SQLite fidelity, monotonic counter allocation, change-log writes, and runtime-schema derivation on one representative persistence slice before CRUD proper starts. | high | **validated** | D16-L, D41-L | **Validated by A20-L spike (2026-06-01).** Stack: `drizzle-orm@0.45.2` + `drizzle-kit@0.31.10` + `better-sqlite3@12.8.0` + `drizzle-typebox@0.3.3` + `@sinclair/typebox@0.34.14`. Proved: (1) `drizzle-typebox` derives valid TypeBox insert/select schemas from Drizzle tables; `Value.Check` validates/rejects correctly. (2) Batch `mutateGraph`-shaped transaction (multi-node → intra-batch ref resolution → multi-edge → LSN allocation → change-log append) works atomically; full rollback on FK violation or domain-validation throw. (3) `update().returning()` works for atomic monotonic counter increment; `insert().returning()` gives auto-increment IDs for ref resolution; JSON detail column round-trips cleanly. (4) Pi tool parameters (`typebox` v1.x) and Drizzle row schemas (`@sinclair/typebox` v0.34 via `drizzle-typebox`) serve different roles and never cross — shared enum `const` arrays bridge both. | | A21-L | The POC can treat coherence as a bounded product verdict over structural legality plus explicitly detected contradictions, gaps, and unresolved reconciliation needs, without solving a general theory of “spec coherence.” | low | open | D8-L | M8 must sharpen the coherence rubric before implementation: known-bad adversarial briefs should show what counts as incoherent, what is merely immature/underspecified, and what should become a reconciliation need. | | A22-L | The elicitor can perform synchronous post-exchange capture well enough for the POC: high-confidence extractive facts can be committed to the graph immediately and gap dispositions updated, while low-confidence implications can be kept out of graph truth and used as disambiguation material. | medium | partially validated | D18-L, D26-L, D45-L, D65-L, I30-L | 2026-06-05 `capture-response-to-graph` validated the product wiring for narrow labeled text facts (`Goal:`, `Context:`, `Constraint:`, `Criterion:`) on `session.submitExchangeResponse`. 2026-06-07 generalized the same explicit-text capture core onto `session.submitMessage`: ordinary labeled user text now appends to transcript truth, commits through `graph/capture` → `CommandExecutor.mutateGraph({createBasis: explicit, ops})`, targets the transcript binding's spec, and publishes graph invalidations; explicit interruptions are transcript-visible but do not capture or silently answer a pending exchange. 2026-06-08 `capture-quality-spike` added a fixed scenario measurement over free prose, file/ref-bearing prose, and implication-heavy prose; the sample extraction report reached precision 1.0 / recall 1.0 with zero false commits, moving generalized capture from parked evidence-gate to a narrow graduate recommendation with an explicit false-commit guard. Readiness-grade capture remains open fitness evidence. | -| A24-L | A flat `elicitation_gaps` table (prospective memory) is sufficient to drive elicitor questioning, seed grounding, and feed capability-readiness without graph structure — gaps are typed coverage obligations (typologies), not graph nodes; apparent dependency among gaps is mediated by the claims their resolution produces. | medium | validated | D65-L, D74-L, D75-L | 2026-06-08 FE-823 materialized the flat table (built as `elicitation_backlog`) on the real LSN/change-log seam. 2026-06-10 `elicitation-gaps-remodel` replaced that question-instance shape with the typed obligation register (`name`/`rationale`/band/predicate/importance/disposition), regenerated the table as `elicitation_gaps`, seeded the grounding typology catalog, and proved live presence-derived coverage/answered read-back without stored structural answers. Remaining downstream proof is capability-readiness over the register (D74-L) and capture-reflection spawning; if genuine gap→gap dependency or rich traversal emerges, promote the table to a plane (rows→nodes, FK pointers→edges). | +| A24-L | A flat `elicitation_gaps` table (prospective memory) is sufficient to drive elicitor questioning, seed grounding, and feed capability-readiness without graph structure — gaps are typed coverage obligations (typologies), not graph nodes; apparent dependency among gaps is mediated by the claims their resolution produces. | medium | validated | D65-L, D74-L, D75-L | 2026-06-08 FE-823 materialized the flat table (built as `elicitation_backlog`) on the real LSN/change-log seam. 2026-06-10 `elicitation-gaps-remodel` replaced that question-instance shape with the typed obligation register, regenerated the table as `elicitation_gaps`, seeded the grounding typology catalog, and proved live presence-derived coverage/answered read-back without stored structural answers; `gaps-node-kind-reference` then retired the catalog/name vocabulary in favor of `refersTo: NodeKind` + free-form `question`. Remaining downstream proof is capture-reflection spawning; if genuine gap→gap dependency or rich traversal emerges, promote the table to a plane (rows→nodes, FK pointers→edges). | | A25-L | Tracking the latest `pi-coding-agent` release continuously (via source-alias in dev + package dependency bumps) keeps Brunch adaptable without routinely destabilizing it, because Brunch's pi product-behavior surface is concentrated in a few sealed integration seams (the `src/.pi/` extension bundle and the session/runtime adapters) behind the D39-L profile — even though pi *types* are imported across ~25 files, those are mostly type-only and pass through that small set of seams. | medium | partially validated | D67-L | 2026-06-09 FE-825 bumped Brunch to pi 0.79, kept type/default resolution on installed `dist`, added a `PI_SOURCE`-gated vite/vitest runtime alias to sibling `pi-mono` source, preserved product default sealed-profile/offline behavior, and passed `npm run verify`. Each later pi bump that lands without product-behavior regressions raises confidence; a bump that silently breaks sealed-profile assumptions falsifies it. | | A26-L | The refined "conversational introspection" goal can be built as a *read-only session-query-back tool*: under `BRUNCH_DEV`, the agent can call `brunch_session_query` over `ctx.sessionManager.getBranch()`, find entries by predicate, project capped dot/`[n]`/`[*]` paths, and surface exact returned values in chat without weakening D39-L sealing or turning self-reporting into product behavior. | medium | validated | D69-L, D71-L | 2026-06-09 `dx-introspection-live` slice 2 replaced the earlier fixed structured self-report/schema idea with `src/.pi/extensions/session-query/`: a dev-gated read-only tool registered only through `createBrunchPiExtensions(..., { introspection: { enabled } })`, covered by find/project/truncation unit tests, default-off/default-on registration tests, and a faux turn that returns verbatim projected session values. Live-model compliance with "call then echo verbatim" remains outer-loop fitness, not a merge gate. | -| A27-L | Gap satisfaction is expressible band-by-band at acceptable LLM cost: **commitment** typologies are structural `presence`/`field`/`coverage` predicates over the graph; **grounding** typologies are a `presence` floor plus `manual` LLM satisficiency (D57-L); **elicitation** typologies are generatively spawned. The explicit `capability → relevant gaps` map (D74-L) carries enough signal to drive proceed / negotiate without a standing grade. | medium | partially validated | D65-L, D74-L, D75-L | 2026-06-10 `elicitation-gaps-remodel` validated the structural `presence` case: a seeded grounding gap's derived coverage/answered state flips from graph truth with no stored structural answer and sibling-spec isolation holds. 2026-06-10 the `capability-readiness` D74-L gate tracer validated the grounding floor: the explicit capability→gap map drives proceed / proceed_low_epistemic / negotiate, live presence coverage flips a generative capability negotiate→proceed, and the gate imports no grade symbols. Remaining proof: `field`/`coverage` predicate derivation, `manual` LLM satisficiency, elicitation/commitment fixtures, and rewiring consumers off grade thresholds. Falsified if grounding readiness cannot decompose into per-typology presence+manual judgments, or if commitment obligations need logic the predicate union can't express. | +| A27-L | Gap satisfaction is expressible band-by-band at acceptable LLM cost: **commitment** typologies are structural `presence`/`field`/`coverage` predicates over the graph; **grounding** typologies are a `presence` floor plus `manual` LLM satisficiency (D57-L); **elicitation** typologies are generatively spawned. The explicit `capability → relevant gaps` map (D74-L) carries enough signal to drive proceed / negotiate without a standing grade. | medium | partially validated | D65-L, D74-L, D75-L | 2026-06-10 `elicitation-gaps-remodel` validated the structural `presence` case: a seeded grounding gap's derived coverage/answered state flips from graph truth with no stored structural answer and sibling-spec isolation holds. 2026-06-10 the `capability-readiness` D74-L gate tracer validated the grounding floor: the explicit capability→gap map drives proceed / proceed_low_epistemic / negotiate, live presence coverage flips a generative capability negotiate→proceed, and the gate imports no grade symbols. 2026-06-10 `gaps-node-kind-reference` collapsed that map onto `NodeKind` (`context`/`thesis`/`goal`/`constraint`), proved required-kind absence fails loud, and proved same-kind gaps discriminate by question+satisfier rather than typology name. Remaining proof: `field`/`coverage` predicate derivation, `manual` LLM satisficiency, elicitation/commitment fixtures, and rewiring consumers off grade thresholds. Falsified if grounding readiness cannot decompose into per-typology presence+manual judgments, or if commitment obligations need logic the predicate union can't express. | ### Active Decisions @@ -154,7 +154,7 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c - **D64-L — Readiness bands are the coarse level of one coverage axis; gap typologies (D65-L) are its finer members. Bands are non-exclusive derived node-kind groupings, not structural legality gates.** Bands are `grounding`, `elicitation`, and `commitment`; each `elicitation_gaps` typology carries exactly one band — band and typology are **one axis at two granularities**, so "bands becoming more differentiated over time" means the typology taxonomy growing, not new bands. A node kind may belong to multiple bands (e.g. `constraint` contributes to grounding as the constraint anchor and to elicitation when it bounds solution space). Bands guide what the elicitor is trying to complete, what graph filters and rendered context show, the per-band **readiness estimate** rollup (D45-L), and which gaps a capability-readiness judgment weighs (D74-L). The band's gate-character differs by band: **grounding** is mostly LLM-judged satisficiency with a count floor (D57-L), **elicitation** is generatively spawned (no fixed typology set), **commitment** is more structurally derivable. The `CommandExecutor` must not reject a clear later-band kind merely because of band; readiness controls objectives and capability-judgment, not what graph truth may contain. Depends on: D45-L, D56-L, D57-L, D59-L, D60-L, D65-L. Supersedes: treating the intent `basic | structural | reasoning` category as the readiness taxonomy, treating readiness as a per-kind creation whitelist, or treating bands as a grade rubric for a stored grade. - **D65-L — `elicitation_gaps` are typed coverage *obligations* (typologies) — the elicitor's prospective-memory agenda and the substrate of capability-readiness judgment; they guide and modulate, they never hard-gate.** Renamed and reconceived from `elicitation_backlog`. A gap is a **typology of coverage that must be addressed** (e.g. "the spec must anchor its primary constraint(s)"), **not** a literal queued question and **not** a specific point of unclarity — that would shadow the intent graph, which already owns the content (decisions, assumptions, constraints, …). The original `unknown`/process-vs-domain split still holds: `elicitation_` scopes the term to *process* gaps (knowable by asking), as opposed to the deferred domain-gap `risk` node (Future Direction §Vocabulary evolution). Each gap carries **both** a stable **name** (its typology key — machine identity used for seeding, dedup, and the `capability → relevant gaps` map (D74-L), and a short display label) **and** a **rationale** (the *meta* prose: what coverage this obligation represents, why it matters, and what counts as satisfying it — read by the elicitor to phrase the next question and to make a `manual` satisficiency judgment, D57-L). The two are not redundant: the name is for machine identity/reference, the rationale is for agent reasoning and cannot be compressed into a terse key. In addition each gap carries: a **band** (D64-L — its coarse level, one band per typology); a **predicate shape** — a tagged union of `presence` (≥N nodes of a kind/band present), `field` (a `detail` key present), `coverage` (D60-L `lacksEdge` per-member absence), or `manual` (LLM-judged, the D57-L satisficiency residue) — which routes structural-vs-JIT checking (D74-L); an **importance** (driver-weight / count-floor membership / priority — *not* a hard gate); and a derived **coverage** strength (how well addressed). Importance and coverage are deliberately **two fields, not one ambiguous `rating`**: importance is the pre-answer weight, coverage the post-answer derived strength. **Disposition** (`open | answered | not_applicable | irrelevant | reopened`) is stored *only where it is non-derivable* — scope judgments (`not_applicable` / `irrelevant`, which the agent may set in bulk) and `manual` satisficiency — while `answered` for a structural predicate is derived **live** from the graph and never hand-set; this is the anti-shadowing line: the table holds obligation/disposition/meta only, never domain content. `reopened` is a legitimate disposition (new ambiguity can reopen a typology). Gaps serve three roles: **agenda** (what to ask / propose next), **judgment drivers** for capability-readiness (D74-L), and a **density signal** that scales generative-output epistemic status (D30-L) — the candidate-proposal / disambiguation UX is precisely how open grounding gaps fill progressively, so an open gap must never wall that UX. Seeding is band-correlated. The **grounding** band has a seeded fixed catalog of typologies collated from the D30-L anchor bundle, the D57-L Walter drivers, [`docs/design/ELICITATION_LENSES.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/ELICITATION_LENSES.md) §grounding bundle, and the shaping kickoff/framing material — a **floor** of `domain` (what kind of thing is being built), `protagonist` (who it is for / most affected), `pain_pull` (what problem/pain/pull drives it), and `constraint` (what binding non-negotiables already shape it) — the anchor bundle that gates generative capabilities (D30-L) — plus softer **progressive drivers** that enrich and focus elicitation but are *never* floor (the no-moving-the-goalpost line): `value` (what value/benefit), `context_of_use` (when/where used), `success_sketch` (how success is measured / what good looks like), and `solution_boundary` (non-goals / what it is explicitly not). **elicitation** gaps are generatively spawned by capture-reflection as preceding answers raise new coverage obligations (no fixed catalog). **commitment** gaps are derived structural predicates over the graph (e.g. "every requirement has a criterion", "every decision records its rejected options", "every invariant has a proof or check"). It remains a **flat table, not a graph plane/node** — its only relations are filter attributes plus FK pointers (`arose_from`, `resolved_by`), a degenerate bipartite graph promotable later only if genuine gap→gap structure emerges; it is the *prospective* sibling of the *retrospective* `reconciliation_need` register (D8-L). `basis` applies via provenance-directness (D63-L): user-raised `explicit`, agent-inferred `implicit`. The flat-table substrate, `createSpec` seeding, `CommandExecutor`-routed mutations, and shared spec-local LSN + `change_log` boundary are settled from FE-823 (built as `elicitation_backlog`); the obligation/predicate/disposition remodel and the rename are what this decision now locks. Still open: whether the register eventually thins the `goal` axis (D59-L), and live per-turn ranking. Depends on: D8-L, D30-L, D45-L, D57-L, D59-L, D60-L, D63-L, D64-L, D74-L. Refined by: D75-L (gaps reference graph node kinds via `refersTo: NodeKind`; the parallel grounding-typology catalog and the closed gap-`name` enum are retired — substrate, predicate union, disposition, and anti-shadowing line are unchanged). Supersedes: the `elicitation_backlog` name and its question-instance / `open | closed`-status model, treating `unknown` as a graph node kind, and any readiness-grade-projection-over-open-counts as authority. - **D74-L — Capability-readiness is a just-in-time, capability-relative judgment over relevant gaps — it replaces the standing grade gate.** When a capability is requested (a generative lens, `propose-graph`, `project-graph`, commitment review, eventual export), the agent evaluates readiness *for that capability* against the `elicitation_gaps` (D65-L) declared relevant to it. The `capability → relevant gaps` map is **explicit** and subsumes the retired `STRATEGY_MIN_GRADE` / `GOAL_MIN_GRADE` / `LENS_MIN_GRADE` thresholds in `runtime-policy.ts`, which were a lossy grade-proxy for "enough grounding". Structurally-obvious relevant gaps (`presence` / `field` / `coverage`) are checked **mechanically** (cheap, no LLM); non-obvious (`manual`) ones consume an **LLM satisficiency judgment** (D57-L). The outcome is one of **proceed**, **proceed at low epistemic status** (density-scaled, D30-L), or **negotiate** — surface an `establishment_offer` ("I can, but answer X and Y first", D32-L). Capability-readiness fires **on request, reactive-primary** (proactive nudges are a separate later concern) and is the **only readiness gate**: it never bars attempting work, it scales/negotiates. This resolves the prior "lens is never gated" (`ELICITATION_LENSES.md`) vs `LENS_MIN_GRADE` contradiction (lenses are not grade-gated; readiness is JIT-judged) and dissolves the grade-ratchet / two-value problem (the soft `readiness estimate`, D45-L, gates nothing and may regress honestly). A future structural milestone gate for export/plan/execute op-modes is deferred (D45-L) until such an op-mode exists. Depends on: D25-L, D26-L, D30-L, D32-L, D45-L, D57-L, D59-L, D65-L. Refined by: D75-L (the `capability → relevant gaps` map references node kinds, not a closed typology-name enum). Supersedes: `GRADE_RANK`-based `MIN_GRADE` hard gating of goal/strategy/lens, and a standing readiness scalar as the authority for capability availability. -- **D75-L — `elicitation_gaps` reference graph node kinds; the parallel grounding-typology vocabulary is retired.** A gap is a **situated question that refers to a graph node kind** (`refersTo: NodeKind`), not an entry in a separate closed "typology" vocabulary. The grounding typology catalog of D65-L (`GROUNDING_GAP_TYPOLOGIES`: floor `domain` / `protagonist` / `pain_pull` / `constraint` + progressive `value` / `context_of_use` / `success_sketch` / `solution_boundary`) was a denormalized, drift-prone copy of the per-kind **source-question rubric** the intent ontology already owns (D56-L; [`docs/design/GRAPH_MODEL.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/GRAPH_MODEL.md#per-plane-node-kinds) §Per-plane node kinds — *"the abstract driver, not a literal question to parrot"*): `domain` / `context_of_use` are facets of `context`; `protagonist` / `pain_pull` of `thesis`; `value` of `goal`; `constraint` / `solution_boundary` of `constraint`; `success_sketch` of `criterion`. Collapsing onto the kind layer yields **one ontology, not two** — the only closed set is `NodeKind` (D54-L/D56-L), already owned by the drizzle-free taxonomy leaf (D73-L). Consequences: (1) the closed gap-`name` typology enum and the `RelevantGapName` union (D74-L) are replaced by `refersTo: NodeKind`; the `capability → relevant gaps` map references node kinds — the grounding floor is grounded `context` + `thesis` + `goal` + `constraint`, a graph query rather than a typology lookup, matching how GRAPH_MODEL already frames the grounding gate ("basic intent nodes are central evidence"). (2) Question text stays **free-form and situated**, projected general→specific by the elicitor per active lens/strategy and grounding density; the presence-aliasing limitation (distinct typologies aliasing one node-kind signal, the deferred finding in the now-retired refactor plan) **dissolves**, because discrimination now lives in the free-form question plus the `manual` / `coverage` satisfier (D57-L), not in a blunt `presence` count or a closed name enum. (3) Coverage extends for free to grounding-band kinds the catalog ignored — `term` (the ubiquitous-language anchor) and `assumption`. The flat-table substrate, `disposition`, `predicate` union, `importance` vs derived `coverage`, the anti-shadowing line (the table holds obligation / disposition / meta only, never domain content), `basis` provenance-directness, and band correlation (D64-L) are all **unchanged** — this decision changes how a gap *names its obligation* (by referring to a kind), not the register substrate. The example phrasings per kind are catalogued in [`docs/design/ELICITATION_QUESTIONS.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/ELICITATION_QUESTIONS.md) as a **priming / example layer for the elicitor, not a schema**: brainstorming more questions adds facets/phrasings for existing kinds and never adds ontology. The code remodel (replace `GROUNDING_GAP_TYPOLOGIES` seeding + `RelevantGapName` with `refersTo: NodeKind`; point capability-readiness at node kinds) is a pending frontier, re-scoping the `capability-readiness` follow-ons in `memory/PLAN.md`. Depends on: D54-L, D56-L, D57-L, D64-L, D65-L, D73-L, D74-L; A24-L, A27-L. Refines: D30-L, D65-L, D74-L. Supersedes: the grounding typology catalog as a parallel closed gap vocabulary; the closed gap-`name` typology enum and the `RelevantGapName` union; and the retired refactor plan to enshrine `GROUNDING_GAP_TYPOLOGIES` as a canonical const. +- **D75-L — `elicitation_gaps` reference graph node kinds; the parallel grounding-typology vocabulary is retired.** A gap is a **situated question that refers to a graph node kind** (`refersTo: NodeKind`), not an entry in a separate closed "typology" vocabulary. The grounding typology catalog of D65-L (`GROUNDING_GAP_TYPOLOGIES`: floor `domain` / `protagonist` / `pain_pull` / `constraint` + progressive `value` / `context_of_use` / `success_sketch` / `solution_boundary`) was a denormalized, drift-prone copy of the per-kind **source-question rubric** the intent ontology already owns (D56-L; [`docs/design/GRAPH_MODEL.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/GRAPH_MODEL.md#per-plane-node-kinds) §Per-plane node kinds — *"the abstract driver, not a literal question to parrot"*): `domain` / `context_of_use` are facets of `context`; `protagonist` / `pain_pull` of `thesis`; `value` of `goal`; `constraint` / `solution_boundary` of `constraint`; `success_sketch` of `criterion`. Collapsing onto the kind layer yields **one ontology, not two** — the only closed set is `NodeKind` (D54-L/D56-L), already owned by the drizzle-free taxonomy leaf (D73-L). Consequences: (1) the closed gap-`name` typology enum and the `RelevantGapName` union (D74-L) are replaced by `refersTo: NodeKind`; the `capability → relevant gaps` map references node kinds — the grounding floor is grounded `context` + `thesis` + `goal` + `constraint`, a graph query rather than a typology lookup, matching how GRAPH_MODEL already frames the grounding gate ("basic intent nodes are central evidence"). (2) Question text stays **free-form and situated**, projected general→specific by the elicitor per active lens/strategy and grounding density; the presence-aliasing limitation (distinct typologies aliasing one node-kind signal, the deferred finding in the now-retired refactor plan) **dissolves**, because discrimination now lives in the free-form question plus the `manual` / `coverage` satisfier (D57-L), not in a blunt `presence` count or a closed name enum. (3) Coverage extends for free to grounding-band kinds the catalog ignored — `term` (the ubiquitous-language anchor) and `assumption`. The flat-table substrate, `disposition`, `predicate` union, `importance` vs derived `coverage`, the anti-shadowing line (the table holds obligation / disposition / meta only, never domain content), `basis` provenance-directness, and band correlation (D64-L) are all **unchanged** — this decision changes how a gap *names its obligation* (by referring to a kind), not the register substrate. The example phrasings per kind are catalogued in [`docs/design/ELICITATION_QUESTIONS.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/ELICITATION_QUESTIONS.md) as a **priming / example layer for the elicitor, not a schema**: brainstorming more questions adds facets/phrasings for existing kinds and never adds ontology. The code remodel landed 2026-06-10: `ElicitationGap` and the table now carry `refersTo: NodeKind` + free-form `question`, `createSpec` seeds grounding gaps by node kind (`context`, `thesis`, `goal`, `constraint`, plus `term`/`assumption`), and capability-readiness points at a `capability → NodeKind[]` map with loud failure for a missing required kind. Depends on: D54-L, D56-L, D57-L, D64-L, D65-L, D73-L, D74-L; A24-L, A27-L. Refines: D30-L, D65-L, D74-L. Supersedes: the grounding typology catalog as a parallel closed gap vocabulary; the closed gap-`name` typology enum and the `RelevantGapName` union; and the retired refactor plan to enshrine `GROUNDING_GAP_TYPOLOGIES` as a canonical const. #### Authority & mutation @@ -509,7 +509,7 @@ src/.pi/ | **Term** | A first-class intent node kind (`kind: "term"`). A canonical naming commitment for ubiquitous language and conceptual consistency. Requires `detail: { definition, aliases? }`. Participates in graph edges: downstream nodes may `dependency`-depend on the term's definition; a term may `boundary`-scope what counts as X; a newer term may `supersession`-replace a prior term. | | **Graph basis** | Provenance-directness field (`explicit | implicit`) on accepted graph nodes and edges: `explicit` when the item came directly from the user (stated or user-reviewed); `implicit` when the agent materialized it from user input after concept-level acceptance. Approval strength is the claim-flavored reading of this axis; the same `explicit | implicit` distinction also applies to non-claim registers such as `elicitation_gaps` (user-raised vs agent-inferred, D65-L). Mutation path lives in `change_log`, not in `basis` (D63-L). | | **Node source** | Free-form string on `GraphNode.source` for epistemic attribution (e.g. "stakeholder", "regulatory", "derived", "agent synthesis"). Convention by prompt, not structural validation. Exists for context-render enrichment — rendered back into sparse text in prompt context, not used for policy or filtering. Not applicable to edges. | -| **Elicitation gap** | A typed coverage *obligation* — a **situated question that refers to a graph node kind** (`refersTo: NodeKind`, D75-L), **not** a literal queued question and not domain content (which lives in the graph). Each gap carries a free-form question, the node kind it refers to, plus a band (D64-L), a predicate shape (`presence | field | coverage | manual`), an importance (driver-weight), a derived coverage strength, a `rationale`, and a disposition (`open | answered | not_applicable | irrelevant | reopened`). Stored in a flat `elicitation_gaps` table (not a graph node); seeded at spec creation for grounding, generatively spawned for elicitation, derived for commitment. Serves as the elicitor's agenda, the substrate of capability-readiness, and a density signal. The *prospective* sibling of the *retrospective* `reconciliation_need` register. See D65-L (substrate) and D75-L (node-kind reference; the parallel typology vocabulary retired). | +| **Elicitation gap** | A typed coverage *obligation* — a **situated question that refers to a graph node kind** (`refersTo: NodeKind`, D75-L), **not** a literal queued question and not domain content (which lives in the graph). Each gap carries a free-form `question`, the node kind it refers to, plus a band (D64-L), a predicate shape (`presence | field | coverage | manual`), an importance (driver-weight), a derived coverage strength, a `rationale`, and a disposition (`open | answered | not_applicable | irrelevant | reopened`). Stored in a flat `elicitation_gaps` table (not a graph node); seeded at spec creation for grounding, generatively spawned for elicitation, derived for commitment. Serves as the elicitor's agenda, the substrate of capability-readiness, and a density signal. The *prospective* sibling of the *retrospective* `reconciliation_need` register. See D65-L (substrate) and D75-L (node-kind reference; the parallel typology vocabulary retired). | | **Grounding typology catalog** *(retired — D75-L)* | The former seeded fixed set of grounding-band gap typologies (floor `domain` / `protagonist` / `pain_pull` / `constraint`; progressive `value` / `context_of_use` / `success_sketch` / `solution_boundary`). Retired as a parallel closed vocabulary: it was a denormalized copy of the per-kind **source-question rubric** the intent ontology already owns (D56-L). Gaps now refer to graph node kinds directly (D75-L); example question phrasings per kind live in [`docs/design/ELICITATION_QUESTIONS.md`](file:///Users/lunelson/Code/hashintel/brunch-next/docs/design/ELICITATION_QUESTIONS.md) as a priming layer, not an enum. | | **Elicitation backlog** *(renamed)* | Former name for the elicitation-gaps register and its question-instance / `open|closed` model. Renamed and reconceived as **elicitation gap** (D65-L). | | **Risk** *(deferred)* | A durable domain-epistemic gap: something nobody knows and cannot economically find out now, requiring strategic accommodation (assumptions, decisions, design/verification/planning) rather than elicitation. Distinct from `assumption` (which proceeds on a believed-but-unprovable value); a risk is upstream and cannot yet pick a value. If adopted it is a first-class intent-plane node kind (it carries real cross-plane edges), not a table; deferred because it reopens the locked kind set (D54-L/D56-L). Future Direction §Vocabulary evolution; D65-L. | diff --git a/memory/cards/gaps-node-kind-reference--node-kind-reference.md b/memory/cards/gaps-node-kind-reference--node-kind-reference.md deleted file mode 100644 index e8c0d2e2..00000000 --- a/memory/cards/gaps-node-kind-reference--node-kind-reference.md +++ /dev/null @@ -1,122 +0,0 @@ -# Gaps reference node kinds (retire the grounding-typology vocabulary) - -Frontier: gaps-node-kind-reference -Status: active -Mode: single -Created: 2026-06-10 - -## Orientation - -- **Seam:** the `elicitation_gaps` obligation register (`src/graph/schema/elicitation-gaps.ts` type, `src/db/schema.ts` table, `src/graph/command-executor.ts` seeding/create, `src/graph/queries.ts` read derivation) and its single consumer `src/projections/session/capability-readiness.ts`. Just built by `elicitation-gaps-remodel` (2026-06-10); D75-L reshapes it. -- **Frontier:** `gaps-node-kind-reference` (PLAN §Frontier Definitions) — heads the readiness chain `elicitation-gaps-remodel` → **this** → `capability-readiness`. -- **Volatile state:** none in HANDOFF; the prior refactor plan is retired and deleted (its catalog-enshrining direction was wrong, absorbed here under D75-L). -- **Open risk (the proving unknown):** does referring to a node kind + a judgment satisfier preserve the discrimination the eight typology names carried, given `thesis` fuses protagonist+pain_pull and `context` fuses domain+context_of_use? The slice must prove two same-kind gaps resolve independently. - -Posture: proving (inherited from gaps-node-kind-reference). - -Cross-cutting obligations carried: -- **Anti-shadowing** (D65-L/D75-L): the table holds obligation/disposition/meta only — never domain content. `refersTo` is a kind tag, `question`/`rationale` are meta prose, not captured answers. -- **Taxonomy ownership** (D73-L): `NodeKind` stays owned by the drizzle-free leaf `src/graph/schema/kinds.ts`; the gaps schema and capability-readiness *import* the union, never redefine it. -- **Command/clock boundary** (D4-L/D16-L): gap mutations stay on `CommandExecutor` + the shared `{specId, lsn}` / `change_log` seam — unchanged. - -## Target Behavior - -Grounding gaps name their obligation by referring to a graph node kind (`refersTo: NodeKind` + a free-form `question`) end-to-end — schema, seeding, read derivation, and the capability-readiness gate — with the typology `name` vocabulary (`GROUNDING_GAP_TYPOLOGIES`, the gap-`name` enum, `RelevantGapName`) removed. - -## Full-card cold-start reads - -``` -- memory/SPEC.md — decisions: D75-L (primary), D65-L, D56-L, D54-L, D73-L, D57-L, D64-L; assumptions A24-L, A27-L; invariant I30-L -- memory/PLAN.md — frontier: gaps-node-kind-reference -- docs/design/ELICITATION_QUESTIONS.md — per-kind question priming examples to seed grounding question text -- docs/design/GRAPH_MODEL.md — §Per-plane node kinds (source-question rubric; grounding-band kinds) -- src/graph/schema/elicitation-gaps.ts, src/graph/schema/kinds.ts, src/graph/schema/nodes.ts — current gap type + NodeKind union -- src/db/schema.ts (elicitationGaps table), src/graph/command-executor.ts (SEEDED_ELICITATION_GAPS, CreateElicitationGapInput, seedElicitationGaps, validateCreateElicitationGap), src/graph/queries.ts (rowToElicitationGap) -- src/projections/session/capability-readiness.ts — the only gate consumer -``` - -## Boundary Crossings - -``` -→ createSpec (CommandExecutor.seedElicitationGaps) -→ db elicitation_gaps table (refers_to + question columns; regenerated migration) -→ queries.rowToElicitationGap (read derivation: coverage/answered preserved) -→ projections/session/capability-readiness (capability → NodeKind[] gate) -``` - -## Risks and Assumptions - -``` -- RISK: same-kind gaps (two `thesis`, two `context`) collapse to one signal under the gate - → MITIGATION: the gate aggregates coverage over ALL gaps of a required kind (floor = ≥1 grounded node of that kind), and discrimination lives in question + manual/coverage satisfier — not a per-name lookup. Prove with the discrimination probe. -- RISK: `presence` predicate's `nodeKind` field now overlaps `refersTo` - → MITIGATION: keep the predicate union as-is (D75-L: substrate unchanged); `refersTo` is the obligation referent, the predicate is the satisfaction check. Do not merge them in this slice. -- ASSUMPTION: the gate's per-kind coverage aggregation rule is an in-model implementation detail, not a new durable decision (D75-L already fixes the floor + discrimination locus). - → IMPACT IF FALSE: a genuine gate-semantics decision would promote back to ln-spec. - → VALIDATE: the capability-readiness map test + discrimination probe; if the aggregation rule needs a recorded choice, stop and route to ln-spec. - → memory/SPEC.md §Assumptions A27-L -- ASSUMPTION: pre-release free-rewrite — regenerate the migration and seed; no `name`-column or typology residue to preserve. - → IMPACT IF FALSE: would need a data migration; SPEC/PLAN do not require it. - → VALIDATE: AGENTS §development phase posture. -``` - -## Posture check (proving) - -- **Proof of life:** a gap references a node kind end-to-end (seed → store → read → gate) — a new shape lit across the whole seam. -- **Invariants:** locks the D75-L one-ontology seam (gaps reference `NodeKind`; no parallel vocabulary). -- **Uncertainty:** retires the presence-aliasing / same-kind-discrimination unknown the retired refactor plan only deferred (finding #1). The discrimination probe is the tracer that breaks if the model is wrong. - -Scores on all three — build it. - -## Acceptance Criteria - -``` -✓ elicitation-gaps schema test — ElicitationGap carries `refersTo: NodeKind` + `question`; no `name` typology field; no GROUNDING_GAP_TYPOLOGIES export -✓ command-executor seed-set test — createSpec seeds grounding gaps keyed by node kind: floor `context`/`thesis`/`goal`/`constraint` plus `term`/`assumption`; no eight literal typology names -✓ command-executor create/validate test — createElicitationGap requires `refersTo` (a valid NodeKind) + non-empty `question`; rejects an invalid kind -✓ queries read test — rowToElicitationGap maps `refers_to`/`question`; live presence-derived coverage/answered still flips from graph truth with sibling-spec isolation -✓ capability-readiness map test — CAPABILITY_RELEVANT_GAPS is `Record`; grounding floor = context+thesis+goal+constraint; a required kind with zero referring gaps in the register fails loud (config bug ≠ uncovered) -✓ discrimination probe (proving) — two `thesis`-referring gaps with different questions resolve independently (one covered, one open) rather than aliasing to a single presence count -✓ no residue — `rg "GROUNDING_GAP_TYPOLOGIES|RelevantGapName|gap\.name"` over src returns nothing; migration regenerated with `refers_to`/`question`, no `name` column -``` - -## Verification Approach - -``` -- Inner: vitest unit — schema/type, seed-set, create/validate, queries read derivation, capability-readiness map + loud-fail (npm run verify gate) -- Middle: the discrimination probe — two same-kind gaps resolved independently via question + satisfier (retires the proving unknown) -- Outer: per-spec seeded read-back over a freshly created spec (existing observed-shapes / read-back probe extended) -``` - -## Cross-cutting obligations - -``` -- Anti-shadowing: table stores obligation/disposition/meta only, never domain content (D65-L/D75-L) -- NodeKind union owned by the drizzle-free leaf graph/schema/kinds.ts; import, never redefine (D73-L) -- Mutations stay on CommandExecutor + shared {specId, lsn} / change_log clock (D4-L/D16-L) -- Reconcile topology READMEs that name the catalog/seeding: src/graph/README.md, src/db/README.md, src/projections/README.md -``` - -## Expected touched paths (tentative) - -``` -src/graph/schema/ -├── elicitation-gaps.ts ~ (name → refersTo: NodeKind + question) -drizzle/ -├── 0004_.sql + (regenerated migration: refers_to + question) -└── meta/ ~ (snapshot) -src/db/ -├── schema.ts ~ (elicitation_gaps: refers_to + question columns) -└── README.md ~ -src/graph/ -├── command-executor.ts ~ (SEEDED_ELICITATION_GAPS → seed by kind; CreateElicitationGapInput; seedElicitationGaps; validate) -├── command-executor.test.ts ~ -├── queries.ts ~ (rowToElicitationGap) -├── queries.test.ts ~ -├── observed-shapes-coverage.test.ts ? -└── README.md ~ -src/projections/ -├── session/capability-readiness.ts ~ (RelevantGapName → NodeKind; capability → NodeKind[]; loud-fail; discrimination) -├── session/capability-readiness.test.ts ~ -└── README.md ~ -``` diff --git a/src/db/README.md b/src/db/README.md index afd3a890..f0611f48 100644 --- a/src/db/README.md +++ b/src/db/README.md @@ -1,6 +1,6 @@ # db/ — Persistence substrate -SPEC decisions: D16-L, D41-L, D52-L, D54-L, D62-L +SPEC decisions: D16-L, D41-L, D52-L, D54-L, D62-L, D75-L ## Owns @@ -105,8 +105,10 @@ a bare LSN is comparable only inside one spec. `elicitation_gaps` is the prospective sibling of `reconciliation_need`: a flat typed coverage-obligation register, not a graph plane or node table. It -stores obligation/disposition/meta only; graph-owned command/query code derives -structural coverage from graph truth at read time. +stores obligation/disposition/meta only: `refers_to` names the graph node kind +that the obligation points at, `question` carries situated elicitor prose, and +graph-owned command/query code derives structural coverage from graph truth at +read time. `nodes.kind_ordinal` is persisted as the storage half of the D62-L projected-code contract. `node_kind_counters` owns monotonic per-`(spec_id, plane, kind)` diff --git a/src/db/schema.ts b/src/db/schema.ts index d4aa66ce..3412cdd8 100644 --- a/src/db/schema.ts +++ b/src/db/schema.ts @@ -151,7 +151,8 @@ export const elicitationGaps = sqliteTable('elicitation_gaps', { spec_id: integer() .notNull() .references(() => specs.id), - name: text().notNull(), + refers_to: text().notNull(), + question: text().notNull(), rationale: text().notNull(), disposition: text({ enum: GAP_DISPOSITIONS }).notNull().default('open'), basis: text({ enum: NODE_BASES }).notNull().default('explicit'), diff --git a/src/graph/README.md b/src/graph/README.md index 9b190d0e..1037295f 100644 --- a/src/graph/README.md +++ b/src/graph/README.md @@ -1,7 +1,7 @@ # graph/ — Graph domain layer Canonical reference: `docs/design/GRAPH_MODEL.md` -SPEC decisions: D4-L, D20-L, D27-L, D51-L, D52-L, D53-L, D54-L, D60-L, D62-L, D63-L +SPEC decisions: D4-L, D20-L, D27-L, D51-L, D52-L, D53-L, D54-L, D60-L, D62-L, D63-L, D75-L ## Owns @@ -11,6 +11,8 @@ SPEC decisions: D4-L, D20-L, D27-L, D51-L, D52-L, D53-L, D54-L, D60-L, D62-L, D6 and structured command results. It also owns prospective-register writes for `elicitation_gaps` (`createSpec` seeding plus create/disposition commands), because the gap register shares the same spec-local LSN and audit boundary. + Gaps name obligations by `refersTo: NodeKind` + free-form `question`, not + a parallel typology enum. - **mutateGraph** — atomic graph mutation for direct writers and future curation: one tool call, one transaction, one selected-spec LSN, all-or-nothing. The @@ -38,8 +40,8 @@ SPEC decisions: D4-L, D20-L, D27-L, D51-L, D52-L, D53-L, D54-L, D60-L, D62-L, D6 - **Domain schema types** (`schema/`) — `GraphNode`, `GraphEdge`, - `ReconciliationNeed`, `ElicitationGap`, kind/category types, - per-kind node ordinals, and derived intent-kind grouping. Raw domain enum + `ReconciliationNeed`, `ElicitationGap` (`refersTo` + `question`), + kind/category types, per-kind node ordinals, and derived intent-kind grouping. Raw domain enum taxonomy lives in the zero-import `schema/kinds.ts` leaf so web-facing graph imports do not pull in Drizzle. diff --git a/src/graph/command-executor.test.ts b/src/graph/command-executor.test.ts index 61aeee25..952774aa 100644 --- a/src/graph/command-executor.test.ts +++ b/src/graph/command-executor.test.ts @@ -414,7 +414,8 @@ describe('CommandExecutor', () => { expect( db .select({ - name: elicitationGaps.name, + refersTo: elicitationGaps.refers_to, + question: elicitationGaps.question, disposition: elicitationGaps.disposition, basis: elicitationGaps.basis, readinessBand: elicitationGaps.readiness_band, @@ -429,21 +430,20 @@ describe('CommandExecutor', () => { .all(), ).toEqual( [ - 'domain', - 'protagonist', - 'pain_pull', - 'constraint', - 'value', - 'context_of_use', - 'success_sketch', - 'solution_boundary', - ].map((name, index) => ({ - name, + ['context', 'What kind of thing is this, and what domain or environment does it live in?', 3], + ['thesis', 'Who is this for, and what pull or pain makes it worth doing?', 3], + ['goal', 'What outcome or value should this create?', 3], + ['constraint', 'What binding constraints, non-goals, or boundaries already shape the work?', 3], + ['term', 'What key word or domain term needs a shared definition?', 1], + ['assumption', 'What are we assuming that might be false?', 1], + ].map(([refersTo, question, importance]) => ({ + refersTo, + question, disposition: 'open', basis: 'implicit', readinessBand: 'grounding', predicateKind: 'presence', - importance: index < 4 ? 3 : 1, + importance, planeAffinity: 'intent', lensAffinity: 'intent', createdAtLsn: result.lsn, @@ -664,7 +664,8 @@ describe('CommandExecutor', () => { it('creates an open gap and preserves the arose-from pointer', () => { const parent = executor.createElicitationGap({ specId, - name: 'domain', + refersTo: 'context', + question: 'What kind of product is this?', rationale: 'Name the product domain.', band: 'grounding', predicate: { kind: 'presence', plane: 'intent', nodeKind: 'context', minimum: 1 }, @@ -676,10 +677,11 @@ describe('CommandExecutor', () => { const child = executor.createElicitationGap({ specId, - name: 'follow_on', + refersTo: 'thesis', + question: 'Which user is blocked most by the current version?', rationale: 'Clarify which user is blocked most by the current version.', band: 'grounding', - predicate: { kind: 'presence', plane: 'intent', nodeKind: 'context', minimum: 1 }, + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'thesis', minimum: 1 }, planeAffinity: 'intent', lensAffinity: 'intent', aroseFromGapId: parent.id, @@ -690,7 +692,8 @@ describe('CommandExecutor', () => { expect(db.select().from(elicitationGaps).where(eq(elicitationGaps.id, child.id)).get()).toMatchObject({ spec_id: specId, - name: 'follow_on', + refers_to: 'thesis', + question: 'Which user is blocked most by the current version?', rationale: 'Clarify which user is blocked most by the current version.', disposition: 'open', basis: 'explicit', @@ -707,16 +710,24 @@ describe('CommandExecutor', () => { it('rejects malformed gaps without writing rows or advancing the clock', () => { const result = executor.createElicitationGap({ specId, - name: ' ', + refersTo: 'not_a_kind' as never, + question: ' ', rationale: ' ', band: 'later' as never, - predicate: { kind: 'presence', minimum: 0 }, + predicate: { kind: 'presence', minimum: 0, nodeKind: 'not_a_kind' as never }, }); expect(result.status).toBe('structural_illegal'); if (result.status !== 'structural_illegal') throw new Error('unreachable'); expect(result.diagnostics.map((diagnostic) => diagnostic.field)).toEqual( - expect.arrayContaining(['name', 'rationale', 'band', 'predicate.minimum']), + expect.arrayContaining([ + 'refersTo', + 'question', + 'rationale', + 'band', + 'predicate.minimum', + 'predicate.nodeKind', + ]), ); expect(db.select().from(elicitationGaps).all()).toEqual([]); expect(graphClockLsn(db, specId)).toBe(0); @@ -728,7 +739,8 @@ describe('CommandExecutor', () => { it('sets a non-derivable disposition and records resolvedByNodeId and dispositionSetAtLsn', () => { const entry = executor.createElicitationGap({ specId, - name: 'manual_grounding', + refersTo: 'thesis', + question: 'Is the audience and pain clear enough?', rationale: 'Judge whether grounding is sufficient.', band: 'grounding', predicate: { kind: 'manual', rubric: 'Sufficiently grounded for generative work.' }, @@ -770,7 +782,8 @@ describe('CommandExecutor', () => { it('rejects hand-setting answered for structural predicates', () => { const entry = executor.createElicitationGap({ specId, - name: 'domain', + refersTo: 'context', + question: 'What kind of product is this?', rationale: 'Name the product domain.', band: 'grounding', predicate: { kind: 'presence', plane: 'intent', nodeKind: 'context', minimum: 1 }, @@ -795,7 +808,8 @@ describe('CommandExecutor', () => { it('rejects a resolved-by node from another spec', () => { const entry = executor.createElicitationGap({ specId, - name: 'manual_grounding', + refersTo: 'thesis', + question: 'Is the audience and pain clear enough?', rationale: 'Judge whether grounding is sufficient.', band: 'grounding', predicate: { kind: 'manual', rubric: 'Sufficiently grounded for generative work.' }, diff --git a/src/graph/command-executor.ts b/src/graph/command-executor.ts index 8b61eb9e..b8c02e5e 100644 --- a/src/graph/command-executor.ts +++ b/src/graph/command-executor.ts @@ -47,7 +47,7 @@ import { READINESS_BANDS, READINESS_GRADES, } from './schema/kinds.js'; -import { type NodeBasis, type NodePlane, type ReadinessBand } from './schema/nodes.js'; +import { type NodeBasis, type NodeKind, type NodePlane, type ReadinessBand } from './schema/nodes.js'; export type ReadinessGrade = (typeof READINESS_GRADES)[number]; export type { @@ -213,7 +213,8 @@ export interface AcceptReviewSetInput { /** Input for creating an elicitation gap. */ export interface CreateElicitationGapInput { readonly specId: number; - readonly name: string; + readonly refersTo: NodeKind; + readonly question: string; readonly rationale: string; readonly basis?: NodeBasis | undefined; readonly band: ReadinessBand; @@ -296,12 +297,19 @@ const KINDS_REQUIRING_DETAIL = new Set(['decision', 'term']); const VALID_READINESS_GRADES = READINESS_GRADES as unknown as string[]; const VALID_NODE_BASES = NODE_BASES as unknown as string[]; const VALID_READINESS_BANDS = READINESS_BANDS as unknown as string[]; +const VALID_NODE_KINDS = [ + ...INTENT_KINDS, + ...ORACLE_KINDS, + ...DESIGN_KINDS, + ...PLAN_KINDS, +] as readonly string[]; const VALID_GAP_DISPOSITIONS = GAP_DISPOSITIONS as unknown as string[]; const VALID_GAP_PREDICATE_KINDS = GAP_PREDICATE_KINDS as unknown as string[]; const VALID_LENS_AFFINITIES = LENS_AFFINITIES as unknown as string[]; const SEEDED_ELICITATION_GAPS: readonly { - readonly name: string; + readonly refersTo: NodeKind; + readonly question: string; readonly rationale: string; readonly basis: NodeBasis; readonly band: ReadinessBand; @@ -311,7 +319,8 @@ const SEEDED_ELICITATION_GAPS: readonly { readonly lensAffinity: ElicitationGapLensAffinity; }[] = [ { - name: 'domain', + refersTo: 'context', + question: 'What kind of thing is this, and what domain or environment does it live in?', rationale: 'Anchors what kind of thing is being specified and the domain it belongs to.', basis: 'implicit', band: 'grounding', @@ -321,18 +330,20 @@ const SEEDED_ELICITATION_GAPS: readonly { lensAffinity: 'intent', }, { - name: 'protagonist', - rationale: 'Identifies who the spec is for or who is most affected by the outcome.', + refersTo: 'thesis', + question: 'Who is this for, and what pull or pain makes it worth doing?', + rationale: 'Identifies the primary audience and why the work matters for them.', basis: 'implicit', band: 'grounding', - predicate: { kind: 'presence', plane: 'intent', nodeKind: 'context', minimum: 1 }, + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'thesis', minimum: 1 }, importance: 3, planeAffinity: 'intent', lensAffinity: 'intent', }, { - name: 'pain_pull', - rationale: 'States the problem, pain, or pull that makes the work worth doing.', + refersTo: 'goal', + question: 'What outcome or value should this create?', + rationale: 'Clarifies the desired outcome or payoff the work should create.', basis: 'implicit', band: 'grounding', predicate: { kind: 'presence', plane: 'intent', nodeKind: 'goal', minimum: 1 }, @@ -341,7 +352,8 @@ const SEEDED_ELICITATION_GAPS: readonly { lensAffinity: 'intent', }, { - name: 'constraint', + refersTo: 'constraint', + question: 'What binding constraints, non-goals, or boundaries already shape the work?', rationale: 'Captures binding constraints or non-negotiable boundaries already shaping the work.', basis: 'implicit', band: 'grounding', @@ -351,41 +363,23 @@ const SEEDED_ELICITATION_GAPS: readonly { lensAffinity: 'intent', }, { - name: 'value', - rationale: 'Clarifies the benefit or value the work should create.', - basis: 'implicit', - band: 'grounding', - predicate: { kind: 'presence', plane: 'intent', nodeKind: 'goal', minimum: 1 }, - importance: 1, - planeAffinity: 'intent', - lensAffinity: 'intent', - }, - { - name: 'context_of_use', - rationale: 'Describes when, where, or under what conditions the result will be used.', + refersTo: 'term', + question: 'What key word or domain term needs a shared definition?', + rationale: 'Pins ubiquitous language before naming drift becomes specification ambiguity.', basis: 'implicit', band: 'grounding', - predicate: { kind: 'presence', plane: 'intent', nodeKind: 'context', minimum: 1 }, + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'term', minimum: 1 }, importance: 1, planeAffinity: 'intent', lensAffinity: 'intent', }, { - name: 'success_sketch', - rationale: 'Sketches what success looks like or how goodness will be recognized.', + refersTo: 'assumption', + question: 'What are we assuming that might be false?', + rationale: 'Surfaces early bets and fragility without turning them into hidden readiness gates.', basis: 'implicit', band: 'grounding', - predicate: { kind: 'presence', plane: 'intent', nodeKind: 'criterion', minimum: 1 }, - importance: 1, - planeAffinity: 'intent', - lensAffinity: 'intent', - }, - { - name: 'solution_boundary', - rationale: 'Names non-goals or boundaries around what the solution is explicitly not.', - basis: 'implicit', - band: 'grounding', - predicate: { kind: 'presence', plane: 'intent', nodeKind: 'constraint', minimum: 1 }, + predicate: { kind: 'presence', plane: 'intent', nodeKind: 'assumption', minimum: 1 }, importance: 1, planeAffinity: 'intent', lensAffinity: 'intent', @@ -437,6 +431,9 @@ function validateGapPredicate(predicate: GapPredicate, diagnostics: Diagnostic[] if (predicate.band !== undefined && !isReadinessBand(predicate.band)) { diagnostics.push({ field: 'predicate.band', message: 'band is not valid' }); } + if (predicate.nodeKind !== undefined && !VALID_NODE_KINDS.includes(predicate.nodeKind)) { + diagnostics.push({ field: 'predicate.nodeKind', message: 'node kind is not valid' }); + } if (predicate.nodeKind === undefined && predicate.band === undefined) { diagnostics.push({ field: 'predicate', message: 'presence predicate needs nodeKind or band' }); } @@ -582,8 +579,12 @@ function validateEdgePatch(patch: EdgePatch): Diagnostic[] { function validateCreateElicitationGap(input: CreateElicitationGapInput): Diagnostic[] { const diagnostics: Diagnostic[] = []; - if (!input.name.trim()) { - diagnostics.push({ field: 'name', message: 'name must be non-empty' }); + if (!VALID_NODE_KINDS.includes(input.refersTo)) { + diagnostics.push({ field: 'refersTo', message: `"${String(input.refersTo)}" is not a valid node kind` }); + } + + if (!input.question.trim()) { + diagnostics.push({ field: 'question', message: 'question must be non-empty' }); } if (!input.rationale.trim()) { @@ -775,7 +776,8 @@ export class CommandExecutor { .values( SEEDED_ELICITATION_GAPS.map((entry) => ({ spec_id: specId, - name: entry.name, + refers_to: entry.refersTo, + question: entry.question, rationale: entry.rationale, basis: entry.basis, readiness_band: entry.band, @@ -886,7 +888,8 @@ export class CommandExecutor { .insert(schema.elicitationGaps) .values({ spec_id: input.specId, - name: input.name.trim(), + refers_to: input.refersTo, + question: input.question.trim(), rationale: input.rationale.trim(), basis: input.basis ?? 'explicit', readiness_band: input.band, @@ -909,7 +912,7 @@ export class CommandExecutor { payload: JSON.stringify({ id: entry!.id, specId: input.specId, - name: input.name.trim(), + refersTo: input.refersTo, band: input.band, predicateKind: input.predicate.kind, planeAffinity: input.planeAffinity, diff --git a/src/graph/queries.test.ts b/src/graph/queries.test.ts index d46593fe..20802901 100644 --- a/src/graph/queries.test.ts +++ b/src/graph/queries.test.ts @@ -94,7 +94,7 @@ describe('getElicitationGaps', () => { expect(other.status).toBe('success'); if (other.status !== 'success') throw new Error('unreachable'); - const before = getElicitationGaps(db, specId).find((gap) => gap.name === 'domain')!; + const before = getElicitationGaps(db, specId).find((gap) => gap.refersTo === 'context')!; expect(before.coverage).toBe(0); expect(before.answered).toBe(false); expect(before.disposition).toBe('open'); @@ -107,13 +107,18 @@ describe('getElicitationGaps', () => { }); expect(resolvedNode.status).toBe('success'); - const after = getElicitationGaps(db, specId).find((gap) => gap.name === 'domain')!; + const after = getElicitationGaps(db, specId).find((gap) => gap.refersTo === 'context')!; expect(after.coverage).toBe(1); expect(after.answered).toBe(true); expect(after.disposition).toBe('answered'); - expect(getElicitationGaps(db, specId)).toHaveLength(8); - expect(getElicitationGaps(db, other.specId)).toHaveLength(8); - expect(getElicitationGaps(db, other.specId).find((gap) => gap.name === 'domain')!.answered).toBe(false); + expect(before.question).toBe( + 'What kind of thing is this, and what domain or environment does it live in?', + ); + expect(getElicitationGaps(db, specId)).toHaveLength(6); + expect(getElicitationGaps(db, other.specId)).toHaveLength(6); + expect(getElicitationGaps(db, other.specId).find((gap) => gap.refersTo === 'context')!.answered).toBe( + false, + ); }); }); diff --git a/src/graph/queries.ts b/src/graph/queries.ts index c69e35b8..d0c44522 100644 --- a/src/graph/queries.ts +++ b/src/graph/queries.ts @@ -379,7 +379,8 @@ function rowToElicitationGap(db: BrunchDb, row: typeof schema.elicitationGaps.$i const entry: MutableElicitationGap = { id: String(row.id), specId: row.spec_id, - name: row.name, + refersTo: row.refers_to as ElicitationGap['refersTo'], + question: row.question, rationale: row.rationale, disposition, basis: row.basis as ElicitationGap['basis'], diff --git a/src/graph/schema/elicitation-gaps.ts b/src/graph/schema/elicitation-gaps.ts index b4e26cf5..59904bbe 100644 --- a/src/graph/schema/elicitation-gaps.ts +++ b/src/graph/schema/elicitation-gaps.ts @@ -44,7 +44,8 @@ export type GapPredicate = export interface ElicitationGap { readonly id: string; readonly specId: number; - readonly name: string; + readonly refersTo: NodeKind; + readonly question: string; readonly rationale: string; readonly basis: NodeBasis; readonly band: ReadinessBand; diff --git a/src/projections/README.md b/src/projections/README.md index efd78bd7..721b02b6 100644 --- a/src/projections/README.md +++ b/src/projections/README.md @@ -1,6 +1,6 @@ # projections/ — reusable DTO boundaries -SPEC decisions: D52-L +SPEC decisions: D52-L, D75-L ## Owns @@ -28,7 +28,7 @@ Disposition: `✓` locked · `●` keep + lock (earns place, needs invariant) · | `session/transcript-context` | 2 | ● | Real transform: filters session entries + Pi-SDK convert. Invariant: no non-empty transcript entry dropped. Consumes the Pi SDK (external trust boundary), not a PULL surface we own. | | `session/runtime-state` | 13 | ● | Most-consumed projection; flattens runtime state. Direct flattened-shape invariant guards the field set every consumer relies on. | | `session/affordances` | 1 | ✓ | `affordances.test.ts` — legality + default-on-switch derivation tested directly. | -| `session/capability-readiness` | 0 | ✓ | D74-L tracer gate, not a reusable DTO. `capability-readiness.test.ts` locks the explicit capability→grounding-gap map, proceed / low-epistemic / negotiate outcomes, no-refusal invariant, and live presence-coverage flip. Consumer rewire remains deferred by the active scope card. | +| `session/capability-readiness` | 0 | ✓ | D74-L/D75-L tracer gate, not a reusable DTO. `capability-readiness.test.ts` locks the explicit capability→node-kind map, proceed / low-epistemic / negotiate outcomes, no-refusal invariant, loud failure when the gap register lacks a required kind, same-kind discrimination through `question`, and live presence-coverage flip. Consumer rewire remains deferred by the next capability-readiness frontier. | | `session/runtime-policy` | 4 | ○ | Policy/definitions data, not a DTO transform. Legality source already guarded via `affordances.test.ts` + `.pi` state tests. | | `workspace/workspace-context` | 1 | ✗ | Pure `{ mode, data }` tag wrapper — zero transform, single consumer (`.pi/extensions/context/get-cwd.ts`). Source `session/workspace-context.ts` already exports the shapes + `inspect*` and can feed the consumer directly. Delete / inline. | | `workspace/workspace-state` | 4 | ● | Real flatten of the `WorkspaceSessionState` union to a narrow DTO. Shape invariant across status variants (`ready` / `needs_human` / base). | diff --git a/src/projections/session/capability-readiness.test.ts b/src/projections/session/capability-readiness.test.ts index a408ca19..015b38d0 100644 --- a/src/projections/session/capability-readiness.test.ts +++ b/src/projections/session/capability-readiness.test.ts @@ -7,21 +7,25 @@ import { createDb, type BrunchDb } from '../../db/connection.js'; import { CommandExecutor } from '../../graph/command-executor.js'; import { getElicitationGaps } from '../../graph/queries.js'; import type { ElicitationGap } from '../../graph/schema/elicitation-gaps.js'; +import type { NodeKind } from '../../graph/schema/nodes.js'; import { CAPABILITY_RELEVANT_GAPS, evaluateCapabilityReadiness, type CapabilityReadinessOutcome, } from './capability-readiness.js'; -function gap(overrides: Partial & Pick): ElicitationGap { +function gap( + overrides: Partial & Pick, +): ElicitationGap { return { - id: overrides.id ?? overrides.name, + id: overrides.id ?? `${overrides.refersTo}:${overrides.question ?? 'gap'}`, specId: overrides.specId ?? 1, - name: overrides.name, - rationale: overrides.rationale ?? `${overrides.name} rationale`, + refersTo: overrides.refersTo, + question: overrides.question ?? `${overrides.refersTo} question`, + rationale: overrides.rationale ?? `${overrides.refersTo} rationale`, basis: overrides.basis ?? 'implicit', band: overrides.band ?? 'grounding', - predicate: overrides.predicate ?? { kind: 'presence', minimum: 1, band: 'grounding' }, + predicate: overrides.predicate ?? { kind: 'presence', minimum: 1, nodeKind: overrides.refersTo }, importance: overrides.importance ?? 1, coverage: overrides.coverage, answered: overrides.answered ?? overrides.coverage >= 1, @@ -30,6 +34,12 @@ function gap(overrides: Partial & Pick> = {}): ElicitationGap[] { + return ['context', 'thesis', 'goal', 'constraint'].map((kind) => + gap({ refersTo: kind as NodeKind, coverage: coverage[kind as NodeKind] ?? 1 }), + ); +} + function expectOutcomeStatus( outcome: CapabilityReadinessOutcome, status: CapabilityReadinessOutcome['status'], @@ -42,52 +52,47 @@ function createTestDb(): BrunchDb { } describe('capability readiness over elicitation gaps', () => { - it('enumerates relevant gap typologies per requested capability', () => { + it('enumerates relevant node kinds per requested capability', () => { expect(CAPABILITY_RELEVANT_GAPS).toEqual({ - 'generative-lens': ['domain', 'protagonist', 'pain_pull', 'constraint'], - 'propose-graph': ['domain', 'protagonist', 'pain_pull', 'constraint'], - 'project-graph': ['domain', 'protagonist', 'pain_pull', 'constraint'], - 'commitment-review': ['domain', 'protagonist', 'pain_pull', 'constraint'], + 'generative-lens': ['context', 'thesis', 'goal', 'constraint'], + 'propose-graph': ['context', 'thesis', 'goal', 'constraint'], + 'project-graph': ['context', 'thesis', 'goal', 'constraint'], + 'commitment-review': ['context', 'thesis', 'goal', 'constraint'], }); }); it('proceeds when all relevant gaps are covered', () => { - const outcome = evaluateCapabilityReadiness('propose-graph', [ - gap({ name: 'domain', coverage: 1 }), - gap({ name: 'protagonist', coverage: 1 }), - gap({ name: 'pain_pull', coverage: 1 }), - gap({ name: 'constraint', coverage: 1 }), - ]); + const outcome = evaluateCapabilityReadiness('propose-graph', floorGaps()); expect(outcome).toEqual({ status: 'proceed' }); }); it('negotiates with establishment-offer-shaped missing gaps when relevant grounding gaps are uncovered', () => { - const outcome = evaluateCapabilityReadiness('project-graph', [ - gap({ name: 'domain', coverage: 1 }), - gap({ name: 'protagonist', coverage: 0 }), - gap({ name: 'pain_pull', coverage: 0 }), - gap({ name: 'constraint', coverage: 1 }), - ]); + const outcome = evaluateCapabilityReadiness('project-graph', floorGaps({ thesis: 0, goal: 0 })); expect(outcome.status).toBe('negotiate'); if (outcome.status !== 'negotiate') return; expect(outcome.offer.kind).toBe('establishment_offer'); - expect(outcome.offer.missingGaps.map((missing) => missing.name)).toEqual(['protagonist', 'pain_pull']); + expect(outcome.offer.missingGaps.map((missing) => missing.refersTo)).toEqual(['thesis', 'goal']); + expect(outcome.offer.missingGaps.map((missing) => missing.question)).toEqual([ + 'thesis question', + 'goal question', + ]); expect(outcome.offer.message).toContain('I can try, but'); }); it('proceeds at low epistemic status when relevant gaps have only partial coverage', () => { - const outcome = evaluateCapabilityReadiness('generative-lens', [ - gap({ name: 'domain', coverage: 1 }), - gap({ name: 'protagonist', coverage: 0.5 }), - gap({ name: 'pain_pull', coverage: 1 }), - gap({ name: 'constraint', coverage: 1 }), - ]); + const outcome = evaluateCapabilityReadiness('generative-lens', floorGaps({ thesis: 0.5 })); expect(outcome).toEqual({ status: 'proceed_low_epistemic', coverage: 0.875 }); }); + it('fails loud when a required kind has no referring gap record', () => { + expect(() => evaluateCapabilityReadiness('propose-graph', floorGaps().slice(0, 3))).toThrow( + /no elicitation gap for constraint/, + ); + }); + it('moves from negotiate to proceed when live presence coverage fills the grounding floor', () => { const db = createTestDb(); const executor = new CommandExecutor(db); @@ -100,13 +105,15 @@ describe('capability readiness over elicitation gaps', () => { 'negotiate', ); - for (const [kind, title] of [ - ['context', 'Local product spec workspace'], - ['goal', 'Help a builder co-author a graph-native spec'], - ['assumption', 'Current planning is too prose-heavy'], - ['constraint', 'Runs locally over the Pi harness'], + for (const [kind, title, detail] of [ + ['context', 'Local product spec workspace', undefined], + ['thesis', 'For builders who need agent-aided specification', undefined], + ['goal', 'Help a builder co-author a graph-native spec', undefined], + ['constraint', 'Runs locally over the Pi harness', undefined], + ['term', 'Spec workspace', { definition: 'A local graph-backed specification authoring space' }], + ['assumption', 'Current planning is too prose-heavy', undefined], ] as const) { - const result = executor.createNode({ specId: created.specId, plane: 'intent', kind, title }); + const result = executor.createNode({ specId: created.specId, plane: 'intent', kind, title, detail }); expect(result.status).toBe('success'); } @@ -116,21 +123,39 @@ describe('capability readiness over elicitation gaps', () => { ); }); + it('proves same-kind gaps resolve independently through their own question and satisfier', () => { + const outcome = evaluateCapabilityReadiness('propose-graph', [ + ...floorGaps(), + gap({ + id: 'thesis:stakeholder', + refersTo: 'thesis', + question: 'Who is the primary user?', + coverage: 1, + }), + gap({ + id: 'thesis:pain', + refersTo: 'thesis', + question: 'Why is this painful enough to solve now?', + coverage: 0, + }), + ]); + + expect(outcome.status).toBe('negotiate'); + if (outcome.status !== 'negotiate') return; + expect(outcome.offer.missingGaps).toEqual([ + expect.objectContaining({ + id: 'thesis:pain', + refersTo: 'thesis', + question: 'Why is this painful enough to solve now?', + }), + ]); + }); + it('never returns a refusal outcome and does not import grade-gate symbols', () => { const outcomes = [ - evaluateCapabilityReadiness('propose-graph', []), - evaluateCapabilityReadiness('propose-graph', [ - gap({ name: 'domain', coverage: 0.25 }), - gap({ name: 'protagonist', coverage: 1 }), - gap({ name: 'pain_pull', coverage: 1 }), - gap({ name: 'constraint', coverage: 1 }), - ]), - evaluateCapabilityReadiness('propose-graph', [ - gap({ name: 'domain', coverage: 1 }), - gap({ name: 'protagonist', coverage: 1 }), - gap({ name: 'pain_pull', coverage: 1 }), - gap({ name: 'constraint', coverage: 1 }), - ]), + evaluateCapabilityReadiness('propose-graph', floorGaps({ context: 0 })), + evaluateCapabilityReadiness('propose-graph', floorGaps({ context: 0.25 })), + evaluateCapabilityReadiness('propose-graph', floorGaps()), ]; expect(outcomes.map((outcome) => outcome.status)).toEqual([ diff --git a/src/projections/session/capability-readiness.ts b/src/projections/session/capability-readiness.ts index dfad5da3..ceb3f52a 100644 --- a/src/projections/session/capability-readiness.ts +++ b/src/projections/session/capability-readiness.ts @@ -1,19 +1,19 @@ import type { ElicitationGap } from '../../graph/schema/elicitation-gaps.js'; +import type { NodeKind } from '../../graph/schema/nodes.js'; export type CapabilityId = 'generative-lens' | 'propose-graph' | 'project-graph' | 'commitment-review'; -type RelevantGapName = 'domain' | 'protagonist' | 'pain_pull' | 'constraint'; - -export const CAPABILITY_RELEVANT_GAPS: Record = { - 'generative-lens': ['domain', 'protagonist', 'pain_pull', 'constraint'], - 'propose-graph': ['domain', 'protagonist', 'pain_pull', 'constraint'], - 'project-graph': ['domain', 'protagonist', 'pain_pull', 'constraint'], - 'commitment-review': ['domain', 'protagonist', 'pain_pull', 'constraint'], +export const CAPABILITY_RELEVANT_GAPS: Record = { + 'generative-lens': ['context', 'thesis', 'goal', 'constraint'], + 'propose-graph': ['context', 'thesis', 'goal', 'constraint'], + 'project-graph': ['context', 'thesis', 'goal', 'constraint'], + 'commitment-review': ['context', 'thesis', 'goal', 'constraint'], }; interface CapabilityMissingGap { readonly id: string; - readonly name: string; + readonly refersTo: NodeKind; + readonly question: string; readonly rationale: string; readonly coverage: number; } @@ -34,24 +34,25 @@ export function evaluateCapabilityReadiness( gaps: readonly ElicitationGap[], ): CapabilityReadinessOutcome { const relevantGaps = relevantGapRecords(capability, gaps); - const missing = relevantGaps.filter((gap) => gap.coverage <= 0); + const missing = relevantGaps.filter((record) => record.coverage <= 0); if (missing.length > 0) { return { status: 'negotiate', offer: { kind: 'establishment_offer', message: `I can try, but answering ${formatGapList(missing)} first would make this materially safer.`, - missingGaps: missing.map((gap) => ({ - id: gap.id, - name: gap.name, - rationale: gap.rationale, - coverage: gap.coverage, + missingGaps: missing.map((record) => ({ + id: record.id, + refersTo: record.refersTo, + question: record.question, + rationale: record.rationale, + coverage: record.coverage, })), }, }; } - const coverage = relevantGaps.length === 0 ? 0 : average(relevantGaps.map((gap) => gap.coverage)); + const coverage = relevantGaps.length === 0 ? 0 : average(relevantGaps.map((record) => record.coverage)); if (coverage >= 1) return { status: 'proceed' }; return { status: 'proceed_low_epistemic', coverage }; } @@ -60,25 +61,12 @@ function relevantGapRecords( capability: CapabilityId, gaps: readonly ElicitationGap[], ): readonly ElicitationGap[] { - const relevantNames = CAPABILITY_RELEVANT_GAPS[capability]; - return relevantNames.map((name) => gaps.find((gap) => gap.name === name) ?? missingGap(name)); -} - -function missingGap(name: RelevantGapName): ElicitationGap { - return { - id: `missing:${name}`, - specId: 0, - name, - rationale: `Missing seeded grounding gap: ${name}`, - basis: 'implicit', - band: 'grounding', - predicate: { kind: 'presence', minimum: 1, band: 'grounding' }, - importance: 1, - coverage: 0, - answered: false, - disposition: 'open', - createdAtLsn: 0, - }; + const relevantKinds = CAPABILITY_RELEVANT_GAPS[capability]; + return relevantKinds.flatMap((kind) => { + const records = gaps.filter((record) => record.refersTo === kind); + if (records.length === 0) throw new Error(`capability ${capability} has no elicitation gap for ${kind}`); + return records; + }); } function average(values: readonly number[]): number { @@ -86,5 +74,5 @@ function average(values: readonly number[]): number { } function formatGapList(gaps: readonly ElicitationGap[]): string { - return gaps.map((gap) => gap.name).join(', '); + return gaps.map((record) => `${record.refersTo}: ${record.question}`).join('; '); }