From 3134c434da326d0bb762fef611e653f5731a161f Mon Sep 17 00:00:00 2001 From: Lu Nelson Date: Thu, 11 Jun 2026 10:38:43 +0200 Subject: [PATCH] spec and plan updates re fixture protocols --- .fixtures/README.md | 13 ++ .../workbenches/live-graph-observer/README.md | 32 +++-- memory/PLAN.md | 50 ++++--- memory/SPEC.md | 10 +- src/app/brunch.test.ts | 32 +++++ src/graph/seed-fixtures.test.ts | 135 +++++++++++++++++- src/graph/seed-fixtures.ts | 129 ++++++++++++----- 7 files changed, 325 insertions(+), 76 deletions(-) diff --git a/.fixtures/README.md b/.fixtures/README.md index c45737dd..db35591f 100644 --- a/.fixtures/README.md +++ b/.fixtures/README.md @@ -40,6 +40,19 @@ probe report/transcript artifacts, then track it. Dev launchers must resolve scratch from the repo-root `.fixtures/scratch/`, independent of the workspace cwd they target. +Seed workbench state explicitly; `npm run dev` never seeds by implication. From +the repo root, load one tracked seed into one named workspace with: + +```sh +npm run seed -- --workspace .fixtures/workbenches/live-graph-observer --seed workspace-spread/alpha-grounding +npm run dev -- --cwd .fixtures/workbenches/live-graph-observer +``` + +The seed command writes only the target workspace's `.brunch/data.db` and reports +that destination path plus the `set/slug → specId` mapping. Running `npm run seed` +without `--workspace` and `--seed` fails with usage instead of loading every seed +into the shell cwd. + ## Current runs - `runs/public-rpc-parity/2026-05-29-public-rpc-parity/` — FE-744 public Brunch diff --git a/.fixtures/workbenches/live-graph-observer/README.md b/.fixtures/workbenches/live-graph-observer/README.md index 514fb1ac..e35eac98 100644 --- a/.fixtures/workbenches/live-graph-observer/README.md +++ b/.fixtures/workbenches/live-graph-observer/README.md @@ -13,22 +13,26 @@ every contributor agrees on where the manual smoke happens. ## How to use it -From the repo root, run: +From the repo root, seed a chosen starting graph explicitly, then launch against +this workbench cwd: ```sh +npm run seed -- --workspace .fixtures/workbenches/live-graph-observer --seed workspace-spread/alpha-grounding + # Dev build, against TS source (no build step needed) -( cd .fixtures/workbenches/live-graph-observer && npx tsx ../../../src/brunch.ts --mode print ) +npm run dev -- --cwd .fixtures/workbenches/live-graph-observer --mode print # Built bin (after `npm run build`) -( cd .fixtures/workbenches/live-graph-observer && node ../../../bin/brunch-cli.js --mode print ) +node bin/brunch-cli.js --cwd .fixtures/workbenches/live-graph-observer --mode print # Once installed (e.g. via `npm link` or a published install) -( cd .fixtures/workbenches/live-graph-observer && brunch-cli --mode print ) +brunch-cli --cwd .fixtures/workbenches/live-graph-observer --mode print ``` -On first launch Brunch scaffolds a local `.brunch/` directory containing -`data.db` and Pi session files **inside this workbench directory**, not in the -repo root. That state is per-cwd by design and must not be committed. +Brunch scaffolds a local `.brunch/` directory containing `data.db` and Pi session +files **inside this workbench directory**, not in the repo root. That state is +per-cwd by design and must not be committed. `npm run dev` only opens the named +workspace; it never loads seed fixtures implicitly. ## What is and is not committed @@ -42,11 +46,9 @@ repo root. That state is per-cwd by design and must not be committed. - `--mode print` — non-interactive workspace projection; smoke for CLI identity and DB scaffolding. -- `--mode tui` — interactive writer session; once the `live-graph-observer` - observer host card lands, this is also the launch path that exposes a local - web observer URL. -- `--mode web` — standalone web host; useful for web-only iteration before the - TUI-hosted observer path is wired in. +- `--mode tui` — interactive writer session and the product-supported launch + path for the local web observer sidecar. Use `--auto-open=false` when an agent + will open the printed sidecar URL manually. ## Browser feedback loop @@ -56,11 +58,11 @@ gives the agent accessibility-tree snapshots, clicks, form input, and screenshot without becoming product runtime behavior. CDP-style tools remain useful for console/network detail when needed. -Launch the web host from this workbench: +Launch the TUI sidecar against this workbench: ```sh -# Terminal A: standalone web observer host -( cd .fixtures/workbenches/live-graph-observer && node ../../../bin/brunch-cli.js --mode web ) +# Terminal A: TUI writer plus web observer sidecar +npm run dev -- --cwd .fixtures/workbenches/live-graph-observer --mode tui --auto-open=false ``` The host prints a localhost URL such as: diff --git a/memory/PLAN.md b/memory/PLAN.md index 47034ca3..17302098 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -121,7 +121,7 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai - `probes-and-transcripts-evolution` — continuous probe/report/transcript hardening as each delivery frontier lands evidence. - `topology-readmes-and-boundaries` — small doc/test hardening when a frontier moves files or exposes a boundary; should remain attached to the frontier when possible rather than becoming an abstract cleanup project. -- `dev-seed-fixtures` — rich, real seed data for local dev / manual / observer testing: the consolidated seed contract, the `npm run seed` loader, and growing/enhancing fixture sets (Bilal-port + legacy). Its semantic curation mutation slice is folded into / blocked by `role-safe-graph-mutations`; ongoing seed-data maintenance remains low-conflict. +- `dev-seed-fixtures` — **partially built as a folded-in FE-848 DX hardening slice**: clarified the seed/workbench contract from SPEC D79-L, replaced the catch-all current-cwd `npm run seed` flow with explicit target-workspace + seed selection, and proved one seeded workbench through `npm run dev -- --cwd ...` / product RPC. Remaining follow-up is the seed disposition catalog and optional explicit all-seeds opt-in. Its semantic curation mutation slice is complete via `role-safe-graph-mutations`; ongoing seed-data maintenance remains low-conflict. - `dx-introspection-live` — done 2026-06-11. DX follow-on to `dx-feedback-loops`: hardened the four-role `.fixtures/` topology + `--cwd` launch (D70-L), unified dev gating under `BRUNCH_DEV`, wired introspection into the real TUI (D71-L), made introspection conversational (A26-L), and added the workspace-local `.brunch/debug/` cache for final system prompt + Brunch-owned tool-result contents. `tool-renders` flattening remains deferred until a concrete renderer-debugging need appears. - `runtime-vocab-leaf` — establish `src/session/schema/kinds.ts` as the drizzle-free source-of-truth leaf for the session/runtime axis enums (`op_mode`, `strategy`, `lens`, `goal`, `auto` sentinel), mirroring `graph/schema/kinds.ts` (D73-L ownership direction). The decision-3 follow-on; independent of the remodel chain and the trio. Must not recreate `READINESS_GRADES` (retired by `capability-readiness`). @@ -675,26 +675,30 @@ The near-term spine has two tracks. The **context-pipeline coverage trio** remai ### dev-seed-fixtures -- **Name:** Development seed-fixture substrate (Bilal-port + legacy specs) -- **Linear:** unassigned -- **Kind:** tooling / dev-substrate -- **Status:** parallel / continuous -- **Objective:** Maintain rich, real seed data for local dev and manual/observer testing: the consolidated `{spec,nodes,edges}` seed contract under `.fixtures/seeds//.json`, the `src/graph/seed-fixtures.ts` loader (`npm run seed`) that commits each fixture through `CommandExecutor`, and the throwaway per-set port scripts that produce seed files. Grow set coverage and graph quality as delivery frontiers need data to exercise. -- **Why now / unlocks:** Delivery frontiers (`capture-response-to-graph`, the live-graph observer follow-on, `poc-live-ship-gate`) need real multi-spec graph data to exercise UI/agent/observer behavior without hand-authoring. The Bilal port already provides three loadable specs; enhancing them surfaces under-represented planes/kinds (notably `thesis`/`goal`) for richer capture and observer demos. +- **Name:** Explicit dev seeding and launchable workbench flow +- **Linear:** FE-848 — folded into the current prompt-context refinement branch by user decision on 2026-06-11; no separate Linear issue for this low-conflict DX hardening slice. +- **Kind:** hardening / dev-substrate +- **Status:** parallel / partially built (folded into FE-848 branch) +- **Certainty:** proving +- **Lights up:** A fresh `.fixtures/workbenches/` can be seeded with one named fixture, launched with `npm run dev -- --cwd .fixtures/workbenches/`, and inspected as that workbench's DB — not the repo-root `.brunch/` and not an accidental all-seeds dump. +- **Stabilizes:** D70-L/D79-L fixture topology and I48-L target-workspace-scoped seeding; gives later manual, observer, and capture probes a reproducible local graph state to aim from. +- **Objective:** Clarify and harden the dev DB seeding flow around the four-role `.fixtures/` contract. Replace the current ambiguous mental model — `npm run seed` loads every tracked seed into whatever shell cwd happens to be active — with an explicit seed command that names the target workspace and selected seed set/slug (with all-seeds as an explicit opt-in). Catalog the captured seed fixtures by consumer disposition, update workbench docs to name the seed(s) they expect, and prove a seeded workbench through the real launch path. +- **Why now / unlocks:** The current root-dev behavior and `--cwd` workbench convention now conflict: root `.brunch/` can contain stale local DB state, workbench `.brunch/` is untracked but under-documented, and several newly captured seeds exist without a consumer. This frontier is the cheapest tracer bullet for D79-L/I48-L and prevents later manual/observer tests from depending on invisible local state. - **Acceptance:** - - Seed contract stays loadable: each set's port script self-validates every `.json` through the real loader (same structural checks `CommandExecutor.mutateGraph` enforces) before writing. - - `npm run seed` loads every `.fixtures/seeds//.json` into the workspace DB through `CommandExecutor` (never direct row inserts), preserving spec-local graph clock / change log / LSN coherence. - - New seed sets follow the established shape: vendored `_originals/`, throwaway `_port-script.ts`, consolidated `.json`, generated `README.md`; derived variant sets may instead document the deterministic filter over an existing seed set and keep mixed-basis product-run output under `.fixtures/runs/`. - - Product curation runs over seeds leave transcript-backed artifacts (`session.jsonl`, `transcript.md`, `report.json`, and graph readback when graph truth is the proof target) and prove real `mutate_graph` transcript evidence plus implicit graph rows; any retained pre-migration `commit_graph` runs are historical only. Mixed-basis graph readbacks are not registered as reusable seeds. -- **Enhancement backlog (captured, not yet scoped):** - 1. Enhance Bilal-port fixtures *through Brunch itself* by feeding the original briefs Bilal authored, to recover `thesis`/`goal` structure the current ported graphs under-express. - 2. Port and enhance the earlier product version's fixtures (the legacy walkthrough scenarios in `docs/praxis/manual-testing.md`), raising quality through better semantic definition (kinds, detail) and internal connection (edges). -- **Verification:** Inner — `src/graph/seed-fixtures.test.ts` seeds real fixtures into an in-memory DB and asserts spec/node/edge counts plus spec-local change-log/clock coherence independent of seed order, rejects non-`explicit` basis, and covers the `macro-view-grounded-intent` explicit intent-only variant; `src/probes/fixture-curation-loop.test.ts` proves curation report/artifact evidence detection without an LLM. Outer — `npm run seed` smoke against a fresh cwd; real fixture-curation runs under `.fixtures/runs/fixture-curation/`; seeded-dev-rpc smoke proves `dev.graph.mutateGraph` advances only the mutated spec's overview LSN. -- **Topology materialization:** Seed data and throwaway prep scripts live under `.fixtures/seeds/`; the loader lives in `src/graph/seed-fixtures.ts` (graph/ owns `CommandExecutor` orchestration; db/ is imported only by graph/, never the reverse); no seed-only graph runtime the product launch does not use. -- **Cross-cutting obligations:** Seeds commit only through `CommandExecutor`; directly-authored items use `basis: explicit` (the retired `accepted_review_set` value is not a basis). Respect multi-spec discipline — each fixture is one spec's own graph (D61-L). Pre-release posture: regenerate fixtures when the schema moves rather than preserving stale shapes. **Known drift:** `docs/praxis/manual-testing.md` still describes the earlier seed system (scenario-arg `npm run seed`, `.brunch/brunch.db`); reconcile it to the current loader (all-sets `npm run seed`, `.brunch/data.db`) when the legacy port (backlog item 2) lands — coordinate with the doc-reconciliation track rather than double-editing. -- **Current execution pointer:** The semantic-mutation curation scope is complete via `role-safe-graph-mutations`, so dev curation no longer mints a second graph-write dialect. Product-driven fixture-curation tracer evidence remains the quality-review input: `macro-view-grounded-intent` is a deterministic explicit-basis Bilal variant; fresh runs now prove `mutate_graph`, while the checked-in `.fixtures/runs/fixture-curation/fixture-curation-2026-06-05T104440Z/` artifact is historical pre-migration `commit_graph` evidence until regenerated. -- **Traceability:** D4-L, D16-L, D19-L, D20-L, D52-L, D61-L, D62-L, D63-L / I1-L / A4-L, A14-L. -- **Design docs:** `.fixtures/seeds/bilal-port/README.md`; `docs/design/GRAPH_MODEL.md`; `docs/praxis/manual-testing.md`. + - ✅ Seed CLI supports selecting one fixture by set/slug and target workspace by path; malformed, unknown, duplicate, or unsafe flag input fails with usage before any workspace DB opens. + - ◐ An all-seeds batch remains possible only through a future explicit flag or explicit command name; no ambient all-seeds default remains. + - ✅ Every seeded spec routes through `seedFixture`/`CommandExecutor`, preserving spec-local LSN, change-log, elicitation-gap seeding, and structural validation; no seed path writes SQLite rows directly. + - ✅ CLI output names the destination `.brunch/data.db` and each selected `set/slug → specId`; defaults are explicit in help text and tests. + - ✅ `npm run dev` / `npm run dev -- --cwd ` never seeds implicitly; launch observes existing workspace DB state only. + - ✅ `.fixtures/README.md` and the `live-graph-observer` workbench README document the canonical flow (`seed` then `dev -- --cwd`) and clarify root/workbench `.brunch/` as local runtime state, not canonical fixture truth; the workbench docs name the TUI sidecar instead of unsupported standalone `--mode web`. + - ◐ Captured seeds (`brunch-self`, `dumpchat`, `fable`, `rd-loop`, `yamlbase`, plus existing Bilal/coverage sets) still need a small disposition catalog: `test`, `preview`, `manual workbench`, `probe input`, or `parked`. + - ✅ A fresh-workbench tracer seeds one named fixture, reads `workspace.selectionState` through product RPC with `--cwd`, and proves graph state came from the workbench `.brunch/data.db` only. +- **Verification:** Inner — seed CLI parse/target-resolution tests; set/slug filtering tests; explicit all-seeds mode test; CommandExecutor/change-log assertions on a temp workspace DB; docs/help snapshot or string tests for visible destination reporting. Middle — fresh workbench smoke using a temp or fixture workbench: seed one fixture, launch via `runBrunchCli({ argv: ['--cwd', workbench, '--mode', 'print' | 'rpc'] })` or equivalent, assert selected workspace state plus graph overview are scoped to that workbench. Optional outer — manual `BRUNCH_DEV=1 npm run dev -- --cwd .fixtures/workbenches/` against a live model after the deterministic tracer passes. +- **Topology materialization:** Seed data and throwaway prep scripts remain under `.fixtures/seeds/`; launchable cwd containers remain under `.fixtures/workbenches/`; the graph-domain seed loader remains in `src/graph/seed-fixtures.ts` unless the CLI grows enough to warrant a thin `src/scripts/` wrapper; workbench runtime DBs stay under gitignored `.brunch/` and are never committed. +- **Cross-cutting obligations:** Preserve D20-L/D52-L graph ownership — the loader orchestrates `CommandExecutor`, not DB internals. Preserve D70-L role separation — seed JSON is input, workbench DB state is local runtime, runs are curated evidence, scratch is ephemeral. Do not add auto-seeding to app startup, and do not treat repo-root `.brunch/` as canonical test fixture state. Pre-release posture allows regenerating or reclassifying stale seed files rather than maintaining compatibility with obsolete local DBs. +- **Branch:** `ln/fe-848-prompt-context-refine` (folded-in slice; no separate Graphite branch). +- **Traceability:** D16-L, D20-L, D52-L, D61-L, D63-L, D70-L, D71-L, D79-L; I1-L, I11-L, I48-L. +- **Design docs:** `.fixtures/README.md`; `.fixtures/workbenches/live-graph-observer/README.md`; `docs/design/GRAPH_MODEL.md`. ### web-design-system-port @@ -762,7 +766,7 @@ nodes: capture-quality-spike [done · spike] A22-L fitness evidence graduated the narrow exchanges-and-generalized-capture feature probes-and-transcripts-evolution [parallel] continuous evidence substrate topology-readmes-and-boundaries [parallel] attach-to-frontier topology hardening - dev-seed-fixtures [parallel] rich seed data substrate for dev/observer testing + dev-seed-fixtures [parallel · proving] explicit seed selection + target-workspace-scoped workbench launch; catalog captured seeds; prove D79/I48 tracer web-design-system-port [done · earned] ported prior-trunk tokens + card primitives into src/web; retired invented warm aesthetic; read-only, no spine deps dx-tier-2-harness [active · proving] FE-847 Tier-2 DX chassis (real boot + faux turn + payload/transcript oracle + fixture resume) + coverage-first scaffold (skipped I45-I47) + topology stubs turn-boundary-reconciliation [next · proving] M7 product write-side: watermark projection (S1) + prepareNextTurn reconciler/worldUpdate/own-write stamping (S2) + submit-time mention ledger/staleness (S3) @@ -784,7 +788,7 @@ edges: graph-tool-resilience -[hard]-> role-safe-graph-mutations (current graph tool + edge model exist) project-graph-review-cycle -[hard]-> role-safe-graph-mutations (current review-set proposal/accept path exists) role-safe-graph-mutations -[hard]-> exchanges-and-generalized-capture (relation-bearing capture uses mutateGraph grammar) - role-safe-graph-mutations -[hard]-> dev-seed-fixtures (semantic curation slice uses mutateGraph grammar) + role-safe-graph-mutations -[already-satisfied]-> dev-seed-fixtures (semantic curation now uses the canonical mutateGraph grammar; D79 hardening no longer needs a second graph-write dialect) capture-quality-spike -[evidence]-> exchanges-and-generalized-capture projection-shape-coverage -[hard]-> renderer-golden-coverage (lock DTO shape before renderer golden) renderer-golden-coverage -[hard]-> prompt-composition-golden-coverage (lock rendered text before prompt golden) @@ -800,7 +804,7 @@ edges: parallel obligations: probes-and-transcripts-evolution -[evidence]-> every P0/P1 frontier topology-readmes-and-boundaries -[boundary]-> every frontier that moves/claims source topology - dev-seed-fixtures -[data]-> capture-response-to-graph, poc-live-ship-gate (real multi-spec graphs to exercise observer/capture; semantic curation waits on role-safe-graph-mutations) + dev-seed-fixtures -[data]-> capture-response-to-graph, poc-live-ship-gate (explicit seeded workbenches provide reproducible real graphs for observer/capture; ongoing semantic curation already rides mutateGraph) horizon: coherence-first-class diff --git a/memory/SPEC.md b/memory/SPEC.md index 9b87ed9d..b9a0b5e2 100644 --- a/memory/SPEC.md +++ b/memory/SPEC.md @@ -84,7 +84,7 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c #### Verification & fixtures -24. Brunch must ship probe drivers over the public JSON-RPC surface that produce replayable transcript artifacts and property-checkable reports. The first product-level driver proof is a deterministic public-RPC structured-exchange permutation run: the current `present_question`/`request_answer`, `present_options`/`request_choice`, and `present_options`/`request_choices` permutations are driven through activated workspace/spec/session state, with Pi JSONL and Brunch projections comparable in kind and quality to an equivalent TUI-driven session. Coherent ten-turn elicitation progress belongs to future generative/adversarial probes, not the deterministic transport-permutation proof. Brief-based golden fixtures are a future input style, not a separate required subsystem. +24. Brunch must ship probe drivers over the public JSON-RPC surface that produce replayable transcript artifacts and property-checkable reports. The first product-level driver proof is a deterministic public-RPC structured-exchange permutation run: the current `present_question`/`request_answer`, `present_options`/`request_choice`, and `present_options`/`request_choices` permutations are driven through activated workspace/spec/session state, with Pi JSONL and Brunch projections comparable in kind and quality to an equivalent TUI-driven session. Coherent ten-turn elicitation progress belongs to future generative/adversarial probes, not the deterministic transport-permutation proof. Brief-based golden fixtures are a future input style, not a separate required subsystem. Reusable seed fixtures, launchable workbenches, promoted probe runs, and scratch output must stay distinct so local dev DB state cannot masquerade as reusable truth or durable evidence. #### Runtime profile & prompting @@ -268,8 +268,9 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c - **D67-L — Brunch tracks the latest pi release; dev iterates against pi source via a gated runtime alias.** Brunch keeps `@earendil-works/pi-*` current with upstream rather than pinning to an old line; version bumps are routine adaptation work, not deferred migrations. Local vite/vitest development aliases `@earendil-works/pi-ai`, `@earendil-works/pi-agent-core`, `@earendil-works/pi-tui`, and `@earendil-works/pi-coding-agent` to the sibling `pi-mono` `src/` checkout via an explicit `PI_SOURCE` runtime flag so cross-package iteration needs no rebuild in those loops; published builds, TypeScript, editors, and default runtime resolve the normal installed `dist`. Base `tsconfig.json` deliberately carries no pi source `paths` because paths cannot be env-gated; if a `tsx` real-provider loop later needs no-rebuild pi source, add an opt-in `tsconfig.dev.json` rather than weakening the default. Inaugural bump: `^0.75.5 → 0.79.0`. Depends on: A25-L, D39-L. Supersedes: pinning Brunch to a fixed older pi line, treating pi upgrades as discrete migration projects, or making a personal source checkout the unconditional type/default resolution path. - **D68-L — Development feedback loops are first-class DX, consolidated behind one front door, distinct from product-verification probes.** Brunch maintains three named developer loops: (1) **faux loop** — deterministic, in-process `AgentSession` over the pi faux provider + `.inMemory()` services, the inner/middle-loop substrate for wrapper logic and regressions; (2) **real-provider TUI/CLI loop** — `tsx`-run Brunch source against a live model for interactive use, with pi-source resolution opt-in per D67-L only when needed; (3) **introspection loop** — real provider plus payload/manifest capture (D69-L). These loops live behind a single consolidated dev front door (`src/dev/`) that owns the dev launchers and the shared faux-harness factory; ad hoc per-file faux setup is absorbed into that factory. The dev loops are the *means of building and iterating on* Brunch and are distinct from `src/probes/` **probe runs**, which are durable *product-verification* artifacts (`.fixtures/runs/`, `docs/architecture/probes-and-transcripts.md`); where a dev loop produces durable evidence it does so as a probe run rather than a parallel artifact path. Depends on: D39-L, D67-L; the probe/transcript model. Supersedes: scattered, unnamed dev-iteration scripts and ad hoc faux-provider wiring as the wrapper's test substrate. - **D69-L — Agent-input introspection is one read-only, dev-gated Brunch extension; mechanical and conversational modes are separate planes.** A single Brunch-owned extension family, wired through `brunch-pi-extensions.ts` (never ambient discovery), provides **mechanical** introspection two ways: (a) a passive `before_provider_request`/`before_agent_start` tap that records *exactly the final payload the model receives* (system prompt, tool JSON schemas, D58-L prompt-resource manifest), and (b) an on-demand `/introspect` command that reports the **base** system-prompt inputs via `ctx.getSystemPromptOptions()` (base inputs only — `getSystemPromptOptions` returns pi's `_baseSystemPromptOptions`, so it does *not* reflect later `before_agent_start`/`before_provider_request` mutations) and the latest passive capture. The extension returns every payload unchanged so it observes but never shapes product behavior (D39-L sealing); because `before_provider_request` is a registration-ordered transformation chain in pi, the introspection tap must be registered *after* all Brunch prompt/tool/policy mutators to record the post-mutation payload. **Conversational** introspection is the sibling read-only query-tool plane: under the same `BRUNCH_DEV`/`introspection.enabled` gate, `brunch_session_query` reads `ctx.sessionManager.getBranch()` and `brunch_introspect_query` reads the captured provider payload plus base prompt options. Both tools project exact values with the shared capped dot/`[n]`/`[*]` grammar, truncate/spill large output, and rely on the agent's normal chat reply to echo/discuss the returned bytes. The D40-L active-tool allow-list explicitly unions this dev query-tool set only when the factory's dev gate is on, subtracts blocked tools, and intersects registered tools; registration alone is not advertisement. Tool-description nudges are the only prompt surface; no product prompt resource or fixed self-report schema is added. Subjective live interrogation remains an injected turn driven from the dev front-door launcher (`session.prompt`) or typed interactively, not a separate slash command. Captured scratch runs still write under `.fixtures/scratch/introspection//` (D70-L) so "what was sent" and "how the model read it" stay correlated. In `BRUNCH_DEV` real TUI launches, the same passive capture may also mirror the latest final system prompt bytes into the launch workspace's ephemeral `.brunch/debug/system-prompt.md`, and explicit Brunch-owned text `tool_result` content appends to `.brunch/debug/tool-contents.md`; this cache is not probe evidence, never reconstructs or shapes prompt state, and does not attempt `renderResult()` flattening. The launcher performs no global environment mutation; real TUI launches keep Pi startup update suppression scoped at the session-construction site with save/restore scoping (D71-L). Direct diagnostic for the "Prompt-resource discretionary loading" blind spot (I38-L). Depends on: D39-L, D40-L, D58-L, D68-L, D70-L; I38-L. Supersedes: treating "how the model sees our tools/skills" as an outer-loop-only, non-instrumented concern, and the fixed structured self-report schema as the default conversational surface. -- **D70-L — `.fixtures/` is a four-role tree (seeds / workbenches / runs / scratch); dev-loop artifacts decouple operating-cwd from artifact-root.** `.fixtures/` separates four lifecycles, each with its own git policy: **`seeds/`** — tracked, reusable explicit-basis starting truth loaded via `npm run seed` (INPUT); **`workbenches/`** — launchable Brunch workspaces whose `.brunch/` is gitignored local state (the directories a dev `--cwd` targets, D71-L); **`runs/`** — tracked, *curated/promoted* probe evidence under `//`, probe-first per D68-L (EVIDENCE); **`scratch/`** — gitignored, ephemeral live dev-loop output under `//` (SCRATCH). Dev launchers (faux/introspection) must resolve their artifact root to the package-relative repo `.fixtures/scratch/`, **not** to the operating `cwd` — the same operating-cwd-vs-`fixtureRoot` decoupling the probe layer already uses (`mkdtemp` ephemeral cwd + repo-resolved `fixtureRoot`). This removes the `join(cwd, '.fixtures', …)` nesting defect where launching against a workbench would write `/.fixtures/…`. An exploratory scratch run becomes durable evidence only by explicit promotion (move `scratch///` → `runs///`, then track it), keeping curated `runs/` clean. `.fixtures/scratch/` is the chosen scratch home (over reusing `tmp/`) so promotion is a move within one tree. Depends on: D52-L, D68-L; the probe/transcript model. Supersedes: pinning dev-run artifacts to the operating cwd; treating all `.fixtures/runs/` output as tracked evidence; leaving the `workbenches/` role undocumented. +- **D70-L — `.fixtures/` is a four-role tree (seeds / workbenches / runs / scratch); dev-loop artifacts decouple operating-cwd from artifact-root.** `.fixtures/` separates four lifecycles, each with its own git policy: **`seeds/`** — tracked, reusable explicit-basis starting truth consumed by the seed loader (INPUT), never local runtime DB state; **`workbenches/`** — launchable Brunch workspaces whose `.brunch/` is gitignored local state (the directories a dev `--cwd` targets, D71-L); **`runs/`** — tracked, *curated/promoted* probe evidence under `//`, probe-first per D68-L (EVIDENCE); **`scratch/`** — gitignored, ephemeral live dev-loop output under `//` (SCRATCH). Dev launchers (faux/introspection) must resolve their artifact root to the package-relative repo `.fixtures/scratch/`, **not** to the operating `cwd` — the same operating-cwd-vs-`fixtureRoot` decoupling the probe layer already uses (`mkdtemp` ephemeral cwd + repo-resolved `fixtureRoot`). This removes the `join(cwd, '.fixtures', …)` nesting defect where launching against a workbench would write `/.fixtures/…`. An exploratory scratch run becomes durable evidence only by explicit promotion (move `scratch///` → `runs///`, then track it), keeping curated `runs/` clean. `.fixtures/scratch/` is the chosen scratch home (over reusing `tmp/`) so promotion is a move within one tree. Depends on: D52-L, D68-L; the probe/transcript model. Supersedes: pinning dev-run artifacts to the operating cwd; treating all `.fixtures/runs/` output as tracked evidence; leaving the `workbenches/` role undocumented. - **D71-L — One `BRUNCH_DEV` switch gates all dev affordances; the main CLI accepts `--cwd`; introspection is present-but-dead in prod.** The over-specific `BRUNCH_DEV_RPC` env var is generalized to a single `BRUNCH_DEV` switch that, when set, enables dev affordances together: dev RPC methods (`dev.*`), registration of the read-only introspection extension (D69-L), and routing of dev-loop artifacts to `.fixtures/scratch/` (D70-L). `runBrunchCli` parses a `--cwd ` flag (defaulting to `process.cwd()`) so a dev session can target a `.fixtures/workbenches/` workspace without `cd`. Two independent prod-safety gates hold: (1) `src/dev/**` is build-excluded by `tsconfig.build.json`, so launchers/harness/alias never ship; (2) the introspection extension, though compiled into `dist` under `src/.pi/`, only *registers* when `createBrunchPiExtensions(..., { introspection: { enabled } })` opts in — and the TUI call site sets `enabled` from `BRUNCH_DEV` only, so absent the switch it is present-but-dead, never wired, honoring D39-L explicit-opt-in sealing (no ambient discovery). Brunch-launched TUI sessions keep Pi startup update suppression on in both product and `BRUNCH_DEV` runs by scoping `PI_OFFLINE=1` through `InteractiveMode.run()` unless the user already set a value; prior `PI_OFFLINE` / `PI_SKIP_VERSION_CHECK` state is restored in `finally`, never as a leaked global `process.env` mutation. Depends on: D39-L, D67-L, D68-L, D69-L, D70-L. Supersedes: the `BRUNCH_DEV_RPC`-only dev gate; relying on the operating cwd to choose the dev workspace; the assumption that the introspection extension needs build-exclusion (runtime opt-in suffices); lifting Pi offline mode in `BRUNCH_DEV` TUI sessions merely to enable live-provider behavior. +- **D79-L — Dev DB seeding is explicit, selected, and target-workspace-scoped; `npm run dev` never implies a seed.** A Brunch workspace DB is local runtime state under that launch cwd's `.brunch/`; running `npm run dev` against the repo root or a workbench may create/open that workspace, but it must not silently load reusable seed fixtures. Reusable graph seeds under `.fixtures/seeds//.json` are loaded only by an explicit seed command that names the target workspace and the seed set/slug (or an explicitly requested all-seeds batch); the loader remains a graph-domain utility over `seedFixture`/`CommandExecutor`, so seeded specs get normal `create_spec`/`mutate_graph` change-log entries, spec-local LSNs, elicitation-gap seeding, and structural validation. Workbenches under `.fixtures/workbenches//` are launchable cwd containers, not seed truth: their `.brunch/` may be reset or re-seeded locally, but tracked files must document which seed(s) a human or script should apply. Captured or newly-authored seed JSON is parked until it has at least one named consumer disposition (`test`, `preview`, `manual workbench`, `probe input`, or `parked`); existence under `seeds/` alone does not make it part of the default dev database. Depends on: D16-L, D20-L, D52-L, D70-L, D71-L. Supersedes: the catch-all `npm run seed` mental model that loads every seed into the current shell cwd; treating the repo-root `.brunch/` as canonical dev fixture state; auto-seeding because a dev host starts. - **D59-L — `goal` is a readiness-derived, AUTO-able objective axis, distinct from strategy.** A *goal* is what the session agent currently pursues; a *strategy* is the reusable interaction shape used to pursue it — a goal is pursued *via* a strategy *through* a lens (three orthogonal axes). The goal set is derived from readiness-band coverage (D64-L) rather than a stored grade: `grounding-advance` (fill grounding gaps and raise grounding coverage), `elicit-expand` (expand the elicited specification graph while ambiguity remains productive), `commit-converge` (reduce / lock down reviewable commitments), plus an always-on `capture-posture` (capture or confirm dev `posture`, D45-L). `goal` defaults to the readiness-derived objective (e.g. while grounding coverage is thin, `grounding-advance`), may be pinned, or left `AUTO`; in either case D58-L manifests advertise the legal resource(s) rather than injecting the whole goal body. For now `goal` is **internal/readiness-derived and not part of the user posture-change surface** (it is too contingent to expose as a user-mutable axis); the pin affordance is reserved for system/internal logic, and unlike `strategy`/`lens` the user does not switch it (D40-L, Q4). `elicit-expand` and `commit-converge` intentionally form the diverge/converge pair for the elicitation diamond; `elicit-I` / `elicit-II` are retired because they were phase-like labels, not objectives. "Advance grounding" is a goal, not a strategy — though the `grounding-advance` goal may carry a dedicated default interaction pattern. Depends on: D45-L, D57-L, D58-L, D64-L. Supersedes: conflating the elicit-lifecycle objective with strategy selection, and deriving the goal set from a stored readiness grade. - **D66-L — `freestyle` is a structure-optional elicitation strategy; it and generalized free-text capture are one slice.** `freestyle` joins the strategy axis (D25-L) as a fifth value alongside `step-wise-decision-tree`, `step-wise-disambiguate`, `propose-graph`, and `project-graph`. The four existing strategies impose structured-exchange turn discipline (offer-first `present_*`/`request_*` ritual, D37-L); `freestyle` makes that discipline *optional* — the turn may be ordinary user-driven chat, structured-exchange tools remain available (not prohibited), and user-invoked slash/skill-commands are ergonomic here precisely because no pending structured exchange is consuming the turn. It is **initiative/interaction-style, not authority**: it is not a new `op_mode`, adds no tool authority, and `op_mode`-gated tool policy (D40-L) is unchanged. Because freestyle has no mandatory exchange, the only way it grows graph truth is **generalized capture**, so the two land together: post-exchange capture (D18-L) is now wired onto the ordinary-message path (`session.submitMessage`, D49-L) over the same `session exchange` unit — which already spans plain user text — routing high-confidence directly-stated facts through `CommandExecutor.mutateGraph({createBasis: explicit, ops})` exactly as the structured-response capture tracer does, while low-confidence implications stay in preface / `capture_*` analysis (D47-L, D50-L) and never become graph truth. Freestyle therefore *composes with*, and does not replace, the `goal` (D59-L) and `lens` (D25-L) axes: the user still pursues `grounding-advance` / `elicit-expand` / etc., just through free chat, and freestyle capture can both resolve and spawn `elicitation_gaps` (D65-L). **AUTO must not select `freestyle`** — it is an explicit user pin only (a "let me just talk" escape hatch); the runtime manifest now omits it under AUTO while still allowing explicit pins, so spontaneous AUTO entry cannot silently abandon the offer-first product thesis (R16). Remaining open quality questions are limited to capture scope beyond directly-labeled facts (fitness evidence under A22-L, materially harder without a structured prompt), whether capture eventually runs on every freestyle turn or on demand, and the exact slash/skill-command surface (the Q6 method-vs-command question). Depends on: D18-L, D25-L, D26-L, D40-L, D45-L, D49-L, D50-L, D59-L, D63-L, D65-L. Refines: R16. Supersedes: treating offer-first (R16) as a universal per-turn session invariant; treating freestyle as a new operational mode or authority posture. - **D60-L — Agent context splits into pull / projection / render / surface, distinguishes graph-truth from active-context reads, and keeps `workspace.state` separate.** **Agent context** = content the agent reasons over: `cwd` (filesystem kickoff heuristic — `.brunch?`, session count/length, README/markdown sizes, file counts), `graph` (overview/list/query), or `node` (variable-hop neighborhood). **PULL** is typed, read-only data access owned by the data layer (`graph/queries.ts` for graph/node; `session/` for cwd) and bypasses `CommandExecutor` (reads only); the typed value *is* the JSON form. Graph pulls must make the read projection explicit: `graph_truth` includes accepted truth records, while `active_context` hides superseded predecessors and must also omit edges whose endpoints are hidden so active-context reads do not contain dangling references. The graph read family should support the observed query shapes without becoming a generic records API: list nodes by kind(s), list nodes by D64-L readiness band(s), find nodes related to anchor node(s) by edge category/direction/hop depth, and find class-members lacking an edge of a given category in a given direction (gap query — a single named absence shape, not a generic NOT-predicate language). `src/graph/README.md` owns the consumer coverage ledger: `read_graph` exposes the six agent shapes, while RPC and web deliberately expose only overview + neighborhood until a scoped feature promotes another shape. **PROJECTION** is optional info-preserving shaping for reusable DTOs; when multiple adapters need the same structured view, it belongs in `projections/`, but many callers can consume the typed read directly. **RENDER** turns a typed or projected value into either an LLM-friendly string or JSON (trivial serialization). Reusable lossy text/markdown rendering belongs in `renderers/`; `.pi/agents/contexts/` owns the agent-context orchestration decision — which typed pull to expose, how much detail to include, and how lens-plane/grade-depth shape the prompt-facing string — and may call reusable renderers. Rendered projected stable node codes (D62-L) remain the primary handles. **SURFACE** delivers it: *pushed* (compose injects at turn boundary), *pulled* (`read_graph`, `read_workspace_context`, `read_session_context` wrap the relevant reads/renderers — markdown in `toolResult.content`, typed JSON in `toolResult.details` per I33-L), or *rpc/ui*. The separate **workspace projection** (`workspace.state` — workspace/session/spec/chrome product state) is a different subject and keeps that name. Depends on: D35-L, D52-L, D53-L, D62-L, D64-L. Supersedes: pre-rendering context strings in the pull layer, scattering context-build logic across `graph/`, `.pi/agents/contexts/`, and tool adapters, or silently mixing graph-truth and active-context reads. @@ -325,6 +326,7 @@ The POC's purpose is to prove three things: (a) that pi's coding-agent harness c | I45-L | A session's assistant-visible watermark advances only when a continuity entry naming a strictly higher spec-local LSN is inserted: a boot/context seed or whole-spec overview snapshot, a `worldUpdate` for any write not already assistant-visible through another carrier (naming only items with LSN strictly greater than the pre-update watermark, I4-L), or the session's own graph-mutation `toolResult`. `worldUpdate` covers foreign writes **and** same-session writes that did not ride an own-mutation `toolResult` (e.g. submit-time / freestyle capture); such a same-session capture advances `current_lsn` and is surfaced by the next `worldUpdate`, never silently swallowed. A freshly seeded session whose seed named the current snapshot LSN does not immediately synthesize a redundant `worldUpdate`. Narrow `getNodes`/`queryNodes` reads do not advance the global watermark (they update per-entity read ledgers only). When `current_lsn == watermark` no `worldUpdate` is synthesized, and the session's own already-visible mutations never produce a `worldUpdate`. The watermark is its own projection over the carrier set (distinct from `runtimeState.world.latestLsn`), projected from transcript continuity entries (D43-L), never a stored field. | planned (turn-boundary-reconciliation slice; coverage-first scaffold) | D43-L, D76-L, D77-L; I1-L, I4-L | | I46-L | Session origination never writes a fabricated user transcript entry. A new session inserts seed continuity entries and then an assistant-originated exchange before idling; a resumed session decides the kick from the **latest unresolved conversational debt**, computed by ignoring trailing continuity-only entries — any reconciler-inserted notice owing no assistant continuation: seed / `worldUpdate` / `brunch.mention*` / `brunch.session_lifecycle` / side-task & reviewer drains — whether inserted this boot or persisted by a prior boot — it originates a turn iff that debt owed assistant continuation (a user message or an incomplete exchange-tuple awaiting the assistant), and otherwise rests at an assistant/system-originated leaf (I13-L). The kick decision is idempotent across crash/reboot: trailing continuity notices neither mask an older unanswered debt nor manufacture a kick over a satisfied leaf. AUTO never originates a `freestyle` turn (D66-L); only an explicit `freestyle` pin yields a wait-for-user idle. | planned (kick+seeding slice; coverage-first scaffold) | D66-L, D78-L; R16; I13-L | | I47-L | Continuity facts (seed/refresh, `worldUpdate`, `brunch.mention*`, `brunch.session_lifecycle`) persist only as Brunch custom transcript entries — never synthetic `toolCall`s, never prompt-only injection — so the D43-L projection can reconstruct them; boot/resume seeding is idempotent, deriving dedupe from projected transcript state (a seed/world-update already present is not re-emitted) rather than from hidden flags, and survives real restart/resume. The watermark must also survive compaction: the preserved-anchor set retains the latest watermark-carrier entry per spec so the projected global watermark never regresses after compaction+resume (which would otherwise spuriously re-emit `worldUpdate`). | planned (kick+seeding + turn-boundary-reconciliation slices; coverage-first scaffold) | D17-L, D37-L, D43-L, D76-L, D78-L | +| I48-L | Dev seeding never mutates an unintended workspace and never loads unrelated reusable seeds by ambient default: the seed path is target-workspace-scoped, selected by seed set/slug unless an all-seeds batch is explicitly requested, routes through `CommandExecutor`, and reports the destination `.brunch/data.db`; dev launch (`npm run dev`, with or without `--cwd`) observes existing workspace DB state but does not imply seeding. | partially validated — seed CLI now requires unambiguous `--workspace` + safe `--seed /` input, rejects malformed/unknown/duplicate flags before opening a workspace DB, writes only the named workspace DB through `seedFixture`/`CommandExecutor`, reports destination + selected seed ref mapping, and product RPC `workspace.selectionState` through `--cwd` proves seeded-vs-sibling workspace isolation; explicit all-seeds opt-in and full seed disposition catalog remain `dev-seed-fixtures` follow-up. | D70-L, D71-L, D79-L; I1-L, I11-L | ## Future Direction Register @@ -535,7 +537,8 @@ src/.pi/ | **Faux loop** | Deterministic in-process dev loop: an `AgentSession` driven by the pi faux provider with `.inMemory()` auth/registry/session/settings, scripting LLM turns via `setResponses`. The inner/middle-loop substrate for wrapper logic and regressions; no network, keys, or tokens (D68-L). | | **Introspection loop** | Real-provider dev loop that captures exactly what the model receives (system prompt, tool schemas, prompt-resource manifest) via the read-only D69-L extension, and pairs it with interactive interrogation of the model about clarity. Diagnoses I38-L discretionary-loading questions. | | **Dev front door** | The consolidated `src/dev/` surface owning the three DX loop launchers and the shared faux-harness factory (D68-L). Distinct from `src/probes/` product-verification probe runs. | -| **Workbench** | A launchable Brunch workspace under `.fixtures/workbenches//` that a dev session targets with `--cwd` (D71-L). Its `.brunch/` runtime state is gitignored local state, not tracked evidence. The operating-cwd axis of a dev run, distinct from the artifact-root axis (D70-L). | +| **Seed fixture** | Tracked reusable explicit-basis starting graph truth under `.fixtures/seeds//.json`, consumed by the seed loader through `seedFixture`/`CommandExecutor` (D79-L). It is input data, not a workbench DB snapshot and not probe evidence; each seed needs a named consumer disposition before it becomes part of a default dev/test flow. | +| **Workbench** | A launchable Brunch workspace under `.fixtures/workbenches//` that a dev session targets with `--cwd` (D71-L). Its `.brunch/` runtime state is gitignored local state, not tracked evidence or reusable seed truth. The operating-cwd axis of a dev run, distinct from the artifact-root axis (D70-L); tracked workbench docs name which seed(s) to apply rather than committing the resulting DB. | | **Scratch run** | Gitignored ephemeral dev-loop output under `.fixtures/scratch///`, always resolved to the repo-root `.fixtures/` rather than the operating cwd (D70-L). Becomes durable evidence only by explicit promotion to a tracked `runs///`. | | **Promotion** | The explicit act of moving a `scratch///` run into tracked `runs///` evidence, the only path by which exploratory dev output becomes a curated probe run (D70-L). | | **`BRUNCH_DEV`** | The single env switch gating every dev affordance at once: dev RPC methods, introspection-extension registration, scratch artifact routing, and the scoped offline-default lift (D71-L). Generalizes the former `BRUNCH_DEV_RPC`. | @@ -696,6 +699,7 @@ The first required probe is M0: after manual TUI interaction, a checker proves ` | I45-L | Middle — watermark-projection property tests (own-write stamping vs foreign `worldUpdate`; strict-greater item set per I4-L; no-`worldUpdate` when `current==watermark`); **seed/full-overview snapshots advance the watermark while narrow `getNodes`/`queryNodes` reads do not**; **no redundant `worldUpdate` immediately after a seed that named the current snapshot LSN**; **same-session submit/capture write bumps `current_lsn` and is surfaced by the next `worldUpdate` (not swallowed)**; **a foreign write that lands between the snapshot read and seed insertion is not masked by the seed**; change-log-range fixtures driving a foreign writer (a second faux session or a direct `CommandExecutor` write) through the real boot. Inner — projection unit tests over synthetic transcript continuity entries. Authored coverage-first (skipped/`todo`) ahead of the `turn-boundary-reconciliation` slice. | | I46-L | Middle — Tier-2 faux-turn-through-real-boot assertions: new session seeds-then-kicks before the first provider call; resumed-session kick decision classifies **latest unresolved conversational debt** (ignoring trailing continuity-only entries) and still fires when a user tail is followed by reconciler-inserted seed/staleness notices; **crash-after-notice-before-provider reboot still kicks when the underlying debt is an unanswered user/assistant turn** (idempotent re-boot); resumed-session kick stays silent when the latest debt already rests at a `request_*`/system leaf; no fabricated user entry in any path; AUTO never originates `freestyle`. Outer — manual walkthrough of opening-offer quality. Authored coverage-first (skipped/`todo`) ahead of the `kick+seeding` slice. | | I47-L | Middle — restart/resume idempotence property tests (repeated boot does not duplicate seed/`worldUpdate`; dedupe derived from projection); **compaction+resume preserves the projected watermark and does not spuriously re-emit `worldUpdate`** (preserved-anchor set retains the latest watermark carrier); carrier-discipline source/architecture tests (continuity facts are custom entries, not synthetic `toolCall`s or prompt-only). Authored coverage-first (skipped/`todo`) ahead of the enabling slices. | +| I48-L | Inner — seed CLI contract tests for target workspace resolution, seed set/slug filtering, explicit all-seeds mode, `CommandExecutor`/change-log routing, and destination reporting. Middle — fresh workbench tracer: seed one named fixture into `.fixtures/workbenches//.brunch/data.db`, launch `npm run dev -- --cwd .fixtures/workbenches/` (or print/RPC equivalent), and assert selected workspace state plus graph overview come only from that workbench DB. | ### Design Notes diff --git a/src/app/brunch.test.ts b/src/app/brunch.test.ts index c1240b84..76dc2b2d 100644 --- a/src/app/brunch.test.ts +++ b/src/app/brunch.test.ts @@ -6,6 +6,7 @@ import { PassThrough } from 'node:stream'; import { SessionManager } from '@earendil-works/pi-coding-agent'; import { describe, expect, it } from 'vitest'; +import { runSeedFixturesCli } from '../graph/seed-fixtures.js'; import { assistantMessage, userMessage } from '../probes/test-helpers.js'; import { createSessionBindingData } from '../session/session-binding.js'; import { @@ -63,6 +64,17 @@ function rpcRequest(method: string, id = 1): PassThrough { return stdin; } +async function runRpcRequest(cwd: string, method: string): Promise { + const stdout = new PassThrough(); + const chunks = collectStream(stdout); + await runBrunchCli({ + argv: ['--cwd', cwd, '--mode=rpc'], + stdin: rpcRequest(method), + stdout, + }); + return JSON.parse(chunks.join('')).result; +} + function collectStream(stream: PassThrough): string[] { const chunks: string[] = []; stream.on('data', (chunk) => chunks.push(String(chunk))); @@ -282,6 +294,26 @@ describe('Brunch CLI dispatch', () => { } } }); + it('uses --cwd product RPC to inspect the named workspace rather than the shell cwd', async () => { + const shellCwd = await mkdtemp(join(tmpdir(), 'brunch-cli-shell-')); + const seededWorkspace = await mkdtemp(join(tmpdir(), 'brunch-cli-seeded-')); + const emptySibling = await mkdtemp(join(tmpdir(), 'brunch-cli-empty-')); + await runSeedFixturesCli({ + argv: ['--workspace', seededWorkspace, '--seed', 'workspace-spread/alpha-grounding'], + cwd: shellCwd, + stdout: () => {}, + }); + + const seededSelection = await runRpcRequest(seededWorkspace, 'workspace.selectionState'); + const siblingSelection = await runRpcRequest(emptySibling, 'workspace.selectionState'); + + expect(seededSelection).toMatchObject({ + cwd: seededWorkspace, + specs: [{ spec: { title: 'Alpha Grounding' } }], + }); + expect(siblingSelection).toMatchObject({ cwd: emptySibling, specs: [] }); + }); + it('exposes matching print and RPC workspace states from a real coordinator store', async () => { const cwd = await mkdtemp(join(tmpdir(), 'brunch-parity-')); await createWorkspaceSessionCoordinator({ cwd }).createSetupSession({ diff --git a/src/graph/seed-fixtures.test.ts b/src/graph/seed-fixtures.test.ts index e8a80ebd..d841d0a2 100644 --- a/src/graph/seed-fixtures.test.ts +++ b/src/graph/seed-fixtures.test.ts @@ -4,8 +4,10 @@ * keeping the graph clock and change log coherent. */ -import { readFileSync } from 'node:fs'; -import { dirname, resolve } from 'node:path'; +import { existsSync, readFileSync } from 'node:fs'; +import { mkdtemp } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { dirname, join, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; import { eq } from 'drizzle-orm'; @@ -16,7 +18,8 @@ import { changeLog, edges, graphClock, nodes, specs } from '../db/schema.js'; import { CommandExecutor } from './command-executor.js'; import { EDGE_CATEGORIES } from './schema/kinds.js'; import { NODE_KIND_METADATA, type ReadinessBand } from './schema/nodes.js'; -import { seedFixture, type SeedFixture } from './seed-fixtures.js'; +import { runSeedFixturesCli, seedFixture, type SeedFixture } from './seed-fixtures.js'; +import { openWorkspaceCommandExecutor } from './workspace-store.js'; const HERE = dirname(fileURLToPath(import.meta.url)); @@ -31,6 +34,132 @@ function graphClockLsn(db: BrunchDb, specId: number): number { ); } +describe('seed fixture CLI', () => { + it.each([ + { name: 'missing args', argv: [] }, + { name: 'missing workspace value', argv: ['--workspace', '--seed', 'workspace-spread/alpha-grounding'] }, + { name: 'missing seed value', argv: ['--workspace', 'target', '--seed'] }, + { + name: 'unknown arg', + argv: ['--workspace', 'target', '--seed', 'workspace-spread/alpha-grounding', '--extra'], + }, + { + name: 'duplicate workspace flag', + argv: ['--workspace', 'one', '--workspace', 'two', '--seed', 'workspace-spread/alpha-grounding'], + }, + { + name: 'duplicate seed flag', + argv: [ + '--workspace', + 'target', + '--seed', + 'workspace-spread/alpha-grounding', + '--seed', + 'yamlbase/spec-graph', + ], + }, + { + name: 'parent seed set', + argv: ['--workspace', 'target', '--seed', '../workspace-spread/alpha-grounding'], + }, + { + name: 'parent seed slug', + argv: ['--workspace', 'target', '--seed', 'workspace-spread/../alpha-grounding'], + }, + { + name: 'absolute seed ref', + argv: ['--workspace', 'target', '--seed', '/workspace-spread/alpha-grounding'], + }, + ])('rejects malformed input without creating a cwd DB: $name', async ({ argv }) => { + const cwd = await mkdtemp(join(tmpdir(), 'brunch-seed-cwd-')); + let stderr = ''; + + const code = await runSeedFixturesCli({ + argv, + cwd, + stderr: (chunk) => { + stderr += chunk; + }, + }); + + expect(code).toBe(1); + expect(stderr).toContain('Usage: npm run seed -- --workspace --seed /'); + expect(existsSync(join(cwd, '.brunch', 'data.db'))).toBe(false); + }); + + it('accepts equals-form flags when values are unambiguous and safe', async () => { + const shellCwd = await mkdtemp(join(tmpdir(), 'brunch-seed-shell-')); + const targetWorkspace = await mkdtemp(join(tmpdir(), 'brunch-seed-target-')); + let stdout = ''; + + const code = await runSeedFixturesCli({ + argv: [`--workspace=${targetWorkspace}`, '--seed=workspace-spread/alpha-grounding'], + cwd: shellCwd, + stdout: (chunk) => { + stdout += chunk; + }, + }); + + expect(code).toBe(0); + expect(stdout).toContain('seeded workspace-spread/alpha-grounding → spec'); + expect(existsSync(join(shellCwd, '.brunch', 'data.db'))).toBe(false); + expect(existsSync(join(targetWorkspace, '.brunch', 'data.db'))).toBe(true); + }); + + it('reports the selected seed ref rather than the fixture internal spec slug', async () => { + const targetWorkspace = await mkdtemp(join(tmpdir(), 'brunch-seed-target-')); + let stdout = ''; + + const code = await runSeedFixturesCli({ + argv: ['--workspace', targetWorkspace, '--seed', 'yamlbase/spec-graph'], + stdout: (chunk) => { + stdout += chunk; + }, + }); + + expect(code).toBe(0); + expect(stdout).toContain('seeded yamlbase/spec-graph → spec'); + expect(stdout).not.toContain('seeded yamlbase/yamlbase → spec'); + }); + + it('seeds only the selected fixture into the named workspace and reports the destination DB', async () => { + const shellCwd = await mkdtemp(join(tmpdir(), 'brunch-seed-shell-')); + const targetWorkspace = await mkdtemp(join(tmpdir(), 'brunch-seed-target-')); + let stdout = ''; + + const code = await runSeedFixturesCli({ + argv: ['--workspace', targetWorkspace, '--seed', 'workspace-spread/alpha-grounding'], + cwd: shellCwd, + stdout: (chunk) => { + stdout += chunk; + }, + }); + + expect(code).toBe(0); + expect(stdout).toContain('seeded workspace-spread/alpha-grounding → spec'); + expect(stdout).toContain(`Destination: ${join(targetWorkspace, '.brunch', 'data.db')}`); + expect(existsSync(join(shellCwd, '.brunch', 'data.db'))).toBe(false); + expect(existsSync(join(targetWorkspace, '.brunch', 'data.db'))).toBe(true); + + const executor = await openWorkspaceCommandExecutor(targetWorkspace); + const specRows = executor.listSpecs(); + expect(specRows.map((spec) => spec.slug)).toEqual(['alpha-grounding']); + const alpha = specRows[0]!; + const db = createDb(join(targetWorkspace, '.brunch', 'data.db')); + expect(db.select().from(nodes).where(eq(nodes.spec_id, alpha.id)).all()).toHaveLength( + loadFixture('alpha-grounding', 'workspace-spread').nodes.length, + ); + expect( + db + .select({ operation: changeLog.operation }) + .from(changeLog) + .where(eq(changeLog.spec_id, alpha.id)) + .all() + .map((row) => row.operation), + ).toEqual(['create_spec', 'mutate_graph']); + }); +}); + describe('seedFixture', () => { it('seeds the code-health fixture into a real DB via the command layer', () => { const db: BrunchDb = createDb(':memory:'); diff --git a/src/graph/seed-fixtures.ts b/src/graph/seed-fixtures.ts index c90b9144..7937ca46 100644 --- a/src/graph/seed-fixtures.ts +++ b/src/graph/seed-fixtures.ts @@ -19,12 +19,11 @@ * upstream format. * * CLI (dev only, run via tsx): - * npm run seed # seed all sets into /.brunch/data.db - * tsx src/graph/seed-fixtures.ts # same + * npm run seed -- --workspace --seed / */ -import { readdir, readFile } from 'node:fs/promises'; -import { dirname, join, resolve } from 'node:path'; +import { readFile } from 'node:fs/promises'; +import { dirname, isAbsolute, join, resolve } from 'node:path'; import { fileURLToPath } from 'node:url'; import type { GraphMutationOp } from './command-executor.js'; @@ -228,40 +227,106 @@ function roleNamedSeedEdgeDraft( const SEEDS_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), '../../.fixtures/seeds'); -/** Read every `.json` (ignoring `_`-prefixed files) under a seed-set dir. */ -async function readSeedSet(setDir: string): Promise { - const entries = await readdir(setDir); - const files = entries.filter((name) => name.endsWith('.json') && !name.startsWith('_')).sort(); - const fixtures: SeedFixture[] = []; - for (const file of files) { - const raw = await readFile(join(setDir, file), 'utf8'); - fixtures.push(JSON.parse(raw) as SeedFixture); +interface SeedCliOptions { + readonly argv?: readonly string[]; + readonly cwd?: string; + readonly stdout?: (chunk: string) => void; + readonly stderr?: (chunk: string) => void; +} + +interface ParsedSeedCliArgs { + readonly workspace: string; + readonly seed: { + readonly ref: string; + readonly set: string; + readonly slug: string; + }; +} + +/** Read one `.json` fixture under a seed-set dir. */ +async function readSelectedSeed(set: string, slug: string): Promise { + const raw = await readFile(join(SEEDS_ROOT, set, `${slug}.json`), 'utf8'); + return JSON.parse(raw) as SeedFixture; +} + +export async function runSeedFixturesCli(options: SeedCliOptions = {}): Promise { + const stdout = options.stdout ?? ((chunk) => process.stdout.write(chunk)); + const stderr = options.stderr ?? ((chunk) => process.stderr.write(chunk)); + const parsed = parseSeedCliArgs(options.argv ?? process.argv.slice(2), options.cwd ?? process.cwd()); + if (!parsed) { + stderr(seedUsage()); + return 1; } - return fixtures; + + const destinationDb = join(parsed.workspace, '.brunch', 'data.db'); + const fixture = await readSelectedSeed(parsed.seed.set, parsed.seed.slug); + const executor = await openWorkspaceCommandExecutor(parsed.workspace); + const result = seedFixture(executor, fixture); + stdout( + `seeded ${parsed.seed.ref} → spec ${result.specId} ` + + `(${result.nodeCount} nodes, ${result.edgeCount} edges)\n`, + ); + stdout(`Destination: ${destinationDb}\n`); + return 0; } -async function main(): Promise { - const cwd = process.cwd(); - const sets = await readdir(SEEDS_ROOT, { withFileTypes: true }); - const setDirs = sets.filter((entry) => entry.isDirectory()).map((entry) => entry.name); - - const executor = await openWorkspaceCommandExecutor(cwd); - for (const set of setDirs) { - const fixtures = await readSeedSet(join(SEEDS_ROOT, set)); - for (const fixture of fixtures) { - const result = seedFixture(executor, fixture); - console.log( - `seeded ${set}/${result.slug} → spec ${result.specId} ` + - `(${result.nodeCount} nodes, ${result.edgeCount} edges)`, - ); +function parseSeedCliArgs(argv: readonly string[], cwd: string): ParsedSeedCliArgs | null { + const values = new Map(); + for (let index = 0; index < argv.length; index += 1) { + const arg = argv[index]!; + if (arg === '--workspace' || arg === '--seed') { + const value = argv[index + 1]; + if (!safeFlagValue(value) || values.has(arg)) return null; + values.set(arg, value); + index += 1; + continue; + } + + const equals = arg.match(/^(--workspace|--seed)=(.*)$/u); + if (equals) { + const flag = equals[1] as '--workspace' | '--seed'; + const value = equals[2]; + if (!safeFlagValue(value) || values.has(flag)) return null; + values.set(flag, value); + continue; } + + return null; } - console.log(`\nDone. Seeded into ${join(cwd, '.brunch', 'data.db')}`); + + const workspace = values.get('--workspace'); + const seed = values.get('--seed'); + if (!workspace || !seed) return null; + + const [set, slug, extra] = seed.split('/'); + if (!safeSeedPart(set) || !safeSeedPart(slug) || extra) return null; + + return { + workspace: isAbsolute(workspace) ? workspace : resolve(cwd, workspace), + seed: { ref: seed, set, slug }, + }; +} + +function safeFlagValue(value: string | undefined): value is string { + return value != null && value.length > 0 && !value.startsWith('--'); +} + +function safeSeedPart(value: string | undefined): value is string { + return value != null && /^[a-z0-9][a-z0-9-]*$/u.test(value); +} + +function seedUsage(): string { + return 'Usage: npm run seed -- --workspace --seed /\n'; } if (import.meta.url === `file://${process.argv[1]}`) { - main().catch((error: unknown) => { - console.error(error); - process.exit(1); - }); + runSeedFixturesCli().then( + (code) => { + process.exitCode = code; + }, + (error: unknown) => { + console.error(error); + process.exitCode = 1; + }, + ); }