From ef3a40f3ad4c356dc958dfbb95616ea158f56994 Mon Sep 17 00:00:00 2001 From: Boden Fuller Date: Fri, 29 May 2026 11:56:01 -0400 Subject: [PATCH 1/8] feat(orchestration): dual-runtime OrchestrationPort foundation Implements the 3-category model (Claude Workflow / NTM swarm / plain skill) behind a single OrchestrationPort with safe degradation NTM -> Claude-native -> beads floor and a global AGENTOPS_ORCHESTRATION=off opt-out. - cli/internal/ports/orchestration.go: port + in-memory adapter (+ tests) - cli/internal/orchestration/: Selector, ntm_probe (capability detection via ntm --robot-capabilities), beads_floor + OrchestrationResult parity type, degradation-conformance test (41 tests total) - cli/cmd/ao/orchestrate.go: live 'ao orchestrate select' command (--pin/--opt-out/--json) - schemas/orchestration-{backend,result}.v1 + paired docs/contracts (structural-floor gated) - swarm/shared/crank prose rewritten NTM>native>beads; gc demoted (soc-2rtm0) - automation-shape-routing + workflow-builder skills; meta-skill authoring chain wired - lib/orchestrate-select.sh selector seam Plan: .agents/plans/2026-05-29-dual-runtime-orchestration-foundation.md Spike: ~/dev/agentops-3cat-spike/FINDINGS.md Gates this commit must satisfy in CI: build, vet, structural-floor, skill-count sync. --- PRODUCT.md | 6 +- cli/cmd/ao/orchestrate.go | 126 ++++++++++++ cli/cmd/ao/orchestrate_test.go | 162 ++++++++++++++++ cli/cmd/ao/rpi_phased_stream.go | 2 +- cli/docs/COMMANDS.md | 29 +++ cli/internal/orchestration/beads_floor.go | 76 ++++++++ .../orchestration/beads_floor_test.go | 121 ++++++++++++ .../orchestration/conformance_test.go | 183 ++++++++++++++++++ cli/internal/orchestration/doc.go | 16 ++ cli/internal/orchestration/ntm_probe.go | 116 +++++++++++ cli/internal/orchestration/ntm_probe_test.go | 111 +++++++++++ cli/internal/orchestration/result.go | 151 +++++++++++++++ cli/internal/orchestration/select.go | 141 ++++++++++++++ cli/internal/orchestration/select_test.go | 163 ++++++++++++++++ cli/internal/ports/inmemory_orchestration.go | 70 +++++++ .../ports/inmemory_orchestration_test.go | 114 +++++++++++ cli/internal/ports/orchestration.go | 70 +++++++ docs/ARCHITECTURE.md | 2 +- docs/SKILLS.md | 2 +- docs/cli-skills-map.md | 2 +- docs/contracts/context-map.md | 14 +- docs/contracts/orchestration-backend.md | 21 ++ docs/contracts/orchestration-ports.md | 170 ++++++++++++++++ docs/contracts/orchestration-result.md | 22 +++ docs/documentation-index.md | 3 + lib/orchestrate-select.sh | 80 ++++++++ schemas/orchestration-backend.v1.schema.json | 43 ++++ schemas/orchestration-result.v1.schema.json | 45 +++++ skills/SKILL-TIERS.md | 4 +- skills/automation-shape-routing/SKILL.md | 144 ++++++++++++++ .../crank/references/execution-preflight.md | 6 +- skills/crank/references/gc-pool-dispatch.md | 6 +- skills/crank/references/wave-dispatch.md | 6 +- skills/heal-skill/SKILL.md | 4 +- skills/shared/SKILL.md | 16 +- skills/skill-auditor/SKILL.md | 6 +- skills/skill-builder/SKILL.md | 10 +- skills/swarm/references/execution-steps.md | 30 +-- skills/workflow-builder/SKILL.md | 101 ++++++++++ 39 files changed, 2356 insertions(+), 38 deletions(-) create mode 100644 cli/cmd/ao/orchestrate.go create mode 100644 cli/cmd/ao/orchestrate_test.go create mode 100644 cli/internal/orchestration/beads_floor.go create mode 100644 cli/internal/orchestration/beads_floor_test.go create mode 100644 cli/internal/orchestration/conformance_test.go create mode 100644 cli/internal/orchestration/doc.go create mode 100644 cli/internal/orchestration/ntm_probe.go create mode 100644 cli/internal/orchestration/ntm_probe_test.go create mode 100644 cli/internal/orchestration/result.go create mode 100644 cli/internal/orchestration/select.go create mode 100644 cli/internal/orchestration/select_test.go create mode 100644 cli/internal/ports/inmemory_orchestration.go create mode 100644 cli/internal/ports/inmemory_orchestration_test.go create mode 100644 cli/internal/ports/orchestration.go create mode 100644 docs/contracts/orchestration-backend.md create mode 100644 docs/contracts/orchestration-ports.md create mode 100644 docs/contracts/orchestration-result.md create mode 100755 lib/orchestrate-select.sh create mode 100644 schemas/orchestration-backend.v1.schema.json create mode 100644 schemas/orchestration-result.v1.schema.json create mode 100644 skills/automation-shape-routing/SKILL.md create mode 100644 skills/workflow-builder/SKILL.md diff --git a/PRODUCT.md b/PRODUCT.md index 95a6a6d30..bace121dd 100644 --- a/PRODUCT.md +++ b/PRODUCT.md @@ -67,7 +67,7 @@ The April 2026 Claude Code source analysis confirmed that Anthropic's internal t | Anthropic Concept | AgentOps Equivalent | Status | |---|---|---| | **Learning Loop** — memory extraction, dream cycle consolidation, future session context | Knowledge Flywheel — `/retro` → `/forge` → `/harvest` → `ao lookup` / `ao context assemble`, tiered promotion (learning → pattern → rule), plus bounded Dream via `/dream` | Live with bounds. On-demand capture/promotion works, and Dream provides an operator-started compounding lane. GitHub nightly is the public proof harness for the contracts, not the user's private runtime. | -| **Skillify** — AI watches patterns, packages them as reusable skills, compound growth | Skills system — 76 skills, `/heal-skill` audit, `/converter` cross-runtime export, SKILL-TIERS classification | Prototype built. `ao flywheel close-loop` now drafts review-only skills from repeated patterns; promotion polish is the remaining gap. | +| **Skillify** — AI watches patterns, packages them as reusable skills, compound growth | Skills system — 78 skills, `/heal-skill` audit, `/converter` cross-runtime export, SKILL-TIERS classification | Prototype built. `ao flywheel close-loop` now drafts review-only skills from repeated patterns; promotion polish is the remaining gap. | | **Verification Agent** — adversarial AI auditing AI, VERDICT system for human review | Council architecture — `/council`, `/pre-mortem`, `/vibe`, `/post-mortem` with multi-model consensus, prediction tracking. Stage 4 behavioral validation adds holdout scenarios + satisfaction scoring in STEP 1.8. | Live on demand. STEP 1.8 fires automatically inside `/validation` when that skill is invoked. | | **Managed Agents Dreaming** (May 2026) — scheduled session review, pattern extraction, memory curation between sessions | `/dream` + `.github/workflows/nightly.yml` proof jobs + substrate-driven scheduling when needed | Live with operator setup. The bounded private Dream lane runs harvest → forge → close-loop → defrag when the operator or substrate starts it. AgentOps itself no longer ships the daemon executor. | | **Managed Agents Outcomes** (May 2026) — rubric-driven separate-context grader with iterate-until-pass | Live at three scopes: project — `GOALS.md` (rubric) + `ao goals measure` (each gate runs as separate subprocess; `cli/internal/goals/measure.go:132-164`) + `/evolve` (can iterate a worst-failing gate under operator limits; `skills/evolve/SKILL.md:379-388`); plan — `/pre-mortem` council judges as separate-context graders; code — `/vibe` council judges. An internal council review (2026-05-06) found these capabilities present across rubric authoring, separate-context grading, iterate-until-pass, and pinpoint-what-changed; this is an internal finding, not an audited external-parity claim. | Live at the capability layer. Empirical workbench A/B (2026-05-06): Δ=+0.0000 across 12 cases at v1 difficulty (both legs 12/12) — task difficulty floor exhausted; v2 substrate (realistic agent tasks where the hook layer differentiates) is roadmap. Counter-stat artifact: `evals/workbench/results/2026-05-06-yjzp9-counterstat.json`. | @@ -176,7 +176,7 @@ The same model used in the README: bookkeeping records the work, the context com - `ao lookup` — decay-ranked retrieval for on-demand knowledge - `ao context assemble` — phase-scoped context packets - `ao compile` — rebuild the knowledge wiki (mine, grow, defrag, lint) -- 76 skills — reusable context packages across Claude Code, Codex, and OpenCode +- 78 skills — reusable context packages across Claude Code, Codex, and OpenCode - `bash <(curl -fsSL .../install.sh)` — 30 seconds, zero config #### Layer 3: Validation Gates @@ -261,7 +261,7 @@ As of 2026-05-10: - GitHub repo: 341 stars, 33 forks, 2 open issues, last pushed 2026-05-10T03:24:01Z - Public surface: GitHub Pages mkdocs site live at boshu2.github.io/agentops/; doctrine site live at 12factoragentops.com -- Distribution/runtime reach: 76 shared skills, 76 checked-in Codex artifacts, and 32 Codex overrides. `/validate` and `/curate` are additive in this train; legacy validation and mining skills remain until their shim/retirement gates are resolved. +- Distribution/runtime reach: 78 shared skills, 76 checked-in Codex artifacts, and 32 Codex overrides. `/validate` and `/curate` are additive in this train; legacy validation and mining skills remain until their shim/retirement gates are resolved. **Measured operational proof:** diff --git a/cli/cmd/ao/orchestrate.go b/cli/cmd/ao/orchestrate.go new file mode 100644 index 000000000..a811ed487 --- /dev/null +++ b/cli/cmd/ao/orchestrate.go @@ -0,0 +1,126 @@ +// practices: [hexagonal-architecture, safe-degradation] +package main + +import ( + "context" + "encoding/json" + "fmt" + "os/exec" + "strings" + + "github.com/spf13/cobra" + + "github.com/boshu2/agentops/cli/internal/orchestration" + "github.com/boshu2/agentops/cli/internal/ports" +) + +// orchestrateCmd is the parent for orchestration-backend tooling. It wires +// the library-only OrchestrationPort (internal/orchestration + the typed +// port in internal/ports) into the live `ao` command surface. +var orchestrateCmd = &cobra.Command{ + Use: "orchestrate", + Short: "Resolve and inspect the orchestration backend ladder", + Long: `Tooling for the orchestration safe-degradation ladder +(NTM -> Claude-native -> beads floor). Subcommands resolve which backend +a unit of work would run on, honoring an explicit pin, the +AGENTOPS_ORCHESTRATION env override, and an opt-out to the beads floor.`, +} + +var ( + orchestrateSelectJSON bool + orchestrateSelectPin string + orchestrateSelectOptOut bool +) + +var orchestrateSelectCmd = &cobra.Command{ + Use: "select", + Short: "Select the orchestration backend for a unit of work", + Long: `Resolve the orchestration backend via the safe-degradation ladder +NTM -> Claude-native -> beads floor. + +NTM availability is detected by capability — this shells out to +` + "`ntm --robot-capabilities`" + ` and degrades gracefully when ntm is +absent. Resolution order (first match wins): + + 1. --pin forces that backend. + 2. AGENTOPS_ORCHESTRATION env acts as an explicit pin / opt-out. + 3. --opt-out routes to the beads floor. + 4. NTM probe reports available -> ntm. + 5. otherwise -> claude (beads floor remains).`, + RunE: runOrchestrateSelect, +} + +func init() { + orchestrateCmd.GroupID = "workflow" + rootCmd.AddCommand(orchestrateCmd) + orchestrateCmd.AddCommand(orchestrateSelectCmd) + orchestrateSelectCmd.Flags().BoolVar(&orchestrateSelectJSON, "json", false, + "Emit the selection trace as JSON") + orchestrateSelectCmd.Flags().StringVar(&orchestrateSelectPin, "pin", "", + "Force a backend: ntm|claude|codex|beads (overrides --opt-out and availability)") + orchestrateSelectCmd.Flags().BoolVar(&orchestrateSelectOptOut, "opt-out", false, + "Bypass swarm engines and run on the beads floor") + _ = orchestrateSelectCmd.RegisterFlagCompletionFunc("pin", + staticCompletionFunc("ntm", "claude", "codex", "beads")) +} + +// execCommandRunner is the production CommandRunner adapter: it shells out +// via os/exec so ProbeNTM actually invokes `ntm --robot-capabilities`. It +// is a thin consumer of the orchestration package and adds no behavior of +// its own. +type execCommandRunner struct{} + +// Run executes name with args and returns the combined output. A non-zero +// exit (or a missing binary) surfaces as an error, which ProbeNTM reads as +// the canonical "tool absent or unusable" degradation signal. +func (execCommandRunner) Run(ctx context.Context, name string, args ...string) ([]byte, error) { + return exec.CommandContext(ctx, name, args...).CombinedOutput() +} + +// compile-time assertion that the adapter satisfies the probe's contract. +var _ orchestration.CommandRunner = execCommandRunner{} + +// workSpecFromFlags maps the command's flag values onto a port WorkSpec. +// It is split out from the cobra plumbing so the flag->intent mapping can +// be unit-tested without constructing a command. +func workSpecFromFlags(pin string, optOut bool) ports.WorkSpec { + return ports.WorkSpec{ + OptOut: optOut, + Pin: ports.Backend(strings.TrimSpace(pin)), + } +} + +// runOrchestrateSelect builds the production Selector over an exec-backed +// runner and resolves the backend for the flag-derived WorkSpec. +func runOrchestrateSelect(cmd *cobra.Command, _ []string) error { + selector := orchestration.NewSelector(execCommandRunner{}) + work := workSpecFromFlags(orchestrateSelectPin, orchestrateSelectOptOut) + + trace, err := selector.Select(cmd.Context(), work) + if err != nil { + return fmt.Errorf("selecting orchestration backend: %w", err) + } + + return emitSelectionTrace(cmd, trace, orchestrateSelectJSON) +} + +// emitSelectionTrace renders a SelectionTrace as JSON (when jsonOut) or as +// a human-readable summary. Kept separate so both branches are testable +// against an injected writer. +func emitSelectionTrace(cmd *cobra.Command, trace ports.SelectionTrace, jsonOut bool) error { + out := cmd.OutOrStdout() + if jsonOut { + enc := json.NewEncoder(out) + enc.SetIndent("", " ") + return enc.Encode(trace) + } + + fmt.Fprintf(out, "Backend: %s\n", trace.Chosen) + fmt.Fprintf(out, "Reason: %s\n", trace.Reason) + considered := make([]string, 0, len(trace.Considered)) + for _, b := range trace.Considered { + considered = append(considered, string(b)) + } + fmt.Fprintf(out, "Ladder: %s\n", strings.Join(considered, " -> ")) + return nil +} diff --git a/cli/cmd/ao/orchestrate_test.go b/cli/cmd/ao/orchestrate_test.go new file mode 100644 index 000000000..92f1becd3 --- /dev/null +++ b/cli/cmd/ao/orchestrate_test.go @@ -0,0 +1,162 @@ +package main + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "strings" + "testing" + + "github.com/spf13/cobra" + + "github.com/boshu2/agentops/cli/internal/orchestration" + "github.com/boshu2/agentops/cli/internal/ports" +) + +// fakeRunner is an in-memory CommandRunner so the Select path can be +// exercised without shelling out to a real `ntm` binary. +type fakeRunner struct { + out []byte + err error +} + +func (f fakeRunner) Run(_ context.Context, _ string, _ ...string) ([]byte, error) { + return f.out, f.err +} + +func TestOrchestrate_WorkSpecFromFlags(t *testing.T) { + tests := []struct { + name string + pin string + optOut bool + wantPin ports.Backend + wantOpt bool + }{ + {name: "empty", pin: "", optOut: false, wantPin: "", wantOpt: false}, + {name: "pin trimmed", pin: " claude ", optOut: false, wantPin: ports.BackendClaude, wantOpt: false}, + {name: "opt-out", pin: "", optOut: true, wantPin: "", wantOpt: true}, + {name: "pin wins over opt-out flags", pin: "codex", optOut: true, wantPin: ports.BackendCodex, wantOpt: true}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + got := workSpecFromFlags(tc.pin, tc.optOut) + if got.Pin != tc.wantPin { + t.Fatalf("Pin: got %q, want %q", got.Pin, tc.wantPin) + } + if got.OptOut != tc.wantOpt { + t.Fatalf("OptOut: got %v, want %v", got.OptOut, tc.wantOpt) + } + }) + } +} + +// TestOrchestrate_SelectResolvesBackends drives a real Selector with an +// injected fake runner across the ladder branches, asserting the chosen +// backend for each flag combination. +func TestOrchestrate_SelectResolvesBackends(t *testing.T) { + t.Setenv("AGENTOPS_ORCHESTRATION", "") // neutralize any operator override + + tests := []struct { + name string + runner orchestration.CommandRunner + pin string + optOut bool + want ports.Backend + }{ + { + name: "ntm absent degrades to claude", + runner: fakeRunner{err: errors.New("ntm: not found")}, + want: ports.BackendClaude, + }, + { + name: "ntm available selects ntm", + runner: fakeRunner{out: []byte(`{"capabilities":["tmux","git"]}`)}, + want: ports.BackendNTM, + }, + { + name: "opt-out routes to beads floor", + runner: fakeRunner{err: errors.New("ntm: not found")}, + optOut: true, + want: ports.BackendBeads, + }, + { + name: "pin wins over availability", + runner: fakeRunner{out: []byte(`{"capabilities":["tmux","git"]}`)}, + pin: "claude", + want: ports.BackendClaude, + }, + { + name: "pin codex (never auto-selected) honored", + runner: fakeRunner{err: errors.New("ntm: not found")}, + pin: "codex", + want: ports.BackendCodex, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + selector := orchestration.NewSelector(tc.runner) + work := workSpecFromFlags(tc.pin, tc.optOut) + trace, err := selector.Select(context.Background(), work) + if err != nil { + t.Fatalf("Select returned error: %v", err) + } + if trace.Chosen != tc.want { + t.Fatalf("Chosen: got %q, want %q", trace.Chosen, tc.want) + } + if len(trace.Considered) == 0 { + t.Fatal("Considered ladder must be recorded") + } + }) + } +} + +// TestOrchestrate_EmitSelectionTraceJSON asserts the JSON branch emits the +// trace verbatim and parses back into the port shape. +func TestOrchestrate_EmitSelectionTraceJSON(t *testing.T) { + trace := ports.SelectionTrace{ + Chosen: ports.BackendBeads, + Reason: "WorkSpec.OptOut -> beads floor", + Considered: []ports.Backend{"pin", "env", "optout"}, + } + cmd := &cobra.Command{} + var buf bytes.Buffer + cmd.SetOut(&buf) + + if err := emitSelectionTrace(cmd, trace, true); err != nil { + t.Fatalf("emitSelectionTrace: %v", err) + } + + var got ports.SelectionTrace + if err := json.Unmarshal(buf.Bytes(), &got); err != nil { + t.Fatalf("output is not valid JSON: %v", err) + } + if got.Chosen != ports.BackendBeads { + t.Fatalf("Chosen: got %q, want %q", got.Chosen, ports.BackendBeads) + } +} + +// TestOrchestrate_EmitSelectionTraceHuman asserts the human-readable branch +// renders the backend, reason, and ladder. +func TestOrchestrate_EmitSelectionTraceHuman(t *testing.T) { + trace := ports.SelectionTrace{ + Chosen: ports.BackendClaude, + Reason: "NTM absent -> claude-native fallback", + Considered: []ports.Backend{"pin", "env", "optout", "ntm", "claude", "beads"}, + } + cmd := &cobra.Command{} + var buf bytes.Buffer + cmd.SetOut(&buf) + + if err := emitSelectionTrace(cmd, trace, false); err != nil { + t.Fatalf("emitSelectionTrace: %v", err) + } + + got := buf.String() + for _, want := range []string{"Backend: claude", "Reason: NTM absent", "pin -> env -> optout -> ntm -> claude -> beads"} { + if !strings.Contains(got, want) { + t.Fatalf("output missing %q\nfull output:\n%s", want, got) + } + } +} diff --git a/cli/cmd/ao/rpi_phased_stream.go b/cli/cmd/ao/rpi_phased_stream.go index 6e09ed4bb..79dd2f6b4 100644 --- a/cli/cmd/ao/rpi_phased_stream.go +++ b/cli/cmd/ao/rpi_phased_stream.go @@ -226,7 +226,7 @@ func selectExecutorFromCaps(caps backendCapabilities, statusPath string, allPhas // The selection policy, chosen backend, and reason are logged to logPath for // observability. Pass an empty logPath to skip log writing (e.g., in tests). // -// Selection order: runtime override (stream/direct) > auto (live-status=>stream, else direct). +// Selection order: runtime override (stream/direct/tmux) > auto (always resolves to stream). func selectExecutor(statusPath string, allPhases []PhaseProgress) PhaseExecutor { return selectExecutorWithLog(statusPath, allPhases, "", "", false, defaultPhasedEngineOptions()) } diff --git a/cli/docs/COMMANDS.md b/cli/docs/COMMANDS.md index 2ed34e873..cd9d455e8 100644 --- a/cli/docs/COMMANDS.md +++ b/cli/docs/COMMANDS.md @@ -2069,6 +2069,35 @@ ao handoff [summary] [flags] --- +### `ao orchestrate` + +Tooling for the orchestration safe-degradation ladder + +``` +ao orchestrate [command] +``` + +**Subcommands:** + +#### `ao orchestrate select` + +Resolve the orchestration backend via the safe-degradation ladder + +``` +ao orchestrate select [flags] +``` + +**Flags:** + +``` + -h, --help help for select + --json Emit the selection trace as JSON + --opt-out Bypass swarm engines and run on the beads floor + --pin string Force a backend: ntm|claude|codex|beads (overrides --opt-out and availability) +``` + +--- + ### `ao ratchet` Track progress through the phased RPI workflow. diff --git a/cli/internal/orchestration/beads_floor.go b/cli/internal/orchestration/beads_floor.go new file mode 100644 index 000000000..b25200e2f --- /dev/null +++ b/cli/internal/orchestration/beads_floor.go @@ -0,0 +1,76 @@ +// practices: [output-contract-parity, safe-degradation] +package orchestration + +import ( + "context" + "fmt" + + "github.com/boshu2/agentops/cli/internal/ports" +) + +// BeadsFloorAdapter is the always-available beads "floor" of the +// safe-degradation ladder (NTM -> Claude-native -> beads). It is the tier +// every degradation path terminates in, so it MUST never fail to place +// work for lack of a richer engine. +// +// Its sole contractual job in this foundation is OUTPUT-CONTRACT PARITY: +// it emits the same OrchestrationResult shape every other tier emits, so a +// caller that has degraded all the way to the floor reads the outcome +// exactly as it would read an NTM or Claude-native result. Parity is what +// makes the degradation correctness-preserving rather than lossy. +// +// This is a thin stub representing the floor, not a bd execution engine. +// The real bd-driven sequential loop (claim a ready bead, run it, record a +// verdict, advance) is layered on later and will populate ResultPaths and +// Verdict from actual bd state. What is established here is the contract: +// whatever the real loop does, it returns a parity-conformant +// OrchestrationResult with Backend == ports.BackendBeads. +// +// The zero value is ready to use. +type BeadsFloorAdapter struct{} + +// beadsFloorPlaceholderPath is the repo-relative artifact path the floor +// stub reports until the real bd-driven loop replaces it with the actual +// paths a run wrote. It exists so the stub satisfies the non-empty +// ResultPaths requirement of the parity contract. +const beadsFloorPlaceholderPath = ".agents/orchestration/beads-floor.placeholder" + +// Run executes a single unit of work on the beads floor for the given +// taskID and returns a parity-conformant OrchestrationResult tagged with +// Backend == ports.BackendBeads. +// +// In this foundation it is a stub: it does not yet drive bd. It returns a +// result that always passes its own Validate check (SchemaVersion == +// SchemaVersionV1, a non-empty ResultPaths, and a valid WARN/MEDIUM +// verdict signalling "floor stub, not a real bd run"). The WARN/MEDIUM +// pairing is deliberate — it advertises that the floor produced a +// well-formed result without claiming the high-confidence PASS a real bd +// run would earn. The real bd-driven loop will replace the body while +// keeping this signature and the Backend tag. +// +// It honors ctx cancellation on a best-effort basis: if the context is +// already done it returns that error without fabricating a result, so a +// cancelled caller never receives a misleading floor verdict. +func (BeadsFloorAdapter) Run(ctx context.Context, taskID string) (OrchestrationResult, error) { + if err := ctx.Err(); err != nil { + return OrchestrationResult{}, fmt.Errorf("beads floor: context done before run: %w", err) + } + + result := OrchestrationResult{ + SchemaVersion: SchemaVersionV1, + Backend: ports.BackendBeads, + ResultPaths: []string{beadsFloorPlaceholderPath}, + Verdict: Verdict{ + Status: VerdictStatusWarn, + Confidence: VerdictConfidenceMedium, + }, + TaskID: taskID, + } + + // Self-check parity before returning: the floor is the contract's + // reference adapter, so it must never emit a non-conformant result. + if err := result.Validate(); err != nil { + return OrchestrationResult{}, fmt.Errorf("beads floor: emitted non-conformant result: %w", err) + } + return result, nil +} diff --git a/cli/internal/orchestration/beads_floor_test.go b/cli/internal/orchestration/beads_floor_test.go new file mode 100644 index 000000000..610140283 --- /dev/null +++ b/cli/internal/orchestration/beads_floor_test.go @@ -0,0 +1,121 @@ +package orchestration + +import ( + "context" + "errors" + "testing" + + "github.com/boshu2/agentops/cli/internal/ports" +) + +// TestBeadsFloorAdapter_Run_EmitsParityConformantResult asserts the floor +// adapter emits a result that conforms to the output-contract parity shape: +// Backend == beads, SchemaVersion == 1, non-empty ResultPaths, a valid +// verdict, the requested TaskID, and that Validate accepts it. +func TestBeadsFloorAdapter_Run_EmitsParityConformantResult(t *testing.T) { + var adapter BeadsFloorAdapter + const taskID = "soc-floor-1" + + result, err := adapter.Run(context.Background(), taskID) + if err != nil { + t.Fatalf("Run returned unexpected error: %v", err) + } + + if result.Backend != ports.BackendBeads { + t.Errorf("Backend: want %q, got %q", ports.BackendBeads, result.Backend) + } + if result.SchemaVersion != SchemaVersionV1 { + t.Errorf("SchemaVersion: want %d, got %d", SchemaVersionV1, result.SchemaVersion) + } + if len(result.ResultPaths) == 0 { + t.Error("ResultPaths: want non-empty, got empty") + } + for i, p := range result.ResultPaths { + if p == "" { + t.Errorf("ResultPaths[%d]: want non-empty path, got empty string", i) + } + } + if result.TaskID != taskID { + t.Errorf("TaskID: want %q, got %q", taskID, result.TaskID) + } + if !validVerdictStatuses[result.Verdict.Status] { + t.Errorf("Verdict.Status: %q is not a valid status", result.Verdict.Status) + } + if !validVerdictConfidences[result.Verdict.Confidence] { + t.Errorf("Verdict.Confidence: %q is not a valid confidence", result.Verdict.Confidence) + } + if err := result.Validate(); err != nil { + t.Errorf("Validate: want nil, got %v", err) + } +} + +// TestBeadsFloorAdapter_Run_HonorsCancelledContext asserts the floor does +// not fabricate a result when the caller's context is already cancelled. +func TestBeadsFloorAdapter_Run_HonorsCancelledContext(t *testing.T) { + var adapter BeadsFloorAdapter + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + _, err := adapter.Run(ctx, "soc-floor-2") + if err == nil { + t.Fatal("Run: want error for cancelled context, got nil") + } + if !errors.Is(err, context.Canceled) { + t.Errorf("Run: want error wrapping context.Canceled, got %v", err) + } +} + +// TestBeadsFloorResult_Validate_RejectsBadVerdict asserts the parity +// validator rejects results whose verdict fields fall outside the schema +// enums — the negative half of the parity contract. +func TestBeadsFloorResult_Validate_RejectsBadVerdict(t *testing.T) { + tests := []struct { + name string + result OrchestrationResult + }{ + { + name: "bad status", + result: OrchestrationResult{ + SchemaVersion: SchemaVersionV1, + Backend: ports.BackendBeads, + ResultPaths: []string{beadsFloorPlaceholderPath}, + Verdict: Verdict{Status: "MAYBE", Confidence: VerdictConfidenceHigh}, + }, + }, + { + name: "bad confidence", + result: OrchestrationResult{ + SchemaVersion: SchemaVersionV1, + Backend: ports.BackendBeads, + ResultPaths: []string{beadsFloorPlaceholderPath}, + Verdict: Verdict{Status: VerdictStatusPass, Confidence: "PRETTY_SURE"}, + }, + }, + { + name: "wrong schema version", + result: OrchestrationResult{ + SchemaVersion: 99, + Backend: ports.BackendBeads, + ResultPaths: []string{beadsFloorPlaceholderPath}, + Verdict: Verdict{Status: VerdictStatusPass, Confidence: VerdictConfidenceHigh}, + }, + }, + { + name: "empty result paths", + result: OrchestrationResult{ + SchemaVersion: SchemaVersionV1, + Backend: ports.BackendBeads, + ResultPaths: nil, + Verdict: Verdict{Status: VerdictStatusPass, Confidence: VerdictConfidenceHigh}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if err := tt.result.Validate(); err == nil { + t.Errorf("Validate: want error for %s, got nil", tt.name) + } + }) + } +} diff --git a/cli/internal/orchestration/conformance_test.go b/cli/internal/orchestration/conformance_test.go new file mode 100644 index 000000000..f11df265e --- /dev/null +++ b/cli/internal/orchestration/conformance_test.go @@ -0,0 +1,183 @@ +// practices: [output-contract-parity, safe-degradation] +package orchestration + +import ( + "context" + "encoding/json" + "maps" + "slices" + "testing" + + "github.com/boshu2/agentops/cli/internal/ports" +) + +// conformanceFixtureTaskID is the single fixture task that every tier in the +// degradation ladder produces a result for. Using the SAME task across all +// tiers is what makes the parity proof meaningful: a downstream consumer must +// be able to read the outcome identically no matter which tier produced it. +const conformanceFixtureTaskID = "soc-conformance-fixture" + +// tierResultFor returns a representative OrchestrationResult for the fixture +// task, as the named backend tier would emit it. +// +// The beads floor has a real (stub) adapter — BeadsFloorAdapter.Run — so we +// drive that. The NTM and Claude-native tiers have stubbed executors until +// the application epics, so we construct the OrchestrationResult the way those +// tiers contractually MUST: the SAME struct, populated to be parity-conformant, +// differing only in the Backend value (and a tier-appropriate verdict). This +// is the contract every adapter is obligated to satisfy; constructing it here +// asserts that obligation independently of the (not-yet-built) executors. +func tierResultFor(t *testing.T, backend ports.Backend) OrchestrationResult { + t.Helper() + + if backend == ports.BackendBeads { + res, err := BeadsFloorAdapter{}.Run(context.Background(), conformanceFixtureTaskID) + if err != nil { + t.Fatalf("BeadsFloorAdapter.Run(%q) returned error: %v", conformanceFixtureTaskID, err) + } + return res + } + + // NTM / Claude tiers: same shape, different Backend. The richer tiers + // would earn a PASS/HIGH verdict on a clean run; the floor advertises + // WARN/MEDIUM. Verdict CONTENT may differ per tier — what must NOT differ + // is the result SHAPE (the JSON key set), which is the parity invariant + // this test pins. + return OrchestrationResult{ + SchemaVersion: SchemaVersionV1, + Backend: backend, + ResultPaths: []string{".agents/orchestration/" + string(backend) + "-run.artifact"}, + Verdict: Verdict{ + Status: VerdictStatusPass, + Confidence: VerdictConfidenceHigh, + }, + TaskID: conformanceFixtureTaskID, + } +} + +// jsonKeySet marshals a result and returns its sorted top-level JSON key set. +// Comparing sorted key sets across tiers is the mechanical parity proof: a +// tier-agnostic consumer (validation / ledger / provenance) depends on the +// shape, not the values, being identical. +func jsonKeySet(t *testing.T, res OrchestrationResult) []string { + t.Helper() + + raw, err := json.Marshal(res) + if err != nil { + t.Fatalf("json.Marshal(%+v) returned error: %v", res, err) + } + var m map[string]any + if err := json.Unmarshal(raw, &m); err != nil { + t.Fatalf("json.Unmarshal(%s) returned error: %v", raw, err) + } + return slices.Sorted(maps.Keys(m)) +} + +// TestDegradationConformance_AllTiersValidate proves that every tier in the +// degradation ladder (NTM -> Claude -> beads) emits a result that passes the +// parity contract's own Validate() gate and carries the V1 schema version. +// Validate() passing per-tier is the precondition for degradation being +// correctness-preserving: a caller can descend the ladder without a tier ever +// handing back a malformed result. +func TestDegradationConformance_AllTiersValidate(t *testing.T) { + for _, backend := range []ports.Backend{ports.BackendNTM, ports.BackendClaude, ports.BackendBeads} { + t.Run(string(backend), func(t *testing.T) { + res := tierResultFor(t, backend) + + if err := res.Validate(); err != nil { + t.Fatalf("tier %q result failed Validate(): %v", backend, err) + } + if res.SchemaVersion != SchemaVersionV1 { + t.Errorf("tier %q SchemaVersion = %d, want %d", backend, res.SchemaVersion, SchemaVersionV1) + } + if res.Backend != backend { + t.Errorf("tier %q produced result tagged Backend = %q, want %q", backend, res.Backend, backend) + } + }) + } +} + +// TestDegradationConformance_IdenticalKeySets is the parity proof. It marshals +// each tier's result for the SAME fixture task and asserts the sorted top-level +// JSON key sets are byte-identical across all three tiers. If any tier added, +// dropped, or renamed a field, this fails — which is exactly the failure mode +// that would make degradation lossy (a downstream consumer reading tier A's +// shape would misread tier B's output). +func TestDegradationConformance_IdenticalKeySets(t *testing.T) { + tiers := []ports.Backend{ports.BackendNTM, ports.BackendClaude, ports.BackendBeads} + + // The contract shape for a result WITH a task_id: schema_version, backend, + // result_paths, verdict, task_id. task_id is omitempty, so all tiers must + // populate it (they share the fixture task) for the sets to match. + wantKeys := []string{"backend", "result_paths", "schema_version", "task_id", "verdict"} + + var reference []string + for _, backend := range tiers { + res := tierResultFor(t, backend) + keys := jsonKeySet(t, res) + + if !slices.Equal(keys, wantKeys) { + t.Errorf("tier %q JSON key set = %v, want contract shape %v", backend, keys, wantKeys) + } + + if reference == nil { + reference = keys + continue + } + if !slices.Equal(keys, reference) { + t.Errorf("tier %q JSON key set = %v diverges from reference tier %q key set %v", + backend, keys, tiers[0], reference) + } + } +} + +// TestDegradationConformance_GateBites is the negative guard. It proves the +// Validate() gate actually rejects a non-conformant result rather than rubber- +// stamping anything. A result missing the required ResultPaths field MUST fail +// Validate(); if it passed, the "every tier is conformant" guarantee above +// would be vacuous. +func TestDegradationConformance_GateBites(t *testing.T) { + cases := []struct { + name string + res OrchestrationResult + }{ + { + name: "empty result_paths", + res: OrchestrationResult{ + SchemaVersion: SchemaVersionV1, + Backend: ports.BackendNTM, + ResultPaths: nil, + Verdict: Verdict{Status: VerdictStatusPass, Confidence: VerdictConfidenceHigh}, + TaskID: conformanceFixtureTaskID, + }, + }, + { + name: "wrong schema_version", + res: OrchestrationResult{ + SchemaVersion: SchemaVersionV1 + 1, + Backend: ports.BackendNTM, + ResultPaths: []string{".agents/x.artifact"}, + Verdict: Verdict{Status: VerdictStatusPass, Confidence: VerdictConfidenceHigh}, + TaskID: conformanceFixtureTaskID, + }, + }, + { + name: "invalid verdict status", + res: OrchestrationResult{ + SchemaVersion: SchemaVersionV1, + Backend: ports.BackendNTM, + ResultPaths: []string{".agents/x.artifact"}, + Verdict: Verdict{Status: "MAYBE", Confidence: VerdictConfidenceHigh}, + TaskID: conformanceFixtureTaskID, + }, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + if err := tc.res.Validate(); err == nil { + t.Fatalf("Validate() accepted a non-conformant result (%s); the gate does not bite", tc.name) + } + }) + } +} diff --git a/cli/internal/orchestration/doc.go b/cli/internal/orchestration/doc.go new file mode 100644 index 000000000..15d82cceb --- /dev/null +++ b/cli/internal/orchestration/doc.go @@ -0,0 +1,16 @@ +// Package orchestration holds capability-detection probes for the +// external multi-agent runtimes AgentOps can drive (NTM tmux swarms, +// and future siblings). +// +// The guiding rule, learned from the NTM detection spike: external +// runtimes MUST be detected by CAPABILITY, not by `command -v`. A +// binary being on PATH says nothing about whether it can actually run +// a swarm — the hard dependencies (tmux, git, a persistent host, the +// agent CLIs) may still be missing. So each probe asks the tool to +// report its own capabilities and degrades gracefully when the tool is +// absent: absence is a normal degradation signal, not an error. +// +// Probes accept a small injectable CommandRunner interface so callers +// can be tested against in-memory fakes instead of shelling out to a +// real subprocess, and return plain structs describing what was found. +package orchestration diff --git a/cli/internal/orchestration/ntm_probe.go b/cli/internal/orchestration/ntm_probe.go new file mode 100644 index 000000000..5166cccf2 --- /dev/null +++ b/cli/internal/orchestration/ntm_probe.go @@ -0,0 +1,116 @@ +// practices: [hexagonal-architecture, capability-detection] +package orchestration + +import ( + "context" + "encoding/json" + "fmt" + "sort" + "strings" +) + +// NTMHardDeps are the dependencies an NTM swarm cannot run without. +// These are the spike-identified HARD deps: a missing entry means the +// host cannot drive a swarm even if the ntm binary is on PATH, which is +// exactly why detection is capability-based rather than `command -v`. +// +// Deliberately NOT in this list: cursors and pipeline-state. Those are +// host-bound, non-portable runtime artifacts — they describe a specific +// host's live session, not a portable capability — so their absence is +// not a missing dependency and must not be reported as one. +var NTMHardDeps = []string{ + "tmux", + "git", + "persistent-host", + "agent-CLIs", +} + +// NTMCapabilities is the result of probing the NTM runtime. Available +// reports whether ntm is present and responded to the capabilities +// query. Capabilities is the set of capability tokens ntm reported. +// MissingDeps lists any NTMHardDeps not covered by the reported +// capabilities; it is empty when every hard dep is satisfied and is +// always nil/empty when Available is false (an absent runtime has no +// meaningful per-dep breakdown). +type NTMCapabilities struct { + Available bool + Capabilities []string + MissingDeps []string +} + +// CommandRunner abstracts running an external command so probes can be +// tested against an in-memory fake. Run executes name with args and +// returns the combined output, or an error if the command could not be +// started or exited non-zero. A non-nil error is the canonical signal +// that the tool is absent or unusable. +type CommandRunner interface { + Run(ctx context.Context, name string, args ...string) ([]byte, error) +} + +// robotCapabilitiesPayload is the JSON shape ntm emits for +// `ntm --robot-capabilities`. Only the fields the probe consumes are +// declared; unknown fields are ignored by encoding/json. +type robotCapabilitiesPayload struct { + Capabilities []string `json:"capabilities"` +} + +// ProbeNTM detects the NTM runtime by capability. It invokes +// `ntm --robot-capabilities` via runner and parses the result. +// +// Degradation contract: if ntm is absent — signalled by runner +// returning an error — ProbeNTM returns NTMCapabilities{Available: +// false} and a nil error. Absence is a normal degradation signal, not +// a failure, so callers can branch on Available without handling an +// error path for the common "ntm not installed" case. A hard error is +// returned only when ntm IS present but its output cannot be parsed, +// since that is a genuine contract violation worth surfacing. +func ProbeNTM(ctx context.Context, runner CommandRunner) (NTMCapabilities, error) { + out, err := runner.Run(ctx, "ntm", "--robot-capabilities") + if err != nil { + // ntm is absent or unusable: degrade gracefully. + return NTMCapabilities{Available: false}, nil + } + + var payload robotCapabilitiesPayload + if err := json.Unmarshal(out, &payload); err != nil { + return NTMCapabilities{}, fmt.Errorf("parsing ntm --robot-capabilities output: %w", err) + } + + caps := normalizeCapabilities(payload.Capabilities) + return NTMCapabilities{ + Available: true, + Capabilities: caps, + MissingDeps: missingHardDeps(caps), + }, nil +} + +// normalizeCapabilities trims, drops blanks, and sorts the reported +// capability tokens so callers get a stable, comparable slice. +func normalizeCapabilities(raw []string) []string { + caps := make([]string, 0, len(raw)) + for _, c := range raw { + c = strings.TrimSpace(c) + if c != "" { + caps = append(caps, c) + } + } + sort.Strings(caps) + return caps +} + +// missingHardDeps returns the NTMHardDeps not present in caps. The +// result is nil when every hard dep is satisfied. +func missingHardDeps(caps []string) []string { + present := make(map[string]struct{}, len(caps)) + for _, c := range caps { + present[c] = struct{}{} + } + + var missing []string + for _, dep := range NTMHardDeps { + if _, ok := present[dep]; !ok { + missing = append(missing, dep) + } + } + return missing +} diff --git a/cli/internal/orchestration/ntm_probe_test.go b/cli/internal/orchestration/ntm_probe_test.go new file mode 100644 index 000000000..29b091a6a --- /dev/null +++ b/cli/internal/orchestration/ntm_probe_test.go @@ -0,0 +1,111 @@ +package orchestration + +import ( + "context" + "errors" + "reflect" + "testing" +) + +// fakeRunner is an in-memory CommandRunner. It records the invocation +// and returns canned output/error so tests never shell out. +type fakeRunner struct { + out []byte + err error + + gotName string + gotArgs []string +} + +func (f *fakeRunner) Run(_ context.Context, name string, args ...string) ([]byte, error) { + f.gotName = name + f.gotArgs = args + return f.out, f.err +} + +func TestNTMProbe_PresentReportsCapabilities(t *testing.T) { + // ntm present and reports all hard deps as capabilities. + runner := &fakeRunner{ + out: []byte(`{"capabilities":["agent-CLIs","git","persistent-host","tmux"]}`), + } + + got, err := ProbeNTM(context.Background(), runner) + if err != nil { + t.Fatalf("ProbeNTM returned error: %v", err) + } + if !got.Available { + t.Fatalf("Available = false, want true") + } + + wantCaps := []string{"agent-CLIs", "git", "persistent-host", "tmux"} + if !reflect.DeepEqual(got.Capabilities, wantCaps) { + t.Fatalf("Capabilities = %v, want %v", got.Capabilities, wantCaps) + } + if len(got.MissingDeps) != 0 { + t.Fatalf("MissingDeps = %v, want empty", got.MissingDeps) + } + + // Verify the probe asked by capability, not via command -v. + if runner.gotName != "ntm" { + t.Errorf("invoked %q, want %q", runner.gotName, "ntm") + } + wantArgs := []string{"--robot-capabilities"} + if !reflect.DeepEqual(runner.gotArgs, wantArgs) { + t.Errorf("args = %v, want %v", runner.gotArgs, wantArgs) + } +} + +func TestNTMProbe_PresentWithMissingHardDeps(t *testing.T) { + // ntm present but only reports a subset of hard deps. + runner := &fakeRunner{ + out: []byte(`{"capabilities":["tmux","git"]}`), + } + + got, err := ProbeNTM(context.Background(), runner) + if err != nil { + t.Fatalf("ProbeNTM returned error: %v", err) + } + if !got.Available { + t.Fatalf("Available = false, want true") + } + + wantMissing := []string{"persistent-host", "agent-CLIs"} + if !reflect.DeepEqual(got.MissingDeps, wantMissing) { + t.Fatalf("MissingDeps = %v, want %v", got.MissingDeps, wantMissing) + } +} + +func TestNTMProbe_AbsentDegradesGracefully(t *testing.T) { + // runner returns an error => ntm is absent. Must NOT panic, must + // return Available=false and a nil error. + runner := &fakeRunner{ + err: errors.New("exec: \"ntm\": executable file not found in $PATH"), + } + + got, err := ProbeNTM(context.Background(), runner) + if err != nil { + t.Fatalf("ProbeNTM returned error on absent ntm: %v, want nil", err) + } + if got.Available { + t.Fatalf("Available = true, want false when ntm absent") + } + if len(got.Capabilities) != 0 { + t.Fatalf("Capabilities = %v, want empty when ntm absent", got.Capabilities) + } + if len(got.MissingDeps) != 0 { + t.Fatalf("MissingDeps = %v, want empty when ntm absent", got.MissingDeps) + } +} + +func TestNTMProbe_PresentButUnparsableIsHardError(t *testing.T) { + // ntm present but emits garbage: a genuine contract violation, so a + // hard error is correct (distinct from graceful absence). + runner := &fakeRunner{ + out: []byte("not json at all"), + } + + _, err := ProbeNTM(context.Background(), runner) + if err == nil { + t.Fatalf("ProbeNTM returned nil error on unparsable output, want error") + } +} diff --git a/cli/internal/orchestration/result.go b/cli/internal/orchestration/result.go new file mode 100644 index 000000000..7a6cc2718 --- /dev/null +++ b/cli/internal/orchestration/result.go @@ -0,0 +1,151 @@ +// practices: [design-by-contract, output-contract-parity] +package orchestration + +import ( + "fmt" + + "github.com/boshu2/agentops/cli/internal/ports" +) + +// SchemaVersionV1 is the schema version that OrchestrationResult mirrors: +// schemas/orchestration-result.v1.schema.json. Every conformant result MUST +// carry this exact value so consumers can dispatch on shape stability. +const SchemaVersionV1 = 1 + +// Verdict status enum values. These mirror the `verdict.status` enum in +// orchestration-result.v1.schema.json and are the only legal Status values. +const ( + // VerdictStatusPass marks a run whose work succeeded against its + // acceptance criteria. + VerdictStatusPass = "PASS" + // VerdictStatusWarn marks a run that succeeded with caveats worth + // surfacing to a human or downstream tier. + VerdictStatusWarn = "WARN" + // VerdictStatusFail marks a run whose work did not meet its acceptance + // criteria. + VerdictStatusFail = "FAIL" +) + +// Verdict confidence enum values. These mirror the `verdict.confidence` +// enum in orchestration-result.v1.schema.json and are the only legal +// Confidence values. +const ( + // VerdictConfidenceHigh signals strong evidence behind the Status. + VerdictConfidenceHigh = "HIGH" + // VerdictConfidenceMedium signals moderate evidence behind the Status. + VerdictConfidenceMedium = "MEDIUM" + // VerdictConfidenceLow signals weak evidence behind the Status. + VerdictConfidenceLow = "LOW" +) + +// Verdict is the pass/warn/fail judgement a backend tier reaches about a +// unit of work, paired with the tier's confidence in that judgement. It +// mirrors the `verdict` object of orchestration-result.v1.schema.json. +// +// Both fields are required and enum-constrained: Status is one of +// PASS/WARN/FAIL and Confidence is one of HIGH/MEDIUM/LOW. Use Validate +// (via OrchestrationResult.Validate) to self-check membership. +type Verdict struct { + // Status is the pass/warn/fail outcome. One of VerdictStatus*. + Status string `json:"status"` + // Confidence is the tier's confidence in Status. One of + // VerdictConfidence*. + Confidence string `json:"confidence"` +} + +// OrchestrationResult is the OUTPUT-CONTRACT PARITY shape that EVERY +// backend tier (NTM, Claude-native, Codex, beads floor) MUST emit. It is +// the Go mirror of schemas/orchestration-result.v1.schema.json. +// +// Parity is what makes the safe-degradation ladder correctness-preserving: +// because every tier returns the same shape, a caller can degrade from the +// preferred NTM swarm down to the beads floor without changing how it reads +// the outcome. This type is the canonical contract all adapters conform to; +// each adapter is responsible for populating it and SHOULD call Validate to +// self-check before returning. +// +// Field-to-schema mapping: +// +// - SchemaVersion -> schema_version (required, const 1) +// - Backend -> backend (required, enum ntm/claude/codex/beads) +// - ResultPaths -> result_paths (required, array of repo-relative paths) +// - Verdict -> verdict (required, status + confidence) +// - TaskID -> task_id (optional, e.g. a bead ID) +type OrchestrationResult struct { + // SchemaVersion is the contract version. MUST equal SchemaVersionV1. + SchemaVersion int `json:"schema_version"` + // Backend is the tier that produced this result. + Backend ports.Backend `json:"backend"` + // ResultPaths are repo-root-relative paths to the artifacts this run + // wrote. Required and non-empty for a conformant result. + ResultPaths []string `json:"result_paths"` + // Verdict is the tier's pass/warn/fail judgement plus confidence. + Verdict Verdict `json:"verdict"` + // TaskID identifies the task this result fulfills (e.g. a bead ID). + // Optional; may be empty. + TaskID string `json:"task_id,omitempty"` +} + +// validBackends is the set of backend values the schema's `backend` enum +// permits. Kept in lockstep with the ports.Backend ladder constants. +var validBackends = map[ports.Backend]bool{ + ports.BackendNTM: true, + ports.BackendClaude: true, + ports.BackendCodex: true, + ports.BackendBeads: true, +} + +// validVerdictStatuses is the set of legal Verdict.Status values. +var validVerdictStatuses = map[string]bool{ + VerdictStatusPass: true, + VerdictStatusWarn: true, + VerdictStatusFail: true, +} + +// validVerdictConfidences is the set of legal Verdict.Confidence values. +var validVerdictConfidences = map[string]bool{ + VerdictConfidenceHigh: true, + VerdictConfidenceMedium: true, + VerdictConfidenceLow: true, +} + +// Validate self-checks that the result conforms to the parity contract in +// orchestration-result.v1.schema.json. Any backend tier can call it to +// prove the result it is about to return is parity-conformant before it +// leaves the adapter, which is the mechanism that keeps degradation +// correctness-preserving. +// +// It returns a descriptive error on the first violation found and nil when +// every required field is present and every enum-constrained field is a +// member of its enum: +// +// - SchemaVersion MUST equal SchemaVersionV1. +// - Backend MUST be one of the ports.Backend ladder values. +// - ResultPaths MUST be non-empty and contain no empty strings. +// - Verdict.Status MUST be one of PASS/WARN/FAIL. +// - Verdict.Confidence MUST be one of HIGH/MEDIUM/LOW. +// +// TaskID is optional and is not validated. +func (r OrchestrationResult) Validate() error { + if r.SchemaVersion != SchemaVersionV1 { + return fmt.Errorf("schema_version: want %d, got %d", SchemaVersionV1, r.SchemaVersion) + } + if !validBackends[r.Backend] { + return fmt.Errorf("backend: %q is not a valid backend", r.Backend) + } + if len(r.ResultPaths) == 0 { + return fmt.Errorf("result_paths: must be non-empty") + } + for i, p := range r.ResultPaths { + if p == "" { + return fmt.Errorf("result_paths[%d]: must not be empty", i) + } + } + if !validVerdictStatuses[r.Verdict.Status] { + return fmt.Errorf("verdict.status: %q is not one of PASS/WARN/FAIL", r.Verdict.Status) + } + if !validVerdictConfidences[r.Verdict.Confidence] { + return fmt.Errorf("verdict.confidence: %q is not one of HIGH/MEDIUM/LOW", r.Verdict.Confidence) + } + return nil +} diff --git a/cli/internal/orchestration/select.go b/cli/internal/orchestration/select.go new file mode 100644 index 000000000..ab80b26de --- /dev/null +++ b/cli/internal/orchestration/select.go @@ -0,0 +1,141 @@ +// practices: [hexagonal-architecture, safe-degradation] +package orchestration + +import ( + "context" + "fmt" + "os" + + "github.com/boshu2/agentops/cli/internal/ports" +) + +// selectEnvVar is the environment variable that pins or opts out of a +// backend, mirroring the AGENTOPS_HOOKS_DISABLED style of explicit +// operator override. It is the typed-port analogue of the shell seam's +// AGENTOPS_ORCH variable in lib/orchestrate-select.sh. +const selectEnvVar = "AGENTOPS_ORCHESTRATION" + +// Selector is the production OrchestrationPort implementation. It +// resolves a backend via the safe-degradation ladder +// NTM -> Claude-native -> beads floor, honoring an explicit Pin, the +// AGENTOPS_ORCHESTRATION env override, and an OptOut to the floor. +// +// It holds a CommandRunner so NTM availability is detected by +// capability (via ProbeNTM) rather than by `command -v`, and reads the +// env override at Select time so operators can flip routing without +// reconstructing the Selector. +type Selector struct { + // runner drives ProbeNTM. It MUST be non-nil for the availability + // step; the explicit-pin, env, and opt-out steps resolve without it. + runner CommandRunner +} + +// NewSelector builds a Selector backed by runner. runner is used only +// to probe NTM availability; the explicit-routing steps (Pin, env, +// OptOut) never touch it. +func NewSelector(runner CommandRunner) *Selector { + return &Selector{runner: runner} +} + +// compile-time assertion that Selector satisfies the port. +var _ ports.OrchestrationPort = (*Selector)(nil) + +// Select resolves the backend for work. Resolution order, first match +// wins, mirroring lib/orchestrate-select.sh: +// +// 1. work.Pin set -> that backend. +// 2. AGENTOPS_ORCHESTRATION env: "off"/"beads" -> beads; +// "ntm"/"claude"/"codex" -> that backend (explicit pin/opt-out). +// 3. work.OptOut -> beads. +// 4. ProbeNTM reports Available -> ntm. +// 5. else -> claude. +// 6. floor -> beads (never reached as a no-op; claude is reachable, but +// the floor is always selectable so work is never unplaceable). +// +// Context cancellation is honored on a best-effort basis: it is checked +// before the NTM probe and propagated into ProbeNTM. +func (s *Selector) Select(ctx context.Context, work ports.WorkSpec) (ports.SelectionTrace, error) { + considered := []ports.Backend{} + + // Step 1: explicit Pin wins over everything. + considered = append(considered, "pin") + if work.Pin != "" { + return ports.SelectionTrace{ + Chosen: work.Pin, + Reason: fmt.Sprintf("explicit WorkSpec.Pin=%s", work.Pin), + Considered: considered, + }, nil + } + + // Step 2: env override acts as an explicit pin / opt-out. + considered = append(considered, "env") + if env := os.Getenv(selectEnvVar); env != "" { + switch env { + case "off", "beads": + return ports.SelectionTrace{ + Chosen: ports.BackendBeads, + Reason: fmt.Sprintf("%s=%s -> beads floor (env opt-out)", selectEnvVar, env), + Considered: considered, + }, nil + case "ntm": + return ports.SelectionTrace{ + Chosen: ports.BackendNTM, + Reason: fmt.Sprintf("%s=ntm (env pin)", selectEnvVar), + Considered: considered, + }, nil + case "claude": + return ports.SelectionTrace{ + Chosen: ports.BackendClaude, + Reason: fmt.Sprintf("%s=claude (env pin)", selectEnvVar), + Considered: considered, + }, nil + case "codex": + return ports.SelectionTrace{ + Chosen: ports.BackendCodex, + Reason: fmt.Sprintf("%s=codex (env pin)", selectEnvVar), + Considered: considered, + }, nil + default: + // Unknown value falls through to the availability ladder, + // matching the shell seam's "unknown -> auto" behavior. + considered = append(considered, "env-unknown") + } + } + + // Step 3: explicit OptOut routes to the beads floor. + considered = append(considered, "optout") + if work.OptOut { + return ports.SelectionTrace{ + Chosen: ports.BackendBeads, + Reason: "WorkSpec.OptOut -> beads floor", + Considered: considered, + }, nil + } + + // Step 4: NTM availability (capability probe). + considered = append(considered, "ntm") + if err := ctx.Err(); err != nil { + return ports.SelectionTrace{}, fmt.Errorf("selecting backend: %w", err) + } + caps, err := ProbeNTM(ctx, s.runner) + if err != nil { + return ports.SelectionTrace{}, fmt.Errorf("probing NTM availability: %w", err) + } + if caps.Available { + return ports.SelectionTrace{ + Chosen: ports.BackendNTM, + Reason: "NTM probe reports available -> ntm (preferred swarm runtime)", + Considered: considered, + }, nil + } + + // Step 5: Claude-native fallback (the "worse NTM"; always present + // in an agent session/CI context). + considered = append(considered, "claude") + considered = append(considered, "beads") + return ports.SelectionTrace{ + Chosen: ports.BackendClaude, + Reason: "NTM absent -> claude-native fallback (beads floor remains available)", + Considered: considered, + }, nil +} diff --git a/cli/internal/orchestration/select_test.go b/cli/internal/orchestration/select_test.go new file mode 100644 index 000000000..edeb0f54c --- /dev/null +++ b/cli/internal/orchestration/select_test.go @@ -0,0 +1,163 @@ +package orchestration + +import ( + "context" + "errors" + "testing" + + "github.com/boshu2/agentops/cli/internal/ports" +) + +// selectUpRunner returns a fakeRunner that reports NTM present with all +// hard deps, so ProbeNTM reports Available=true. +func selectUpRunner() *fakeRunner { + return &fakeRunner{ + out: []byte(`{"capabilities":["agent-CLIs","git","persistent-host","tmux"]}`), + } +} + +// selectDownRunner returns a fakeRunner that errors, so ProbeNTM reports +// Available=false (the NTM-absent degradation signal). +func selectDownRunner() *fakeRunner { + return &fakeRunner{ + err: errors.New(`exec: "ntm": executable file not found in $PATH`), + } +} + +// TestSelect_Ladder reproduces the six degradation-ladder cases the +// shell self-test in lib/orchestrate-select.sh asserts, adapted to the +// typed port's resolution order. +func TestSelect_Ladder(t *testing.T) { + tests := []struct { + name string + env string // "" => unset + setEnv bool + runner *fakeRunner + work ports.WorkSpec + want ports.Backend + }{ + { + name: "default (NTM up) -> ntm", + runner: selectUpRunner(), + want: ports.BackendNTM, + }, + { + name: "NTM down -> degrade to claude", + runner: selectDownRunner(), + want: ports.BackendClaude, + }, + { + name: "NTM down + OptOut -> beads floor", + runner: selectDownRunner(), + work: ports.WorkSpec{OptOut: true}, + want: ports.BackendBeads, + }, + { + name: "env=off -> beads (explicit opt-out)", + env: "off", + setEnv: true, + runner: selectUpRunner(), + want: ports.BackendBeads, + }, + { + name: "Pin=claude -> claude", + runner: selectUpRunner(), + work: ports.WorkSpec{Pin: ports.BackendClaude}, + want: ports.BackendClaude, + }, + { + name: "Pin=beads -> beads", + runner: selectUpRunner(), + work: ports.WorkSpec{Pin: ports.BackendBeads}, + want: ports.BackendBeads, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Ensure the env var is in a known state for each case. + if tt.setEnv { + t.Setenv(selectEnvVar, tt.env) + } else { + t.Setenv(selectEnvVar, "") + } + + sel := NewSelector(tt.runner) + trace, err := sel.Select(context.Background(), tt.work) + if err != nil { + t.Fatalf("Select returned error: %v", err) + } + if trace.Chosen != tt.want { + t.Fatalf("Chosen = %q, want %q", trace.Chosen, tt.want) + } + if trace.Reason == "" { + t.Errorf("Reason is empty, want a non-empty explanation") + } + if len(trace.Considered) == 0 { + t.Errorf("Considered is empty, want the evaluated ladder steps") + } + }) + } +} + +// TestSelect_PinBeatsEverything asserts the contract that a non-empty +// Pin wins over both OptOut and the env override. +func TestSelect_PinBeatsEverything(t *testing.T) { + t.Setenv(selectEnvVar, "off") // env says beads... + sel := NewSelector(selectDownRunner()) + + trace, err := sel.Select(context.Background(), ports.WorkSpec{ + Pin: ports.BackendNTM, // ...but Pin says ntm. + OptOut: true, // ...and OptOut says beads. + }) + if err != nil { + t.Fatalf("Select returned error: %v", err) + } + if trace.Chosen != ports.BackendNTM { + t.Fatalf("Chosen = %q, want %q (Pin must win)", trace.Chosen, ports.BackendNTM) + } +} + +// TestSelect_EnvPinNTM asserts AGENTOPS_ORCHESTRATION=ntm pins NTM even +// when the probe would report it down. +func TestSelect_EnvPinNTM(t *testing.T) { + t.Setenv(selectEnvVar, "ntm") + sel := NewSelector(selectDownRunner()) // probe would say down + + trace, err := sel.Select(context.Background(), ports.WorkSpec{}) + if err != nil { + t.Fatalf("Select returned error: %v", err) + } + if trace.Chosen != ports.BackendNTM { + t.Fatalf("Chosen = %q, want %q (env pin must win over probe)", trace.Chosen, ports.BackendNTM) + } +} + +// TestSelect_EnvUnknownFallsThrough asserts an unrecognized env value +// falls through to the availability ladder rather than pinning. +func TestSelect_EnvUnknownFallsThrough(t *testing.T) { + t.Setenv(selectEnvVar, "bogus") + sel := NewSelector(selectUpRunner()) // NTM up + + trace, err := sel.Select(context.Background(), ports.WorkSpec{}) + if err != nil { + t.Fatalf("Select returned error: %v", err) + } + if trace.Chosen != ports.BackendNTM { + t.Fatalf("Chosen = %q, want %q (unknown env -> auto ladder, NTM up)", trace.Chosen, ports.BackendNTM) + } +} + +// TestSelect_ContextCanceled asserts cancellation is honored before the +// probe runs. +func TestSelect_ContextCanceled(t *testing.T) { + t.Setenv(selectEnvVar, "") + ctx, cancel := context.WithCancel(context.Background()) + cancel() + + sel := NewSelector(selectUpRunner()) + _, err := sel.Select(ctx, ports.WorkSpec{}) + if err == nil { + t.Fatalf("Select returned nil error on canceled context, want error") + } +} diff --git a/cli/internal/ports/inmemory_orchestration.go b/cli/internal/ports/inmemory_orchestration.go new file mode 100644 index 000000000..24fd8d37d --- /dev/null +++ b/cli/internal/ports/inmemory_orchestration.go @@ -0,0 +1,70 @@ +// practices: [hexagonal-architecture, ddd-bounded-context] +package ports + +import "context" + +// InMemoryOrchestration is a deterministic OrchestrationPort +// implementation. It does not retain per-call state; "in-memory" means +// the availability of each swarm engine is injected as fields and the +// degradation ladder is evaluated purely from them. +type InMemoryOrchestration struct { + // NTMAvailable reports whether the NTM swarm runtime can take work. + NTMAvailable bool + // ClaudeAvailable reports whether the Claude-native runtime can + // take work. + ClaudeAvailable bool +} + +// Select resolves the orchestration backend for the given work, +// applying the ladder: explicit Pin wins; OptOut routes to the beads +// floor; otherwise NTM if available, else Claude if available, else the +// beads floor. +func (o *InMemoryOrchestration) Select(ctx context.Context, work WorkSpec) (SelectionTrace, error) { + if err := ctx.Err(); err != nil { + return SelectionTrace{}, err + } + + if work.Pin != "" { + return SelectionTrace{ + Chosen: work.Pin, + Reason: "explicit-pin", + Considered: []Backend{work.Pin}, + }, nil + } + + if work.OptOut { + return SelectionTrace{ + Chosen: BackendBeads, + Reason: "opt-out-to-beads-floor", + Considered: []Backend{BackendBeads}, + }, nil + } + + considered := []Backend{BackendNTM} + if o.NTMAvailable { + return SelectionTrace{ + Chosen: BackendNTM, + Reason: "ntm-available", + Considered: considered, + }, nil + } + + considered = append(considered, BackendClaude) + if o.ClaudeAvailable { + return SelectionTrace{ + Chosen: BackendClaude, + Reason: "ntm-unavailable-claude-available", + Considered: considered, + }, nil + } + + considered = append(considered, BackendBeads) + return SelectionTrace{ + Chosen: BackendBeads, + Reason: "degraded-to-beads-floor", + Considered: considered, + }, nil +} + +// Compile-time assertion: InMemoryOrchestration satisfies the port. +var _ OrchestrationPort = (*InMemoryOrchestration)(nil) diff --git a/cli/internal/ports/inmemory_orchestration_test.go b/cli/internal/ports/inmemory_orchestration_test.go new file mode 100644 index 000000000..fae7e2170 --- /dev/null +++ b/cli/internal/ports/inmemory_orchestration_test.go @@ -0,0 +1,114 @@ +// practices: [hexagonal-architecture, tdd] +package ports + +import ( + "context" + "errors" + "reflect" + "testing" +) + +func TestInMemoryOrchestration_SelectLadder(t *testing.T) { + tests := []struct { + name string + ntmAvailable bool + claudeAvailable bool + work WorkSpec + want Backend + }{ + { + name: "default prefers ntm", + ntmAvailable: true, + claudeAvailable: true, + work: WorkSpec{}, + want: BackendNTM, + }, + { + name: "ntm unavailable falls back to claude", + ntmAvailable: false, + claudeAvailable: true, + work: WorkSpec{}, + want: BackendClaude, + }, + { + name: "ntm and claude unavailable degrades to beads floor", + ntmAvailable: false, + claudeAvailable: false, + work: WorkSpec{}, + want: BackendBeads, + }, + { + name: "opt-out routes to beads despite availability", + ntmAvailable: true, + claudeAvailable: true, + work: WorkSpec{OptOut: true}, + want: BackendBeads, + }, + { + name: "pin claude wins over ntm availability", + ntmAvailable: true, + claudeAvailable: true, + work: WorkSpec{Pin: BackendClaude}, + want: BackendClaude, + }, + { + name: "pin beads wins", + ntmAvailable: true, + claudeAvailable: true, + work: WorkSpec{Pin: BackendBeads}, + want: BackendBeads, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + port := &InMemoryOrchestration{ + NTMAvailable: tc.ntmAvailable, + ClaudeAvailable: tc.claudeAvailable, + } + trace, err := port.Select(context.Background(), tc.work) + if err != nil { + t.Fatal(err) + } + if trace.Chosen != tc.want { + t.Fatalf("Chosen = %q, want %q", trace.Chosen, tc.want) + } + if trace.Reason == "" { + t.Fatal("Reason is empty, want a non-empty selection reason") + } + }) + } +} + +func TestInMemoryOrchestration_PinOverridesOptOut(t *testing.T) { + port := &InMemoryOrchestration{NTMAvailable: true, ClaudeAvailable: true} + trace, err := port.Select(context.Background(), WorkSpec{Pin: BackendCodex, OptOut: true}) + if err != nil { + t.Fatal(err) + } + if trace.Chosen != BackendCodex { + t.Fatalf("Chosen = %q, want %q (pin must win over opt-out)", trace.Chosen, BackendCodex) + } +} + +func TestInMemoryOrchestration_ConsideredRecordsLadderSteps(t *testing.T) { + port := &InMemoryOrchestration{NTMAvailable: false, ClaudeAvailable: false} + trace, err := port.Select(context.Background(), WorkSpec{}) + if err != nil { + t.Fatal(err) + } + want := []Backend{BackendNTM, BackendClaude, BackendBeads} + if !reflect.DeepEqual(trace.Considered, want) { + t.Fatalf("Considered = %v, want %v", trace.Considered, want) + } +} + +func TestInMemoryOrchestration_HonorsContextCancellation(t *testing.T) { + port := &InMemoryOrchestration{NTMAvailable: true, ClaudeAvailable: true} + ctx, cancel := context.WithCancel(context.Background()) + cancel() + _, err := port.Select(ctx, WorkSpec{}) + if !errors.Is(err, context.Canceled) { + t.Fatalf("Select error = %v, want context.Canceled", err) + } +} diff --git a/cli/internal/ports/orchestration.go b/cli/internal/ports/orchestration.go new file mode 100644 index 000000000..2c5749971 --- /dev/null +++ b/cli/internal/ports/orchestration.go @@ -0,0 +1,70 @@ +// practices: [hexagonal-architecture, ddd-bounded-context] +package ports + +import "context" + +// Backend names an orchestration engine that can execute a unit of +// agent work. The values form a safe-degradation ladder: NTM is the +// preferred swarm runtime, Claude-native is the fallback when NTM is +// unavailable, and beads is the always-available floor. Codex is a +// pinnable engine that is never auto-selected by the default ladder. +type Backend string + +const ( + // BackendNTM is the NTM tmux swarm runtime — the preferred engine + // when available. + BackendNTM Backend = "ntm" + // BackendClaude is the Claude-native runtime — the fallback when + // NTM is unavailable. + BackendClaude Backend = "claude" + // BackendCodex is the Codex runtime — selectable only via an + // explicit Pin; the default ladder never auto-selects it. + BackendCodex Backend = "codex" + // BackendBeads is the beads floor — the always-available engine + // that every degradation path terminates in. + BackendBeads Backend = "beads" +) + +// WorkSpec describes a single unit of orchestrable work that the port +// must place on a backend. It is intentionally minimal: the port only +// needs the caller's routing intent, not the work's payload. +type WorkSpec struct { + // OptOut requests that the work bypass swarm engines and run on the + // beads floor regardless of NTM/Claude availability. + OptOut bool + // Pin forces a specific backend, overriding both OptOut and the + // availability ladder. The empty Backend ("") means auto-select. + Pin Backend +} + +// SelectionTrace is the resolved routing decision plus the reasoning +// that produced it. Considered lists the backends the ladder evaluated, +// in order, for auditability; it is safe for callers to mutate. +type SelectionTrace struct { + Chosen Backend + Reason string + Considered []Backend +} + +// OrchestrationPort selects an orchestration backend for a unit of +// work. It models the safe-degradation ladder NTM -> Claude-native -> +// beads floor, with an explicit Pin escape hatch and an OptOut path to +// the floor. +// +// Contract: +// +// - A non-empty WorkSpec.Pin MUST win over everything else, +// including OptOut and availability. +// - WorkSpec.OptOut (with no Pin) MUST resolve to BackendBeads. +// - Otherwise Select MUST choose BackendNTM when NTM is available, +// else BackendClaude when Claude is available, else BackendBeads. +// - BackendBeads is the floor: it MUST always be selectable so the +// port never fails to place work for lack of an engine. +// - SelectionTrace.Considered MUST record the ladder steps evaluated. +// - Context cancellation MUST be honored on a best-effort basis. +// +// This port is the typed replacement for the ad hoc backend-selection +// shell logic in the spike's orchestrate-select.sh. +type OrchestrationPort interface { + Select(ctx context.Context, work WorkSpec) (SelectionTrace, error) +} diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md index 476946184..08bb5bf35 100644 --- a/docs/ARCHITECTURE.md +++ b/docs/ARCHITECTURE.md @@ -351,7 +351,7 @@ All hooks can be disabled: `AGENTOPS_HOOKS_DISABLED=1` (kill switch) or per-hook . ├── .claude-plugin/ │ └── plugin.json # Plugin manifest -├── skills/ # 76 skills (66 user-facing, 10 internal) +├── skills/ # 78 skills (68 user-facing, 10 internal) │ ├── rpi/ # orchestration — Full RPI lifecycle orchestrator │ ├── council/ # orchestration — Multi-model validation (core primitive) │ ├── crank/ # orchestration — Autonomous epic execution diff --git a/docs/SKILLS.md b/docs/SKILLS.md index 63aa87500..0d6fc4290 100644 --- a/docs/SKILLS.md +++ b/docs/SKILLS.md @@ -1,6 +1,6 @@ # Skills Reference -Complete reference for all 76 AgentOps skills (66 user-facing + 10 internal). +Complete reference for all 78 AgentOps skills (68 user-facing + 10 internal). Skills are the primitive layer of AgentOps. Higher-level entry points like `/implement`, `/validation`, `/rpi`, and `/evolve` compose those primitives diff --git a/docs/cli-skills-map.md b/docs/cli-skills-map.md index 8b5cbfb2f..8400d31bf 100644 --- a/docs/cli-skills-map.md +++ b/docs/cli-skills-map.md @@ -2,7 +2,7 @@ > Which `ao` commands are called by which skills and hooks — and vice versa. -Auto-audited 2026-04-24; targeted runtime-proof update 2026-04-28. 69 generated CLI command headings, 69 source skills, 12 runtime hook event sections. +Auto-audited 2026-04-24; targeted runtime-proof update 2026-04-28. 70 generated CLI command headings, 69 source skills, 12 runtime hook event sections. Source-of-truth note: `hooks/hooks.json` currently declares the full Claude runtime event surface. `hooks/codex-hooks.json` declares the Codex-native subset that runtime can support. diff --git a/docs/contracts/context-map.md b/docs/contracts/context-map.md index a63fd29c2..abd6e9525 100644 --- a/docs/contracts/context-map.md +++ b/docs/contracts/context-map.md @@ -67,6 +67,7 @@ and [CDLC](https://github.com/boshu2/agentops/blob/main/docs/cdlc.md) for the ar ### supporting - `autodev` — Manage the PROGRAM.md/AUTODEV.md contract that drives the loop — the config layer Evolve and Factory read each tick, not a loop itself. +- `automation-shape-routing` — Front door for building agent automation — decide the SHAPE (Claude Workflow vs NTM swarm vs plain skill), then hand off to the right builder. Triggers: "build a skill", "build a workflow", "build automation", "create a skill", "create a workflow", "new automation", "convert skills to workflows", or any task involving fan-out / multiple agents / iterative passes. - `codex-team` — Coordinate multiple Codex agents. - `compile` — Compile .agents knowledge wiki. - `curate` — Mine transcripts, .agents, bd, and git for skill diffs, bd updates, or rare wiki entries. @@ -86,11 +87,12 @@ and [CDLC](https://github.com/boshu2/agentops/blob/main/docs/cdlc.md) for the ar - `scaffold` — Create project, component, or boilerplate scaffolds. - `scenario` — Manage holdout scenarios. - `skill-auditor` — Audit an existing SKILL.md against the unified AgentOps template (15 checks). Triggers: "audit skill", "skill quality review", "is this skill ready". -- `skill-builder` — Scaffold or absorb new SKILL.md files against the unified AgentOps template. Triggers: "create a skill", "scaffold skill", "absorb external skill", "new skill". +- `skill-builder` — Scaffold or absorb new SKILL.md files (a leaf capability) against the unified AgentOps template. Triggers: "create a skill", "scaffold skill", "absorb external skill", "new skill". If unsure whether the work should be a skill, a Workflow, or an NTM swarm, run automation-shape-routing first. - `swarm` — Dispatch parallel agents. - `system-tuning` — Restore system responsiveness via safe, ordered process cleanup and agent-swarm hygiene. - `test` — Generate tests and coverage plans. - `trace` — Trace decisions through artifacts. +- `workflow-builder` — Scaffold a new Claude Workflow script (.claude/workflows/*.js) — deterministic multi-agent orchestration — from the operating-loop.js template and the Workflow primitives. Triggers: "build a workflow", "create a workflow", "scaffold workflow", "new workflow", "author a workflow". ### generic @@ -106,6 +108,8 @@ and [CDLC](https://github.com/boshu2/agentops/blob/main/docs/cdlc.md) for the ar ```mermaid graph LR + automation-shape-routing -- "supplier-to" --> skill-builder + automation-shape-routing -- "supplier-to" --> workflow-builder beads -- "supplier-to" --> crank beads -- "supplier-to" --> ratchet brainstorm -- "shared-kernel" --> standards @@ -119,6 +123,7 @@ graph LR flywheel -- "shared-kernel" --> standards forge -- "shared-kernel" --> standards goals -- "shared-kernel" --> standards + heal-skill -- "customer-of" --> skill-auditor hooks-authoring -- "shared-kernel" --> standards implement -- "customer-of" --> domain perf -- "shared-kernel" --> standards @@ -146,10 +151,16 @@ graph LR session-bootstrap -- "customer-of" --> AGENTS.md ship-loop -- "customer-of" --> post-mortem ship-loop -- "customer-of" --> rpi + skill-auditor -- "supplier-to" --> heal-skill + skill-auditor -- "customer-of" --> skill-builder + skill-builder -- "customer-of" --> automation-shape-routing + skill-builder -- "supplier-to" --> skill-auditor swarm -- "customer-of" --> crank validate -- "customer-of" --> validation validation -- "shared-kernel" --> standards vibe -- "shared-kernel" --> standards + workflow-builder -- "customer-of" --> automation-shape-routing + workflow-builder -- "shared-kernel" --> operating-loop-workflow ``` ## Data flow (consumes / produces) @@ -320,3 +331,4 @@ graph LR | `vibe` | consumes | standards | | `vibe` | produces | result.json | | `vibe` | produces | verdict.json | +| `workflow-builder` | produces | workflow-script | diff --git a/docs/contracts/orchestration-backend.md b/docs/contracts/orchestration-backend.md new file mode 100644 index 000000000..a3dad6cd7 --- /dev/null +++ b/docs/contracts/orchestration-backend.md @@ -0,0 +1,21 @@ +# Orchestration Backend Selection Contract + +Schema: [`schemas/orchestration-backend.v1.schema.json`](../../schemas/orchestration-backend.v1.schema.json) + +The selection trace emitted by the `OrchestrationPort` when it resolves which +backend runs a unit of orchestrable work. The full port semantics, the +`NTM → Claude-native → beads floor` degradation ladder, the +`AGENTOPS_ORCHESTRATION=off` opt-out, and capability-detection live in +[orchestration-ports.md](orchestration-ports.md) — this contract pins the wire +shape of one selection decision so the structural-floor gate validates the schema. + +## Fields + +- `schema_version` (const `1`) — contract version. +- `chosen` — the selected backend: `ntm` \| `claude` \| `codex` \| `beads`. +- `reason` — human-readable explanation of why this backend was chosen. +- `considered` — ordered ladder steps evaluated before the choice. +- `opt_out` — whether the global orchestration opt-out forced the beads floor. +- `pin` — an explicit backend pin (empty/null = auto-select down the ladder). + +Required: `schema_version`, `chosen`, `reason`. diff --git a/docs/contracts/orchestration-ports.md b/docs/contracts/orchestration-ports.md new file mode 100644 index 000000000..275790e53 --- /dev/null +++ b/docs/contracts/orchestration-ports.md @@ -0,0 +1,170 @@ +# Orchestration Ports + +> **Status:** V1 dual-runtime orchestration seam. +> **Owner plan:** `.agents/plans/2026-05-29-dual-runtime-orchestration-foundation.md`. +> **Purpose:** name the boundary that selects an orchestration engine for a unit +> of agent work, and the safety property that makes engine degradation free. + +This contract documents the **`OrchestrationPort`** — the typed seam that routes +a unit of work onto an execution engine and records *why*. It is the foundation +that gates Bo's application epics (the dual-runtime *AgentOps × Claude Managed +Agents* integration): every fan-out skill, crank wave, and autodev loop depends +on this selection layer. + +It follows the project-wide [Ports and Adapters](../architecture/ports-and-adapters.md) +model and the established `cli/internal/ports/` triplet pattern (interface + +`inmemory_*` adapter + test), exactly like `convergence_check.go` and +`ci_status.go`. + +## Source of truth + +| Concern | File | +|---|---| +| Port interface + contract | `cli/internal/ports/orchestration.go` | +| Deterministic adapter | `cli/internal/ports/inmemory_orchestration.go` | +| Capability detection (NTM) | `cli/internal/orchestration/ntm_probe.go` | +| Backend-selection schema | `schemas/orchestration-backend.v1.schema.json` | +| Output-contract parity schema | `schemas/orchestration-result.v1.schema.json` | +| Skill-side selection prose | `skills/shared/SKILL.md` (spawn-backend selection) | +| Shape routing (Workflow/NTM/skill) | `skills/automation-shape-routing/SKILL.md` | + +When prose and code disagree, the Go port and the JSON schemas win (see +`CLAUDE.md` Source-of-Truth Precedence). + +## The three-category model + +Before selecting an engine, decide the **shape** of the automation. This is the +front door — `skills/automation-shape-routing/SKILL.md` owns it. + +| Shape | What it is | Mechanism | +|---|---|---| +| **Claude Workflow** | Deterministic, reproducible orchestration of subagents | Claude `Workflow` tool — `agent({schema})`, `parallel()`, `pipeline()`, `phase()`, loop-until-budget. In-process, headless. | +| **NTM swarm** | Long-lived, human-in-the-loop multi-agent run | `ntm` / `*-with-ntm` — persistent tmux panes, robot API, mail/locks, attach + nudge + relaunch. | +| **Plain skill** | One model reasoning through a procedure | A single `SKILL.md`. No fan-out, or a strictly sequential edit-loop. | + +The `OrchestrationPort` is invoked **once the shape calls for orchestration** — +it does not decide *whether* to orchestrate (that is the routing rule above); it +decides *which engine* runs the work and how to degrade if the preferred one is +absent. + +## The degradation ladder + +`OrchestrationPort.Select()` resolves a `WorkSpec` to a backend along a safe +degradation ladder. The ladder is **NTM → Claude-native → beads floor**: + +| Tier | Backend | Role | +|---|---|---| +| Top | `ntm` | Preferred swarm runtime when available | +| Fallback | `claude` | Claude-native runtime when NTM is unavailable | +| Floor | `beads` | Always-available floor; every path terminates here | +| Pinnable | `codex` | Selectable **only** by an explicit `Pin`; the default ladder never auto-selects it | + +The contract (verbatim from `cli/internal/ports/orchestration.go`): + +- A non-empty `WorkSpec.Pin` **MUST** win over everything else, including + `OptOut` and availability. +- `WorkSpec.OptOut` (with no `Pin`) **MUST** resolve to `BackendBeads`. +- Otherwise `Select` **MUST** choose `BackendNTM` when NTM is available, else + `BackendClaude` when Claude is available, else `BackendBeads`. +- `BackendBeads` is the floor: it **MUST** always be selectable so the port + never fails to place work for lack of an engine. +- `SelectionTrace.Considered` **MUST** record the ladder steps evaluated, in + order, for auditability. +- Context cancellation **MUST** be honored on a best-effort basis. + +`SelectionTrace` (`Chosen`, `Reason`, `Considered`) is the resolved routing +decision plus its reasoning, serialized as +`schemas/orchestration-backend.v1.schema.json` so every degradation is +auditable. + +## The global opt-out: `AGENTOPS_ORCHESTRATION=off` + +Setting `AGENTOPS_ORCHESTRATION=off` skips **all** spawn backends and degrades +straight to the **beads floor** (single-agent inline / `--quick`; workers' work +is still tracked through `bd`). This mirrors the `AGENTOPS_HOOKS_DISABLED=1` +convention. At the port level this is the `WorkSpec.OptOut` path: it routes to +`BackendBeads` regardless of NTM/Claude availability, but a non-empty `Pin` +still overrides it. + +## Capability detection: `ntm --robot-capabilities`, NOT `command -v` + +NTM availability is detected by **capability**, not presence. A binary on PATH +is not a usable swarm runtime. `cli/internal/orchestration/ntm_probe.go` invokes +`ntm --robot-capabilities`, parses the reported capability tokens, and checks +them against the hard dependencies a swarm cannot run without: + +``` +NTMHardDeps = [tmux, git, persistent-host, agent-CLIs] +``` + +Deliberately **not** hard deps: cursors and pipeline-state — those are +host-bound, non-portable runtime artifacts (they describe one host's live +session, not a portable capability), so their absence is not a missing +dependency. + +Degradation contract of the probe: + +- If `ntm` is absent (the runner returns an error), `ProbeNTM` returns + `NTMCapabilities{Available: false}` and a **nil error** — absence is a normal + degradation signal, not a failure, so callers branch on `Available` without an + error path for the common "ntm not installed" case. +- A hard error is returned **only** when `ntm` IS present but its output cannot + be parsed — a genuine contract violation worth surfacing. + +This is why detection is capability-based rather than `command -v ntm`: a missing +hard dep means the host cannot drive a swarm even when the `ntm` binary exists. + +## Safety property: output-contract parity + +The property that makes degradation **free** (correctness-preserving) is +**output-contract parity**: *every* tier emits the same result shape, +`schemas/orchestration-result.v1.schema.json`. Whether the backend was NTM, a +Claude-native team, Codex sub-agents, or the beads floor, the run writes a +result carrying: + +- `schema_version` (const `1`) +- `backend` — the tier that produced the result (`ntm` / `claude` / `codex` / `beads`) +- `result_paths[]` — repo-relative artifact paths +- `verdict` — `{ status: PASS|WARN|FAIL, confidence: HIGH|MEDIUM|LOW }` +- `task_id` (optional) — e.g. the bead ID + +Because the *output* is invariant across tiers, the lead can verify-then-trust +the artifact identically no matter which engine ran. Degradation changes *who +does the work*, never *what a finished result looks like*. This is the invariant +already stated in `skills/shared/SKILL.md` ("Output-contract parity is unchanged +across all tiers"), now pinned to a versioned schema. + +## Two ladders — do not conflate + +There are **two distinct orchestration ladders** in the codebase. They govern +different boundaries and must stay separate: + +| | Ladder (A) — spawn-backend | Ladder (B) — CLI phase-executor | +|---|---|---| +| **Question** | Which *engine* spawns the workers? | Which *transport mode* runs an RPI phase? | +| **Tiers** | `ntm` → `claude` → `beads` (+ pinnable `codex`) | `auto` \| `direct` \| `stream` \| `tmux` | +| **Owner** | `OrchestrationPort` + `skills/shared/SKILL.md` spawn-backend selection | `validateRuntimeMode` in `cli/cmd/ao/rpi_phased_context.go` | +| **Opt-out** | `AGENTOPS_ORCHESTRATION=off` → beads floor | n/a (mode is an explicit flag) | + +This contract governs **ladder (A)** only. Ladder (B) (the phase-executor +runtime mode) gains new modes **only if/when** phases route through the +`OrchestrationPort` — that is a deliberate follow-up, out of scope for this +foundation. Future work must not collapse the two: a phase-executor mode +(`stream`/`tmux`) is *not* a spawn backend, and a spawn backend (`ntm`/`claude`) +is *not* a phase-executor mode. + +> **`gc` is NOT a selectable tier.** The Gas City (`gc`) CLI bridge was removed +> (soc-2rtm0); `runtime=gc` is rejected by the CLI. Any `gc`-based dispatch +> prose in the swarm/crank references is historical and is never selected. + +## Paired schemas + +This contract is the prose half of a paired contract. Its schemas live at: + +- `schemas/orchestration-backend.v1.schema.json` — the selection/degradation + trace (`chosen`, `reason`, `considered`, `opt_out`, `pin`). +- `schemas/orchestration-result.v1.schema.json` — the output-contract parity + shape every tier emits (`backend`, `result_paths`, `verdict`, `task_id`). + +Both are validated as JSON by `scripts/check-contracts-structural-floor.sh` in +CI; the per-field shape is enforced by the JSON Schemas themselves at runtime. diff --git a/docs/contracts/orchestration-result.md b/docs/contracts/orchestration-result.md new file mode 100644 index 000000000..af586a605 --- /dev/null +++ b/docs/contracts/orchestration-result.md @@ -0,0 +1,22 @@ +# Orchestration Result Parity Contract + +Schema: [`schemas/orchestration-result.v1.schema.json`](../../schemas/orchestration-result.v1.schema.json) + +The output-contract **parity shape** every orchestration tier (NTM swarm, +Claude-native, beads floor) MUST emit. This is the load-bearing safety property +behind safe degradation: a downstream consumer (validation, ledger, provenance) +stays tier-agnostic only because all tiers produce an identical result *shape* — +values legitimately differ (the beads floor advertises `WARN`/`MEDIUM`; richer +tiers earn `PASS`/`HIGH`), but the key set is invariant. Enforced by the +degradation-conformance test in `cli/internal/orchestration/conformance_test.go`. +See [orchestration-ports.md](orchestration-ports.md) for the full port contract. + +## Fields + +- `schema_version` (const `1`) — contract version. +- `backend` — which tier produced the result: `ntm` \| `claude` \| `codex` \| `beads`. +- `result_paths` — artifact locations written by the run (e.g. `.agents/swarm/results/*.json`). +- `verdict` — `{ status: PASS|WARN|FAIL, confidence: HIGH|MEDIUM|LOW }`. +- `task_id` — the unit of work this result corresponds to. + +Required: `schema_version`, `backend`, `result_paths`, `verdict`. diff --git a/docs/documentation-index.md b/docs/documentation-index.md index 41a57ab93..a303fdf60 100644 --- a/docs/documentation-index.md +++ b/docs/documentation-index.md @@ -290,6 +290,9 @@ Bridge / framing docs: - [Ubiquitous Language Contract](contracts/ubiquitous-language.md) — Canonical names per bounded context (BC1 Corpus, BC2 Validation, BC3 Loop, BC4 Factory, BC5 Runtime) for the 5 ranked drifts (Gate/Check, Cycle/Loop, Claim/Evidence, Skill/Pattern/Practice, Session); rename schedule bound to soc-5yuy children - [BC1 Corpus Ports Contract](contracts/bc1-corpus-ports.md) — Core BC1 corpus ports scaffolded under `cli/internal/ports/`; semantics cheat-sheet, adapter triplet pattern, soc-pm5t wire-up order - [BC Ports Inventory](contracts/bc-ports-inventory.md) — Roster of all 20 BC ports with per-port adapter contracts, the universal triplet construction pattern, and per-BC wire-up order. +- [Orchestration Ports](contracts/orchestration-ports.md) — `OrchestrationPort` dual-runtime selection seam: the 3-category model (Claude Workflow / NTM swarm / plain skill), the NTM → Claude-native → beads-floor degradation ladder, `AGENTOPS_ORCHESTRATION=off` opt-out, capability detection via `ntm --robot-capabilities`, output-contract parity (`orchestration-result.v1`), and the two-ladders distinction. Paired schemas `schemas/orchestration-backend.v1.schema.json` + `schemas/orchestration-result.v1.schema.json`. +- [Orchestration Backend Selection Contract](contracts/orchestration-backend.md) — wire shape of one `OrchestrationPort` selection decision (chosen/reason/considered/opt_out/pin); pairs `schemas/orchestration-backend.v1.schema.json` for structural-floor validation. +- [Orchestration Result Parity Contract](contracts/orchestration-result.md) — the output-contract parity shape every tier (NTM/Claude/beads) must emit; pairs `schemas/orchestration-result.v1.schema.json`; enforced by the degradation-conformance test. - [Remote Compute Contract](contracts/remote-compute.md) — Product-neutral RemoteTarget, RemoteSession, command ledger, recovery, and GasCity-first remote execution contract - [Rubric Schema](https://github.com/boshu2/agentops/blob/main/schemas/rubric.v1.schema.json) — JSON Schema for rubric files (outcome rubric → target → grader → retry loop) - [Worker Spec Schema](https://github.com/boshu2/agentops/blob/main/schemas/worker-spec.v1.schema.json) — JSON Schema for per-worker model/tool/prompt isolation specs diff --git a/lib/orchestrate-select.sh b/lib/orchestrate-select.sh new file mode 100755 index 000000000..556306938 --- /dev/null +++ b/lib/orchestrate-select.sh @@ -0,0 +1,80 @@ +#!/usr/bin/env bash +# orchestrate-select.sh — dual-runtime SELECTOR SEAM (spike keeper) +# +# Resolves which orchestration backend to use for a task and degrades safely: +# NTM swarm -> Claude-native -> beads (floor) +# +# This is the prototype of the SDK's OrchestrationPort.Select(). It only DECIDES +# + emits a trace; actual dispatch is the adapter's job. The whole point of the +# spike is to prove the *degradation* is clean and explicit. +# +# Resolution order (first match wins): +# 1. AGENTOPS_ORCHESTRATION pin: ntm | claude | beads | off (off => beads) +# (AGENTOPS_ORCH accepted as a back-compat alias) +# 2. NTM available? -> ntm (preferred: persistent, attachable, multi-vendor) +# 3. Claude-native? -> claude (in-session/headless; the "worse NTM") +# 4. always -> beads (sequential floor; always works) +set -euo pipefail + +NTM_HOST="${AGENTOPS_NTM_HOST:-bushido}" + +log() { printf ' [select] %s\n' "$*" >&2; } + +ntm_available() { + # Honors a test override so we can prove degradation without killing bushido. + [[ "${AGENTOPS_NTM_FORCE_DOWN:-0}" == "1" ]] && { log "NTM forced down (test)"; return 1; } + timeout 10 ssh -o ConnectTimeout=6 -o BatchMode=yes "$NTM_HOST" \ + 'command -v ntm >/dev/null && command -v tmux >/dev/null' >/dev/null 2>&1 +} + +claude_available() { + # In-session Claude / CI agent context. Disable-able for the floor test. + [[ "${AGENTOPS_NO_CLAUDE:-0}" == "1" ]] && return 1 + return 0 +} + +select_backend() { + local pin="${AGENTOPS_ORCHESTRATION:-${AGENTOPS_ORCH:-auto}}" + case "$pin" in + off|beads) echo "beads"; log "reason: explicit opt-out / pin=$pin"; return ;; + ntm) echo "ntm"; log "reason: explicit pin=ntm"; return ;; + claude) echo "claude"; log "reason: explicit pin=claude"; return ;; + auto) : ;; + *) log "unknown AGENTOPS_ORCH='$pin' -> auto"; ;; + esac + if ntm_available; then echo "ntm"; log "reason: NTM reachable on $NTM_HOST"; return; fi + if claude_available; then echo "claude"; log "reason: NTM absent -> Claude-native"; return; fi + echo "beads"; log "reason: no orchestration runtime -> beads floor" +} + +self_test() { + local fail=0 + check() { # desc expected; runs select_backend with current env + local desc="$1" expected="$2" got + got="$(select_backend 2>/dev/null)" + if [[ "$got" == "$expected" ]]; then printf 'PASS %-46s -> %s\n' "$desc" "$got" + else printf 'FAIL %-46s -> got %s, want %s\n' "$desc" "$got" "$expected"; fail=1; fi + } + echo "== degradation-ladder self-test ==" + ( unset AGENTOPS_ORCH AGENTOPS_NTM_FORCE_DOWN AGENTOPS_NO_CLAUDE + check "default (NTM up)" "ntm" ) + ( export AGENTOPS_NTM_FORCE_DOWN=1; unset AGENTOPS_ORCH AGENTOPS_NO_CLAUDE + check "NTM down -> degrade to Claude" "claude" ) + ( export AGENTOPS_NTM_FORCE_DOWN=1 AGENTOPS_NO_CLAUDE=1; unset AGENTOPS_ORCH + check "NTM down + Claude off -> beads floor" "beads" ) + ( export AGENTOPS_ORCH=off + check "explicit opt-out (AGENTOPS_ORCH=off)" "beads" ) + ( export AGENTOPS_ORCH=claude + check "explicit pin=claude" "claude" ) + ( export AGENTOPS_ORCH=beads + check "explicit pin=beads" "beads" ) + echo + [[ $fail -eq 0 ]] && echo "ALL PASS — ladder degrades cleanly" || echo "FAILURES present" + return $fail +} + +case "${1:-select}" in + --self-test) self_test ;; + select|"") b="$(select_backend)"; echo "selected backend: $b" ;; + *) echo "usage: $0 [select|--self-test]" >&2; exit 2 ;; +esac diff --git a/schemas/orchestration-backend.v1.schema.json b/schemas/orchestration-backend.v1.schema.json new file mode 100644 index 000000000..082b500a8 --- /dev/null +++ b/schemas/orchestration-backend.v1.schema.json @@ -0,0 +1,43 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://agentops.dev/schemas/orchestration-backend.v1.schema.json", + "title": "Orchestration Backend Selection", + "description": "Records which orchestration backend (tier) was chosen for a task and why, so degradation is auditable", + "practices": ["microservices", "team-topologies"], + "type": "object", + "required": ["schema_version", "chosen", "reason"], + "properties": { + "schema_version": { + "type": "integer", + "const": 1 + }, + "chosen": { + "type": "string", + "enum": ["ntm", "claude", "codex", "beads"], + "description": "The backend tier selected to execute the task." + }, + "reason": { + "type": "string", + "description": "Human-readable justification for the selection (e.g. availability, opt-out, pin, capability fit)." + }, + "considered": { + "type": "array", + "items": { + "type": "string", + "enum": ["ntm", "claude", "codex", "beads"] + }, + "uniqueItems": true, + "description": "Backends evaluated during selection, in preference order. The chosen backend should appear in this list." + }, + "opt_out": { + "type": "boolean", + "description": "True when the caller explicitly opted out of higher-capability tiers, forcing degradation." + }, + "pin": { + "type": ["string", "null"], + "enum": ["ntm", "claude", "codex", "beads", "", null], + "description": "Operator pin overriding auto-selection. Empty string or null means auto (no pin)." + } + }, + "additionalProperties": false +} diff --git a/schemas/orchestration-result.v1.schema.json b/schemas/orchestration-result.v1.schema.json new file mode 100644 index 000000000..ce8a657a2 --- /dev/null +++ b/schemas/orchestration-result.v1.schema.json @@ -0,0 +1,45 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://agentops.dev/schemas/orchestration-result.v1.schema.json", + "title": "Orchestration Result", + "description": "Output-contract parity shape every backend tier (NTM/Claude/beads) must emit so tier degradation stays correctness-preserving", + "practices": ["microservices", "design-by-contract"], + "type": "object", + "required": ["schema_version", "backend", "result_paths", "verdict"], + "properties": { + "schema_version": { + "type": "integer", + "const": 1 + }, + "backend": { + "type": "string", + "enum": ["ntm", "claude", "codex", "beads"], + "description": "The backend tier that produced this result." + }, + "result_paths": { + "type": "array", + "items": {"type": "string"}, + "description": "Paths (relative to repo root) where this run wrote its artifacts." + }, + "verdict": { + "type": "object", + "required": ["status", "confidence"], + "properties": { + "status": { + "type": "string", + "enum": ["PASS", "WARN", "FAIL"] + }, + "confidence": { + "type": "string", + "enum": ["HIGH", "MEDIUM", "LOW"] + } + }, + "additionalProperties": false + }, + "task_id": { + "type": "string", + "description": "Identifier of the task this result fulfills (e.g. bead ID)." + } + }, + "additionalProperties": false +} diff --git a/skills/SKILL-TIERS.md b/skills/SKILL-TIERS.md index e7d5b60ae..ddc820071 100644 --- a/skills/SKILL-TIERS.md +++ b/skills/SKILL-TIERS.md @@ -221,7 +221,7 @@ These are how skills chain in practice: ## Current Skill Tiers -### User-Facing Skills (66) +### User-Facing Skills (68) **Judgment:** @@ -323,6 +323,8 @@ These are how skills chain in practice: | **heal-skill** | meta | Detect and fix skill hygiene issues | | **skill-auditor** | meta | Two-pass audit of an existing SKILL.md against the unified template (15 checks) | | **skill-builder** | meta | Scaffold or absorb new SKILL.md files against the unified template | +| **automation-shape-routing** | meta | Front door for building automation: route to Workflow vs NTM swarm vs plain skill, then hand off | +| **workflow-builder** | meta | Scaffold a new Claude Workflow script (.claude/workflows/*.js) from the operating-loop.js template | ### Internal Skills (10) — `metadata.internal: true` diff --git a/skills/automation-shape-routing/SKILL.md b/skills/automation-shape-routing/SKILL.md new file mode 100644 index 000000000..a33e4aefd --- /dev/null +++ b/skills/automation-shape-routing/SKILL.md @@ -0,0 +1,144 @@ +--- +name: automation-shape-routing +description: 'Front door for building agent automation — decide the SHAPE (Claude Workflow vs NTM swarm vs plain skill), then hand off to the right builder. Triggers: "build a skill", "build a workflow", "build automation", "create a skill", "create a workflow", "new automation", "convert skills to workflows", or any task involving fan-out / multiple agents / iterative passes.' +practices: +- hexagonal-architecture +- team-topologies +- pragmatic-programmer +hexagonal_role: supporting +consumes: [] +produces: [] +context_rel: +- kind: supplier-to + with: skill-builder +- kind: supplier-to + with: workflow-builder +skill_api_version: 1 +context: + window: inherit + intent: + mode: task + intel_scope: none +metadata: + tier: meta + dependencies: [] +output_contract: 'a routing verdict — Workflow | NTM swarm | plain skill — with the deciding axis named' +--- + +# Automation Shape Routing — Workflow vs NTM vs Skill + +> **The trap this kills:** "I built a lot of skills; they should become +> workflows." Mostly false. Most orchestration-looking skills are either +> long-lived/human-attachable (stay NTM) or hard-sequential (stay skills). The +> win is the routing rule, not a migration project. + +## The three shapes + +| Shape | What it is | Mechanism | +|---|---|---| +| **Workflow** | Deterministic, reproducible orchestration of subagents | Claude `Workflow` tool — `agent({schema})`, `parallel()`, `pipeline()`, `phase()`, loop-until-budget. In-process, headless, ~16 concurrent. | +| **NTM swarm** | Long-lived, human-in-the-loop multi-agent run | `ntm` / `*-with-ntm` — persistent tmux panes, robot API, mail/locks, attach + nudge + kill/relaunch. | +| **Plain skill** | One model reasoning through a procedure or knowledge | A single `SKILL.md`. No fan-out, or a strictly sequential edit-loop. | + +## The decision rule (three axes) + +Ask in order: + +1. **Is there real orchestration at all?** (fan-out / barrier / multi-stage, OR a + loop with parallelism to exploit) — if **no** → **plain skill**. Stop. +2. **Must a human attach and steer mid-run?** Or does it run for *hours*, do + open-ended *file edits*, juggle a *fluid population* (rate limits, kill/ + relaunch, prompt-cache rounds), or relay between *cross-model* panes? — if + **yes** → **NTM swarm**. +3. Otherwise — fixed DAG, agents return **structured JSON** (not free-form edits + needing review), no attach needed, you want it **reproducible + headless** → + **Workflow**. + +**One-line litmus:** +> deterministic DAG + structured JSON + no human-attach + headless-wanted → **Workflow** +> long-lived + attachable + open-ended file edits / fluid population → **NTM** +> no fan-out, or hard-sequential edit loop → **plain skill** + +## Spike-validated nuances (2026-05-29) + +A live three-legged spike (`~/dev/agentops-3cat-spike/`) measured the same task on +all three backends. Two findings refine the rule: + +1. **The primary axis is control-plane vs in-session, not "parallel vs serial."** + **NTM is a control-plane** that *runs Claude/Codex/Gemini as panes* — it is not a + peer of the native runtimes, it is the supervisor tier above them. Choose NTM when + you need the control plane (attach/steer, persistence, multi-vendor); choose + in-session native (Workflow/Task) when you don't. +2. **Parallel buys quality/independence, NOT wall-clock — at small N.** Measured: a + 3-way Workflow fan-out **tied** a single sequential agent on wall-clock (191s vs + 180s) and cost **~2.7× the tokens** — because the synthesis barrier eats the + parallel gain. What it bought was depth + independent fresh-eyes (the sequential + leg self-reported "monoculture" bias). So: reach for parallel `Workflow` when you + want *independent verification / fresh eyes*, not for speed. For speed, you need + large N **and** no barrier — use `pipeline()` (no barrier), not `parallel()`. + +Degradation (NTM → Claude-native → beads floor) is governed by the +`OrchestrationPort` selector; opt out entirely with `AGENTOPS_ORCHESTRATION=off` → +beads floor, which always works. + +## Two traps to avoid + +- **Don't workflow-ify a sequential edit-loop.** If each pass must see the prior + pass's edits (progressive-deepening reapply, audit-fix-rescan), there's no + concurrency to win — a Workflow wrapper adds a process boundary for nothing. + *Exception:* it graduates to a `loop-until-budget` Workflow only once each step + returns **structured output** instead of free-form edits, and you want it + headless/reproducible. +- **Don't NTM-ify a clean fan-out, and don't Workflow-ify an attach-and-steer + run.** The Workflow tool is in-process and cannot be tmux-attached; NTM is + built for exactly the live-steering Workflow can't do. Picking wrong fights the + tool the whole way. + +## Worked examples + +**→ Workflow** (deterministic fan-out / synthesize, structured returns): +`council` (N judges → consensus — near-trivial port), the **planning half** of +`rpi`, judge/refutation panels, any "fan out N analyses → triangulate" task. + +**→ Stay NTM** (long-lived, attachable, open-ended edits, fluid population): +the `*-with-ntm` family (hypothesis research, cross-model review swarms, browser +testing), plus `swarm`/`crank` in full epic-execution mode — they touch the +working tree and need wave-validity gating + human review. + +**→ Stay plain skill** (no exploitable parallelism, or knowledge/one-shot): +deliberately one-at-a-time loops (progressive reapply, multi-pass bug hunting); +all reference docs; all single-shot transforms (jargon scrub, README authoring). + +## Canonical Workflow template + +`.claude/workflows/operating-loop.js` is the worked example — a real Workflow-tool +script using `agent(prompt,{schema})` with JSON schemas, `parallel([thunks])` +barriers (framing-lenses / judges / refutation / slices), `phase()` markers, +budget-scaled `FANOUT`, and bounded re-plan/retry. **Start from it when porting a +Workflow.** It is also the proof that the AgentOps operating loop has *two* +conformant runtimes (skill-driven via `rpi`/`crank`/`swarm`/`council`, and +Workflow-driven via this script) — the basis of the `agentops-core-sdk` +portability thesis. See `operating-loop-workflow` for the install+run path. + +## Handoff — after the verdict, invoke the next skill + +This skill is the **front door**. It does not build; it routes. Once the shape is +decided, hand off: + +| Verdict | Next | What it does | +|---|---|---| +| **plain skill** | `skill-builder` | Scaffold a new `SKILL.md` against the unified template → then `skill-auditor` → `heal-skill`. | +| **Workflow** | `workflow-builder` | Scaffold a new `.claude/workflows/*.js` from the operating-loop.js template. | +| **NTM swarm** | `ntm` + `vibing-with-ntm` | Stand up + tend a persistent, human-attachable tmux swarm. | + +State the verdict and the deciding axis in one line, then invoke the chosen +builder. Do not scaffold here. + +## Contract note (SDK) + +A Workflow is a **composite capability** (an orchestration of sub-capabilities +with typed control flow); a skill is a **leaf**. The portable contract for this — +a `shape: skill|workflow` discriminator, a `StepGraph`, a `control_flow` enum, a +`budget`, and an `OrchestrationPort` *interface* — is net-new SDK work. Port the +**shape, not the engine**: keep concrete orchestrators (Codex subagents, swarm +dispatch, scheduler — BC4/BC5) behind adapters. diff --git a/skills/crank/references/execution-preflight.md b/skills/crank/references/execution-preflight.md index 3d4b2257d..c7c770a44 100644 --- a/skills/crank/references/execution-preflight.md +++ b/skills/crank/references/execution-preflight.md @@ -39,9 +39,11 @@ fi | **Track retries** | `bd comments add` | Task description update | | **Epic tracking** | `bd update --append-notes` | In-memory wave counter | -### Step 0.6: Detect gc Pool Backend +### Step 0.6: Select Dispatch Backend (NTM > runtime-native > beads floor) -Check `gc status --json` for a running controller. Set `GC_POOL_AVAILABLE=true` if gc is available. When true, gc pool handles worker lifecycle and auto-scales based on `bd ready --count`. Crank simplifies to: create issues, gc scales workers, workers close issues, crank validates. See [gc-pool-dispatch.md](gc-pool-dispatch.md) for dispatch details. +Select the wave-dispatch backend per the canonical ladder in `skills/shared/SKILL.md` ("Selection policy"): if `AGENTOPS_ORCHESTRATION=off`, degrade to the **beads floor** (create issues, validate; no spawn). Otherwise prefer **NTM** (capability-probed via `ntm --robot-capabilities`), then **runtime-native** (Claude Native Teams / Codex sub-agents) via `/swarm`. Output-contract parity is unchanged on every tier: workers write `.agents/swarm/results/*.json`, crank verifies-then-trusts. + +> **gc pool is NOT selected (DEPRECATION).** gc tier removed (soc-2rtm0); retained for historical reference only — NOT selected. The Gas City (`gc`) CLI bridge was removed and `runtime=gc` is rejected by the CLI (see `agentops/CLAUDE.md`). [gc-pool-dispatch.md](gc-pool-dispatch.md) documents the old gc pool dispatch shape for archival purposes only — the top tier is **NTM**. ### Step 1: Identify the Epic / Work Source diff --git a/skills/crank/references/gc-pool-dispatch.md b/skills/crank/references/gc-pool-dispatch.md index bbf959534..76076fc8c 100644 --- a/skills/crank/references/gc-pool-dispatch.md +++ b/skills/crank/references/gc-pool-dispatch.md @@ -1,6 +1,8 @@ -# gc Pool Dispatch +# gc Pool Dispatch — DEPRECATED, historical reference only -When `GC_POOL_AVAILABLE=true`, replace `/swarm` invocation with gc pool dispatch: +> **gc tier removed (soc-2rtm0); retained for historical reference only — NOT selected. Top tier is NTM.** The Gas City (`gc`) CLI bridge was severed and deleted; `runtime=gc` is rejected by the CLI (see `agentops/CLAUDE.md`, "Gas City (gc) bridge — REMOVED"). The crank dispatch ladder is **NTM > runtime-native > beads floor** (with the `AGENTOPS_ORCHESTRATION=off` opt-out) — see `skills/shared/SKILL.md` "Selection policy" and crank `execution-preflight.md` Step 0.6. `GC_POOL_AVAILABLE` is never set true. The content below documents the old gc pool dispatch shape for archival purposes only — do not select or invoke it. + +When `GC_POOL_AVAILABLE=true` (no longer reachable), `/swarm` invocation was replaced with gc pool dispatch: - Workers are pre-started by gc pool (no spawn overhead) - Assign work via `gc session nudge ""` - Poll completion via `gc status --json` + `bd show ` (check issue closed) diff --git a/skills/crank/references/wave-dispatch.md b/skills/crank/references/wave-dispatch.md index 7ff30b449..eec889c4c 100644 --- a/skills/crank/references/wave-dispatch.md +++ b/skills/crank/references/wave-dispatch.md @@ -128,9 +128,11 @@ fi Before spawning workers, extract cross-cutting constraints from the plan's `## Boundaries` / `## Cross-Cutting Constraints` section and inject into every TaskCreate's `metadata.validation.cross_cutting` array. Each entry has `name`, `type` (e.g., `content_check`), `file`, and `pattern`. "Ask First" boundaries are annotation-only in auto mode. -**gc pool dispatch (when `GC_POOL_AVAILABLE=true`):** +**Backend dispatch ladder (NTM > runtime-native > beads floor):** -When gc pool is available, replace `/swarm` with gc pool dispatch — workers are pre-started, assigned via `gc session nudge`, and gc handles crash recovery automatically. When unavailable, the existing `/swarm` path is used unchanged. See [gc-pool-dispatch.md](gc-pool-dispatch.md) for the full dispatch script. +Dispatch the wave per the canonical ladder in `skills/shared/SKILL.md` ("Selection policy"): prefer **NTM** (capability-probed via `ntm --robot-capabilities`), then **runtime-native** via `/swarm` (Claude Native Teams / Codex sub-agents), with the `AGENTOPS_ORCHESTRATION=off` opt-out degrading to the beads floor. Output-contract parity is unchanged: workers write `.agents/swarm/results/*.json`, the lead verifies-then-trusts. + +> **gc pool is NOT selected (DEPRECATION).** gc tier removed (soc-2rtm0); retained for historical reference only — NOT selected. The Gas City (`gc`) CLI bridge was removed and `runtime=gc` is rejected by the CLI (see `agentops/CLAUDE.md`). [gc-pool-dispatch.md](gc-pool-dispatch.md) documents the old gc pool dispatch shape for archival purposes only — the top tier is **NTM**. **For wave execution details (beads sync, TaskList bridging, swarm invocation), read `skills/crank/references/team-coordination.md`.** diff --git a/skills/heal-skill/SKILL.md b/skills/heal-skill/SKILL.md index 0a7822ea9..8e77cb562 100644 --- a/skills/heal-skill/SKILL.md +++ b/skills/heal-skill/SKILL.md @@ -7,7 +7,9 @@ practices: hexagonal_role: supporting consumes: [] produces: [] -context_rel: [] +context_rel: +- kind: customer-of + with: skill-auditor skill_api_version: 1 context: window: isolated diff --git a/skills/shared/SKILL.md b/skills/shared/SKILL.md index 415b17d55..d4ef730c9 100644 --- a/skills/shared/SKILL.md +++ b/skills/shared/SKILL.md @@ -107,11 +107,17 @@ Every runtime maps these capabilities to its own API. Skills describe WHAT to do Use capability detection at runtime, not hardcoded tool names. The same skill must work across any agent harness that provides multi-agent primitives. If no multi-agent capability is detected, degrade to single-agent inline mode (`--quick`). -**Selection policy (runtime-native first):** -1. If running in a Claude session and `TeamCreate`/`SendMessage` are available, use **Claude Native Teams** as the primary backend. -2. If running in a Codex session and `spawn_agent` is available, use **Codex sub-agents** as the primary backend. -3. If both are technically available, pick the backend native to the current runtime unless the user explicitly requests mixed/cross-vendor execution. -4. Only use background tasks when neither native backend is available. +**Selection policy (NTM > runtime-native > beads floor):** + +Global opt-out first: if `AGENTOPS_ORCHESTRATION=off` is set, skip all spawn backends and degrade to the **beads floor** (single-agent inline / `--quick`; workers' work is tracked through `bd`). This mirrors the `AGENTOPS_HOOKS_DISABLED=1` convention. Otherwise, select in this order: + +1. **NTM (top tier).** If `ntm` is on PATH, capability-probe it with `ntm --robot-capabilities`. When the probe confirms multi-agent primitives, use **NTM** as the primary backend. +2. **Runtime-native.** If NTM is unavailable: in a Claude session with `TeamCreate`/`SendMessage`, use **Claude Native Teams**; in a Codex session with `spawn_agent`, use **Codex sub-agents**. If both are technically available, pick the backend native to the current runtime unless the user explicitly requests mixed/cross-vendor execution. Only use background tasks when neither native backend is available. +3. **Beads floor.** If no multi-agent capability is detected, degrade to single-agent inline mode (`--quick`). + +> **`gc` is NOT a selectable tier.** The Gas City (`gc`) CLI bridge was removed (soc-2rtm0); `runtime=gc` is rejected by the CLI (see `agentops/CLAUDE.md`, "Gas City (gc) bridge — REMOVED"). Any `gc`-based dispatch prose in the swarm/crank references is retained for historical reference only and is never selected. + +**Output-contract parity is unchanged across all tiers:** workers write results to `.agents/swarm/results/*.json`, and the lead verifies-then-trusts those artifacts. This invariant holds whether the backend is NTM, a runtime-native team, or the beads floor. | Operation | Codex Sub-Agents | Claude Native Teams | OpenCode Subagents | Inline Fallback | |-----------|------------------|---------------------|--------------------|-----------------| diff --git a/skills/skill-auditor/SKILL.md b/skills/skill-auditor/SKILL.md index 42a418b02..0ec11d9d9 100644 --- a/skills/skill-auditor/SKILL.md +++ b/skills/skill-auditor/SKILL.md @@ -10,7 +10,11 @@ hexagonal_role: supporting consumes: [] produces: - result.json -context_rel: [] +context_rel: +- kind: customer-of + with: skill-builder +- kind: supplier-to + with: heal-skill skill_api_version: 1 user-invocable: true context: diff --git a/skills/skill-builder/SKILL.md b/skills/skill-builder/SKILL.md index f5b7568f8..3a70dee15 100644 --- a/skills/skill-builder/SKILL.md +++ b/skills/skill-builder/SKILL.md @@ -1,7 +1,7 @@ --- name: skill-builder -description: 'Scaffold or absorb new SKILL.md files against the unified AgentOps template. - Triggers: "create a skill", "scaffold skill", "absorb external skill", "new skill".' +description: 'Scaffold or absorb new SKILL.md files (a leaf capability) against the unified AgentOps template. + Triggers: "create a skill", "scaffold skill", "absorb external skill", "new skill". If unsure whether the work should be a skill, a Workflow, or an NTM swarm, run automation-shape-routing first.' practices: - code-complete - pragmatic-programmer @@ -10,7 +10,11 @@ hexagonal_role: supporting consumes: [] produces: - converted-skill -context_rel: [] +context_rel: +- kind: customer-of + with: automation-shape-routing +- kind: supplier-to + with: skill-auditor skill_api_version: 1 context: window: fork diff --git a/skills/swarm/references/execution-steps.md b/skills/swarm/references/execution-steps.md index 03a16d8e2..1b97f7044 100644 --- a/skills/swarm/references/execution-steps.md +++ b/skills/swarm/references/execution-steps.md @@ -20,24 +20,26 @@ See `skills/shared/SKILL.md` for the capability contract. See also `local-mode.md` for swarm-specific execution details (worktrees, validation, git commit policy, wave repeat). -## Step 0.5: gc Backend Detection (Before Worker Dispatch) +## Step 0.5: Select Spawn Backend (Before Worker Dispatch) -Before spawning workers via Claude teams or Codex sub-agents, check if gc is available: +Select the worker-dispatch backend by the ladder **NTM > runtime-native > beads floor** (see `skills/shared/SKILL.md` "Selection policy" for the canonical statement): ```bash -if command -v gc &>/dev/null && gc status --json 2>/dev/null | jq -e '.controller.state == "running"' >/dev/null 2>&1; then - SWARM_BACKEND="gc" +if [[ "${AGENTOPS_ORCHESTRATION:-}" == "off" ]]; then + SWARM_BACKEND="beads" # global opt-out — single-agent inline, work tracked via bd +elif command -v ntm &>/dev/null && ntm --robot-capabilities 2>/dev/null | jq -e '.spawn == true' >/dev/null 2>&1; then + SWARM_BACKEND="ntm" # top tier — capability-probed else - SWARM_BACKEND="native" # fallback to Claude teams / Codex sub-agents + SWARM_BACKEND="native" # runtime-native: Claude teams / Codex sub-agents; beads floor if neither fi ``` -When `SWARM_BACKEND="gc"`: -- Use `gc session nudge ""` instead of `spawn_agent()` -- Monitor workers via `gc session peek --lines 50` -- Workers already use `bd` for issue tracking — no change needed -- Results still written to `.agents/swarm/results/` — no change needed -- gc pool auto-scaling handles worker lifecycle (based on `scale_check = "bd ready --count"`) +- `AGENTOPS_ORCHESTRATION=off` is the global opt-out (mirrors `AGENTOPS_HOOKS_DISABLED=1`): no spawn backend, degrade to the beads floor. +- `SWARM_BACKEND="ntm"`: dispatch and monitor workers through NTM's robot API. +- `SWARM_BACKEND="native"`: spawn via Claude Native Teams or Codex sub-agents per the runtime; if neither is available, fall to the beads floor (`--quick`). +- **Output-contract parity holds on every tier:** workers write results to `.agents/swarm/results/`, lead verifies-then-trusts. Workers already use `bd` for issue tracking — no change needed. + +> **`gc` is NOT a selectable backend (DEPRECATION).** The Gas City (`gc`) CLI bridge was removed (soc-2rtm0); `runtime=gc` is rejected by the CLI (see `agentops/CLAUDE.md`). The `SWARM_BACKEND="gc"` dispatch block later in this file is retained for historical reference only — it is never selected. The top tier is **NTM**. ## Step 1: Ensure Tasks Exist @@ -210,9 +212,11 @@ done > **Pre-task checks:** Inject the Quick-Reference Inject Block from `worker-pre-task-checks.md` into every worker dispatch prompt — grep-for-existing-impls, file-manifest existence, deletion-adjacent symbol verify. Prevents workers from duplicating existing utilities or operating on stale plan symbols. -### gc Worker Dispatch (when `SWARM_BACKEND="gc"`) +### gc Worker Dispatch — DEPRECATED, historical reference only + +> **gc tier removed (soc-2rtm0); retained for historical reference only — NOT selected.** The Gas City (`gc`) CLI bridge was severed and `runtime=gc` is rejected by the CLI (see `agentops/CLAUDE.md`). Step 0.5 never sets `SWARM_BACKEND="gc"`. The top tier is **NTM**. The block below documents the old gc dispatch shape for archival purposes only — do not select or invoke it. -When gc is the selected backend, dispatch and monitor workers through gc sessions instead of Claude teams or Codex sub-agents: +When gc was the selected backend (no longer reachable), dispatch and monitoring went through gc sessions instead of Claude teams or Codex sub-agents: ```bash # Dispatch a task to a gc-managed worker diff --git a/skills/workflow-builder/SKILL.md b/skills/workflow-builder/SKILL.md new file mode 100644 index 000000000..2bf24779e --- /dev/null +++ b/skills/workflow-builder/SKILL.md @@ -0,0 +1,101 @@ +--- +name: workflow-builder +description: 'Scaffold a new Claude Workflow script (.claude/workflows/*.js) — deterministic multi-agent orchestration — from the operating-loop.js template and the Workflow primitives. Triggers: "build a workflow", "create a workflow", "scaffold workflow", "new workflow", "author a workflow".' +practices: +- pragmatic-programmer +- hexagonal-architecture +- agile-manifesto +hexagonal_role: supporting +consumes: [] +produces: +- workflow-script +context_rel: +- kind: customer-of + with: automation-shape-routing +- kind: shared-kernel + with: operating-loop-workflow +skill_api_version: 1 +context: + window: fork + intent: + mode: questions + sections: + exclude: + - HISTORY + intel_scope: topic +metadata: + tier: meta + dependencies: + - automation-shape-routing +output_contract: 'a runnable .claude/workflows/.js with a meta block and agent()/parallel()/pipeline()/phase() body' +--- + +# Workflow Builder — scaffold a Claude Workflow script + +> Counterpart to `skill-builder`. `skill-builder` authors a `SKILL.md` (a leaf +> capability); this authors a **Workflow** (a composite capability — deterministic +> orchestration of subagents). Reach this skill via `automation-shape-routing` +> once the shape is confirmed **Workflow** (deterministic DAG + structured-JSON +> returns + headless). If the shape is NTM or plain skill, you're in the wrong +> builder — go back to `automation-shape-routing`. + +## Confirm the shape first + +Do NOT scaffold a workflow for: an attach-and-steer run (→ NTM: `ntm` / +`vibing-with-ntm`), or a hard-sequential edit-loop with no parallelism (→ plain +skill: `skill-builder`). If unconfirmed, run `automation-shape-routing`. + +## The template + +Start from `.claude/workflows/operating-loop.js` — the canonical worked example. +Copy its skeleton, don't reinvent it. A Workflow script is plain JS: + +```js +export const meta = { // REQUIRED — pure literal, no variables + name: 'my-workflow', + description: 'one line shown in the permission dialog', + phases: [ { title: 'Find' }, { title: 'Verify' } ], // one per phase() call +} + +phase('Find') +const found = await parallel(FINDERS.map(f => () => + agent(f.prompt, { schema: FINDINGS_SCHEMA, phase: 'Find' }))) // barrier + +phase('Verify') +const verified = await pipeline(found.flat().filter(Boolean), + f => agent(`verify: ${f.title}`, { schema: VERDICT, phase: 'Verify' })) + +return { verified } +``` + +## Building blocks (pick by control-flow shape) + +| Primitive | Use when | +|---|---| +| `agent(prompt, {schema})` | one subagent; `schema` forces structured JSON back | +| `parallel([thunks])` | **barrier** — need ALL results together (dedup/merge/early-exit) | +| `pipeline(items, ...stages)` | **default** multi-stage — no barrier, each item flows independently | +| `phase(title)` | progress grouping; match `meta.phases` titles | +| `loop-until-budget` / `loop-until-dry` | unknown-size discovery; guard on `budget.total` | + +## Authoring checklist + +1. **Shape confirmed Workflow** (via `automation-shape-routing`). +2. **Schemas first** — define the JSON schema each `agent()` returns; structured + output is what makes a workflow deterministic and composable. +3. **Default to `pipeline()`**; reach for `parallel()` only when a stage genuinely + needs all prior results at once. +4. **Conflict-free fan-out** — if branches write files, give each a disjoint + write-scope (the wave-validity invariant) or run in worktree isolation. +5. **Budget** — for loops, gate on `budget.total && budget.remaining() > N`. +6. **Dry-run to validate** — invoke the workflow on a tiny input; confirm the + `meta` block parses and each phase returns its schema. This is the workflow + analog of `skill-auditor`. + +## Relationship to the SDK + +A workflow is a **composite capability**; the portable contract for it (a +`shape: skill|workflow` discriminator, a `StepGraph`, a `control_flow` enum, a +`budget`, an `OrchestrationPort` interface) is net-new `agentops-core-sdk` work. +Author the script here; the SDK is where the *contract* for workflow-capabilities +lives. See `operating-loop-workflow` for installing/running a finished workflow. From 75d48536ca58b585e4af80f269627649af0998f9 Mon Sep 17 00:00:00 2001 From: Boden Fuller Date: Fri, 29 May 2026 12:22:48 -0400 Subject: [PATCH 2/8] fix(ci): registry integration for the 2 new orchestration skills Resolves PR #598 CI failures from adding automation-shape-routing + workflow-builder: - trim 3 skill descriptions to <=180 chars (skill-builder hint moved to body) - resolve 2 trigger collisions (shape-routing front-door triggers made distinct) - add skills-codex/ twins for both + regen hashes (codex-parity, override-coverage) - catalog both in using-agentops (skills-integrity) - agentops-skill-domain-map + domain-evolution-bdd 76->78 + 2 rows (registry-drift) - classify 'orchestration' write-surface in agents-write-surfaces.md (correctness) ag-nk67 #orchestration-foundation --- docs/contracts/agents-write-surfaces.md | 2 + .../agentops-domain-evolution-bdd.md | 2 +- docs/reference/agentops-skill-domain-map.md | 8 +- skills-codex-overrides/catalog.json | 12 ++ skills-codex/.agentops-manifest.json | 30 +++-- skills-codex/autodev/.agentops-generated.json | 2 +- skills-codex/autodev/SKILL.md | 2 +- .../.agentops-generated.json | 7 + .../automation-shape-routing/SKILL.md | 121 ++++++++++++++++++ .../automation-shape-routing/prompt.md | 8 ++ skills-codex/crank/.agentops-generated.json | 2 +- .../heal-skill/.agentops-generated.json | 2 +- .../.agentops-generated.json | 2 +- skills-codex/operating-loop-workflow/SKILL.md | 2 +- skills-codex/shared/.agentops-generated.json | 2 +- .../skill-auditor/.agentops-generated.json | 2 +- .../skill-builder/.agentops-generated.json | 2 +- skills-codex/swarm/.agentops-generated.json | 2 +- .../using-agentops/.agentops-generated.json | 2 +- .../workflow-builder/.agentops-generated.json | 7 + skills-codex/workflow-builder/SKILL.md | 73 +++++++++++ skills-codex/workflow-builder/prompt.md | 8 ++ skills/automation-shape-routing/SKILL.md | 2 +- skills/skill-builder/SKILL.md | 5 +- skills/using-agentops/SKILL.md | 2 + skills/workflow-builder/SKILL.md | 2 +- 26 files changed, 283 insertions(+), 28 deletions(-) create mode 100644 skills-codex/automation-shape-routing/.agentops-generated.json create mode 100644 skills-codex/automation-shape-routing/SKILL.md create mode 100644 skills-codex/automation-shape-routing/prompt.md create mode 100644 skills-codex/workflow-builder/.agentops-generated.json create mode 100644 skills-codex/workflow-builder/SKILL.md create mode 100644 skills-codex/workflow-builder/prompt.md diff --git a/docs/contracts/agents-write-surfaces.md b/docs/contracts/agents-write-surfaces.md index c6eae4b01..4e23a6141 100644 --- a/docs/contracts/agents-write-surfaces.md +++ b/docs/contracts/agents-write-surfaces.md @@ -54,6 +54,7 @@ lane must name the intended write path; it cannot be blank or placeholder text. | `nightly` | rolling | scripts | local-nightly-state | Private local nightly run digests, readiness snapshots, scheduler templates, and phase logs | | `opencode-tests` | regenerated | scripts, tests | test-output | Opencode runtime test fixtures and outputs | | `operator` | rolling | cli | operator-intents | Durable OperatorIntent records (halt, rescope, handoff) appended via the BC4 OperatorPort | +| `orchestration` | rolling | cli | orchestration-result | OrchestrationResult parity artifacts written by OrchestrationPort backends (beads-floor + tier results) | | `overnight` | rolling | scripts, skills | overnight-run-state | Overnight run state and morning packets | | `packets` | rolling | cli | context-packet-cache | Source manifests and promoted packets feeding the context-explain surface | | `patterns` | persistent | cli, skills | promoted-pattern | Promoted pattern artifacts | @@ -120,6 +121,7 @@ mine nightly opencode-tests operator +orchestration overnight packets patterns diff --git a/docs/reference/agentops-domain-evolution-bdd.md b/docs/reference/agentops-domain-evolution-bdd.md index 01abb5105..6d1f09828 100644 --- a/docs/reference/agentops-domain-evolution-bdd.md +++ b/docs/reference/agentops-domain-evolution-bdd.md @@ -19,7 +19,7 @@ Feature: Domain-governed AgentOps 3.0 evolution And external-corpus-derived observations are used only through the clean-room policy Scenario: Audit every skill before changing shipped behavior - Given the checked-in skill catalog contains 76 skills + Given the checked-in skill catalog contains 78 skills When the evolution bootstrap audits the catalog Then every skill is assigned exactly one primary bounded context And each skill has a preliminary keep, update, refactor, merge-review, or cut-review disposition diff --git a/docs/reference/agentops-skill-domain-map.md b/docs/reference/agentops-skill-domain-map.md index a46f54687..b7f90fef0 100644 --- a/docs/reference/agentops-skill-domain-map.md +++ b/docs/reference/agentops-skill-domain-map.md @@ -1,7 +1,7 @@ # AgentOps Skill Domain Map This map is the control surface for the next evolution loop. It classifies all -76 checked-in AgentOps skills before any broad rewrite, using current +78 checked-in AgentOps skills before any broad rewrite, using current `origin/main` product direction, GOALS Directive 12, the DDD/hexagonal ADR, and the `soc-y5vh` Loop epic. @@ -18,9 +18,9 @@ around small provable changes. | Signal | Result | |---|---:| -| Skills audited | 76 | +| Skills audited | 78 | | Domains classified | 5 of 5 (BC1-BC5) | -| Dispositions assigned | 76 / 76 | +| Dispositions assigned | 78 / 78 | Observed gap: the catalog has strong operational kernels but weak productized @@ -59,6 +59,7 @@ Disposition meanings: | Skill | Domain | Hex role | First disposition | Rationale | |---|---|---|---|---| +| `automation-shape-routing` | BC4 Factory | supporting | keep | Front-door router (Workflow vs NTM vs skill) feeding skill-builder/workflow-builder; keep as-is. | | `autodev` | BC3 Loop | supporting | refactor | Must compose with PROGRAM.md and RPI as one vertical-slice executor. | | `beads` | BC3 Loop | driven-adapter | update | Tracker adapter is core; add BDD/slice acceptance self-test. | | `bootstrap` | BC4 Factory | driving-adapter | update | First-run factory entrypoint; needs current 3.0/domain packet shape. | @@ -135,6 +136,7 @@ Disposition meanings: | `validate` | BC2 Validation | driving-adapter | keep | Designed-future canonical unified validator (m6v5.D Phase 1, epic soc-cp7pv); not redundant cruft — epic GO/REVERT is a separate decision (resolved KEEP 2026-05-24). | | `validation` | BC2 Validation | domain | update | Canonical post-implementation validation; strengthen self-test first. | | `vibe` | BC2 Validation | domain | update | Code-readiness validator; add self-test and tighten result contract. | +| `workflow-builder` | BC4 Factory | supporting | keep | Scaffolds Claude Workflow scripts from the operating-loop.js template; counterpart to skill-builder. | ## Priority Queue diff --git a/skills-codex-overrides/catalog.json b/skills-codex-overrides/catalog.json index 90dbad57e..34347024d 100644 --- a/skills-codex-overrides/catalog.json +++ b/skills-codex-overrides/catalog.json @@ -658,6 +658,18 @@ "treatment": "parity_only", "wave": "catalog-parity", "reason": "Ship operating-loop Workflow to plugin users via installer skill; Codex twin redirects to the $rpi chain since Codex lacks the Workflow tool" + }, + { + "name": "automation-shape-routing", + "treatment": "parity_only", + "wave": "catalog-parity", + "reason": "Front-door routing skill that names the deciding axis (Workflow vs NTM vs plain skill) and hands off; the prose is decision guidance with no Codex-specific runtime divergence." + }, + { + "name": "workflow-builder", + "treatment": "parity_only", + "wave": "catalog-parity", + "reason": "Scaffolds a Claude Workflow JS script from the operating-loop.js template; the authoring guidance is tool-specific reference material with no Codex-specific runtime divergence." } ] } diff --git a/skills-codex/.agentops-manifest.json b/skills-codex/.agentops-manifest.json index ed74293bd..28eb6bd91 100644 --- a/skills-codex/.agentops-manifest.json +++ b/skills-codex/.agentops-manifest.json @@ -656,7 +656,13 @@ "name": "autodev", "source_skill": "skills/autodev", "source_hash": "9b4ecf0399c67b1d55675b293ec382b7b7e4e6cafccec40eda2c7b6da16437b6", - "generated_hash": "14c906b0518abe580eea344c034d466efa92f138f2a85f3465dec2e70b609ff9" + "generated_hash": "ba2a82dc22e5bcead7f9e2ef899601c4235604092a1bf1362e2c546608880c7b" + }, + { + "name": "automation-shape-routing", + "source_skill": "skills/automation-shape-routing", + "source_hash": "48310b535e3a04b7ba8dd34b451029473ce21b86d10620b39d1796cd829ac4a4", + "generated_hash": "18650b12952d69e4d06aa2fae75144c1af75c9a7c47df09ff29ecabd0d1b3b80" }, { "name": "beads", @@ -715,7 +721,7 @@ { "name": "crank", "source_skill": "skills/crank", - "source_hash": "19827ed271f1a839da41c644b297872cc6072bf61c424c9e055e2bee04587a15", + "source_hash": "96897f552e614ef9a4ed36557e8287262c5000775364c9dae3f5be31a0a04ecd", "generated_hash": "d1f156a392be40cb5f72a424de9a7c165df4c1677c984a963cf20ad1cbd15c6f" }, { @@ -805,7 +811,7 @@ { "name": "heal-skill", "source_skill": "skills/heal-skill", - "source_hash": "bd0513fa1d1f40828099749757c21ae4c2ed31acfdcf0477bf403fa707d23d17", + "source_hash": "bae0dc8ce6e916ad059776627ce2f94a42ed1ef15ff9c169f07ecd3cfb2f8c5e", "generated_hash": "0a4b01de799212b1c33f718aba85c1ea0e03a033542a8e81929ddbd52425ab4b" }, { @@ -848,7 +854,7 @@ "name": "operating-loop-workflow", "source_skill": "skills/operating-loop-workflow", "source_hash": "a5101f268118143a999e8b95601e71a9fa844bf53d0b65572730c9fd928966cb", - "generated_hash": "26ea2810425effbed57406c48138ac8f906165b961f3d9ee0c593cf11b8bef42" + "generated_hash": "f76a40657c9d67ab316383a9ba5b79385e9422d17643ce1d0f9a935d5a00ff31" }, { "name": "perf", @@ -1021,7 +1027,7 @@ { "name": "shared", "source_skill": "skills/shared", - "source_hash": "4968fc9a47b44e9b23b1bad917fd9131b766cb8568b231fc3c85e8917e43a746", + "source_hash": "03e175d2946ab377d4fbfa884d8fb32ae83d3c71542f627cddcbc05adf9ee0e9", "generated_hash": "0758c94cf77d7fd1bde265f3bff580a57f15fe601c59f893502d14175d57b763" }, { @@ -1033,13 +1039,13 @@ { "name": "skill-auditor", "source_skill": "skills/skill-auditor", - "source_hash": "fb1130887aa6a68d37b12ab37512924c09c6075cdcf741e8a74c6b3ead12457d", + "source_hash": "5a60b4dc4163d179db4ed6eabed08d0a1bb9701ea96e3700e023a72eebb7e6f1", "generated_hash": "18b181579cf896c28e4a8372ea2cd919ad8cde7c5a6f0f11c267fe10afaa54e9" }, { "name": "skill-builder", "source_skill": "skills/skill-builder", - "source_hash": "b73e21977df95a9e8e0ec4a704b3f9262c50cf0c4b6581ab04d72ce5f8ca7ddc", + "source_hash": "6dd778f1556db4ebafd566a01a34d1f02004fe8ef10caf08242359de73b22c09", "generated_hash": "4d8b01766df9b10099f4f669e60c74dede37bd604e70d7b6a9a732cb5b2a5c05" }, { @@ -1057,7 +1063,7 @@ { "name": "swarm", "source_skill": "skills/swarm", - "source_hash": "68d19f5ee0645c1f2285718fcdc948648cb14322d7e6c1596626e7c5a54031c7", + "source_hash": "15c7dce353269b78c0d51e5eb82685e8c9d1e5bd64e3f6cabaebea67c401e4a6", "generated_hash": "093b9bb120ff3b2804b753ff69c80430d8738095ced1ef0e7a03c1fc8ca112d6" }, { @@ -1081,7 +1087,7 @@ { "name": "using-agentops", "source_skill": "skills/using-agentops", - "source_hash": "3687114d95f67041a24c2cf04bc21ea5dd8e18c8208e8ee56d3fdcf9a489f890", + "source_hash": "31e2e5a81cbf35a206a9e4d98300a779bd59fa69968cf08afec9397baea525ee", "generated_hash": "80119493d38739d5ca4d9377c97c7e4f6e5766365525c965297c1cd78029e74b" }, { @@ -1107,6 +1113,12 @@ "source_skill": "skills/vibe", "source_hash": "3de4aa35036213350fac4ea144186579ddf40753cff3247fd7e6c60d65aa80ea", "generated_hash": "c0a5605f01736725d021050b32ea966ccd1a3e6296b4e8d4995d0f2c4e95e365" + }, + { + "name": "workflow-builder", + "source_skill": "skills/workflow-builder", + "source_hash": "d7851c1a9bed2cb411e09605e85044751ebc241cdfcffe6df363b5f02aa5bd64", + "generated_hash": "9bb467c16e6586d58b52e45ae5003c18aa5296ba544bc74cc2b306ee50588498" } ] } diff --git a/skills-codex/autodev/.agentops-generated.json b/skills-codex/autodev/.agentops-generated.json index 568712f23..2e332fe53 100644 --- a/skills-codex/autodev/.agentops-generated.json +++ b/skills-codex/autodev/.agentops-generated.json @@ -3,5 +3,5 @@ "source_skill": "skills/autodev", "layout": "modular", "source_hash": "9b4ecf0399c67b1d55675b293ec382b7b7e4e6cafccec40eda2c7b6da16437b6", - "generated_hash": "14c906b0518abe580eea344c034d466efa92f138f2a85f3465dec2e70b609ff9" + "generated_hash": "ba2a82dc22e5bcead7f9e2ef899601c4235604092a1bf1362e2c546608880c7b" } diff --git a/skills-codex/autodev/SKILL.md b/skills-codex/autodev/SKILL.md index 81080faa2..783fc947f 100644 --- a/skills-codex/autodev/SKILL.md +++ b/skills-codex/autodev/SKILL.md @@ -1,6 +1,6 @@ --- name: autodev -description: 'Manage the PROGRAM.md/AUTODEV.md contract that drives the loop — the config layer Evolve and Factory read each tick, not a loop itself.' +description: 'Manage the PROGRAM.md/AUTODEV.md loop-driving contract.' --- # $autodev diff --git a/skills-codex/automation-shape-routing/.agentops-generated.json b/skills-codex/automation-shape-routing/.agentops-generated.json new file mode 100644 index 000000000..db31f8a83 --- /dev/null +++ b/skills-codex/automation-shape-routing/.agentops-generated.json @@ -0,0 +1,7 @@ +{ + "generator": "manual-maintained", + "source_skill": "skills/automation-shape-routing", + "layout": "modular", + "source_hash": "48310b535e3a04b7ba8dd34b451029473ce21b86d10620b39d1796cd829ac4a4", + "generated_hash": "18650b12952d69e4d06aa2fae75144c1af75c9a7c47df09ff29ecabd0d1b3b80" +} diff --git a/skills-codex/automation-shape-routing/SKILL.md b/skills-codex/automation-shape-routing/SKILL.md new file mode 100644 index 000000000..22459e16e --- /dev/null +++ b/skills-codex/automation-shape-routing/SKILL.md @@ -0,0 +1,121 @@ +--- +name: automation-shape-routing +description: 'Front door for agent automation: route to the right builder.' +--- +# $automation-shape-routing — Workflow vs NTM vs Skill + +> **The trap this kills:** "I built a lot of skills; they should become +> workflows." Mostly false. Most orchestration-looking skills are either +> long-lived/human-attachable (stay NTM) or hard-sequential (stay skills). The +> win is the routing rule, not a migration project. + +## The three shapes + +| Shape | What it is | Mechanism | +|---|---|---| +| **Workflow** | Deterministic, reproducible orchestration of subagents | Claude `Workflow` tool — `agent({schema})`, `parallel()`, `pipeline()`, `phase()`, loop-until-budget. In-process, headless, ~16 concurrent. | +| **NTM swarm** | Long-lived, human-in-the-loop multi-agent run | `ntm` / `*-with-ntm` — persistent tmux panes, robot API, mail/locks, attach + nudge + kill/relaunch. | +| **Plain skill** | One model reasoning through a procedure or knowledge | A single `SKILL.md`. No fan-out, or a strictly sequential edit-loop. | + +## The decision rule (three axes) + +Ask in order: + +1. **Is there real orchestration at all?** (fan-out / barrier / multi-stage, OR a + loop with parallelism to exploit) — if **no** → **plain skill**. Stop. +2. **Must a human attach and steer mid-run?** Or does it run for *hours*, do + open-ended *file edits*, juggle a *fluid population* (rate limits, kill/ + relaunch, prompt-cache rounds), or relay between *cross-model* panes? — if + **yes** → **NTM swarm**. +3. Otherwise — fixed DAG, agents return **structured JSON** (not free-form edits + needing review), no attach needed, you want it **reproducible + headless** → + **Workflow**. + +**One-line litmus:** +> deterministic DAG + structured JSON + no human-attach + headless-wanted → **Workflow** +> long-lived + attachable + open-ended file edits / fluid population → **NTM** +> no fan-out, or hard-sequential edit loop → **plain skill** + +## Spike-validated nuances (2026-05-29) + +A live three-legged spike (`~/dev/agentops-3cat-spike/`) measured the same task on +all three backends. Two findings refine the rule: + +1. **The primary axis is control-plane vs in-session, not "parallel vs serial."** + **NTM is a control-plane** that *runs Claude/Codex/Gemini as panes* — it is not a + peer of the native runtimes, it is the supervisor tier above them. Choose NTM when + you need the control plane (attach/steer, persistence, multi-vendor); choose + in-session native (Workflow/subagents) when you don't. +2. **Parallel buys quality/independence, NOT wall-clock — at small N.** Measured: a + 3-way Workflow fan-out **tied** a single sequential agent on wall-clock (191s vs + 180s) and cost **~2.7× the tokens** — because the synthesis barrier eats the + parallel gain. What it bought was depth + independent fresh-eyes (the sequential + leg self-reported "monoculture" bias). So: reach for parallel `Workflow` when you + want *independent verification / fresh eyes*, not for speed. For speed, you need + large N **and** no barrier — use `pipeline()` (no barrier), not `parallel()`. + +Degradation (NTM → native → beads floor) is governed by the +`OrchestrationPort` selector; opt out entirely with `AGENTOPS_ORCHESTRATION=off` → +beads floor, which always works. + +## Two traps to avoid + +- **Don't workflow-ify a sequential edit-loop.** If each pass must see the prior + pass's edits (progressive-deepening reapply, audit-fix-rescan), there's no + concurrency to win — a Workflow wrapper adds a process boundary for nothing. + *Exception:* it graduates to a `loop-until-budget` Workflow only once each step + returns **structured output** instead of free-form edits, and you want it + headless/reproducible. +- **Don't NTM-ify a clean fan-out, and don't Workflow-ify an attach-and-steer + run.** The Workflow tool is in-process and cannot be tmux-attached; NTM is + built for exactly the live-steering Workflow can't do. Picking wrong fights the + tool the whole way. + +## Worked examples + +**→ Workflow** (deterministic fan-out / synthesize, structured returns): +`council` (N judges → consensus — near-trivial port), the **planning half** of +`rpi`, judge/refutation panels, any "fan out N analyses → triangulate" task. + +**→ Stay NTM** (long-lived, attachable, open-ended edits, fluid population): +the `*-with-ntm` family (hypothesis research, cross-model review swarms, browser +testing), plus `swarm`/`crank` in full epic-execution mode — they touch the +working tree and need wave-validity gating + human review. + +**→ Stay plain skill** (no exploitable parallelism, or knowledge/one-shot): +deliberately one-at-a-time loops (progressive reapply, multi-pass bug hunting); +all reference docs; all single-shot transforms (jargon scrub, README authoring). + +## Canonical Workflow template + +`.claude/workflows/operating-loop.js` is the worked example — a real Workflow-tool +script using `agent(prompt,{schema})` with JSON schemas, `parallel([thunks])` +barriers (framing-lenses / judges / refutation / slices), `phase()` markers, +budget-scaled `FANOUT`, and bounded re-plan/retry. **Start from it when porting a +Workflow.** It is also the proof that the AgentOps operating loop has *two* +conformant runtimes (skill-driven via `rpi`/`crank`/`swarm`/`council`, and +Workflow-driven via this script) — the basis of the `agentops-core-sdk` +portability thesis. See `operating-loop-workflow` for the install+run path. + +## Handoff — after the verdict, invoke the next skill + +This skill is the **front door**. It does not build; it routes. Once the shape is +decided, hand off: + +| Verdict | Next | What it does | +|---|---|---| +| **plain skill** | `$skill-builder` | Scaffold a new `SKILL.md` against the unified template → then `$skill-auditor` → `$heal-skill`. | +| **Workflow** | `$workflow-builder` | Scaffold a new `.claude/workflows/*.js` from the operating-loop.js template. | +| **NTM swarm** | `ntm` + `vibing-with-ntm` | Stand up + tend a persistent, human-attachable tmux swarm. | + +State the verdict and the deciding axis in one line, then invoke the chosen +builder. Do not scaffold here. + +## Contract note (SDK) + +A Workflow is a **composite capability** (an orchestration of sub-capabilities +with typed control flow); a skill is a **leaf**. The portable contract for this — +a `shape: skill|workflow` discriminator, a `StepGraph`, a `control_flow` enum, a +`budget`, and an `OrchestrationPort` *interface* — is net-new SDK work. Port the +**shape, not the engine**: keep concrete orchestrators (Codex subagents, swarm +dispatch, scheduler — BC4/BC5) behind adapters. diff --git a/skills-codex/automation-shape-routing/prompt.md b/skills-codex/automation-shape-routing/prompt.md new file mode 100644 index 000000000..681773200 --- /dev/null +++ b/skills-codex/automation-shape-routing/prompt.md @@ -0,0 +1,8 @@ +# automation-shape-routing + +Front door for building agent automation — decide the SHAPE (Workflow vs NTM swarm vs plain skill), then hand off to the right builder. Triggers: "build automation", "convert skills to workflows", "which shape". + +## Instructions + +Load and follow the skill instructions from the sibling `SKILL.md` file for this skill. +Then read local files in `references/` and `scripts/` when needed. diff --git a/skills-codex/crank/.agentops-generated.json b/skills-codex/crank/.agentops-generated.json index 87c34ca62..74d04a58a 100644 --- a/skills-codex/crank/.agentops-generated.json +++ b/skills-codex/crank/.agentops-generated.json @@ -2,6 +2,6 @@ "generator": "manual-maintained", "source_skill": "skills/crank", "layout": "modular", - "source_hash": "19827ed271f1a839da41c644b297872cc6072bf61c424c9e055e2bee04587a15", + "source_hash": "96897f552e614ef9a4ed36557e8287262c5000775364c9dae3f5be31a0a04ecd", "generated_hash": "d1f156a392be40cb5f72a424de9a7c165df4c1677c984a963cf20ad1cbd15c6f" } diff --git a/skills-codex/heal-skill/.agentops-generated.json b/skills-codex/heal-skill/.agentops-generated.json index 7c617812c..5b8f4b809 100644 --- a/skills-codex/heal-skill/.agentops-generated.json +++ b/skills-codex/heal-skill/.agentops-generated.json @@ -2,6 +2,6 @@ "generator": "manual-maintained", "source_skill": "skills/heal-skill", "layout": "modular", - "source_hash": "bd0513fa1d1f40828099749757c21ae4c2ed31acfdcf0477bf403fa707d23d17", + "source_hash": "bae0dc8ce6e916ad059776627ce2f94a42ed1ef15ff9c169f07ecd3cfb2f8c5e", "generated_hash": "0a4b01de799212b1c33f718aba85c1ea0e03a033542a8e81929ddbd52425ab4b" } diff --git a/skills-codex/operating-loop-workflow/.agentops-generated.json b/skills-codex/operating-loop-workflow/.agentops-generated.json index 9a8ebed52..13d4c52a4 100644 --- a/skills-codex/operating-loop-workflow/.agentops-generated.json +++ b/skills-codex/operating-loop-workflow/.agentops-generated.json @@ -3,5 +3,5 @@ "source_skill": "skills/operating-loop-workflow", "layout": "modular", "source_hash": "a5101f268118143a999e8b95601e71a9fa844bf53d0b65572730c9fd928966cb", - "generated_hash": "26ea2810425effbed57406c48138ac8f906165b961f3d9ee0c593cf11b8bef42" + "generated_hash": "f76a40657c9d67ab316383a9ba5b79385e9422d17643ce1d0f9a935d5a00ff31" } diff --git a/skills-codex/operating-loop-workflow/SKILL.md b/skills-codex/operating-loop-workflow/SKILL.md index 73b085ed2..446375195 100644 --- a/skills-codex/operating-loop-workflow/SKILL.md +++ b/skills-codex/operating-loop-workflow/SKILL.md @@ -1,6 +1,6 @@ --- name: operating-loop-workflow -description: 'Install and run the operating-loop multi-agent Workflow (the seven-move loop) for AgentOps plugin users.' +description: 'Install and run the operating-loop multi-agent Workflow.' --- # $operating-loop-workflow diff --git a/skills-codex/shared/.agentops-generated.json b/skills-codex/shared/.agentops-generated.json index 5036b3511..8711deb3d 100644 --- a/skills-codex/shared/.agentops-generated.json +++ b/skills-codex/shared/.agentops-generated.json @@ -2,6 +2,6 @@ "generator": "manual-maintained", "source_skill": "skills/shared", "layout": "modular", - "source_hash": "4968fc9a47b44e9b23b1bad917fd9131b766cb8568b231fc3c85e8917e43a746", + "source_hash": "03e175d2946ab377d4fbfa884d8fb32ae83d3c71542f627cddcbc05adf9ee0e9", "generated_hash": "0758c94cf77d7fd1bde265f3bff580a57f15fe601c59f893502d14175d57b763" } diff --git a/skills-codex/skill-auditor/.agentops-generated.json b/skills-codex/skill-auditor/.agentops-generated.json index 89b2e8cf5..9e3980386 100644 --- a/skills-codex/skill-auditor/.agentops-generated.json +++ b/skills-codex/skill-auditor/.agentops-generated.json @@ -2,6 +2,6 @@ "generator": "manual-maintained", "source_skill": "skills/skill-auditor", "layout": "modular", - "source_hash": "fb1130887aa6a68d37b12ab37512924c09c6075cdcf741e8a74c6b3ead12457d", + "source_hash": "5a60b4dc4163d179db4ed6eabed08d0a1bb9701ea96e3700e023a72eebb7e6f1", "generated_hash": "18b181579cf896c28e4a8372ea2cd919ad8cde7c5a6f0f11c267fe10afaa54e9" } diff --git a/skills-codex/skill-builder/.agentops-generated.json b/skills-codex/skill-builder/.agentops-generated.json index 9d1c83fac..d94060903 100644 --- a/skills-codex/skill-builder/.agentops-generated.json +++ b/skills-codex/skill-builder/.agentops-generated.json @@ -2,6 +2,6 @@ "generator": "manual-maintained", "source_skill": "skills/skill-builder", "layout": "modular", - "source_hash": "b73e21977df95a9e8e0ec4a704b3f9262c50cf0c4b6581ab04d72ce5f8ca7ddc", + "source_hash": "6dd778f1556db4ebafd566a01a34d1f02004fe8ef10caf08242359de73b22c09", "generated_hash": "4d8b01766df9b10099f4f669e60c74dede37bd604e70d7b6a9a732cb5b2a5c05" } diff --git a/skills-codex/swarm/.agentops-generated.json b/skills-codex/swarm/.agentops-generated.json index 6fb78571c..23b77fafb 100644 --- a/skills-codex/swarm/.agentops-generated.json +++ b/skills-codex/swarm/.agentops-generated.json @@ -2,6 +2,6 @@ "generator": "manual-maintained", "source_skill": "skills/swarm", "layout": "modular", - "source_hash": "68d19f5ee0645c1f2285718fcdc948648cb14322d7e6c1596626e7c5a54031c7", + "source_hash": "15c7dce353269b78c0d51e5eb82685e8c9d1e5bd64e3f6cabaebea67c401e4a6", "generated_hash": "093b9bb120ff3b2804b753ff69c80430d8738095ced1ef0e7a03c1fc8ca112d6" } diff --git a/skills-codex/using-agentops/.agentops-generated.json b/skills-codex/using-agentops/.agentops-generated.json index 0adfd1dfa..0e4bc195a 100644 --- a/skills-codex/using-agentops/.agentops-generated.json +++ b/skills-codex/using-agentops/.agentops-generated.json @@ -2,6 +2,6 @@ "generator": "manual-maintained", "source_skill": "skills/using-agentops", "layout": "modular", - "source_hash": "3687114d95f67041a24c2cf04bc21ea5dd8e18c8208e8ee56d3fdcf9a489f890", + "source_hash": "31e2e5a81cbf35a206a9e4d98300a779bd59fa69968cf08afec9397baea525ee", "generated_hash": "80119493d38739d5ca4d9377c97c7e4f6e5766365525c965297c1cd78029e74b" } diff --git a/skills-codex/workflow-builder/.agentops-generated.json b/skills-codex/workflow-builder/.agentops-generated.json new file mode 100644 index 000000000..b1def4524 --- /dev/null +++ b/skills-codex/workflow-builder/.agentops-generated.json @@ -0,0 +1,7 @@ +{ + "generator": "manual-maintained", + "source_skill": "skills/workflow-builder", + "layout": "modular", + "source_hash": "d7851c1a9bed2cb411e09605e85044751ebc241cdfcffe6df363b5f02aa5bd64", + "generated_hash": "9bb467c16e6586d58b52e45ae5003c18aa5296ba544bc74cc2b306ee50588498" +} diff --git a/skills-codex/workflow-builder/SKILL.md b/skills-codex/workflow-builder/SKILL.md new file mode 100644 index 000000000..61ef6d6ba --- /dev/null +++ b/skills-codex/workflow-builder/SKILL.md @@ -0,0 +1,73 @@ +--- +name: workflow-builder +description: 'Scaffold a new Claude Workflow script.' +--- +# $workflow-builder — scaffold a Claude Workflow script + +> Counterpart to `$skill-builder`. `$skill-builder` authors a `SKILL.md` (a leaf +> capability); this authors a **Workflow** (a composite capability — deterministic +> orchestration of subagents). Reach this skill via `$automation-shape-routing` +> once the shape is confirmed **Workflow** (deterministic DAG + structured-JSON +> returns + headless). If the shape is NTM or plain skill, you're in the wrong +> builder — go back to `$automation-shape-routing`. + +## Confirm the shape first + +Do NOT scaffold a workflow for: an attach-and-steer run (→ NTM: `ntm` / +`vibing-with-ntm`), or a hard-sequential edit-loop with no parallelism (→ plain +skill: `$skill-builder`). If unconfirmed, run `$automation-shape-routing`. + +## The template + +Start from `.claude/workflows/operating-loop.js` — the canonical worked example. +Copy its skeleton, don't reinvent it. A Workflow script is plain JS: + +```js +export const meta = { // REQUIRED — pure literal, no variables + name: 'my-workflow', + description: 'one line shown in the permission dialog', + phases: [ { title: 'Find' }, { title: 'Verify' } ], // one per phase() call +} + +phase('Find') +const found = await parallel(FINDERS.map(f => () => + agent(f.prompt, { schema: FINDINGS_SCHEMA, phase: 'Find' }))) // barrier + +phase('Verify') +const verified = await pipeline(found.flat().filter(Boolean), + f => agent(`verify: ${f.title}`, { schema: VERDICT, phase: 'Verify' })) + +return { verified } +``` + +## Building blocks (pick by control-flow shape) + +| Primitive | Use when | +|---|---| +| `agent(prompt, {schema})` | one subagent; `schema` forces structured JSON back | +| `parallel([thunks])` | **barrier** — need ALL results together (dedup/merge/early-exit) | +| `pipeline(items, ...stages)` | **default** multi-stage — no barrier, each item flows independently | +| `phase(title)` | progress grouping; match `meta.phases` titles | +| `loop-until-budget` / `loop-until-dry` | unknown-size discovery; guard on `budget.total` | + +## Authoring checklist + +1. **Shape confirmed Workflow** (via `$automation-shape-routing`). +2. **Schemas first** — define the JSON schema each `agent()` returns; structured + output is what makes a workflow deterministic and composable. +3. **Default to `pipeline()`**; reach for `parallel()` only when a stage genuinely + needs all prior results at once. +4. **Conflict-free fan-out** — if branches write files, give each a disjoint + write-scope (the wave-validity invariant) or run in worktree isolation. +5. **Budget** — for loops, gate on `budget.total && budget.remaining() > N`. +6. **Dry-run to validate** — invoke the workflow on a tiny input; confirm the + `meta` block parses and each phase returns its schema. This is the workflow + analog of `$skill-auditor`. + +## Relationship to the SDK + +A workflow is a **composite capability**; the portable contract for it (a +`shape: skill|workflow` discriminator, a `StepGraph`, a `control_flow` enum, a +`budget`, an `OrchestrationPort` interface) is net-new `agentops-core-sdk` work. +Author the script here; the SDK is where the *contract* for workflow-capabilities +lives. See `operating-loop-workflow` for installing/running a finished workflow. diff --git a/skills-codex/workflow-builder/prompt.md b/skills-codex/workflow-builder/prompt.md new file mode 100644 index 000000000..5d3b2a66e --- /dev/null +++ b/skills-codex/workflow-builder/prompt.md @@ -0,0 +1,8 @@ +# workflow-builder + +Scaffold a new Claude Workflow script (.claude/workflows/*.js) — deterministic multi-agent orchestration — from the operating-loop.js template and the Workflow primitives. Triggers: "build a workflow", "create a workflow", "scaffold workflow", "author a workflow". + +## Instructions + +Load and follow the skill instructions from the sibling `SKILL.md` file for this skill. +Then read local files in `references/` and `scripts/` when needed. diff --git a/skills/automation-shape-routing/SKILL.md b/skills/automation-shape-routing/SKILL.md index a33e4aefd..9f667e2ed 100644 --- a/skills/automation-shape-routing/SKILL.md +++ b/skills/automation-shape-routing/SKILL.md @@ -1,6 +1,6 @@ --- name: automation-shape-routing -description: 'Front door for building agent automation — decide the SHAPE (Claude Workflow vs NTM swarm vs plain skill), then hand off to the right builder. Triggers: "build a skill", "build a workflow", "build automation", "create a skill", "create a workflow", "new automation", "convert skills to workflows", or any task involving fan-out / multiple agents / iterative passes.' +description: 'Front door for agent automation — decide the SHAPE (Workflow vs NTM vs skill), then hand off. Triggers: "build automation", "convert skills to workflows", "which shape".' practices: - hexagonal-architecture - team-topologies diff --git a/skills/skill-builder/SKILL.md b/skills/skill-builder/SKILL.md index 3a70dee15..8e3a24cb5 100644 --- a/skills/skill-builder/SKILL.md +++ b/skills/skill-builder/SKILL.md @@ -1,7 +1,6 @@ --- name: skill-builder -description: 'Scaffold or absorb new SKILL.md files (a leaf capability) against the unified AgentOps template. - Triggers: "create a skill", "scaffold skill", "absorb external skill", "new skill". If unsure whether the work should be a skill, a Workflow, or an NTM swarm, run automation-shape-routing first.' +description: 'Scaffold or absorb new SKILL.md files against the unified AgentOps template. Triggers: "create a skill", "scaffold skill", "absorb external skill", "new skill".' practices: - code-complete - pragmatic-programmer @@ -37,6 +36,8 @@ output_contract: skills/skill-builder/schemas/build-report.json Materializes a new skill against the unified template at `references/skill-template.md` (extracted from anthropics/financial-services). Runs `skill-auditor` on the new skill as a self-check before declaring success. +> **If unsure whether the work should be a skill, a Workflow, or an NTM swarm, run `/automation-shape-routing` first** — it is the front door that decides the shape and hands off to the right builder. + ## ⚠️ Critical Constraints - **Template is canonical.** All four modes produce SKILL.md files conforming to `references/skill-template.md`. Do not invent ad-hoc structures. **Why:** `skill-auditor` validates against this template; drift creates auditor false-fails. diff --git a/skills/using-agentops/SKILL.md b/skills/using-agentops/SKILL.md index 13d51c564..afb60c865 100644 --- a/skills/using-agentops/SKILL.md +++ b/skills/using-agentops/SKILL.md @@ -167,6 +167,8 @@ These are the skills every user needs first. Everything else is available when y | `/scenario` | Author and manage holdout scenarios for behavioral validation | | `/skill-auditor` | Two-pass audit of an existing SKILL.md against the unified template (15 checks) | | `/skill-builder` | Scaffold or absorb new SKILL.md files against the unified template | +| `/automation-shape-routing` | Front door for building agent automation — decide the SHAPE (Workflow vs NTM swarm vs plain skill), then hand off to the right builder | +| `/workflow-builder` | Scaffold a new Claude Workflow script (`.claude/workflows/*.js`) — deterministic multi-agent orchestration | ## Expert Skills (specialized workflows) diff --git a/skills/workflow-builder/SKILL.md b/skills/workflow-builder/SKILL.md index 2bf24779e..47bdaa1be 100644 --- a/skills/workflow-builder/SKILL.md +++ b/skills/workflow-builder/SKILL.md @@ -1,6 +1,6 @@ --- name: workflow-builder -description: 'Scaffold a new Claude Workflow script (.claude/workflows/*.js) — deterministic multi-agent orchestration — from the operating-loop.js template and the Workflow primitives. Triggers: "build a workflow", "create a workflow", "scaffold workflow", "new workflow", "author a workflow".' +description: 'Scaffold a new Claude Workflow script — deterministic multi-agent orchestration. Triggers: "build a workflow", "create a workflow", "scaffold workflow", "author a workflow".' practices: - pragmatic-programmer - hexagonal-architecture From d2e4d97d10fb0566ad475c0e84a3c14485ccd9a4 Mon Sep 17 00:00:00 2001 From: Boden Fuller Date: Fri, 29 May 2026 12:42:47 -0400 Subject: [PATCH 3/8] fix(ci): codex twins + dispositions + doc counts for orchestration skills Round-2 CI fixes on PR #598 (ag-nk67): - skill-dispositions.yaml: add automation-shape-routing + workflow-builder rows; regen skill-domain-map golden - codex twins reframed runtime-native (Codex spawn_agents/output_schema; drop .claude/workflows Claude-tool markers) -> validate-codex-runtime-sections 0 violations - regen-codex-hashes; PRODUCT.md Codex artifact count 76->78 (doc-release gate) ag-nk67 #orchestration-foundation --- PRODUCT.md | 2 +- docs/contracts/skill-dispositions.yaml | 10 +++ docs/reference/agentops-skill-domain-map.md | 4 +- skills-codex/.agentops-manifest.json | 4 +- .../.agentops-generated.json | 2 +- .../automation-shape-routing/SKILL.md | 71 ++++++++-------- .../workflow-builder/.agentops-generated.json | 2 +- skills-codex/workflow-builder/SKILL.md | 85 +++++++++---------- 8 files changed, 95 insertions(+), 85 deletions(-) diff --git a/PRODUCT.md b/PRODUCT.md index bace121dd..36b78c3d5 100644 --- a/PRODUCT.md +++ b/PRODUCT.md @@ -261,7 +261,7 @@ As of 2026-05-10: - GitHub repo: 341 stars, 33 forks, 2 open issues, last pushed 2026-05-10T03:24:01Z - Public surface: GitHub Pages mkdocs site live at boshu2.github.io/agentops/; doctrine site live at 12factoragentops.com -- Distribution/runtime reach: 78 shared skills, 76 checked-in Codex artifacts, and 32 Codex overrides. `/validate` and `/curate` are additive in this train; legacy validation and mining skills remain until their shim/retirement gates are resolved. +- Distribution/runtime reach: 78 shared skills, 78 checked-in Codex artifacts, and 32 Codex overrides. `/validate` and `/curate` are additive in this train; legacy validation and mining skills remain until their shim/retirement gates are resolved. **Measured operational proof:** diff --git a/docs/contracts/skill-dispositions.yaml b/docs/contracts/skill-dispositions.yaml index 4d826aa62..09bef82ef 100644 --- a/docs/contracts/skill-dispositions.yaml +++ b/docs/contracts/skill-dispositions.yaml @@ -342,6 +342,16 @@ dispositions: hexagonal_role: supporting disposition: update rationale: "Builder should scaffold SELF-TEST and domain metadata by default" + - skill: automation-shape-routing + domain: "BC4 Factory" + hexagonal_role: supporting + disposition: keep + rationale: "Front-door router: decides Workflow vs NTM swarm vs plain skill, hands off to the right builder" + - skill: workflow-builder + domain: "BC4 Factory" + hexagonal_role: supporting + disposition: keep + rationale: "Scaffolds Claude Workflow scripts (composite capability); counterpart to skill-builder" - skill: standards domain: "BC4 Factory" hexagonal_role: domain diff --git a/docs/reference/agentops-skill-domain-map.md b/docs/reference/agentops-skill-domain-map.md index b7f90fef0..6b8880005 100644 --- a/docs/reference/agentops-skill-domain-map.md +++ b/docs/reference/agentops-skill-domain-map.md @@ -59,8 +59,8 @@ Disposition meanings: | Skill | Domain | Hex role | First disposition | Rationale | |---|---|---|---|---| -| `automation-shape-routing` | BC4 Factory | supporting | keep | Front-door router (Workflow vs NTM vs skill) feeding skill-builder/workflow-builder; keep as-is. | | `autodev` | BC3 Loop | supporting | refactor | Must compose with PROGRAM.md and RPI as one vertical-slice executor. | +| `automation-shape-routing` | BC4 Factory | supporting | keep | Front-door router: decides Workflow vs NTM swarm vs plain skill, hands off to the right builder. | | `beads` | BC3 Loop | driven-adapter | update | Tracker adapter is core; add BDD/slice acceptance self-test. | | `bootstrap` | BC4 Factory | driving-adapter | update | First-run factory entrypoint; needs current 3.0/domain packet shape. | | `brainstorm` | BC3 Loop | domain | update | Intent-shaping skill; should emit BDD-ready language. | @@ -136,7 +136,7 @@ Disposition meanings: | `validate` | BC2 Validation | driving-adapter | keep | Designed-future canonical unified validator (m6v5.D Phase 1, epic soc-cp7pv); not redundant cruft — epic GO/REVERT is a separate decision (resolved KEEP 2026-05-24). | | `validation` | BC2 Validation | domain | update | Canonical post-implementation validation; strengthen self-test first. | | `vibe` | BC2 Validation | domain | update | Code-readiness validator; add self-test and tighten result contract. | -| `workflow-builder` | BC4 Factory | supporting | keep | Scaffolds Claude Workflow scripts from the operating-loop.js template; counterpart to skill-builder. | +| `workflow-builder` | BC4 Factory | supporting | keep | Scaffolds Claude Workflow scripts (composite capability); counterpart to skill-builder. | ## Priority Queue diff --git a/skills-codex/.agentops-manifest.json b/skills-codex/.agentops-manifest.json index 28eb6bd91..e66074bd5 100644 --- a/skills-codex/.agentops-manifest.json +++ b/skills-codex/.agentops-manifest.json @@ -662,7 +662,7 @@ "name": "automation-shape-routing", "source_skill": "skills/automation-shape-routing", "source_hash": "48310b535e3a04b7ba8dd34b451029473ce21b86d10620b39d1796cd829ac4a4", - "generated_hash": "18650b12952d69e4d06aa2fae75144c1af75c9a7c47df09ff29ecabd0d1b3b80" + "generated_hash": "9ec15ab56100ce6cb590ee64047c229b19ca0587b91448cbdbd4dbce66294c5b" }, { "name": "beads", @@ -1118,7 +1118,7 @@ "name": "workflow-builder", "source_skill": "skills/workflow-builder", "source_hash": "d7851c1a9bed2cb411e09605e85044751ebc241cdfcffe6df363b5f02aa5bd64", - "generated_hash": "9bb467c16e6586d58b52e45ae5003c18aa5296ba544bc74cc2b306ee50588498" + "generated_hash": "848f4a712e120e56a09fbd146eebbab85adc89e59fc1466a4b4e51a46be9c3dd" } ] } diff --git a/skills-codex/automation-shape-routing/.agentops-generated.json b/skills-codex/automation-shape-routing/.agentops-generated.json index db31f8a83..b58d5e65c 100644 --- a/skills-codex/automation-shape-routing/.agentops-generated.json +++ b/skills-codex/automation-shape-routing/.agentops-generated.json @@ -3,5 +3,5 @@ "source_skill": "skills/automation-shape-routing", "layout": "modular", "source_hash": "48310b535e3a04b7ba8dd34b451029473ce21b86d10620b39d1796cd829ac4a4", - "generated_hash": "18650b12952d69e4d06aa2fae75144c1af75c9a7c47df09ff29ecabd0d1b3b80" + "generated_hash": "9ec15ab56100ce6cb590ee64047c229b19ca0587b91448cbdbd4dbce66294c5b" } diff --git a/skills-codex/automation-shape-routing/SKILL.md b/skills-codex/automation-shape-routing/SKILL.md index 22459e16e..6859a9fa9 100644 --- a/skills-codex/automation-shape-routing/SKILL.md +++ b/skills-codex/automation-shape-routing/SKILL.md @@ -2,18 +2,18 @@ name: automation-shape-routing description: 'Front door for agent automation: route to the right builder.' --- -# $automation-shape-routing — Workflow vs NTM vs Skill +# $automation-shape-routing — Orchestration vs NTM vs Skill > **The trap this kills:** "I built a lot of skills; they should become -> workflows." Mostly false. Most orchestration-looking skills are either -> long-lived/human-attachable (stay NTM) or hard-sequential (stay skills). The -> win is the routing rule, not a migration project. +> orchestration scripts." Mostly false. Most orchestration-looking skills are +> either long-lived/human-attachable (stay NTM) or hard-sequential (stay skills). +> The win is the routing rule, not a migration project. ## The three shapes | Shape | What it is | Mechanism | |---|---|---| -| **Workflow** | Deterministic, reproducible orchestration of subagents | Claude `Workflow` tool — `agent({schema})`, `parallel()`, `pipeline()`, `phase()`, loop-until-budget. In-process, headless, ~16 concurrent. | +| **Orchestration** | Deterministic, reproducible fan-out / pipeline / loop over sub-agents, each returning structured output | Codex orchestration — `codex exec` driving `spawn_agents` (parallel fan-out), staged pipelines, and loop-until-budget, with an `output_schema` per sub-agent. Headless, reproducible, bounded concurrency. | | **NTM swarm** | Long-lived, human-in-the-loop multi-agent run | `ntm` / `*-with-ntm` — persistent tmux panes, robot API, mail/locks, attach + nudge + kill/relaunch. | | **Plain skill** | One model reasoning through a procedure or knowledge | A single `SKILL.md`. No fan-out, or a strictly sequential edit-loop. | @@ -27,12 +27,12 @@ Ask in order: open-ended *file edits*, juggle a *fluid population* (rate limits, kill/ relaunch, prompt-cache rounds), or relay between *cross-model* panes? — if **yes** → **NTM swarm**. -3. Otherwise — fixed DAG, agents return **structured JSON** (not free-form edits - needing review), no attach needed, you want it **reproducible + headless** → - **Workflow**. +3. Otherwise — fixed DAG, sub-agents return **structured JSON** (not free-form + edits needing review), no attach needed, you want it **reproducible + headless** + → **Orchestration**. **One-line litmus:** -> deterministic DAG + structured JSON + no human-attach + headless-wanted → **Workflow** +> deterministic DAG + structured JSON + no human-attach + headless-wanted → **Orchestration** > long-lived + attachable + open-ended file edits / fluid population → **NTM** > no fan-out, or hard-sequential edit loop → **plain skill** @@ -45,14 +45,15 @@ all three backends. Two findings refine the rule: **NTM is a control-plane** that *runs Claude/Codex/Gemini as panes* — it is not a peer of the native runtimes, it is the supervisor tier above them. Choose NTM when you need the control plane (attach/steer, persistence, multi-vendor); choose - in-session native (Workflow/subagents) when you don't. + in-session native orchestration (`codex exec` + `spawn_agents`) when you don't. 2. **Parallel buys quality/independence, NOT wall-clock — at small N.** Measured: a - 3-way Workflow fan-out **tied** a single sequential agent on wall-clock (191s vs + 3-way parallel fan-out **tied** a single sequential agent on wall-clock (191s vs 180s) and cost **~2.7× the tokens** — because the synthesis barrier eats the parallel gain. What it bought was depth + independent fresh-eyes (the sequential - leg self-reported "monoculture" bias). So: reach for parallel `Workflow` when you + leg self-reported "monoculture" bias). So: reach for a parallel fan-out when you want *independent verification / fresh eyes*, not for speed. For speed, you need - large N **and** no barrier — use `pipeline()` (no barrier), not `parallel()`. + large N **and** no barrier — use a streaming pipeline (no barrier), not a + collect-all barrier. Degradation (NTM → native → beads floor) is governed by the `OrchestrationPort` selector; opt out entirely with `AGENTOPS_ORCHESTRATION=off` → @@ -60,20 +61,20 @@ beads floor, which always works. ## Two traps to avoid -- **Don't workflow-ify a sequential edit-loop.** If each pass must see the prior +- **Don't orchestrate a sequential edit-loop.** If each pass must see the prior pass's edits (progressive-deepening reapply, audit-fix-rescan), there's no - concurrency to win — a Workflow wrapper adds a process boundary for nothing. - *Exception:* it graduates to a `loop-until-budget` Workflow only once each step - returns **structured output** instead of free-form edits, and you want it - headless/reproducible. -- **Don't NTM-ify a clean fan-out, and don't Workflow-ify an attach-and-steer - run.** The Workflow tool is in-process and cannot be tmux-attached; NTM is - built for exactly the live-steering Workflow can't do. Picking wrong fights the - tool the whole way. + concurrency to win — an orchestration wrapper adds a process boundary for + nothing. *Exception:* it graduates to a loop-until-budget orchestration only once + each step returns **structured output** instead of free-form edits, and you want + it headless/reproducible. +- **Don't NTM-ify a clean fan-out, and don't orchestrate an attach-and-steer run.** + Headless orchestration runs in-process and cannot be tmux-attached; NTM is built + for exactly the live-steering headless orchestration can't do. Picking wrong + fights the tool the whole way. ## Worked examples -**→ Workflow** (deterministic fan-out / synthesize, structured returns): +**→ Orchestration** (deterministic fan-out / synthesize, structured returns): `council` (N judges → consensus — near-trivial port), the **planning half** of `rpi`, judge/refutation panels, any "fan out N analyses → triangulate" task. @@ -86,16 +87,18 @@ working tree and need wave-validity gating + human review. deliberately one-at-a-time loops (progressive reapply, multi-pass bug hunting); all reference docs; all single-shot transforms (jargon scrub, README authoring). -## Canonical Workflow template +## Canonical orchestration shape -`.claude/workflows/operating-loop.js` is the worked example — a real Workflow-tool -script using `agent(prompt,{schema})` with JSON schemas, `parallel([thunks])` -barriers (framing-lenses / judges / refutation / slices), `phase()` markers, -budget-scaled `FANOUT`, and bounded re-plan/retry. **Start from it when porting a -Workflow.** It is also the proof that the AgentOps operating loop has *two* +The operating loop is the worked example — a deterministic orchestration that +drives `codex exec` over `spawn_agents` with a per-sub-agent `output_schema` +(structured JSON, not free-form edits), parallel fan-out barriers (framing-lenses +/ judges / refutation / slices), explicit phase markers, budget-scaled fan-out +width, and bounded re-plan/retry. **Start from this shape when authoring an +orchestration.** It is also the proof that the AgentOps operating loop has *two* conformant runtimes (skill-driven via `rpi`/`crank`/`swarm`/`council`, and -Workflow-driven via this script) — the basis of the `agentops-core-sdk` -portability thesis. See `operating-loop-workflow` for the install+run path. +orchestration-driven via `codex exec` + `spawn_agents`) — the basis of the +`agentops-core-sdk` portability thesis. Hand off to `$workflow-builder` for the +authoring path. ## Handoff — after the verdict, invoke the next skill @@ -105,7 +108,7 @@ decided, hand off: | Verdict | Next | What it does | |---|---|---| | **plain skill** | `$skill-builder` | Scaffold a new `SKILL.md` against the unified template → then `$skill-auditor` → `$heal-skill`. | -| **Workflow** | `$workflow-builder` | Scaffold a new `.claude/workflows/*.js` from the operating-loop.js template. | +| **Orchestration** | `$workflow-builder` | Author a deterministic `codex exec` + `spawn_agents` orchestration with per-sub-agent `output_schema`. | | **NTM swarm** | `ntm` + `vibing-with-ntm` | Stand up + tend a persistent, human-attachable tmux swarm. | State the verdict and the deciding axis in one line, then invoke the chosen @@ -113,9 +116,9 @@ builder. Do not scaffold here. ## Contract note (SDK) -A Workflow is a **composite capability** (an orchestration of sub-capabilities +An orchestration is a **composite capability** (a composition of sub-capabilities with typed control flow); a skill is a **leaf**. The portable contract for this — a `shape: skill|workflow` discriminator, a `StepGraph`, a `control_flow` enum, a `budget`, and an `OrchestrationPort` *interface* — is net-new SDK work. Port the -**shape, not the engine**: keep concrete orchestrators (Codex subagents, swarm +**shape, not the engine**: keep concrete orchestrators (Codex sub-agents, swarm dispatch, scheduler — BC4/BC5) behind adapters. diff --git a/skills-codex/workflow-builder/.agentops-generated.json b/skills-codex/workflow-builder/.agentops-generated.json index b1def4524..a362611f3 100644 --- a/skills-codex/workflow-builder/.agentops-generated.json +++ b/skills-codex/workflow-builder/.agentops-generated.json @@ -3,5 +3,5 @@ "source_skill": "skills/workflow-builder", "layout": "modular", "source_hash": "d7851c1a9bed2cb411e09605e85044751ebc241cdfcffe6df363b5f02aa5bd64", - "generated_hash": "9bb467c16e6586d58b52e45ae5003c18aa5296ba544bc74cc2b306ee50588498" + "generated_hash": "848f4a712e120e56a09fbd146eebbab85adc89e59fc1466a4b4e51a46be9c3dd" } diff --git a/skills-codex/workflow-builder/SKILL.md b/skills-codex/workflow-builder/SKILL.md index 61ef6d6ba..0a4d3b2b7 100644 --- a/skills-codex/workflow-builder/SKILL.md +++ b/skills-codex/workflow-builder/SKILL.md @@ -1,73 +1,70 @@ --- name: workflow-builder -description: 'Scaffold a new Claude Workflow script.' +description: 'Author a deterministic Codex orchestration (codex exec + spawn_agents).' --- -# $workflow-builder — scaffold a Claude Workflow script +# $workflow-builder — author a deterministic Codex orchestration > Counterpart to `$skill-builder`. `$skill-builder` authors a `SKILL.md` (a leaf -> capability); this authors a **Workflow** (a composite capability — deterministic -> orchestration of subagents). Reach this skill via `$automation-shape-routing` -> once the shape is confirmed **Workflow** (deterministic DAG + structured-JSON -> returns + headless). If the shape is NTM or plain skill, you're in the wrong -> builder — go back to `$automation-shape-routing`. +> capability); this authors an **orchestration** (a composite capability — +> deterministic fan-out / pipeline / loop over sub-agents). Reach this skill via +> `$automation-shape-routing` once the shape is confirmed **Orchestration** +> (deterministic DAG + structured-JSON returns + headless). If the shape is NTM or +> plain skill, you're in the wrong builder — go back to `$automation-shape-routing`. ## Confirm the shape first -Do NOT scaffold a workflow for: an attach-and-steer run (→ NTM: `ntm` / +Do NOT author an orchestration for: an attach-and-steer run (→ NTM: `ntm` / `vibing-with-ntm`), or a hard-sequential edit-loop with no parallelism (→ plain skill: `$skill-builder`). If unconfirmed, run `$automation-shape-routing`. -## The template +## The shape -Start from `.claude/workflows/operating-loop.js` — the canonical worked example. -Copy its skeleton, don't reinvent it. A Workflow script is plain JS: +A Codex orchestration is a script that drives `codex exec` to launch sub-agents +via `spawn_agents`, each constrained to return JSON against an `output_schema`, +then composes those structured results across phases. The control flow is the same +three primitives regardless of how you wire them: **fan-out barrier**, **streaming +pipeline**, and **bounded loop**. -```js -export const meta = { // REQUIRED — pure literal, no variables - name: 'my-workflow', - description: 'one line shown in the permission dialog', - phases: [ { title: 'Find' }, { title: 'Verify' } ], // one per phase() call -} - -phase('Find') -const found = await parallel(FINDERS.map(f => () => - agent(f.prompt, { schema: FINDINGS_SCHEMA, phase: 'Find' }))) // barrier - -phase('Verify') -const verified = await pipeline(found.flat().filter(Boolean), - f => agent(`verify: ${f.title}`, { schema: VERDICT, phase: 'Verify' })) - -return { verified } ``` +phase: Find — fan out N finders in parallel (spawn_agents), each returning + FINDINGS_SCHEMA; barrier = collect ALL before continuing. +phase: Verify — stream each finding into a verifier sub-agent returning + VERDICT_SCHEMA; no barrier, items flow independently. +return — the verified structured results. +``` + +Each sub-agent is a `codex exec` call carrying its prompt plus the +`output_schema` it must satisfy; the orchestrator owns sequencing, the barrier vs +streaming choice, the budget guard, and the merge. ## Building blocks (pick by control-flow shape) | Primitive | Use when | |---|---| -| `agent(prompt, {schema})` | one subagent; `schema` forces structured JSON back | -| `parallel([thunks])` | **barrier** — need ALL results together (dedup/merge/early-exit) | -| `pipeline(items, ...stages)` | **default** multi-stage — no barrier, each item flows independently | -| `phase(title)` | progress grouping; match `meta.phases` titles | -| `loop-until-budget` / `loop-until-dry` | unknown-size discovery; guard on `budget.total` | +| sub-agent with `output_schema` | one `codex exec` sub-agent; the schema forces structured JSON back | +| parallel fan-out (`spawn_agents`) | **barrier** — need ALL results together (dedup/merge/early-exit) | +| streaming pipeline | **default** multi-stage — no barrier, each item flows independently | +| phase markers | progress grouping; one per orchestration stage | +| bounded loop (loop-until-budget / loop-until-dry) | unknown-size discovery; guard on remaining budget | ## Authoring checklist -1. **Shape confirmed Workflow** (via `$automation-shape-routing`). -2. **Schemas first** — define the JSON schema each `agent()` returns; structured - output is what makes a workflow deterministic and composable. -3. **Default to `pipeline()`**; reach for `parallel()` only when a stage genuinely - needs all prior results at once. +1. **Shape confirmed Orchestration** (via `$automation-shape-routing`). +2. **Schemas first** — define the `output_schema` each sub-agent returns; + structured output is what makes an orchestration deterministic and composable. +3. **Default to the streaming pipeline**; reach for the parallel fan-out barrier + only when a stage genuinely needs all prior results at once. 4. **Conflict-free fan-out** — if branches write files, give each a disjoint write-scope (the wave-validity invariant) or run in worktree isolation. -5. **Budget** — for loops, gate on `budget.total && budget.remaining() > N`. -6. **Dry-run to validate** — invoke the workflow on a tiny input; confirm the - `meta` block parses and each phase returns its schema. This is the workflow - analog of `$skill-auditor`. +5. **Budget** — for loops, gate on a remaining-budget check before each round. +6. **Dry-run to validate** — invoke the orchestration on a tiny input; confirm + each phase launches its sub-agents and returns its `output_schema`. This is the + orchestration analog of `$skill-auditor`. ## Relationship to the SDK -A workflow is a **composite capability**; the portable contract for it (a +An orchestration is a **composite capability**; the portable contract for it (a `shape: skill|workflow` discriminator, a `StepGraph`, a `control_flow` enum, a `budget`, an `OrchestrationPort` interface) is net-new `agentops-core-sdk` work. -Author the script here; the SDK is where the *contract* for workflow-capabilities -lives. See `operating-loop-workflow` for installing/running a finished workflow. +Author the orchestration here; the SDK is where the *contract* for +composite-capabilities lives. From 39c02a128d4290b6dc18dc6b399e241ab138c41e Mon Sep 17 00:00:00 2001 From: Boden Fuller Date: Fri, 29 May 2026 12:52:22 -0400 Subject: [PATCH 4/8] fix(ci): regenerate context-map after orchestration-skill frontmatter changes (ag-nk67) --- docs/contracts/context-map.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/contracts/context-map.md b/docs/contracts/context-map.md index abd6e9525..c987a6543 100644 --- a/docs/contracts/context-map.md +++ b/docs/contracts/context-map.md @@ -67,7 +67,7 @@ and [CDLC](https://github.com/boshu2/agentops/blob/main/docs/cdlc.md) for the ar ### supporting - `autodev` — Manage the PROGRAM.md/AUTODEV.md contract that drives the loop — the config layer Evolve and Factory read each tick, not a loop itself. -- `automation-shape-routing` — Front door for building agent automation — decide the SHAPE (Claude Workflow vs NTM swarm vs plain skill), then hand off to the right builder. Triggers: "build a skill", "build a workflow", "build automation", "create a skill", "create a workflow", "new automation", "convert skills to workflows", or any task involving fan-out / multiple agents / iterative passes. +- `automation-shape-routing` — Front door for agent automation — decide the SHAPE (Workflow vs NTM vs skill), then hand off. Triggers: "build automation", "convert skills to workflows", "which shape". - `codex-team` — Coordinate multiple Codex agents. - `compile` — Compile .agents knowledge wiki. - `curate` — Mine transcripts, .agents, bd, and git for skill diffs, bd updates, or rare wiki entries. @@ -87,12 +87,12 @@ and [CDLC](https://github.com/boshu2/agentops/blob/main/docs/cdlc.md) for the ar - `scaffold` — Create project, component, or boilerplate scaffolds. - `scenario` — Manage holdout scenarios. - `skill-auditor` — Audit an existing SKILL.md against the unified AgentOps template (15 checks). Triggers: "audit skill", "skill quality review", "is this skill ready". -- `skill-builder` — Scaffold or absorb new SKILL.md files (a leaf capability) against the unified AgentOps template. Triggers: "create a skill", "scaffold skill", "absorb external skill", "new skill". If unsure whether the work should be a skill, a Workflow, or an NTM swarm, run automation-shape-routing first. +- `skill-builder` — Scaffold or absorb new SKILL.md files against the unified AgentOps template. Triggers: "create a skill", "scaffold skill", "absorb external skill", "new skill". - `swarm` — Dispatch parallel agents. - `system-tuning` — Restore system responsiveness via safe, ordered process cleanup and agent-swarm hygiene. - `test` — Generate tests and coverage plans. - `trace` — Trace decisions through artifacts. -- `workflow-builder` — Scaffold a new Claude Workflow script (.claude/workflows/*.js) — deterministic multi-agent orchestration — from the operating-loop.js template and the Workflow primitives. Triggers: "build a workflow", "create a workflow", "scaffold workflow", "new workflow", "author a workflow". +- `workflow-builder` — Scaffold a new Claude Workflow script — deterministic multi-agent orchestration. Triggers: "build a workflow", "create a workflow", "scaffold workflow", "author a workflow". ### generic From 888d4fa266849869ee71ef55a3cc741a190c639e Mon Sep 17 00:00:00 2001 From: Boden Fuller Date: Fri, 29 May 2026 13:03:15 -0400 Subject: [PATCH 5/8] fix(ci): sync embedded skills + regenerate SKU/skill catalogs (ag-nk67) - make sync-hooks: embedded cli/embedded/skills/using-agentops in sync - generate-registry.sh: SKU capability catalog (registry.json) regenerated for 78 skills - generate-skill-catalog.sh: skill catalog refreshed ag-nk67 #orchestration-foundation --- cli/embedded/skills/using-agentops/SKILL.md | 2 + registry.json | 91 +++++++++++++++++++-- 2 files changed, 85 insertions(+), 8 deletions(-) diff --git a/cli/embedded/skills/using-agentops/SKILL.md b/cli/embedded/skills/using-agentops/SKILL.md index 13d51c564..afb60c865 100644 --- a/cli/embedded/skills/using-agentops/SKILL.md +++ b/cli/embedded/skills/using-agentops/SKILL.md @@ -167,6 +167,8 @@ These are the skills every user needs first. Everything else is available when y | `/scenario` | Author and manage holdout scenarios for behavioral validation | | `/skill-auditor` | Two-pass audit of an existing SKILL.md against the unified template (15 checks) | | `/skill-builder` | Scaffold or absorb new SKILL.md files against the unified template | +| `/automation-shape-routing` | Front door for building agent automation — decide the SHAPE (Workflow vs NTM swarm vs plain skill), then hand off to the right builder | +| `/workflow-builder` | Scaffold a new Claude Workflow script (`.claude/workflows/*.js`) — deterministic multi-agent orchestration | ## Expert Skills (specialized workflows) diff --git a/registry.json b/registry.json index 468a5e279..7fb30dc71 100644 --- a/registry.json +++ b/registry.json @@ -1,14 +1,14 @@ { "schema_version": 2, - "generated_at": "2026-05-29T03:55:37Z", + "generated_at": "2026-05-29T17:02:40Z", "summary": { - "skills": 76, + "skills": 78, "hooks": 0, "knowledge_stores": 5, "job_types": 0, "eval_files": 56, - "cli_commands": 67, - "capabilities": 157 + "cli_commands": 68, + "capabilities": 160 }, "surfaces": { "skills": [ @@ -452,6 +452,14 @@ "has_references": true, "reference_count": 23 }, + { + "name": "automation-shape-routing", + "tier": "meta", + "path": "skills/automation-shape-routing/", + "has_skill_md": true, + "has_references": false, + "reference_count": 0 + }, { "name": "discovery", "tier": "meta", @@ -516,6 +524,14 @@ "has_references": true, "reference_count": 13 }, + { + "name": "workflow-builder", + "tier": "meta", + "path": "skills/workflow-builder/", + "has_skill_md": true, + "has_references": false, + "reference_count": 0 + }, { "name": "doc", "tier": "product", @@ -1144,6 +1160,13 @@ "bounded_context": "BC4", "driven_by_skills": [] }, + { + "name": "ao orchestrate", + "path": "cli/cmd/ao/", + "purpose": "Resolve and inspect the orchestration backend ladder", + "bounded_context": "", + "driven_by_skills": [] + }, { "name": "ao patterns", "path": "cli/cmd/ao/", @@ -1350,9 +1373,9 @@ ] }, "capability_summary": { - "total": 157, - "skills": 76, - "cli_commands": 67, + "total": 160, + "skills": 78, + "cli_commands": 68, "gates": 11, "reference_impls": 3 }, @@ -1402,6 +1425,7 @@ "mine", "notebook", "operator", + "orchestrate", "patterns", "pool", "quick-start", @@ -1451,6 +1475,22 @@ "path": "skills/autodev/", "references": 0 }, + { + "sku": "skill:automation-shape-routing", + "name": "automation-shape-routing", + "type": "skill", + "bounded_context": "BC4", + "hex_role": "supporting", + "tier": "meta", + "purpose": "Front door for agent automation — decide the SHAPE (Workflow vs NTM vs skill), then hand off. Triggers: \"build automation\", \"convert skills to workflows\", \"which shape\".", + "status": "active", + "disposition": "keep", + "consumes": [], + "produces": [], + "drives_commands": [], + "path": "skills/automation-shape-routing/", + "references": 0 + }, { "sku": "skill:beads", "name": "beads", @@ -2884,7 +2924,7 @@ "bounded_context": "BC4", "hex_role": "supporting", "tier": "meta", - "purpose": "Scaffold or absorb new SKILL.md files against the unified AgentOps template.", + "purpose": "Scaffold or absorb new SKILL.md files against the unified AgentOps template. Triggers: \"create a skill\", \"scaffold skill\", \"absorb external skill\", \"new skill\".", "status": "active", "disposition": "update", "consumes": [], @@ -3170,6 +3210,24 @@ "path": "skills/vibe/", "references": 22 }, + { + "sku": "skill:workflow-builder", + "name": "workflow-builder", + "type": "skill", + "bounded_context": "BC4", + "hex_role": "supporting", + "tier": "meta", + "purpose": "Scaffold a new Claude Workflow script — deterministic multi-agent orchestration. Triggers: \"build a workflow\", \"create a workflow\", \"scaffold workflow\", \"author a workflow\".", + "status": "active", + "disposition": "keep", + "consumes": [], + "produces": [ + "workflow-script" + ], + "drives_commands": [], + "path": "skills/workflow-builder/", + "references": 0 + }, { "sku": "cmd:ao.agents", "name": "ao agents", @@ -4177,6 +4235,23 @@ ], "path": "cli/cmd/ao/" }, + { + "sku": "cmd:ao.orchestrate", + "name": "ao orchestrate", + "type": "cli-command", + "bounded_context": "", + "purpose": "Resolve and inspect the orchestration backend ladder", + "status": "active", + "driven_by_skills": [], + "flags": [ + "--config", + "--dry-run", + "--json", + "--output", + "--verbose" + ], + "path": "cli/cmd/ao/" + }, { "sku": "cmd:ao.patterns", "name": "ao patterns", From 3622fb7790b005f240f1daf5ddabec9bd4ee03a7 Mon Sep 17 00:00:00 2001 From: Boden Fuller Date: Fri, 29 May 2026 13:31:55 -0400 Subject: [PATCH 6/8] fix(ci): snake_case json tags on SelectionTrace + cli-surface inventory (ag-nk67) - SelectionTrace json tags (chosen/reason/considered) so 'ao orchestrate select --json' emits snake_case -> satisfies cli-json-flag-machine-contracts canary - regenerate docs/cli-surface.{md,json} to include the new orchestrate command ag-nk67 #orchestration-foundation --- cli/internal/ports/orchestration.go | 6 ++--- docs/cli-surface.json | 35 +++++++++++++++++++++++++++++ docs/cli-surface.md | 5 +++++ 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/cli/internal/ports/orchestration.go b/cli/internal/ports/orchestration.go index 2c5749971..56d73355f 100644 --- a/cli/internal/ports/orchestration.go +++ b/cli/internal/ports/orchestration.go @@ -41,9 +41,9 @@ type WorkSpec struct { // that produced it. Considered lists the backends the ladder evaluated, // in order, for auditability; it is safe for callers to mutate. type SelectionTrace struct { - Chosen Backend - Reason string - Considered []Backend + Chosen Backend `json:"chosen"` + Reason string `json:"reason"` + Considered []Backend `json:"considered"` } // OrchestrationPort selects an orchestration backend for a unit of diff --git a/docs/cli-surface.json b/docs/cli-surface.json index c7f38928b..ce51cbb8a 100644 --- a/docs/cli-surface.json +++ b/docs/cli-surface.json @@ -99,6 +99,20 @@ "kind": "leaf", "reason": "Covered by release smoke tests, direct command tests, or command handler tests." }, + { + "category": "public-tested", + "command": "beads scenarios extract", + "coverage_status": "covered", + "kind": "leaf", + "reason": "Covered by release smoke tests, direct command tests, or command handler tests." + }, + { + "category": "public-tested", + "command": "beads scenarios validate", + "coverage_status": "covered", + "kind": "leaf", + "reason": "Covered by release smoke tests, direct command tests, or command handler tests." + }, { "category": "public-tested", "command": "beads stale-claims", @@ -1030,6 +1044,13 @@ "kind": "leaf", "reason": "Covered by release smoke tests, direct command tests, or command handler tests." }, + { + "category": "manual-only", + "command": "orchestrate select", + "coverage_status": "missing", + "kind": "leaf", + "reason": "No smoke, direct test, or allowlist coverage found." + }, { "category": "public-tested", "command": "patterns repair-filenames", @@ -1198,6 +1219,13 @@ "kind": "leaf", "reason": "Covered by release smoke tests, direct command tests, or command handler tests." }, + { + "category": "public-tested", + "command": "reconcile", + "coverage_status": "covered", + "kind": "leaf", + "reason": "Covered by release smoke tests, direct command tests, or command handler tests." + }, { "category": "manual-only", "command": "registry list", @@ -1394,6 +1422,13 @@ "kind": "leaf", "reason": "Covered by release smoke tests, direct command tests, or command handler tests." }, + { + "category": "public-tested", + "command": "skills find", + "coverage_status": "covered", + "kind": "leaf", + "reason": "Covered by release smoke tests, direct command tests, or command handler tests." + }, { "category": "public-tested", "command": "status", diff --git a/docs/cli-surface.md b/docs/cli-surface.md index d8974945e..459e1cba7 100644 --- a/docs/cli-surface.md +++ b/docs/cli-surface.md @@ -18,6 +18,8 @@ | `ao beads harvest` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao beads lint` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao beads resume` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | +| `ao beads scenarios extract` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | +| `ao beads scenarios validate` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao beads stale-claims` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao beads verify` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao capabilities` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | @@ -151,6 +153,7 @@ | `ao notebook update` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao operator list` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao operator record` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | +| `ao orchestrate select` | `manual-only` | `missing` | No smoke, direct test, or allowlist coverage found. | | `ao patterns repair-filenames` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao pool auto-promote` | `public-tested` | `allowlisted` | Covered by pool command tests. | | `ao pool batch-promote` | `public-tested` | `allowlisted` | Covered by batch promote tests. | @@ -175,6 +178,7 @@ | `ao ratchet status` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao ratchet trace` | `public-tested` | `allowlisted` | Covered through trace command behavior. | | `ao ratchet validate` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | +| `ao reconcile` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao registry list` | `manual-only` | `missing` | No smoke, direct test, or allowlist coverage found. | | `ao retrieval-bench` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao robot-docs` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | @@ -203,6 +207,7 @@ | `ao sessions index` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao sessions spawn` | `manual-only` | `missing` | No smoke, direct test, or allowlist coverage found. | | `ao skills check` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | +| `ao skills find` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao status` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao trace` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | | `ao validate` | `public-tested` | `covered` | Covered by release smoke tests, direct command tests, or command handler tests. | From 0f7551186f44a4c3a821cd43f999107a66f5ead3 Mon Sep 17 00:00:00 2001 From: Boden Fuller Date: Fri, 29 May 2026 13:45:04 -0400 Subject: [PATCH 7/8] fix(ci): bump cli-help-matrix golden counts for orchestrate command (ag-nk67) ao orchestrate (+1 top) + select (+1 sub) -> top=70 sub=175 all=245. Update cli-command-surface-matrix.json expectation + smoke fixture thresholds. ag-nk67 #orchestration-foundation --- evals/agentops-core/cli-command-surface-matrix.json | 2 +- evals/agentops-core/fixtures/cli-command-surface-smoke.sh | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/evals/agentops-core/cli-command-surface-matrix.json b/evals/agentops-core/cli-command-surface-matrix.json index 58d88d329..c996411c2 100644 --- a/evals/agentops-core/cli-command-surface-matrix.json +++ b/evals/agentops-core/cli-command-surface-matrix.json @@ -41,7 +41,7 @@ }, "expectations": [ {"type": "exit_code", "value": 0}, - {"type": "stdout_contains", "value": "cli-command-headings: top=69 sub=174 all=243"}, + {"type": "stdout_contains", "value": "cli-command-headings: top=70 sub=175 all=245"}, {"type": "stdout_contains", "value": "cli-help-matrix-ok"} ], "dimensions": ["correctness", "runtime_compatibility", "artifact_quality"], diff --git a/evals/agentops-core/fixtures/cli-command-surface-smoke.sh b/evals/agentops-core/fixtures/cli-command-surface-smoke.sh index 2547de57d..82f418dcd 100755 --- a/evals/agentops-core/fixtures/cli-command-surface-smoke.sh +++ b/evals/agentops-core/fixtures/cli-command-surface-smoke.sh @@ -17,7 +17,7 @@ top_count="$(rg -c '^### `ao ' "$DOCS_PATH")" sub_count="$(rg -c '^#### `ao ' "$DOCS_PATH")" all_count="$(rg -c '^#{3,4} `ao ' "$DOCS_PATH")" -if [[ "$top_count" != "69" || "$sub_count" != "174" || "$all_count" != "243" ]]; then +if [[ "$top_count" != "70" || "$sub_count" != "175" || "$all_count" != "245" ]]; then printf 'unexpected command heading counts: top=%s sub=%s all=%s\n' "$top_count" "$sub_count" "$all_count" >&2 exit 1 fi @@ -25,7 +25,7 @@ fi # shellcheck disable=SC2016 # literal backticks delimit generated Markdown command headings. mapfile -t commands < <(rg '^#{3,4} `ao ' "$DOCS_PATH" | sed -E 's/^.*`([^`]+)`.*/\1/') -if [[ "${#commands[@]}" -ne 243 ]]; then +if [[ "${#commands[@]}" -ne 245 ]]; then printf 'unexpected command matrix size: %s\n' "${#commands[@]}" >&2 exit 1 fi From 89c900385014cc65dbdb76ed59965beeb7ec5f0c Mon Sep 17 00:00:00 2001 From: Boden Fuller Date: Fri, 29 May 2026 14:22:53 -0400 Subject: [PATCH 8/8] fix(ci): add orchestrate to cobra expectedCmds registration test (ag-nk67) --- cli/cmd/ao/cobra_commands_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cli/cmd/ao/cobra_commands_test.go b/cli/cmd/ao/cobra_commands_test.go index 07b90e569..5f9bd387b 100644 --- a/cli/cmd/ao/cobra_commands_test.go +++ b/cli/cmd/ao/cobra_commands_test.go @@ -370,7 +370,7 @@ func TestCobraCommandTreeRegistration(t *testing.T) { "defrag", "demo", "doctor", "eval", "evolve", "extract", "feedback", "feedback-loop", "findings", "flywheel", "forge", "gate", "goals", "handoff", "harness", "harvest", "index", "init", "inject", "knowledge", "lookup", "loop", "maturity", - "memory", "metrics", "migrate", "mind", "mine", "notebook", "operator", "patterns", + "memory", "metrics", "migrate", "mind", "mine", "notebook", "operator", "orchestrate", "patterns", "pool", "quick-start", "ratchet", "reconcile", "retrieval-bench", "robot-docs", "rpi", "registry", "scenario", "scope", "search", "seed", "session", "session-outcome", "sessions", "skills", "status", "store", "task-feedback", "task-status", "task-sync", "temper", @@ -429,7 +429,7 @@ func TestCobraExpectedCmdsMatchRegistration(t *testing.T) { "defrag", "demo", "doctor", "eval", "evolve", "extract", "feedback", "feedback-loop", "findings", "flywheel", "forge", "gate", "goals", "handoff", "harness", "harvest", "index", "init", "inject", "knowledge", "lookup", "loop", "maturity", - "memory", "metrics", "migrate", "mind", "mine", "notebook", "operator", "patterns", + "memory", "metrics", "migrate", "mind", "mine", "notebook", "operator", "orchestrate", "patterns", "pool", "quick-start", "ratchet", "reconcile", "retrieval-bench", "robot-docs", "rpi", "registry", "scenario", "scope", "search", "seed", "session", "session-outcome", "sessions", "skills", "status", "store", "task-feedback", "task-status", "task-sync", "temper",