boshu2 · boshu2 · May 29, 2026 · May 29, 2026
@@ -0,0 +1,93 @@
+package main
+
+import (
+	"encoding/json"
+	"fmt"
+	"os"
+
+	"github.com/spf13/cobra"
+)
+
+// outcomesScore is the score payload returned by an Outcomes grader (cloud
+// Managed Agent, async job, or the local Codex/NTM path). It carries only
+// aggregate + per-criterion scores — never holdout answers.
+type outcomesScore struct {
+	SourceTaskID     string             `json:"source_task_id"`
+	JudgeContentHash string             `json:"judge_content_hash"`
+	Aggregate        float64            `json:"aggregate"`
+	Threshold        float64            `json:"threshold"`
+	CriterionScores  map[string]float64 `json:"criterion_scores"`
+}
+
+// outcomesVerdict is the one verdict record (skills/council/schemas/verdict.json
+// shape). ao eval outcomes ingest emits this so an Outcomes score feeds the
+// Knowledge Flywheel exactly as a local `ao eval run` verdict does — no second
+// verdict format, no alternate bar.
+type outcomesVerdict struct {
+	Verdict               string             `json:"verdict"`
+	Confidence            string             `json:"confidence"`
+	KeyInsight            string             `json:"key_insight"`
+	Recommendation        string             `json:"recommendation"`
+	SchemaVersion         int                `json:"schema_version"`
+	SatisfactionScore     *float64           `json:"satisfaction_score"`
+	SatisfactionBreakdown map[string]float64 `json:"satisfaction_breakdown"`
+	Findings              []map[string]any   `json:"findings"`
+}
+
+// ingestOutcomesScore maps an Outcomes score onto the council verdict record.
+// PASS when aggregate meets the rubric threshold, FAIL when it falls below 70%
+// of it, WARN in between. The aggregate becomes satisfaction_score and the
+// per-criterion scores become satisfaction_breakdown.
+func ingestOutcomesScore(s outcomesScore) outcomesVerdict {
+	threshold := s.Threshold
+	if threshold <= 0 {
+		threshold = 1.0
+	}
+	verdict := "WARN"
+	switch {
+	case s.Aggregate >= threshold:
+		verdict = "PASS"
+	case s.Aggregate < threshold*0.7:
+		verdict = "FAIL"
+	}
+	agg := s.Aggregate
+	return outcomesVerdict{
+		Verdict:    verdict,
+		Confidence: "HIGH",
+		KeyInsight: fmt.Sprintf("Outcomes aggregate %.4f vs threshold %.4f for task %q",
+			s.Aggregate, threshold, s.SourceTaskID),
+		Recommendation:        fmt.Sprintf("Outcomes grade ingested as %s; feeds the corpus via the eval-verdict pipeline.", verdict),
+		SchemaVersion:         4,
+		SatisfactionScore:     &agg,
+		SatisfactionBreakdown: s.CriterionScores,
+		Findings:              []map[string]any{},
+	}
+}
+
+var evalOutcomesIngestCmd = &cobra.Command{
+	Use:   "ingest <score.json>",
+	Short: "Ingest an Outcomes score payload into the one council verdict record",
+	Args:  cobra.ExactArgs(1),
+	RunE:  runEvalOutcomesIngest,
+}
+
+func runEvalOutcomesIngest(cmd *cobra.Command, args []string) error {
+	raw, err := os.ReadFile(args[0])
+	if err != nil {
+		return fmt.Errorf("read %s: %w", args[0], err)
+	}
+	var s outcomesScore
+	if err := json.Unmarshal(raw, &s); err != nil {
+		return fmt.Errorf("parse %s: %w", args[0], err)
+	}
+	out, err := json.MarshalIndent(ingestOutcomesScore(s), "", "  ")
+	if err != nil {
+		return fmt.Errorf("encode verdict: %w", err)
+	}
+	fmt.Fprintln(cmd.OutOrStdout(), string(out))
+	return nil
+}
+
+func init() {
+	evalOutcomesCmd.AddCommand(evalOutcomesIngestCmd)
+}
@@ -0,0 +1,53 @@
+package main
+
+import "testing"
+
+// TestIngestOutcomesScore_ProducesVerdictRecord: an Outcomes score at/above the
+// rubric threshold maps to a PASS verdict record in the one council verdict shape
+// (skills/council/schemas/verdict.json) — closing the Outcomes → Knowledge
+// Flywheel loop without forking the verdict format.
+func TestIngestOutcomesScore_ProducesVerdictRecord(t *testing.T) {
+	s := outcomesScore{
+		SourceTaskID:     "task-capital-cities",
+		JudgeContentHash: "sha256:abc123",
+		Aggregate:        0.92,
+		Threshold:        0.8,
+		CriterionScores:  map[string]float64{"accuracy": 0.95, "concision": 0.85},
+	}
+	v := ingestOutcomesScore(s)
+
+	if v.Verdict != "PASS" {
+		t.Errorf("Verdict = %q, want PASS", v.Verdict)
+	}
+	if v.SatisfactionScore == nil || *v.SatisfactionScore != 0.92 {
+		t.Errorf("SatisfactionScore = %v, want 0.92", v.SatisfactionScore)
+	}
+	if v.SatisfactionBreakdown["accuracy"] != 0.95 {
+		t.Errorf("breakdown[accuracy] = %v, want 0.95", v.SatisfactionBreakdown["accuracy"])
+	}
+	if v.SchemaVersion != 4 {
+		t.Errorf("SchemaVersion = %d, want 4", v.SchemaVersion)
+	}
+	if v.Findings == nil {
+		t.Error("Findings must be a non-nil (possibly empty) slice for verdict.json validity")
+	}
+}
+
+// TestIngestOutcomesScore_VerdictBands: aggregate below threshold downgrades to
+// WARN, and far below (< 70% of threshold) to FAIL.
+func TestIngestOutcomesScore_VerdictBands(t *testing.T) {
+	cases := []struct {
+		agg  float64
+		want string
+	}{
+		{0.90, "PASS"}, // >= 0.8
+		{0.70, "WARN"}, // < 0.8 but >= 0.56
+		{0.40, "FAIL"}, // < 0.56
+	}
+	for _, c := range cases {
+		got := ingestOutcomesScore(outcomesScore{Aggregate: c.agg, Threshold: 0.8}).Verdict
+		if got != c.want {
+			t.Errorf("aggregate %.2f: Verdict = %q, want %q", c.agg, got, c.want)
+		}
+	}
+}
@@ -1448,6 +1448,14 @@ Compile a holdout-safe Outcomes rubric payload from a locked Task + criteria
 ao eval outcomes compile <input.json> [flags]
 ```
 
+##### `ao eval outcomes ingest`
+
+Ingest an Outcomes score payload into the one council verdict record
+
+```
+ao eval outcomes ingest <score.json> [flags]
+```
+
 #### `ao eval run`
 
 Run a deterministic eval suite

@@ -105,3 +105,4 @@ public-stateful-fixture-needed|overnight curator enqueue|Writes jobs into a conf
 public-stateful-fixture-needed|overnight curator event|Appends events to a configured local curator ledger.
 manual-only|overnight curator status|Depends on local curator, runner, and Ollama environment.
 public-stateful-fixture-needed|eval outcomes compile|Holdout-safe rubric projection; core logic unit-tested (eval_outcomes_test.go compileOutcomesRubric); CLI smoke needs an input.json fixture (follow-up).
+public-stateful-fixture-needed|eval outcomes ingest|Maps an Outcomes score to the council verdict record; core logic unit-tested (eval_outcomes_ingest_test.go ingestOutcomesScore); CLI smoke needs a score.json fixture (follow-up).