Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 107 additions & 87 deletions dashboard/dist/index.html

Large diffs are not rendered by default.

6 changes: 4 additions & 2 deletions dashboard/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,16 @@
"class-variance-authority": "^0.7.1",
"clsx": "^2.1.1",
"lucide-react": "^1.11.0",
"react": "^19.1.0",
"react-dom": "^19.1.0",
"react-markdown": "^10.1.0",
"recharts": "^3.8.1",
"remark-gfm": "^4.0.1",
"tailwind-merge": "^3.5.0",
"tw-animate-css": "^1.4.0"
},
"peerDependencies": {
"react": "^19.1.0",
"react-dom": "^19.1.0"
},
"devDependencies": {
"@tailwindcss/vite": "^4.2.4",
"@types/react": "^19.1.6",
Expand Down
57 changes: 46 additions & 11 deletions dashboard/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import { AveragesTable } from "./components/AveragesTable.tsx";
import { CompareView } from "./components/CompareView.tsx";
import { ConversationView } from "./components/ConversationView.tsx";
import { DetailPanel } from "./components/DetailPanel.tsx";
import { EvalScoresView, hasEvalScores } from "./components/EvalScoresView.tsx";
import { ProgressBar } from "./components/ProgressBar.tsx";
import { RubricView } from "./components/RubricView.tsx";
import { ScenarioTable } from "./components/ScenarioTable.tsx";
Expand Down Expand Up @@ -186,6 +187,14 @@ function scenarioDetail(scenario: ServerScenario): ScenarioDetail {
judge_dimension_scores: (scenario.judgeDimensionScores ?? []).map(
normalizeDimension,
),
retrieval_scores: (scenario.retrievalScores ??
[]) as unknown as ScenarioDetail["retrieval_scores"],
demotion_scores: (scenario.demotionScores ??
[]) as unknown as ScenarioDetail["demotion_scores"],
procedure_scores: (scenario.procedureScores ??
[]) as unknown as ScenarioDetail["procedure_scores"],
dedup_scores: (scenario.dedupScores ??
[]) as unknown as ScenarioDetail["dedup_scores"],
expectations: scenario.expectations,
error: scenario.error,
counts: scenario.counts
Expand Down Expand Up @@ -992,6 +1001,14 @@ function ScenarioDetailView({
<RubricView detail={detail} />
</Card>
</div>
{hasEvalScores(detail) && (
<Card className="mt-4 p-4">
<div className="text-xs uppercase tracking-wider text-muted-foreground font-semibold mb-3">
Eval scores
</div>
<EvalScoresView detail={detail} />
</Card>
)}
</>
);
}
Expand Down Expand Up @@ -1463,7 +1480,10 @@ export function StartRunView({
onChange={(e) => setRepeat(Number(e.currentTarget.value))}
/>
</Field>
<Field label="Parallel limit">
<Field
label="Parallel limit"
hint="Max concurrent scenarios when parallel is on. 2-4 is typical; higher = faster but more LLM cost spikes."
>
<TextInput
type="number"
min={1}
Expand All @@ -1475,19 +1495,34 @@ export function StartRunView({
/>
</Field>
</div>
<div className="flex flex-wrap gap-4 items-center">
<Checkbox checked={dryRun} onChange={setDryRun} label="Dry run" />
<Checkbox
checked={parallelEnabled}
onChange={setParallelEnabled}
<div className="grid grid-cols-1 sm:grid-cols-3 gap-3">
<Field
label="Dry run"
hint="Records run + scenario rows but skips the live adapter, judge, and scorers. Use to validate config without spending LLM tokens."
>
<Checkbox checked={dryRun} onChange={setDryRun} label="Enabled" />
</Field>
<Field
label="Parallel"
/>
{!presetId ? (
hint="Run multiple scenarios concurrently. Scenarios still complete in order, but several run at a time (set the limit above)."
>
<Checkbox
checked={saveAsPreset}
onChange={setSaveAsPreset}
label="Save as preset"
checked={parallelEnabled}
onChange={setParallelEnabled}
label="Enabled"
/>
</Field>
{!presetId ? (
<Field
label="Save as preset"
hint="Save this scenario selection + settings as a reusable preset visible on the Presets page."
>
<Checkbox
checked={saveAsPreset}
onChange={setSaveAsPreset}
label="Enabled"
/>
</Field>
) : null}
</div>
{saveAsPreset && !presetId ? (
Expand Down
4 changes: 4 additions & 0 deletions dashboard/src/api/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ export type ServerScenario = {
targetEvents?: Array<Record<string, unknown>>;
checkpoints?: Array<Record<string, unknown>>;
judgeDimensionScores?: Array<Record<string, unknown>>;
retrievalScores?: Array<Record<string, unknown>>;
demotionScores?: Array<Record<string, unknown>>;
procedureScores?: Array<Record<string, unknown>>;
dedupScores?: Array<Record<string, unknown>>;
expectations?: unknown;
error?: unknown;
counts?: {
Expand Down
23 changes: 17 additions & 6 deletions dashboard/src/components/DetailPanel.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@ import { useState } from "react";
import { scorePct } from "../helpers.ts";
import type { ScenarioDetail } from "../types.ts";
import { ConversationView } from "./ConversationView.tsx";
import { EvalScoresView, hasEvalScores } from "./EvalScoresView.tsx";
import { RubricView } from "./RubricView.tsx";

interface Props {
detail: ScenarioDetail;
onClose: () => void;
}

type TabKey = "conversation" | "rubric" | "evals";

export function DetailPanel({ detail, onClose }: Props) {
const [tab, setTab] = useState<"conversation" | "rubric">("conversation");
const evalsVisible = hasEvalScores(detail);
const [tab, setTab] = useState<TabKey>("conversation");

const isRunning = detail.status === "running";
const scoreLabel =
Expand Down Expand Up @@ -109,14 +113,21 @@ export function DetailPanel({ detail, onClose }: Props) {
>
Rubric
</button>
{evalsVisible && (
<button
type="button"
className={`tab-btn${tab === "evals" ? " tab-active" : ""}`}
onClick={() => setTab("evals")}
>
Eval scores
</button>
)}
</div>
</div>
<div className="detail-body">
{tab === "conversation" ? (
<ConversationView detail={detail} />
) : (
<RubricView detail={detail} />
)}
{tab === "conversation" && <ConversationView detail={detail} />}
{tab === "rubric" && <RubricView detail={detail} />}
{tab === "evals" && <EvalScoresView detail={detail} />}
</div>
</div>
</div>
Expand Down
Loading
Loading