Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -9,81 +9,138 @@ interface EvaluatorNamesCellProps {
runId: string | undefined
}

interface EvaluatorEntry {
evaluatorId: string | null
evaluatorRevisionId: string | null
evaluatorSlug: string | null
}

function getEvaluatorEntryKey(entry: EvaluatorEntry) {
return entry.evaluatorId ?? entry.evaluatorRevisionId ?? entry.evaluatorSlug ?? "unknown"
}
Comment on lines +18 to +20
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major | ⚡ Quick win

Dedup and React keys should prioritize evaluatorRevisionId (not evaluatorId).

Current key precedence merges multiple revisions under one evaluator ID, so distinct evaluator revisions can disappear from the cell/tooltip list.

Suggested fix
 function getEvaluatorEntryKey(entry: EvaluatorEntry) {
-    return entry.evaluatorId ?? entry.evaluatorRevisionId ?? entry.evaluatorSlug ?? "unknown"
+    if (entry.evaluatorRevisionId) return `rev:${entry.evaluatorRevisionId}`
+    if (entry.evaluatorId) return `id:${entry.evaluatorId}`
+    if (entry.evaluatorSlug) return `slug:${entry.evaluatorSlug}`
+    return "unknown"
 }
@@
-        const key = col.evaluatorId ?? col.evaluatorRevisionId ?? col.evaluatorSlug
+        const key =
+            (col.evaluatorRevisionId && `rev:${col.evaluatorRevisionId}`) ||
+            (col.evaluatorId && `id:${col.evaluatorId}`) ||
+            (col.evaluatorSlug && `slug:${col.evaluatorSlug}`) ||
+            null

Also applies to: 45-46, 73-73, 95-95


/**
* Cell that renders evaluator names for a queue.
*
* Resolution chain:
* 1. Read evaluation run data via runId (batch-fetched)
* 2. Extract evaluator workflow IDs from annotation step references
* 2. Extract evaluator workflow IDs + slugs from annotation step references
* 3. Read evaluator entity data for each ID (batch-fetched)
* 4. Display evaluator names
* 4. Display evaluator names, falling back to slug from step refs then truncated ID
*/
const EvaluatorNamesCell = memo(function EvaluatorNamesCell({runId}: EvaluatorNamesCellProps) {
if (!runId) return null
return <EvaluatorIdsBridge runId={runId} />
})

/** Reads evaluation run → extracts evaluator IDs → delegates to name resolution */
/** Reads evaluation run → extracts evaluator IDs + slugs → delegates to name resolution */
const EvaluatorIdsBridge = memo(function EvaluatorIdsBridge({runId}: {runId: string}) {
const rawQuery = useAtomValue(evaluationRunMolecule.atoms.query(runId))
const evaluatorIds = useAtomValue(evaluationRunMolecule.selectors.evaluatorIds(runId))
const columnDefs = useAtomValue(evaluationRunMolecule.selectors.annotationColumnDefs(runId))

// Deduplicate by evaluatorId, preserving order
const evaluatorEntries: EvaluatorEntry[] = []
const seen = new Set<string>()
for (const col of columnDefs) {
const key = col.evaluatorId ?? col.evaluatorRevisionId ?? col.evaluatorSlug
if (key && !seen.has(key)) {
seen.add(key)
evaluatorEntries.push({
evaluatorId: col.evaluatorId,
evaluatorRevisionId: col.evaluatorRevisionId,
evaluatorSlug: col.evaluatorSlug,
})
}
}

if (rawQuery.isPending && evaluatorIds.length === 0) {
if (rawQuery.isPending && evaluatorEntries.length === 0) {
return <Skeleton.Button active size="small" style={{width: 80, height: 22}} />
}

if (evaluatorIds.length === 0) return null
if (evaluatorEntries.length === 0) return null

return <EvaluatorNamesList evaluatorIds={evaluatorIds} />
return <EvaluatorNamesList evaluatorEntries={evaluatorEntries} />
})

/** Resolves evaluator names from IDs and renders tags */
/** Resolves evaluator names from IDs+slugs and renders tags */
const EvaluatorNamesList = memo(function EvaluatorNamesList({
evaluatorIds,
evaluatorEntries,
}: {
evaluatorIds: string[]
evaluatorEntries: EvaluatorEntry[]
}) {
const names = evaluatorIds.map((id) => <EvaluatorNameTag key={id} evaluatorId={id} />)
const names = evaluatorEntries.map((entry) => (
<EvaluatorNameTag
key={getEvaluatorEntryKey(entry)}
evaluatorId={entry.evaluatorId}
evaluatorRevisionId={entry.evaluatorRevisionId}
fallbackSlug={entry.evaluatorSlug}
/>
))

if (names.length <= 2) {
return <div className="flex items-center gap-1 overflow-hidden">{names}</div>
}

const visible = names.slice(0, 2)
const remainingIds = evaluatorIds.slice(2)
const remainingEntries = evaluatorEntries.slice(2)

return (
<div className="flex items-center gap-1 overflow-hidden">
{visible}
<Tooltip
title={
<div className="flex flex-col gap-1">
{remainingIds.map((id) => (
<EvaluatorNameSpan key={id} evaluatorId={id} />
{remainingEntries.map((entry) => (
<EvaluatorNameSpan
key={getEvaluatorEntryKey(entry)}
evaluatorId={entry.evaluatorId}
evaluatorRevisionId={entry.evaluatorRevisionId}
fallbackSlug={entry.evaluatorSlug}
/>
))}
</div>
}
>
<Tag className="cursor-default">+{remainingIds.length}</Tag>
<Tag className="cursor-default">+{remainingEntries.length}</Tag>
</Tooltip>
</div>
)
})

/** Single evaluator name tag — subscribes to evaluator entity for its name */
const EvaluatorNameTag = memo(function EvaluatorNameTag({evaluatorId}: {evaluatorId: string}) {
const name = useAtomValue(workflowMolecule.selectors.name(evaluatorId))
const slug = useAtomValue(workflowMolecule.selectors.slug(evaluatorId))
const EvaluatorNameTag = memo(function EvaluatorNameTag({
evaluatorId,
evaluatorRevisionId,
fallbackSlug,
}: {
evaluatorId: string | null
evaluatorRevisionId: string | null
fallbackSlug: string | null
}) {
const lookupId = evaluatorRevisionId ?? evaluatorId ?? ""
const name = useAtomValue(workflowMolecule.selectors.name(lookupId))
const slug = useAtomValue(workflowMolecule.selectors.slug(lookupId))
const fallbackId = evaluatorId ?? lookupId

return <Tag>{name || slug || evaluatorId.slice(0, 8)}</Tag>
return <Tag>{name || fallbackSlug || slug || fallbackId.slice(0, 8)}</Tag>
})

/** Single evaluator name span (for tooltip) */
const EvaluatorNameSpan = memo(function EvaluatorNameSpan({evaluatorId}: {evaluatorId: string}) {
const name = useAtomValue(workflowMolecule.selectors.name(evaluatorId))
const slug = useAtomValue(workflowMolecule.selectors.slug(evaluatorId))
const EvaluatorNameSpan = memo(function EvaluatorNameSpan({
evaluatorId,
evaluatorRevisionId,
fallbackSlug,
}: {
evaluatorId: string | null
evaluatorRevisionId: string | null
fallbackSlug: string | null
}) {
const lookupId = evaluatorRevisionId ?? evaluatorId ?? ""
const name = useAtomValue(workflowMolecule.selectors.name(lookupId))
const slug = useAtomValue(workflowMolecule.selectors.slug(lookupId))
const fallbackId = evaluatorId ?? lookupId

return <span>{name || slug || evaluatorId.slice(0, 8)}</span>
return <span>{name || fallbackSlug || slug || fallbackId.slice(0, 8)}</span>
})

export default EvaluatorNamesCell
Original file line number Diff line number Diff line change
Expand Up @@ -194,11 +194,12 @@ const StringField = memo(function StringField({
className={`flex flex-col gap-1 playground-property-control ${readOnly ? READONLY_CLASS : ""}`}
>
<Typography.Text className="playground-property-control-label">{label}</Typography.Text>
<Input
<Input.TextArea
value={value ?? ""}
onChange={(e) => onChange(e.target.value || null)}
disabled={isDisabled}
placeholder="Enter value"
autoSize={{minRows: 2, maxRows: 6}}
/>
</div>
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -397,12 +397,14 @@ const AnnotationColumnHeader = memo(function AnnotationColumnHeader({
}: {
def: AnnotationColumnDef
}) {
const name = useAtomValue(workflowMolecule.selectors.name(def.evaluatorId ?? ""))
const slug = useAtomValue(workflowMolecule.selectors.slug(def.evaluatorId ?? ""))
const displayName = name || slug || def.evaluatorSlug || def.columnName || def.stepKey
const evaluatorLookupId = def.evaluatorRevisionId ?? def.evaluatorId ?? ""
const name = useAtomValue(workflowMolecule.selectors.name(evaluatorLookupId))
const slug = useAtomValue(workflowMolecule.selectors.slug(evaluatorLookupId))
const displaySlug = def.evaluatorSlug || slug
const displayName = name || displaySlug || def.columnName || def.stepKey

return (
<Tooltip title={slug ? `${displayName} (${slug})` : displayName}>
<Tooltip title={displaySlug ? `${displayName} (${displaySlug})` : displayName}>
<span className="truncate">{displayName}</span>
</Tooltip>
)
Expand All @@ -423,9 +425,11 @@ const AnnotationGroupHeader = memo(function AnnotationGroupHeader({
isCollapsed: boolean
onToggle: () => void
}) {
const name = useAtomValue(workflowMolecule.selectors.name(def.evaluatorId ?? ""))
const slug = useAtomValue(workflowMolecule.selectors.slug(def.evaluatorId ?? ""))
const displayName = name || slug || def.evaluatorSlug || def.columnName || def.stepKey
const evaluatorLookupId = def.evaluatorRevisionId ?? def.evaluatorId ?? ""
const name = useAtomValue(workflowMolecule.selectors.name(evaluatorLookupId))
const slug = useAtomValue(workflowMolecule.selectors.slug(evaluatorLookupId))
const displaySlug = def.evaluatorSlug || slug
const displayName = name || displaySlug || def.columnName || def.stepKey
Comment thread
ashrafchowdury marked this conversation as resolved.

const handleClick = useCallback(
(e: React.MouseEvent) => {
Expand Down Expand Up @@ -1166,16 +1170,17 @@ function resolveExportColumnLabel(
if (def) {
if (def.columnType === "annotation") {
const annotationDef = def.annotationDef
const name = annotationDef.evaluatorId
? store.get(workflowMolecule.selectors.name(annotationDef.evaluatorId))
const evaluatorLookupId = annotationDef.evaluatorRevisionId ?? annotationDef.evaluatorId
const name = evaluatorLookupId
? store.get(workflowMolecule.selectors.name(evaluatorLookupId))
: null
const slug = annotationDef.evaluatorId
? store.get(workflowMolecule.selectors.slug(annotationDef.evaluatorId))
const slug = evaluatorLookupId
? store.get(workflowMolecule.selectors.slug(evaluatorLookupId))
: null
return (
name ||
slug ||
annotationDef.evaluatorSlug ||
slug ||
annotationDef.columnName ||
annotationDef.stepKey
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,7 @@ const AnnotationSession = ({
try {
await addScenariosToTestset({
targetMode: params.mode === "new" ? "new" : "existing",
commitMessage: params.message,
commitMessage: params.message ?? "",
newTestsetName: params.entityName,
newTestsetSlug: params.entitySlug,
})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ function normalizeResolvedEvaluator(ref: EvaluatorStepRef, evaluator: Workflow):
const variantId = evaluator.workflow_variant_id ?? evaluator.variant_id ?? ref.variantId ?? null
return {
...evaluator,
slug: evaluator.slug ?? ref.slug ?? null,
slug: ref.slug ?? evaluator.slug ?? null,
workflow_id: evaluator.workflow_id ?? ref.workflowId ?? null,
workflow_variant_id: variantId,
variant_id: variantId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,12 @@ const evaluatorRevisionIdsAtom = atom<string[]>((get) => {
return get(evaluationRunMolecule.selectors.evaluatorRevisionIds(runId))
})

function deriveEvaluatorSlugFromStepKey(stepKey: string | null | undefined): string | null {
if (!stepKey) return null
const parts = stepKey.split(".").filter(Boolean)
return parts.at(-1) ?? null
}

/**
* Ordered evaluator references from annotation steps.
* Each entry preserves the queue's pinned evaluator revision while keeping the
Expand All @@ -524,6 +530,8 @@ const evaluatorStepRefsAtom = atom<EvaluatorStepRef[]>((get) => {
revisionId: step.references?.evaluator_revision?.id ?? null,
slug:
step.references?.evaluator?.slug ??
step.references?.evaluator_variant?.slug ??
deriveEvaluatorSlugFromStepKey(step.key) ??
step.references?.evaluator_revision?.slug ??
null,
stepKey: step.key ?? null,
Expand All @@ -545,6 +553,8 @@ const testsetSyncEvaluatorsAtom = atom<TestsetSyncEvaluator[]>((get) => {
const name = evaluatorEntity?.name?.trim() || null
const slug =
step.references?.evaluator?.slug ??
step.references?.evaluator_variant?.slug ??
deriveEvaluatorSlugFromStepKey(step.key) ??
evaluatorEntity?.slug ??
step.references?.evaluator_revision?.slug ??
workflowId
Expand Down Expand Up @@ -753,11 +763,14 @@ const META_KEYS = new Set(["tags", "meta"])
type TestcaseColumnGroup = "input" | "output" | "expected"

function getAnnotationDisplayTitle(get: Getter, def: AnnotationColumnDef): string {
const evaluator = def.evaluatorId ? get(workflowMolecule.selectors.data(def.evaluatorId)) : null
const evaluatorLookupId = def.evaluatorRevisionId ?? def.evaluatorId
const evaluator = evaluatorLookupId
? get(workflowMolecule.selectors.data(evaluatorLookupId))
: null
return (
evaluator?.name?.trim() ||
evaluator?.slug?.trim() ||
def.evaluatorSlug?.trim() ||
evaluator?.slug?.trim() ||
def.columnName?.trim() ||
def.stepKey?.trim() ||
""
Expand Down Expand Up @@ -2444,20 +2457,29 @@ async function waitForStoreAtomValue<T>(

function resolveScenarioIdsForAddToTestset(get: Getter): string[] {
const scope = get(addToTestsetScopeAtom)
const queueKind = get(queueKindAtom)

if (scope === "all") {
return get(scenarioIdsAtom)
}

if (scope === "complete") {
if (queueKind === "testcases" && (scope === "all" || scope === "complete")) {
const completed = get(completedScenarioIdsAtom)
const records = get(scenarioRecordsAtom)
return get(scenarioIdsAtom).filter((id) => isScenarioCompleted(id, completed, records))
}

if (scope === "all" || scope === "complete") {
return get(scenarioIdsAtom)
}
return get(addToTestsetScenarioIdsAtom)
}

function resolveCompletedScenarioIdsForAnnotationExport(
get: Getter,
scenarioIds: string[],
): Set<string> {
const completed = get(completedScenarioIdsAtom)
const records = get(scenarioRecordsAtom)
return new Set(scenarioIds.filter((id) => isScenarioCompleted(id, completed, records)))
}

function extractExistingColumns(
rows: {data?: Record<string, unknown> | null}[] | null | undefined,
): Set<string> {
Expand Down Expand Up @@ -2988,7 +3010,8 @@ const addScenariosToTestsetAtom = atom(
}),
queueId,
evaluators,
requireAnnotationOutputScenarioIds: new Set(),
requireAnnotationOutputScenarioIds:
resolveCompletedScenarioIdsForAnnotationExport(get, scenarioIds),
setProcessed,
})
: await prepareTestcaseExportRows({
Expand Down Expand Up @@ -3096,8 +3119,14 @@ const canSyncToTestsetAtom = atom<boolean>((get) => {
})

const canAddToTestsetAtom = atom<boolean>((get) => {
const queueKind = get(queueKindAtom)
const ids = get(scenarioIdsAtom)
return ids.length > 0
if (ids.length === 0) return false
if (queueKind === "traces") return true

const completed = get(completedScenarioIdsAtom)
const records = get(scenarioRecordsAtom)
return ids.some((id) => isScenarioCompleted(id, completed, records))
})

async function buildTestsetSyncPreviewForSession(get: Getter) {
Expand Down
1 change: 1 addition & 0 deletions web/packages/agenta-annotation/src/state/testsetSync.ts
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,7 @@ export function buildTestcaseExportRows(params: TestcaseExportRowBuilderParams):
evaluators: params.evaluators,
queueId: params.queueId,
})
if (entries.length === 0) continue

applyAnnotationOutputEntries(data, entries)

Expand Down
4 changes: 3 additions & 1 deletion web/packages/agenta-annotation/src/state/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,9 @@ export interface AnnotationColumnDef {
path: string | null
/** Evaluator workflow ID from the annotation step's references */
evaluatorId: string | null
/** Evaluator slug from the annotation step's references */
/** Evaluator revision ID from the annotation step's references */
evaluatorRevisionId: string | null
/** Evaluator slug from step refs, step key, or mapping column fallback */
evaluatorSlug: string | null
}

Expand Down
Loading
Loading