From 7745852fd540ccd415f95ccb0dcf571b3f2c0310 Mon Sep 17 00:00:00 2001
From: Jason Bailey <jbailey@launchdarkly.com>
Date: Tue, 14 Apr 2026 16:46:02 -0500
Subject: [PATCH 1/7] fix: Add support for graph metric tracking (#1269)

---
 .../__tests__/LDAIConfigTrackerImpl.test.ts   | 145 ++++++
 .../__tests__/LDGraphTrackerImpl.test.ts      | 418 ++++++++++++++++++
 .../server-ai/src/LDAIConfigTrackerImpl.ts    | 106 +++--
 .../sdk/server-ai/src/LDGraphTrackerImpl.ts   | 119 +++++
 .../src/api/config/LDAIConfigTracker.ts       |  54 ++-
 .../server-ai/src/api/graph/LDGraphTracker.ts | 126 ++++++
 packages/sdk/server-ai/src/api/graph/index.ts |   1 +
 packages/sdk/server-ai/src/api/index.ts       |   1 +
 packages/sdk/server-ai/src/index.ts           |   1 +
 9 files changed, 928 insertions(+), 43 deletions(-)
 create mode 100644 packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts
 create mode 100644 packages/sdk/server-ai/src/LDGraphTrackerImpl.ts
 create mode 100644 packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts
 create mode 100644 packages/sdk/server-ai/src/api/graph/index.ts

diff --git a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts
index 1fbe25538c..5ea65c4c93 100644
--- a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts
+++ b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts
@@ -877,3 +877,148 @@ describe('trackJudgeResponse', () => {
     );
   });
 });
+
+describe('trackToolCall', () => {
+  it('tracks a single tool call', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    tracker.trackToolCall('my-tool');
+
+    expect(mockTrack).toHaveBeenCalledWith(
+      '$ld:ai:tool_call',
+      testContext,
+      { ...getExpectedTrackData(), toolKey: 'my-tool' },
+      1,
+    );
+  });
+
+  it('includes graphKey when provided', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    tracker.trackToolCall('my-tool', 'my-graph');
+
+    expect(mockTrack).toHaveBeenCalledWith(
+      '$ld:ai:tool_call',
+      testContext,
+      { ...getExpectedTrackData(), graphKey: 'my-graph', toolKey: 'my-tool' },
+      1,
+    );
+  });
+});
+
+describe('trackToolCalls', () => {
+  it('tracks multiple tool calls', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    tracker.trackToolCalls(['tool-a', 'tool-b', 'tool-c']);
+
+    expect(mockTrack).toHaveBeenCalledTimes(3);
+    expect(mockTrack).toHaveBeenCalledWith(
+      '$ld:ai:tool_call',
+      testContext,
+      { ...getExpectedTrackData(), toolKey: 'tool-a' },
+      1,
+    );
+    expect(mockTrack).toHaveBeenCalledWith(
+      '$ld:ai:tool_call',
+      testContext,
+      { ...getExpectedTrackData(), toolKey: 'tool-b' },
+      1,
+    );
+    expect(mockTrack).toHaveBeenCalledWith(
+      '$ld:ai:tool_call',
+      testContext,
+      { ...getExpectedTrackData(), toolKey: 'tool-c' },
+      1,
+    );
+  });
+});
+
+describe('graphKey parameter support', () => {
+  it('includes graphKey in trackDuration event', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    tracker.trackDuration(1000, 'my-graph');
+
+    expect(mockTrack).toHaveBeenCalledWith(
+      '$ld:ai:duration:total',
+      testContext,
+      { ...getExpectedTrackData(), graphKey: 'my-graph' },
+      1000,
+    );
+  });
+
+  it('includes graphKey in trackSuccess event', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    tracker.trackSuccess('my-graph');
+
+    expect(mockTrack).toHaveBeenCalledWith(
+      '$ld:ai:generation:success',
+      testContext,
+      { ...getExpectedTrackData(), graphKey: 'my-graph' },
+      1,
+    );
+  });
+
+  it('does not include graphKey when not provided', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    tracker.trackSuccess();
+
+    expect(mockTrack).toHaveBeenCalledWith(
+      '$ld:ai:generation:success',
+      testContext,
+      getExpectedTrackData(),
+      1,
+    );
+  });
+});
diff --git a/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts
new file mode 100644
index 0000000000..fe42bf4e4d
--- /dev/null
+++ b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts
@@ -0,0 +1,418 @@
+import { LDContext } from '@launchdarkly/js-server-sdk-common';
+
+import { LDClientMin } from '../src/LDClientMin';
+import { LDGraphTrackerImpl } from '../src/LDGraphTrackerImpl';
+
+const mockTrack = jest.fn();
+const mockLdClient: LDClientMin = {
+  track: mockTrack,
+  variation: jest.fn(),
+};
+
+const testContext: LDContext = { kind: 'user', key: 'test-user' };
+const graphKey = 'test-graph';
+const variationKey = 'v1';
+const version = 2;
+
+const getExpectedTrackData = () => ({
+  graphKey,
+  variationKey,
+  version,
+});
+
+beforeEach(() => {
+  jest.clearAllMocks();
+});
+
+it('returns track data', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+
+  expect(tracker.getTrackData()).toEqual(getExpectedTrackData());
+});
+
+it('tracks invocation success', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+  tracker.trackInvocationSuccess();
+
+  expect(mockTrack).toHaveBeenCalledWith(
+    '$ld:ai:graph:invocation_success',
+    testContext,
+    getExpectedTrackData(),
+    1,
+  );
+});
+
+it('tracks invocation failure', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+  tracker.trackInvocationFailure();
+
+  expect(mockTrack).toHaveBeenCalledWith(
+    '$ld:ai:graph:invocation_failure',
+    testContext,
+    getExpectedTrackData(),
+    1,
+  );
+});
+
+it('tracks latency', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+  tracker.trackLatency(1500);
+
+  expect(mockTrack).toHaveBeenCalledWith(
+    '$ld:ai:graph:latency',
+    testContext,
+    getExpectedTrackData(),
+    1500,
+  );
+});
+
+it('tracks total tokens', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+  tracker.trackTotalTokens({ total: 200, input: 80, output: 120 });
+
+  expect(mockTrack).toHaveBeenCalledWith(
+    '$ld:ai:graph:total_tokens',
+    testContext,
+    getExpectedTrackData(),
+    200,
+  );
+});
+
+it('does not track total tokens when total is zero', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+  tracker.trackTotalTokens({ total: 0, input: 0, output: 0 });
+
+  expect(mockTrack).not.toHaveBeenCalled();
+});
+
+it('tracks path', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+  const path = ['node-a', 'node-b', 'node-c'];
+  tracker.trackPath(path);
+
+  expect(mockTrack).toHaveBeenCalledWith(
+    '$ld:ai:graph:path',
+    testContext,
+    { ...getExpectedTrackData(), path },
+    1,
+  );
+});
+
+it('tracks judge response', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+  const response = {
+    judgeConfigKey: 'my-judge',
+    evals: {
+      relevance: { score: 0.9, reasoning: 'Relevant' },
+      accuracy: { score: 0.85, reasoning: 'Accurate' },
+    },
+    success: true,
+  };
+  tracker.trackJudgeResponse(response);
+
+  expect(mockTrack).toHaveBeenCalledWith(
+    'relevance',
+    testContext,
+    { ...getExpectedTrackData(), judgeConfigKey: 'my-judge' },
+    0.9,
+  );
+  expect(mockTrack).toHaveBeenCalledWith(
+    'accuracy',
+    testContext,
+    { ...getExpectedTrackData(), judgeConfigKey: 'my-judge' },
+    0.85,
+  );
+});
+
+it('tracks judge response without judgeConfigKey', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+  const response = {
+    evals: { relevance: { score: 0.7, reasoning: 'Somewhat relevant' } },
+    success: true,
+  };
+  tracker.trackJudgeResponse(response);
+
+  expect(mockTrack).toHaveBeenCalledWith('relevance', testContext, getExpectedTrackData(), 0.7);
+});
+
+it('tracks redirect', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+  tracker.trackRedirect('agent-a', 'agent-b');
+
+  expect(mockTrack).toHaveBeenCalledWith(
+    '$ld:ai:graph:redirect',
+    testContext,
+    { ...getExpectedTrackData(), sourceKey: 'agent-a', redirectedTarget: 'agent-b' },
+    1,
+  );
+});
+
+it('tracks handoff success', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+  tracker.trackHandoffSuccess('agent-a', 'agent-b');
+
+  expect(mockTrack).toHaveBeenCalledWith(
+    '$ld:ai:graph:handoff_success',
+    testContext,
+    { ...getExpectedTrackData(), sourceKey: 'agent-a', targetKey: 'agent-b' },
+    1,
+  );
+});
+
+it('tracks handoff failure', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+  tracker.trackHandoffFailure('agent-a', 'agent-b');
+
+  expect(mockTrack).toHaveBeenCalledWith(
+    '$ld:ai:graph:handoff_failure',
+    testContext,
+    { ...getExpectedTrackData(), sourceKey: 'agent-a', targetKey: 'agent-b' },
+    1,
+  );
+});
+
+it('returns empty summary when no metrics tracked', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+
+  expect(tracker.getSummary()).toEqual({});
+});
+
+it('summarizes tracked graph metrics', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+
+  tracker.trackInvocationSuccess();
+  tracker.trackLatency(2000);
+  tracker.trackTotalTokens({ total: 300, input: 100, output: 200 });
+  tracker.trackPath(['node-a', 'node-b']);
+
+  expect(tracker.getSummary()).toEqual({
+    success: true,
+    durationMs: 2000,
+    tokens: { total: 300, input: 100, output: 200 },
+    path: ['node-a', 'node-b'],
+  });
+});
+
+describe('at-most-once semantics for graph-level metrics', () => {
+  it('drops duplicate trackInvocationSuccess calls', () => {
+    const tracker = new LDGraphTrackerImpl(
+      mockLdClient,
+      graphKey,
+      variationKey,
+      version,
+      testContext,
+    );
+    tracker.trackInvocationSuccess();
+    tracker.trackInvocationSuccess();
+
+    expect(mockTrack).toHaveBeenCalledTimes(1);
+  });
+
+  it('drops trackInvocationFailure after trackInvocationSuccess', () => {
+    const tracker = new LDGraphTrackerImpl(
+      mockLdClient,
+      graphKey,
+      variationKey,
+      version,
+      testContext,
+    );
+    tracker.trackInvocationSuccess();
+    tracker.trackInvocationFailure();
+
+    expect(mockTrack).toHaveBeenCalledTimes(1);
+    expect(mockTrack).toHaveBeenCalledWith(
+      '$ld:ai:graph:invocation_success',
+      expect.anything(),
+      expect.anything(),
+      expect.anything(),
+    );
+  });
+
+  it('drops duplicate trackLatency calls', () => {
+    const tracker = new LDGraphTrackerImpl(
+      mockLdClient,
+      graphKey,
+      variationKey,
+      version,
+      testContext,
+    );
+    tracker.trackLatency(1000);
+    tracker.trackLatency(2000);
+
+    expect(mockTrack).toHaveBeenCalledTimes(1);
+    expect(mockTrack).toHaveBeenCalledWith(
+      '$ld:ai:graph:latency',
+      testContext,
+      getExpectedTrackData(),
+      1000,
+    );
+  });
+
+  it('drops duplicate trackTotalTokens calls', () => {
+    const tracker = new LDGraphTrackerImpl(
+      mockLdClient,
+      graphKey,
+      variationKey,
+      version,
+      testContext,
+    );
+    tracker.trackTotalTokens({ total: 100, input: 40, output: 60 });
+    tracker.trackTotalTokens({ total: 200, input: 80, output: 120 });
+
+    expect(mockTrack).toHaveBeenCalledTimes(1);
+    expect(mockTrack).toHaveBeenCalledWith(
+      '$ld:ai:graph:total_tokens',
+      testContext,
+      getExpectedTrackData(),
+      100,
+    );
+  });
+
+  it('drops duplicate trackPath calls', () => {
+    const tracker = new LDGraphTrackerImpl(
+      mockLdClient,
+      graphKey,
+      variationKey,
+      version,
+      testContext,
+    );
+    tracker.trackPath(['node-a']);
+    tracker.trackPath(['node-b', 'node-c']);
+
+    expect(mockTrack).toHaveBeenCalledTimes(1);
+    expect(mockTrack).toHaveBeenCalledWith(
+      '$ld:ai:graph:path',
+      testContext,
+      { ...getExpectedTrackData(), path: ['node-a'] },
+      1,
+    );
+  });
+});
+
+describe('edge-level methods can be called multiple times', () => {
+  it('allows multiple trackRedirect calls', () => {
+    const tracker = new LDGraphTrackerImpl(
+      mockLdClient,
+      graphKey,
+      variationKey,
+      version,
+      testContext,
+    );
+    tracker.trackRedirect('a', 'b');
+    tracker.trackRedirect('b', 'c');
+
+    expect(mockTrack).toHaveBeenCalledTimes(2);
+  });
+
+  it('allows multiple trackHandoffSuccess calls', () => {
+    const tracker = new LDGraphTrackerImpl(
+      mockLdClient,
+      graphKey,
+      variationKey,
+      version,
+      testContext,
+    );
+    tracker.trackHandoffSuccess('a', 'b');
+    tracker.trackHandoffSuccess('b', 'c');
+
+    expect(mockTrack).toHaveBeenCalledTimes(2);
+  });
+
+  it('allows multiple trackHandoffFailure calls', () => {
+    const tracker = new LDGraphTrackerImpl(
+      mockLdClient,
+      graphKey,
+      variationKey,
+      version,
+      testContext,
+    );
+    tracker.trackHandoffFailure('a', 'b');
+    tracker.trackHandoffFailure('b', 'c');
+
+    expect(mockTrack).toHaveBeenCalledTimes(2);
+  });
+});
diff --git a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
index a81f8e0721..adda7c97c8 100644
--- a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
+++ b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
@@ -26,12 +26,13 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
     private _context: LDContext,
   ) {}
 
-  getTrackData(): {
+  getTrackData(graphKey?: string): {
     variationKey: string;
     configKey: string;
     version: number;
     modelName: string;
     providerName: string;
+    graphKey?: string;
   } {
     return {
       variationKey: this._variationKey,
@@ -39,15 +40,21 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
       version: this._version,
       modelName: this._modelName,
       providerName: this._providerName,
+      ...(graphKey !== undefined ? { graphKey } : {}),
     };
   }
 
-  trackDuration(duration: number): void {
+  trackDuration(duration: number, graphKey?: string): void {
     this._trackedMetrics.durationMs = duration;
-    this._ldClient.track('$ld:ai:duration:total', this._context, this.getTrackData(), duration);
+    this._ldClient.track(
+      '$ld:ai:duration:total',
+      this._context,
+      this.getTrackData(graphKey),
+      duration,
+    );
   }
 
-  async trackDurationOf<TRes>(func: () => Promise<TRes>): Promise<TRes> {
+  async trackDurationOf<TRes>(func: () => Promise<TRes>, graphKey?: string): Promise<TRes> {
     const startTime = Date.now();
     try {
       // Be sure to await here so that we can track the duration of the function and also handle errors.
@@ -56,66 +63,97 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
     } finally {
       const endTime = Date.now();
       const duration = endTime - startTime; // duration in milliseconds
-      this.trackDuration(duration);
+      this.trackDuration(duration, graphKey);
     }
   }
 
-  trackTimeToFirstToken(timeToFirstTokenMs: number) {
+  trackTimeToFirstToken(timeToFirstTokenMs: number, graphKey?: string) {
     this._trackedMetrics.timeToFirstTokenMs = timeToFirstTokenMs;
     this._ldClient.track(
       '$ld:ai:tokens:ttf',
       this._context,
-      this.getTrackData(),
+      this.getTrackData(graphKey),
       timeToFirstTokenMs,
     );
   }
 
-  trackEvalScores(scores: Record<string, EvalScore>) {
+  trackEvalScores(scores: Record<string, EvalScore>, graphKey?: string) {
     Object.entries(scores).forEach(([metricKey, evalScore]) => {
-      this._ldClient.track(metricKey, this._context, this.getTrackData(), evalScore.score);
+      this._ldClient.track(metricKey, this._context, this.getTrackData(graphKey), evalScore.score);
     });
   }
 
-  trackJudgeResponse(response: JudgeResponse) {
+  trackJudgeResponse(response: JudgeResponse, graphKey?: string) {
     Object.entries(response.evals).forEach(([metricKey, evalScore]) => {
       this._ldClient.track(
         metricKey,
         this._context,
-        { ...this.getTrackData(), judgeConfigKey: response.judgeConfigKey },
+        { ...this.getTrackData(graphKey), judgeConfigKey: response.judgeConfigKey },
         evalScore.score,
       );
     });
   }
 
-  trackFeedback(feedback: { kind: LDFeedbackKind }): void {
+  trackToolCall(toolKey: string, graphKey?: string): void {
+    this._ldClient.track(
+      '$ld:ai:tool_call',
+      this._context,
+      { ...this.getTrackData(graphKey), toolKey },
+      1,
+    );
+  }
+
+  trackToolCalls(toolKeys: string[], graphKey?: string): void {
+    toolKeys.forEach((toolKey) => {
+      this.trackToolCall(toolKey, graphKey);
+    });
+  }
+
+  trackFeedback(feedback: { kind: LDFeedbackKind }, graphKey?: string): void {
     this._trackedMetrics.feedback = feedback;
     if (feedback.kind === LDFeedbackKind.Positive) {
-      this._ldClient.track('$ld:ai:feedback:user:positive', this._context, this.getTrackData(), 1);
+      this._ldClient.track(
+        '$ld:ai:feedback:user:positive',
+        this._context,
+        this.getTrackData(graphKey),
+        1,
+      );
     } else if (feedback.kind === LDFeedbackKind.Negative) {
-      this._ldClient.track('$ld:ai:feedback:user:negative', this._context, this.getTrackData(), 1);
+      this._ldClient.track(
+        '$ld:ai:feedback:user:negative',
+        this._context,
+        this.getTrackData(graphKey),
+        1,
+      );
     }
   }
 
-  trackSuccess(): void {
+  trackSuccess(graphKey?: string): void {
     this._trackedMetrics.success = true;
-    this._ldClient.track('$ld:ai:generation:success', this._context, this.getTrackData(), 1);
+    this._ldClient.track(
+      '$ld:ai:generation:success',
+      this._context,
+      this.getTrackData(graphKey),
+      1,
+    );
   }
 
-  trackError(): void {
+  trackError(graphKey?: string): void {
     this._trackedMetrics.success = false;
-    this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(), 1);
+    this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(graphKey), 1);
   }
 
   async trackMetricsOf<TRes>(
     metricsExtractor: (result: TRes) => LDAIMetrics,
     func: () => Promise<TRes>,
+    graphKey?: string,
   ): Promise<TRes> {
     let result: TRes;
 
     try {
-      result = await this.trackDurationOf(func);
+      result = await this.trackDurationOf(func, graphKey);
     } catch (err) {
-      this.trackError();
+      this.trackError(graphKey);
       throw err;
     }
 
@@ -124,14 +162,14 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
 
     // Track success/error based on metrics
     if (metrics.success) {
-      this.trackSuccess();
+      this.trackSuccess(graphKey);
     } else {
-      this.trackError();
+      this.trackError(graphKey);
     }
 
     // Track token usage if available
     if (metrics.usage) {
-      this.trackTokens(metrics.usage);
+      this.trackTokens(metrics.usage, graphKey);
     }
 
     return result;
@@ -140,6 +178,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
   trackStreamMetricsOf<TStream>(
     streamCreator: () => TStream,
     metricsExtractor: (stream: TStream) => Promise<LDAIMetrics>,
+    graphKey?: string,
   ): TStream {
     const startTime = Date.now();
 
@@ -148,14 +187,14 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
       const stream = streamCreator();
 
       // Start background metrics tracking (fire and forget)
-      this._trackStreamMetricsInBackground(stream, metricsExtractor, startTime);
+      this._trackStreamMetricsInBackground(stream, metricsExtractor, startTime, graphKey);
 
       // Return stream immediately for consumption
       return stream;
     } catch (error) {
       // Track error if stream creation fails
-      this.trackDuration(Date.now() - startTime);
-      this.trackError();
+      this.trackDuration(Date.now() - startTime, graphKey);
+      this.trackError(graphKey);
       throw error;
     }
   }
@@ -164,6 +203,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
     stream: TStream,
     metricsExtractor: (stream: TStream) => Promise<LDAIMetrics>,
     startTime: number,
+    graphKey?: string,
   ): Promise<void> {
     try {
       // Wait for metrics to be available
@@ -171,21 +211,21 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
 
       // Track success/error based on metrics
       if (metrics.success) {
-        this.trackSuccess();
+        this.trackSuccess(graphKey);
       } else {
-        this.trackError();
+        this.trackError(graphKey);
       }
 
       // Track token usage if available
       if (metrics.usage) {
-        this.trackTokens(metrics.usage);
+        this.trackTokens(metrics.usage, graphKey);
       }
     } catch (error) {
       // If metrics extraction fails, track error
-      this.trackError();
+      this.trackError(graphKey);
     } finally {
       // Track duration regardless of success/error
-      this.trackDuration(Date.now() - startTime);
+      this.trackDuration(Date.now() - startTime, graphKey);
     }
   }
 
@@ -260,9 +300,9 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
     }
   }
 
-  trackTokens(tokens: LDTokenUsage): void {
+  trackTokens(tokens: LDTokenUsage, graphKey?: string): void {
     this._trackedMetrics.tokens = tokens;
-    const trackData = this.getTrackData();
+    const trackData = this.getTrackData(graphKey);
     if (tokens.total > 0) {
       this._ldClient.track('$ld:ai:tokens:total', this._context, trackData, tokens.total);
     }
diff --git a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts
new file mode 100644
index 0000000000..4c08e26a58
--- /dev/null
+++ b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts
@@ -0,0 +1,119 @@
+import { LDContext } from '@launchdarkly/js-server-sdk-common';
+
+import { LDGraphMetricSummary, LDGraphTracker } from './api/graph/LDGraphTracker';
+import { JudgeResponse } from './api/judge/types';
+import { LDTokenUsage } from './api/metrics';
+import { LDClientMin } from './LDClientMin';
+
+export class LDGraphTrackerImpl implements LDGraphTracker {
+  private _trackedMetrics: LDGraphMetricSummary = {};
+
+  constructor(
+    private _ldClient: LDClientMin,
+    private _graphKey: string,
+    private _variationKey: string,
+    private _version: number,
+    private _context: LDContext,
+  ) {}
+
+  getTrackData(): {
+    variationKey: string;
+    graphKey: string;
+    version: number;
+  } {
+    return {
+      variationKey: this._variationKey,
+      graphKey: this._graphKey,
+      version: this._version,
+    };
+  }
+
+  trackInvocationSuccess(): void {
+    if (this._trackedMetrics.success !== undefined) {
+      return;
+    }
+    this._trackedMetrics.success = true;
+    this._ldClient.track('$ld:ai:graph:invocation_success', this._context, this.getTrackData(), 1);
+  }
+
+  trackInvocationFailure(): void {
+    if (this._trackedMetrics.success !== undefined) {
+      return;
+    }
+    this._trackedMetrics.success = false;
+    this._ldClient.track('$ld:ai:graph:invocation_failure', this._context, this.getTrackData(), 1);
+  }
+
+  trackLatency(durationMs: number): void {
+    if (this._trackedMetrics.durationMs !== undefined) {
+      return;
+    }
+    this._trackedMetrics.durationMs = durationMs;
+    this._ldClient.track('$ld:ai:graph:latency', this._context, this.getTrackData(), durationMs);
+  }
+
+  trackTotalTokens(tokens: LDTokenUsage): void {
+    if (this._trackedMetrics.tokens !== undefined) {
+      return;
+    }
+    if (tokens.total <= 0) {
+      return;
+    }
+    this._trackedMetrics.tokens = tokens;
+    this._ldClient.track(
+      '$ld:ai:graph:total_tokens',
+      this._context,
+      this.getTrackData(),
+      tokens.total,
+    );
+  }
+
+  trackPath(path: string[]): void {
+    if (this._trackedMetrics.path !== undefined) {
+      return;
+    }
+    this._trackedMetrics.path = path;
+    this._ldClient.track('$ld:ai:graph:path', this._context, { ...this.getTrackData(), path }, 1);
+  }
+
+  trackJudgeResponse(response: JudgeResponse): void {
+    const trackData = response.judgeConfigKey
+      ? { ...this.getTrackData(), judgeConfigKey: response.judgeConfigKey }
+      : this.getTrackData();
+
+    Object.entries(response.evals).forEach(([metricKey, evalScore]) => {
+      this._ldClient.track(metricKey, this._context, trackData, evalScore.score);
+    });
+  }
+
+  trackRedirect(sourceKey: string, redirectedTarget: string): void {
+    this._ldClient.track(
+      '$ld:ai:graph:redirect',
+      this._context,
+      { ...this.getTrackData(), sourceKey, redirectedTarget },
+      1,
+    );
+  }
+
+  trackHandoffSuccess(sourceKey: string, targetKey: string): void {
+    this._ldClient.track(
+      '$ld:ai:graph:handoff_success',
+      this._context,
+      { ...this.getTrackData(), sourceKey, targetKey },
+      1,
+    );
+  }
+
+  trackHandoffFailure(sourceKey: string, targetKey: string): void {
+    this._ldClient.track(
+      '$ld:ai:graph:handoff_failure',
+      this._context,
+      { ...this.getTrackData(), sourceKey, targetKey },
+      1,
+    );
+  }
+
+  getSummary(): LDGraphMetricSummary {
+    return { ...this._trackedMetrics };
+  }
+}
diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
index 41ff0e20a1..3a40fd3c6d 100644
--- a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
+++ b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
@@ -37,13 +37,16 @@ export interface LDAIMetricSummary {
 export interface LDAIConfigTracker {
   /**
    * Get the data for tracking.
+   *
+   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  getTrackData(): {
+  getTrackData(graphKey?: string): {
     variationKey: string;
     configKey: string;
     version: number;
     modelName: string;
     providerName: string;
+    graphKey?: string;
   };
   /**
    * Track the duration of generation.
@@ -51,53 +54,79 @@ export interface LDAIConfigTracker {
    * Ideally this would not include overhead time such as network communication.
    *
    * @param durationMs The duration in milliseconds.
+   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackDuration(durationMs: number): void;
+  trackDuration(durationMs: number, graphKey?: string): void;
 
   /**
    * Track information about token usage.
    *
    * @param tokens Token usage information.
+   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackTokens(tokens: LDTokenUsage): void;
+  trackTokens(tokens: LDTokenUsage, graphKey?: string): void;
 
   /**
    * Generation was successful.
+   *
+   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackSuccess(): void;
+  trackSuccess(graphKey?: string): void;
 
   /**
    * An error was encountered during generation.
+   *
+   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackError(): void;
+  trackError(graphKey?: string): void;
 
   /**
    * Track sentiment about the generation.
    *
    * @param feedback Feedback about the generation.
+   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackFeedback(feedback: { kind: LDFeedbackKind }): void;
+  trackFeedback(feedback: { kind: LDFeedbackKind }, graphKey?: string): void;
 
   /**
    * Track the time to first token for this generation.
    *
    * @param timeToFirstTokenMs The duration in milliseconds.
+   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackTimeToFirstToken(timeToFirstTokenMs: number): void;
+  trackTimeToFirstToken(timeToFirstTokenMs: number, graphKey?: string): void;
 
   /**
    * Track evaluation scores for multiple metrics.
    *
    * @param scores Record mapping metric keys to their evaluation scores
+   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackEvalScores(scores: Record<string, EvalScore>): void;
+  trackEvalScores(scores: Record<string, EvalScore>, graphKey?: string): void;
 
   /**
    * Track a judge response containing evaluation scores and judge configuration key.
    *
    * @param response Judge response containing evaluation scores and judge configuration key
+   * @param graphKey When provided, associates this metric with the specified agent graph key.
+   */
+  trackJudgeResponse(response: JudgeResponse, graphKey?: string): void;
+
+  /**
+   * Track a single tool invocation.
+   *
+   * @param toolKey The identifier of the tool that was invoked.
+   * @param graphKey When provided, associates this metric with the specified agent graph key.
+   */
+  trackToolCall(toolKey: string, graphKey?: string): void;
+
+  /**
+   * Track multiple tool invocations.
+   *
+   * @param toolKeys The identifiers of the tools that were invoked.
+   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackJudgeResponse(response: JudgeResponse): void;
+  trackToolCalls(toolKeys: string[], graphKey?: string): void;
 
   /**
    * Track the duration of execution of the provided function.
@@ -108,9 +137,10 @@ export interface LDAIConfigTracker {
    * This function does not automatically record an error when the function throws.
    *
    * @param func The function to track the duration of.
+   * @param graphKey When provided, associates this metric with the specified agent graph key.
    * @returns The result of the function.
    */
-  trackDurationOf(func: () => Promise<any>): Promise<any>;
+  trackDurationOf(func: () => Promise<any>, graphKey?: string): Promise<any>;
 
   /**
    * Track metrics for a generic AI operation.
@@ -124,11 +154,13 @@ export interface LDAIConfigTracker {
    *
    * @param metricsExtractor Function that extracts LDAIMetrics from the operation result
    * @param func Function which executes the operation
+   * @param graphKey When provided, associates this metric with the specified agent graph key.
    * @returns The result of the operation
    */
   trackMetricsOf<TRes>(
     metricsExtractor: (result: TRes) => LDAIMetrics,
     func: () => Promise<TRes>,
+    graphKey?: string,
   ): Promise<TRes>;
 
   /**
@@ -150,11 +182,13 @@ export interface LDAIConfigTracker {
    *
    * @param streamCreator Function that creates and returns the stream (synchronous)
    * @param metricsExtractor Function that asynchronously extracts metrics from the stream
+   * @param graphKey When provided, associates this metric with the specified agent graph key.
    * @returns The stream result (returned immediately, not a Promise)
    */
   trackStreamMetricsOf<TStream>(
     streamCreator: () => TStream,
     metricsExtractor: (stream: TStream) => Promise<LDAIMetrics>,
+    graphKey?: string,
   ): TStream;
 
   /**
diff --git a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts
new file mode 100644
index 0000000000..94cf30658f
--- /dev/null
+++ b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts
@@ -0,0 +1,126 @@
+import { JudgeResponse } from '../judge/types';
+import { LDTokenUsage } from '../metrics';
+
+/**
+ * Metrics tracked at the graph level.
+ */
+export interface LDGraphMetricSummary {
+  /**
+   * True if the graph invocation succeeded, false if it failed, absent if not tracked.
+   */
+  success?: boolean;
+
+  /**
+   * Total graph execution duration in milliseconds, if tracked.
+   */
+  durationMs?: number;
+
+  /**
+   * Aggregated token usage across the entire graph invocation, if tracked.
+   */
+  tokens?: LDTokenUsage;
+
+  /**
+   * Execution path through the graph as an array of config keys, if tracked.
+   */
+  path?: string[];
+}
+
+/**
+ * Tracker for graph-level and edge-level metrics in AI agent graph operations.
+ *
+ * Node-level metrics are tracked via each node's {@link LDAIConfigTracker}.
+ */
+export interface LDGraphTracker {
+  /**
+   * Get the data for tracking.
+   */
+  getTrackData(): {
+    variationKey: string;
+    graphKey: string;
+    version: number;
+  };
+
+  /**
+   * Track a successful graph invocation.
+   *
+   * At-most-once per tracker instance. Subsequent calls are dropped.
+   */
+  trackInvocationSuccess(): void;
+
+  /**
+   * Track an unsuccessful graph invocation.
+   *
+   * At-most-once per tracker instance. Subsequent calls are dropped.
+   */
+  trackInvocationFailure(): void;
+
+  /**
+   * Track the total latency of graph execution.
+   *
+   * At-most-once per tracker instance. Subsequent calls are dropped.
+   *
+   * @param durationMs Duration in milliseconds.
+   */
+  trackLatency(durationMs: number): void;
+
+  /**
+   * Track aggregated token usage across the entire graph invocation.
+   *
+   * At-most-once per tracker instance. Subsequent calls are dropped.
+   *
+   * @param tokens Token usage information.
+   */
+  trackTotalTokens(tokens: LDTokenUsage): void;
+
+  /**
+   * Track the execution path through the graph.
+   *
+   * At-most-once per tracker instance. Subsequent calls are dropped.
+   *
+   * @param path Array of config keys representing the sequence of nodes executed.
+   */
+  trackPath(path: string[]): void;
+
+  /**
+   * Track judge responses for the final graph output.
+   *
+   * @param response Judge response containing evaluation scores.
+   */
+  trackJudgeResponse(response: JudgeResponse): void;
+
+  /**
+   * Track when a node redirects to a different target than originally specified.
+   *
+   * May be called multiple times.
+   *
+   * @param sourceKey Config key of the source node.
+   * @param redirectedTarget Config key of the target node that was redirected to.
+   */
+  trackRedirect(sourceKey: string, redirectedTarget: string): void;
+
+  /**
+   * Track a successful handoff between nodes.
+   *
+   * May be called multiple times.
+   *
+   * @param sourceKey Config key of the source node.
+   * @param targetKey Config key of the target node.
+   */
+  trackHandoffSuccess(sourceKey: string, targetKey: string): void;
+
+  /**
+   * Track a failed handoff between nodes.
+   *
+   * May be called multiple times.
+   *
+   * @param sourceKey Config key of the source node.
+   * @param targetKey Config key of the target node.
+   */
+  trackHandoffFailure(sourceKey: string, targetKey: string): void;
+
+  /**
+   * Get a summary of the tracked graph-level metrics.
+   */
+  getSummary(): LDGraphMetricSummary;
+}
diff --git a/packages/sdk/server-ai/src/api/graph/index.ts b/packages/sdk/server-ai/src/api/graph/index.ts
new file mode 100644
index 0000000000..536e630115
--- /dev/null
+++ b/packages/sdk/server-ai/src/api/graph/index.ts
@@ -0,0 +1 @@
+export * from './LDGraphTracker';
diff --git a/packages/sdk/server-ai/src/api/index.ts b/packages/sdk/server-ai/src/api/index.ts
index 2f289b8356..7470ef740c 100644
--- a/packages/sdk/server-ai/src/api/index.ts
+++ b/packages/sdk/server-ai/src/api/index.ts
@@ -1,5 +1,6 @@
 export * from './config';
 export * from './chat';
+export * from './graph';
 export * from './judge';
 export * from './metrics';
 export * from './LDAIClient';
diff --git a/packages/sdk/server-ai/src/index.ts b/packages/sdk/server-ai/src/index.ts
index 7c1bb54b3d..8bb6c11808 100644
--- a/packages/sdk/server-ai/src/index.ts
+++ b/packages/sdk/server-ai/src/index.ts
@@ -26,3 +26,4 @@ export function initAi(ldClient: LDClientMin): LDAIClient {
 export type LDLogger = common.LDLogger;
 
 export * from './api';
+export { LDGraphTrackerImpl } from './LDGraphTrackerImpl';

From d640d8e0b9542dce2a1dfac404c78648aa3d05c5 Mon Sep 17 00:00:00 2001
From: Jason Bailey <jbailey@launchdarkly.com>
Date: Wed, 15 Apr 2026 07:38:28 -0500
Subject: [PATCH 2/7] feat!: Add per-execution runId, at-most-once tracking,
 and cross-process tracker resumption (#1270)

---
 .../sdk/server-ai/__tests__/Judge.test.ts     |  74 ++---
 .../__tests__/LDAIClientImpl.test.ts          |  66 ++--
 .../__tests__/LDAIConfigTrackerImpl.test.ts   | 290 ++++++++++++++++++
 .../server-ai/__tests__/TrackedChat.test.ts   |  26 +-
 .../server-ai/examples/bedrock/src/index.ts   |   5 +-
 .../server-ai/examples/openai/src/index.ts    |  19 +-
 .../server-ai/examples/vercel-ai/src/index.ts |  11 +-
 packages/sdk/server-ai/src/LDAIClientImpl.ts  |  40 ++-
 .../server-ai/src/LDAIConfigTrackerImpl.ts    |  72 ++++-
 packages/sdk/server-ai/src/api/LDAIClient.ts  |  12 +
 .../sdk/server-ai/src/api/chat/TrackedChat.ts |  39 +--
 .../src/api/config/LDAIConfigTracker.ts       |  31 +-
 .../src/api/config/LDAIConfigUtils.ts         |  38 ++-
 .../sdk/server-ai/src/api/config/types.ts     |   7 +-
 packages/sdk/server-ai/src/api/judge/Judge.ts |  29 +-
 15 files changed, 572 insertions(+), 187 deletions(-)

diff --git a/packages/sdk/server-ai/__tests__/Judge.test.ts b/packages/sdk/server-ai/__tests__/Judge.test.ts
index 6c8985b914..c0def31740 100644
--- a/packages/sdk/server-ai/__tests__/Judge.test.ts
+++ b/packages/sdk/server-ai/__tests__/Judge.test.ts
@@ -47,14 +47,14 @@ describe('Judge', () => {
       ],
       model: { name: 'gpt-4' },
       provider: { name: 'openai' },
-      tracker: mockTracker,
+      createTracker: () => mockTracker,
       evaluationMetricKey: 'relevance',
     };
   });
 
   describe('constructor', () => {
     it('initializes with proper configuration', () => {
-      const judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
+      const judge = new Judge(judgeConfig, mockProvider, mockLogger);
 
       expect(judge).toBeDefined();
     });
@@ -64,7 +64,7 @@ describe('Judge', () => {
     let judge: Judge;
 
     beforeEach(() => {
-      judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
+      judge = new Judge(judgeConfig, mockProvider, mockLogger);
     });
 
     it('evaluates AI response successfully', async () => {
@@ -209,12 +209,7 @@ describe('Judge', () => {
         evaluationMetricKey: undefined,
         evaluationMetricKeys: [],
       };
-      const judgeWithoutMetrics = new Judge(
-        configWithoutMetrics,
-        mockTracker,
-        mockProvider,
-        mockLogger,
-      );
+      const judgeWithoutMetrics = new Judge(configWithoutMetrics, mockProvider, mockLogger);
 
       const result = await judgeWithoutMetrics.evaluate('test input', 'test output');
 
@@ -231,12 +226,7 @@ describe('Judge', () => {
         evaluationMetricKey: 'relevance',
         evaluationMetricKeys: undefined,
       };
-      const judgeWithSingleKey = new Judge(
-        configWithSingleKey,
-        mockTracker,
-        mockProvider,
-        mockLogger,
-      );
+      const judgeWithSingleKey = new Judge(configWithSingleKey, mockProvider, mockLogger);
 
       const mockStructuredResponse: StructuredResponse = {
         data: {
@@ -275,12 +265,7 @@ describe('Judge', () => {
         evaluationMetricKey: undefined,
         evaluationMetricKeys: ['relevance', 'accuracy'],
       };
-      const judgeWithLegacyKeys = new Judge(
-        configWithLegacyKeys,
-        mockTracker,
-        mockProvider,
-        mockLogger,
-      );
+      const judgeWithLegacyKeys = new Judge(configWithLegacyKeys, mockProvider, mockLogger);
 
       const mockStructuredResponse: StructuredResponse = {
         data: {
@@ -319,12 +304,7 @@ describe('Judge', () => {
         evaluationMetricKey: undefined,
         evaluationMetricKeys: ['', '   ', 'relevance', 'accuracy'],
       };
-      const judgeWithInvalidKeys = new Judge(
-        configWithInvalidKeys,
-        mockTracker,
-        mockProvider,
-        mockLogger,
-      );
+      const judgeWithInvalidKeys = new Judge(configWithInvalidKeys, mockProvider, mockLogger);
 
       const mockStructuredResponse: StructuredResponse = {
         data: {
@@ -364,7 +344,7 @@ describe('Judge', () => {
         evaluationMetricKey: 'helpfulness',
         evaluationMetricKeys: ['relevance', 'accuracy'],
       };
-      const judgeWithBoth = new Judge(configWithBoth, mockTracker, mockProvider, mockLogger);
+      const judgeWithBoth = new Judge(configWithBoth, mockProvider, mockLogger);
 
       const mockStructuredResponse: StructuredResponse = {
         data: {
@@ -402,12 +382,7 @@ describe('Judge', () => {
         ...judgeConfig,
         messages: undefined,
       };
-      const judgeWithoutMessages = new Judge(
-        configWithoutMessages,
-        mockTracker,
-        mockProvider,
-        mockLogger,
-      );
+      const judgeWithoutMessages = new Judge(configWithoutMessages, mockProvider, mockLogger);
 
       const result = await judgeWithoutMessages.evaluate('test input', 'test output');
 
@@ -511,7 +486,7 @@ describe('Judge', () => {
     let judge: Judge;
 
     beforeEach(() => {
-      judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
+      judge = new Judge(judgeConfig, mockProvider, mockLogger);
     });
 
     it('evaluates messages and response successfully', async () => {
@@ -596,7 +571,7 @@ describe('Judge', () => {
     let judge: Judge;
 
     beforeEach(() => {
-      judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
+      judge = new Judge(judgeConfig, mockProvider, mockLogger);
     });
 
     it('constructs evaluation messages correctly', () => {
@@ -621,7 +596,7 @@ describe('Judge', () => {
     let judge: Judge;
 
     beforeEach(() => {
-      judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger);
+      judge = new Judge(judgeConfig, mockProvider, mockLogger);
     });
 
     it('parses valid evaluation response correctly', () => {
@@ -633,7 +608,7 @@ describe('Judge', () => {
         },
       };
 
-      const result = parseResponse(responseData, 'relevance');
+      const result = parseResponse(responseData, 'relevance', mockTracker);
 
       expect(result).toEqual({
         relevance: { score: 0.8, reasoning: 'Good' },
@@ -647,7 +622,7 @@ describe('Judge', () => {
         relevance: { score: 0.8, reasoning: 'Good' },
       };
 
-      const result = parseResponse(responseData, 'relevance');
+      const result = parseResponse(responseData, 'relevance', mockTracker);
 
       expect(result).toEqual({});
     });
@@ -661,7 +636,7 @@ describe('Judge', () => {
         },
       };
 
-      const result = parseResponse(responseData, 'relevance');
+      const result = parseResponse(responseData, 'relevance', mockTracker);
 
       expect(result).toEqual({});
     });
@@ -675,7 +650,7 @@ describe('Judge', () => {
         },
       };
 
-      const result = parseResponse(responseData, 'relevance');
+      const result = parseResponse(responseData, 'relevance', mockTracker);
 
       expect(result).toEqual({});
       expect(mockLogger.warn).toHaveBeenCalledWith(
@@ -693,7 +668,7 @@ describe('Judge', () => {
         },
       };
 
-      const result = parseResponse(responseData, 'relevance');
+      const result = parseResponse(responseData, 'relevance', mockTracker);
 
       expect(result).toEqual({});
       expect(mockLogger.warn).toHaveBeenCalledWith(
@@ -711,7 +686,7 @@ describe('Judge', () => {
         },
       };
 
-      const result = parseResponse(responseData, 'relevance');
+      const result = parseResponse(responseData, 'relevance', mockTracker);
 
       expect(result).toEqual({});
       expect(mockLogger.warn).toHaveBeenCalledWith(
@@ -729,7 +704,7 @@ describe('Judge', () => {
         },
       };
 
-      const result = parseResponse(responseData, 'relevance');
+      const result = parseResponse(responseData, 'relevance', mockTracker);
 
       expect(result).toEqual({});
       expect(mockLogger.warn).toHaveBeenCalledWith(
@@ -744,12 +719,7 @@ describe('Judge', () => {
         evaluationMetricKey: undefined,
         evaluationMetricKeys: [],
       };
-      const judgeWithEmptyKeys = new Judge(
-        configWithEmptyKeys,
-        mockTracker,
-        mockProvider,
-        mockLogger,
-      );
+      const judgeWithEmptyKeys = new Judge(configWithEmptyKeys, mockProvider, mockLogger);
 
       const result = await judgeWithEmptyKeys.evaluate('test input', 'test output');
 
@@ -769,7 +739,7 @@ describe('Judge', () => {
         },
       };
 
-      const result = parseResponse(responseData, 'relevance');
+      const result = parseResponse(responseData, 'relevance', mockTracker);
 
       expect(result).toEqual({});
       expect(mockLogger.warn).toHaveBeenCalledWith(
@@ -787,7 +757,7 @@ describe('Judge', () => {
         },
       };
 
-      const result = parseResponse(responseData, 'relevance');
+      const result = parseResponse(responseData, 'relevance', mockTracker);
 
       expect(result).toEqual({});
       expect(mockLogger.warn).toHaveBeenCalledWith(
diff --git a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts
index 35e8b671a2..9695c1f815 100644
--- a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts
+++ b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts
@@ -85,7 +85,7 @@ describe('config evaluation', () => {
       { role: 'system', content: 'Hello John' },
       { role: 'user', content: 'Score: 42' },
     ]);
-    expect(result.tracker).toBeDefined();
+    expect(result.createTracker).toBeDefined();
     expect(result.enabled).toBe(true);
     evaluateSpy.mockRestore();
   });
@@ -143,7 +143,7 @@ describe('config evaluation', () => {
     expect(result.instructions).toBe(
       'You are a helpful assistant. Your name is John and your score is 42',
     );
-    expect(result.tracker).toBeDefined();
+    expect(result.createTracker).toBeDefined();
     expect(result.enabled).toBe(true);
     evaluateSpy.mockRestore();
   });
@@ -176,7 +176,7 @@ describe('config evaluation', () => {
     expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined);
     // Should use first value from evaluationMetricKeys
     expect(result.evaluationMetricKey).toBe('relevance');
-    expect(result.tracker).toBeDefined();
+    expect(result.createTracker).toBeDefined();
     expect(result.enabled).toBe(true);
     evaluateSpy.mockRestore();
   });
@@ -208,7 +208,7 @@ describe('config evaluation', () => {
 
     expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined);
     expect(result.evaluationMetricKey).toBe('relevance');
-    expect(result.tracker).toBeDefined();
+    expect(result.createTracker).toBeDefined();
     expect(result.enabled).toBe(true);
     evaluateSpy.mockRestore();
   });
@@ -241,7 +241,7 @@ describe('config evaluation', () => {
 
     expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined);
     expect(result.evaluationMetricKey).toBe('helpfulness');
-    expect(result.tracker).toBeDefined();
+    expect(result.createTracker).toBeDefined();
     expect(result.enabled).toBe(true);
     evaluateSpy.mockRestore();
   });
@@ -275,7 +275,7 @@ describe('config evaluation', () => {
     expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined);
     // Empty string should be treated as invalid, so should fall back to first value in evaluationMetricKeys
     expect(result.evaluationMetricKey).toBe('relevance');
-    expect(result.tracker).toBeDefined();
+    expect(result.createTracker).toBeDefined();
     expect(result.enabled).toBe(true);
     evaluateSpy.mockRestore();
   });
@@ -308,7 +308,7 @@ describe('config evaluation', () => {
     expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined);
     // Should skip empty and whitespace strings, use first valid value
     expect(result.evaluationMetricKey).toBe('relevance');
-    expect(result.tracker).toBeDefined();
+    expect(result.createTracker).toBeDefined();
     expect(result.enabled).toBe(true);
     evaluateSpy.mockRestore();
   });
@@ -331,7 +331,7 @@ describe('config evaluation', () => {
     const result = await client.completionConfig(key, testContext, defaultValue);
 
     expect(result.enabled).toBe(false);
-    expect(result.tracker).toBeUndefined();
+    expect(result.createTracker).toBeUndefined();
   });
 
   it('handles missing metadata mode by defaulting to completion mode', async () => {
@@ -352,7 +352,7 @@ describe('config evaluation', () => {
     const result = await client.completionConfig(key, testContext, defaultValue);
 
     expect(result.enabled).toBe(false);
-    expect(result.tracker).toBeDefined();
+    expect(result.createTracker).toBeDefined();
     expect(result.messages).toEqual([{ role: 'system', content: 'Hello' }]);
     expect(result.model).toEqual({ name: 'example-provider', parameters: { name: 'imagination' } });
   });
@@ -381,7 +381,7 @@ describe('config evaluation', () => {
     expect(result.model).toEqual(defaultValue.model);
     expect(result.messages).toEqual(defaultValue.messages);
     expect(result.provider).toEqual(defaultValue.provider);
-    expect(result.tracker).toBeDefined();
+    expect(result.createTracker).toBeDefined();
     expect(result.enabled).toBe(defaultValue.enabled);
     expect(mockLdClient.variation).toHaveBeenCalledWith(
       key,
@@ -408,7 +408,7 @@ describe('completionConfig method', () => {
     const mockConfig = {
       model: { name: 'test-model' },
       messages: [],
-      tracker: {} as any,
+      createTracker: () => ({}) as any,
       enabled: true,
     };
 
@@ -449,7 +449,7 @@ describe('agentConfig method', () => {
     const mockConfig = {
       model: { name: 'test-model' },
       instructions: 'You are a helpful assistant.',
-      tracker: {} as any,
+      createTracker: () => ({}) as any,
       enabled: true,
     };
 
@@ -527,7 +527,7 @@ describe('agents method', () => {
         },
         provider: { name: 'openai' },
         instructions: 'You are a research assistant specializing in climate change.',
-        tracker: expect.any(Object),
+        createTracker: expect.any(Function),
         enabled: true,
       },
       'writing-agent': {
@@ -538,7 +538,7 @@ describe('agents method', () => {
         },
         provider: { name: 'anthropic' },
         instructions: 'You are a writing assistant with academic style.',
-        tracker: expect.any(Object),
+        createTracker: expect.any(Function),
         enabled: true,
       },
     });
@@ -582,7 +582,7 @@ describe('judgeConfig method', () => {
       provider: { name: 'openai' },
       evaluationMetricKeys: ['relevance'],
       messages: [{ role: 'system' as const, content: 'You are a judge for {{metric}}.' }],
-      tracker: {} as any,
+      createTracker: () => ({}) as any,
       toVercelAISDK: jest.fn(),
     };
 
@@ -631,6 +631,7 @@ describe('createJudge method', () => {
       enabled: false,
     };
 
+    const mockTrackerInstance = {} as any;
     const mockJudgeConfig = {
       key: 'test-judge',
       enabled: true,
@@ -638,7 +639,7 @@ describe('createJudge method', () => {
       provider: { name: 'openai' },
       evaluationMetricKeys: ['relevance', 'accuracy'],
       messages: [{ role: 'system' as const, content: 'You are a judge.' }],
-      tracker: {} as any,
+      createTracker: () => mockTrackerInstance,
       toVercelAISDK: jest.fn(),
     };
 
@@ -658,12 +659,7 @@ describe('createJudge method', () => {
       response_to_evaluate: '{{response_to_evaluate}}',
     });
     expect(AIProviderFactory.create).toHaveBeenCalledWith(mockJudgeConfig, undefined, undefined);
-    expect(Judge).toHaveBeenCalledWith(
-      mockJudgeConfig,
-      mockJudgeConfig.tracker,
-      mockProvider,
-      undefined,
-    );
+    expect(Judge).toHaveBeenCalledWith(mockJudgeConfig, mockProvider, undefined);
     expect(result).toBe(mockJudge);
     judgeConfigSpy.mockRestore();
   });
@@ -706,7 +702,7 @@ describe('createJudge method', () => {
       provider: { name: 'openai' },
       evaluationMetricKeys: ['relevance'],
       messages: [{ role: 'system' as const, content: 'You are a judge.' }],
-      tracker: {} as any,
+      createTracker: () => ({}) as any,
       toVercelAISDK: jest.fn(),
     };
 
@@ -741,6 +737,30 @@ describe('createJudge method', () => {
   });
 });
 
+describe('createTracker method', () => {
+  it('reconstructs a tracker from a resumption token', () => {
+    const client = new LDAIClientImpl(mockLdClient);
+
+    // Build a token manually: { runId, configKey, variationKey, version }
+    const payload = JSON.stringify({
+      runId: 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11',
+      configKey: 'my-config',
+      variationKey: 'v1',
+      version: 3,
+    });
+    const token = Buffer.from(payload).toString('base64url');
+
+    const tracker = client.createTracker(token, testContext);
+
+    expect(tracker.getTrackData()).toMatchObject({
+      runId: 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11',
+      configKey: 'my-config',
+      variationKey: 'v1',
+      version: 3,
+    });
+  });
+});
+
 describe('optional default values', () => {
   it('uses a disabled completion config when no default is provided', async () => {
     const client = new LDAIClientImpl(mockLdClient);
diff --git a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts
index 5ea65c4c93..e644eff377 100644
--- a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts
+++ b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts
@@ -4,11 +4,18 @@ import { LDFeedbackKind } from '../src/api/metrics';
 import { LDAIConfigTrackerImpl } from '../src/LDAIConfigTrackerImpl';
 import { LDClientMin } from '../src/LDClientMin';
 
+const testRunId = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11';
+jest.mock('node:crypto', () => ({
+  randomUUID: jest.fn(() => testRunId),
+}));
+
 const mockTrack = jest.fn();
 const mockVariation = jest.fn();
+const mockWarn = jest.fn();
 const mockLdClient: LDClientMin = {
   track: mockTrack,
   variation: mockVariation,
+  logger: { warn: mockWarn, error: jest.fn(), info: jest.fn(), debug: jest.fn() } as any,
 };
 
 const testContext: LDContext = { kind: 'user', key: 'test-user' };
@@ -24,6 +31,7 @@ const getExpectedTrackData = () => ({
   version,
   modelName,
   providerName,
+  runId: testRunId,
 });
 
 beforeEach(() => {
@@ -33,6 +41,7 @@ beforeEach(() => {
 it('tracks duration', () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -53,6 +62,7 @@ it('tracks duration', () => {
 it('tracks duration of async function', async () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -76,6 +86,7 @@ it('tracks duration of async function', async () => {
 it('tracks time to first token', () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -96,6 +107,7 @@ it('tracks time to first token', () => {
 it('tracks positive feedback', () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -116,6 +128,7 @@ it('tracks positive feedback', () => {
 it('tracks negative feedback', () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -136,6 +149,7 @@ it('tracks negative feedback', () => {
 it('tracks success', () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -156,6 +170,7 @@ it('tracks success', () => {
 it('tracks OpenAI usage', async () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -223,6 +238,7 @@ it('tracks OpenAI usage', async () => {
 it('tracks error when OpenAI metrics function throws', async () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -264,6 +280,7 @@ it('tracks error when OpenAI metrics function throws', async () => {
 it('tracks Bedrock conversation with successful response', () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -334,6 +351,7 @@ it('tracks Bedrock conversation with successful response', () => {
 it('tracks Bedrock conversation with error response', () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -369,6 +387,7 @@ describe('Vercel AI SDK generateText', () => {
   it('tracks Vercel AI SDK usage', async () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
+      testRunId,
       configKey,
       variationKey,
       version,
@@ -436,6 +455,7 @@ describe('Vercel AI SDK generateText', () => {
   it('tracks error when Vercel AI SDK metrics function throws', async () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
+      testRunId,
       configKey,
       variationKey,
       version,
@@ -478,6 +498,7 @@ describe('Vercel AI SDK generateText', () => {
 it('tracks tokens', () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -521,6 +542,7 @@ it('tracks tokens', () => {
 it('only tracks non-zero token counts', () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -560,6 +582,7 @@ it('only tracks non-zero token counts', () => {
 it('returns empty summary when no metrics tracked', () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -576,6 +599,7 @@ it('returns empty summary when no metrics tracked', () => {
 it('summarizes tracked metrics', () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -612,6 +636,7 @@ it('summarizes tracked metrics', () => {
 it('tracks duration when async function throws', async () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -639,6 +664,7 @@ it('tracks duration when async function throws', async () => {
 it('tracks error', () => {
   const tracker = new LDAIConfigTrackerImpl(
     mockLdClient,
+    testRunId,
     configKey,
     variationKey,
     version,
@@ -660,6 +686,7 @@ describe('trackMetricsOf', () => {
   it('tracks success and token usage from metrics', async () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
+      testRunId,
       configKey,
       variationKey,
       version,
@@ -715,6 +742,7 @@ describe('trackMetricsOf', () => {
   it('tracks failure when metrics indicate failure', async () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
+      testRunId,
       configKey,
       variationKey,
       version,
@@ -745,6 +773,7 @@ describe('trackMetricsOf', () => {
   it('tracks failure when operation throws', async () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
+      testRunId,
       configKey,
       variationKey,
       version,
@@ -774,6 +803,7 @@ describe('trackMetricsOf', () => {
   it('tracks metrics without token usage', async () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
+      testRunId,
       configKey,
       variationKey,
       version,
@@ -815,6 +845,7 @@ describe('trackJudgeResponse', () => {
   it('tracks evaluation metric key with score', () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
+      testRunId,
       configKey,
       variationKey,
       version,
@@ -844,6 +875,7 @@ describe('trackJudgeResponse', () => {
   it('tracks multiple evaluation metrics when present', () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
+      testRunId,
       configKey,
       variationKey,
       version,
@@ -882,6 +914,7 @@ describe('trackToolCall', () => {
   it('tracks a single tool call', () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
+      testRunId,
       configKey,
       variationKey,
       version,
@@ -903,6 +936,7 @@ describe('trackToolCall', () => {
   it('includes graphKey when provided', () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
+      testRunId,
       configKey,
       variationKey,
       version,
@@ -926,6 +960,7 @@ describe('trackToolCalls', () => {
   it('tracks multiple tool calls', () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
+      testRunId,
       configKey,
       variationKey,
       version,
@@ -962,6 +997,7 @@ describe('graphKey parameter support', () => {
   it('includes graphKey in trackDuration event', () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
+      testRunId,
       configKey,
       variationKey,
       version,
@@ -983,6 +1019,7 @@ describe('graphKey parameter support', () => {
   it('includes graphKey in trackSuccess event', () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
+      testRunId,
       configKey,
       variationKey,
       version,
@@ -1004,6 +1041,7 @@ describe('graphKey parameter support', () => {
   it('does not include graphKey when not provided', () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
+      testRunId,
       configKey,
       variationKey,
       version,
@@ -1022,3 +1060,255 @@ describe('graphKey parameter support', () => {
     );
   });
 });
+
+describe('at-most-once semantics', () => {
+  it('drops duplicate trackDuration call with warning', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+    tracker.trackDuration(1000);
+    tracker.trackDuration(2000);
+
+    expect(mockTrack).toHaveBeenCalledTimes(1);
+    expect(mockWarn).toHaveBeenCalledTimes(1);
+    expect(mockWarn).toHaveBeenCalledWith(expect.stringContaining('Duration'));
+  });
+
+  it('drops duplicate trackSuccess call with warning', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+    tracker.trackSuccess();
+    tracker.trackSuccess();
+
+    expect(mockTrack).toHaveBeenCalledTimes(1);
+    expect(mockWarn).toHaveBeenCalledTimes(1);
+  });
+
+  it('drops trackError call after trackSuccess with warning', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+    tracker.trackSuccess();
+    tracker.trackError();
+
+    expect(mockTrack).toHaveBeenCalledTimes(1);
+    expect(mockWarn).toHaveBeenCalledTimes(1);
+  });
+
+  it('drops duplicate trackFeedback call with warning', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+    tracker.trackFeedback({ kind: LDFeedbackKind.Positive });
+    tracker.trackFeedback({ kind: LDFeedbackKind.Negative });
+
+    expect(mockTrack).toHaveBeenCalledTimes(1);
+    expect(mockWarn).toHaveBeenCalledTimes(1);
+  });
+
+  it('drops duplicate trackTokens call with warning', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+    tracker.trackTokens({ total: 100, input: 50, output: 50 });
+    tracker.trackTokens({ total: 200, input: 100, output: 100 });
+
+    // First call tracks 3 events (total, input, output), second is dropped
+    expect(mockTrack).toHaveBeenCalledTimes(3);
+    expect(mockWarn).toHaveBeenCalledTimes(1);
+  });
+
+  it('drops duplicate trackTimeToFirstToken call with warning', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+    tracker.trackTimeToFirstToken(100);
+    tracker.trackTimeToFirstToken(200);
+
+    expect(mockTrack).toHaveBeenCalledTimes(1);
+    expect(mockWarn).toHaveBeenCalledTimes(1);
+  });
+});
+
+describe('resumptionToken', () => {
+  it('encodes runId, configKey, variationKey, and version as URL-safe Base64 JSON', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    const token = tracker.resumptionToken;
+    const decoded = JSON.parse(Buffer.from(token, 'base64url').toString('utf8'));
+
+    expect(decoded).toEqual({
+      runId: testRunId,
+      configKey,
+      variationKey,
+      version,
+    });
+  });
+
+  it('includes empty variationKey explicitly when not set', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      '',
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    const token = tracker.resumptionToken;
+    const decoded = JSON.parse(Buffer.from(token, 'base64url').toString('utf8'));
+
+    expect(decoded).toEqual({
+      runId: testRunId,
+      configKey,
+      variationKey: '',
+      version,
+    });
+    expect('variationKey' in decoded).toBe(true);
+  });
+
+  it('uses URL-safe Base64 encoding (no + / or = characters)', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    const token = tracker.resumptionToken;
+    expect(token).not.toMatch(/[+/=]/);
+  });
+});
+
+describe('fromResumptionToken', () => {
+  it('reconstructs tracker with original runId', () => {
+    const original = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    const reconstructed = LDAIConfigTrackerImpl.fromResumptionToken(
+      original.resumptionToken,
+      mockLdClient,
+      testContext,
+    );
+
+    expect(reconstructed.getTrackData().runId).toBe(testRunId);
+    expect(reconstructed.getTrackData().configKey).toBe(configKey);
+    expect(reconstructed.getTrackData().variationKey).toBe(variationKey);
+    expect(reconstructed.getTrackData().version).toBe(version);
+  });
+
+  it('reconstructs tracker with empty variationKey when none was set', () => {
+    const original = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      '',
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    const reconstructed = LDAIConfigTrackerImpl.fromResumptionToken(
+      original.resumptionToken,
+      mockLdClient,
+      testContext,
+    );
+
+    expect(reconstructed.getTrackData().variationKey).toBe('');
+  });
+
+  it('reconstructed tracker emits track events with original runId', () => {
+    const original = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    const reconstructed = LDAIConfigTrackerImpl.fromResumptionToken(
+      original.resumptionToken,
+      mockLdClient,
+      testContext,
+    );
+
+    reconstructed.trackSuccess();
+
+    expect(mockTrack).toHaveBeenCalledWith(
+      '$ld:ai:generation:success',
+      testContext,
+      expect.objectContaining({ runId: testRunId }),
+      1,
+    );
+  });
+});
diff --git a/packages/sdk/server-ai/__tests__/TrackedChat.test.ts b/packages/sdk/server-ai/__tests__/TrackedChat.test.ts
index d750a47e65..75681b0f83 100644
--- a/packages/sdk/server-ai/__tests__/TrackedChat.test.ts
+++ b/packages/sdk/server-ai/__tests__/TrackedChat.test.ts
@@ -38,13 +38,13 @@ describe('TrackedChat', () => {
       messages: [{ role: 'system', content: 'You are a helpful assistant.' }],
       model: { name: 'gpt-4' },
       provider: { name: 'openai' },
-      tracker: mockTracker,
+      createTracker: () => mockTracker,
     };
   });
 
   describe('appendMessages', () => {
     it('appends messages to the conversation history', () => {
-      const chat = new TrackedChat(aiConfig, mockTracker, mockProvider);
+      const chat = new TrackedChat(aiConfig, mockProvider);
 
       const messagesToAppend: LDMessage[] = [
         { role: 'user', content: 'Hello' },
@@ -60,7 +60,7 @@ describe('TrackedChat', () => {
     });
 
     it('appends multiple message batches sequentially', () => {
-      const chat = new TrackedChat(aiConfig, mockTracker, mockProvider);
+      const chat = new TrackedChat(aiConfig, mockProvider);
 
       chat.appendMessages([{ role: 'user', content: 'First message' }]);
       chat.appendMessages([{ role: 'assistant', content: 'Second message' }]);
@@ -74,7 +74,7 @@ describe('TrackedChat', () => {
     });
 
     it('handles empty message array', () => {
-      const chat = new TrackedChat(aiConfig, mockTracker, mockProvider);
+      const chat = new TrackedChat(aiConfig, mockProvider);
 
       chat.appendMessages([]);
 
@@ -85,7 +85,7 @@ describe('TrackedChat', () => {
 
   describe('getMessages', () => {
     it('returns only conversation history when includeConfigMessages is false', () => {
-      const chat = new TrackedChat(aiConfig, mockTracker, mockProvider);
+      const chat = new TrackedChat(aiConfig, mockProvider);
 
       chat.appendMessages([
         { role: 'user', content: 'User message' },
@@ -100,7 +100,7 @@ describe('TrackedChat', () => {
     });
 
     it('returns only conversation history when includeConfigMessages is omitted (defaults to false)', () => {
-      const chat = new TrackedChat(aiConfig, mockTracker, mockProvider);
+      const chat = new TrackedChat(aiConfig, mockProvider);
 
       chat.appendMessages([{ role: 'user', content: 'User message' }]);
 
@@ -111,7 +111,7 @@ describe('TrackedChat', () => {
     });
 
     it('returns config messages prepended when includeConfigMessages is true', () => {
-      const chat = new TrackedChat(aiConfig, mockTracker, mockProvider);
+      const chat = new TrackedChat(aiConfig, mockProvider);
 
       chat.appendMessages([
         { role: 'user', content: 'User message' },
@@ -127,7 +127,7 @@ describe('TrackedChat', () => {
     });
 
     it('returns only config messages when no conversation history exists and includeConfigMessages is true', () => {
-      const chat = new TrackedChat(aiConfig, mockTracker, mockProvider);
+      const chat = new TrackedChat(aiConfig, mockProvider);
 
       const messages = chat.getMessages(true);
 
@@ -140,7 +140,7 @@ describe('TrackedChat', () => {
         ...aiConfig,
         messages: [],
       };
-      const chat = new TrackedChat(configWithoutMessages, mockTracker, mockProvider);
+      const chat = new TrackedChat(configWithoutMessages, mockProvider);
 
       const messages = chat.getMessages(false);
 
@@ -148,7 +148,7 @@ describe('TrackedChat', () => {
     });
 
     it('returns a copy of the messages array (not a reference)', () => {
-      const chat = new TrackedChat(aiConfig, mockTracker, mockProvider);
+      const chat = new TrackedChat(aiConfig, mockProvider);
 
       chat.appendMessages([{ role: 'user', content: 'Original message' }]);
 
@@ -171,7 +171,7 @@ describe('TrackedChat', () => {
         ...aiConfig,
         messages: undefined,
       };
-      const chat = new TrackedChat(configWithoutMessages, mockTracker, mockProvider);
+      const chat = new TrackedChat(configWithoutMessages, mockProvider);
 
       chat.appendMessages([{ role: 'user', content: 'User message' }]);
 
@@ -196,7 +196,7 @@ describe('TrackedChat', () => {
 
       mockProvider.invokeModel.mockResolvedValue(mockResponse);
 
-      const chat = new TrackedChat(aiConfig, mockTracker, mockProvider);
+      const chat = new TrackedChat(aiConfig, mockProvider);
 
       await chat.invoke('Hello');
 
@@ -216,7 +216,7 @@ describe('TrackedChat', () => {
 
       mockProvider.invokeModel.mockResolvedValue(mockResponse);
 
-      const chat = new TrackedChat(aiConfig, mockTracker, mockProvider);
+      const chat = new TrackedChat(aiConfig, mockProvider);
 
       chat.appendMessages([{ role: 'user', content: 'Pre-appended message' }]);
       await chat.invoke('New user input');
diff --git a/packages/sdk/server-ai/examples/bedrock/src/index.ts b/packages/sdk/server-ai/examples/bedrock/src/index.ts
index ac331d23e8..e1cbf93e06 100644
--- a/packages/sdk/server-ai/examples/bedrock/src/index.ts
+++ b/packages/sdk/server-ai/examples/bedrock/src/index.ts
@@ -62,12 +62,13 @@ async function main() {
     },
   );
 
-  if (!aiConfig.enabled || !aiConfig.tracker) {
+  if (!aiConfig.enabled) {
     console.log('*** AI configuration is not enabled');
     process.exit(0);
   }
 
-  const completion = aiConfig.tracker.trackBedrockConverseMetrics(
+  const tracker = aiConfig.createTracker!();
+  const completion = tracker.trackBedrockConverseMetrics(
     await awsClient.send(
       new ConverseCommand({
         modelId: aiConfig.model?.name ?? 'no-model',
diff --git a/packages/sdk/server-ai/examples/openai/src/index.ts b/packages/sdk/server-ai/examples/openai/src/index.ts
index 8b501e9cb7..e16643d6c5 100644
--- a/packages/sdk/server-ai/examples/openai/src/index.ts
+++ b/packages/sdk/server-ai/examples/openai/src/index.ts
@@ -60,20 +60,19 @@ async function main() {
     myVariable: 'My User Defined Variable',
   });
 
-  if (!aiConfig.enabled || !aiConfig.tracker) {
+  if (!aiConfig.enabled) {
     console.log('*** AI configuration is not enabled');
     process.exit(0);
   }
 
-  const completion = await aiConfig.tracker.trackMetricsOf(
-    OpenAIProvider.createAIMetrics,
-    async () =>
-      client.chat.completions.create({
-        messages: aiConfig.messages || [],
-        model: aiConfig.model?.name || 'gpt-4',
-        temperature: (aiConfig.model?.parameters?.temperature as number) ?? 0.5,
-        max_tokens: (aiConfig.model?.parameters?.maxTokens as number) ?? 4096,
-      }),
+  const tracker = aiConfig.createTracker!();
+  const completion = await tracker.trackMetricsOf(OpenAIProvider.createAIMetrics, async () =>
+    client.chat.completions.create({
+      messages: aiConfig.messages || [],
+      model: aiConfig.model?.name || 'gpt-4',
+      temperature: (aiConfig.model?.parameters?.temperature as number) ?? 0.5,
+      max_tokens: (aiConfig.model?.parameters?.maxTokens as number) ?? 4096,
+    }),
   );
 
   console.log('AI Response:', completion.choices[0]?.message.content);
diff --git a/packages/sdk/server-ai/examples/vercel-ai/src/index.ts b/packages/sdk/server-ai/examples/vercel-ai/src/index.ts
index 5f138a5dd2..af1db3e918 100644
--- a/packages/sdk/server-ai/examples/vercel-ai/src/index.ts
+++ b/packages/sdk/server-ai/examples/vercel-ai/src/index.ts
@@ -52,7 +52,7 @@ async function main() {
   //   const aiConfig = await aiClient.completionConfig(aiConfigKey, context, defaultValue);
   const aiConfig = await aiClient.completionConfig(aiConfigKey, context);
 
-  if (!aiConfig.enabled || !aiConfig.tracker) {
+  if (!aiConfig.enabled) {
     console.log('*** AI configuration is not enabled');
     process.exit(0);
   }
@@ -74,9 +74,9 @@ async function main() {
     });
 
     // Call the model and track metrics for the ai config
-    const result = await aiConfig.tracker.trackMetricsOf(
-      VercelProvider.getAIMetricsFromResponse,
-      () => generateText({ ...vercelConfig, messages: vercelConfig.messages ?? [] }),
+    const tracker = aiConfig.createTracker!();
+    const result = await tracker.trackMetricsOf(VercelProvider.getAIMetricsFromResponse, () =>
+      generateText({ ...vercelConfig, messages: vercelConfig.messages ?? [] }),
     );
 
     console.log('Response:', result.text);
@@ -99,7 +99,8 @@ async function main() {
     });
 
     // Stream is returned immediately (synchronously), metrics tracked in background
-    const streamResult = aiConfig.tracker.trackStreamMetricsOf(
+    const streamTracker = aiConfig.createTracker!();
+    const streamResult = streamTracker.trackStreamMetricsOf(
       () => streamText({ ...vercelConfig, messages: vercelConfig.messages ?? [] }),
       VercelProvider.getAIMetricsFromStream,
     );
diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts
index 37ac4e8f10..209c0ce860 100644
--- a/packages/sdk/server-ai/src/LDAIClientImpl.ts
+++ b/packages/sdk/server-ai/src/LDAIClientImpl.ts
@@ -1,4 +1,5 @@
 import Mustache from 'mustache';
+import { randomUUID } from 'node:crypto';
 
 import { LDContext, LDLogger } from '@launchdarkly/js-server-sdk-common';
 
@@ -13,6 +14,7 @@ import {
   LDAIConfigDefaultKind,
   LDAIConfigKind,
   LDAIConfigMode,
+  LDAIConfigTracker,
   LDAIJudgeConfig,
   LDAIJudgeConfigDefault,
   LDJudge,
@@ -87,19 +89,21 @@ export class LDAIClientImpl implements LDAIClient {
       return LDAIConfigUtils.createDisabledConfig(key, mode);
     }
 
-    const tracker = new LDAIConfigTrackerImpl(
-      this._ldClient,
-      key,
-      // eslint-disable-next-line no-underscore-dangle
-      value._ldMeta?.variationKey ?? '',
-      // eslint-disable-next-line no-underscore-dangle
-      value._ldMeta?.version ?? 1,
-      value.model?.name ?? '',
-      value.provider?.name ?? '',
-      context,
-    );
+    const trackerFactory = () =>
+      new LDAIConfigTrackerImpl(
+        this._ldClient,
+        randomUUID(),
+        key,
+        // eslint-disable-next-line no-underscore-dangle
+        value._ldMeta?.variationKey ?? '',
+        // eslint-disable-next-line no-underscore-dangle
+        value._ldMeta?.version ?? 1,
+        value.model?.name ?? '',
+        value.provider?.name ?? '',
+        context,
+      );
 
-    const config = LDAIConfigUtils.fromFlagValue(key, value, tracker);
+    const config = LDAIConfigUtils.fromFlagValue(key, value, trackerFactory);
 
     // Apply variable interpolation (always needed for ldctx)
     return this._applyInterpolation(config, context, variables);
@@ -296,7 +300,7 @@ export class LDAIClientImpl implements LDAIClient {
       variables,
     );
 
-    if (!config.enabled || !config.tracker) {
+    if (!config.enabled) {
       this._logger?.info(`Chat configuration is disabled: ${key}`);
       return undefined;
     }
@@ -313,7 +317,7 @@ export class LDAIClientImpl implements LDAIClient {
       defaultAiProvider,
     );
 
-    return new TrackedChat(config, config.tracker, provider, judges, this._logger);
+    return new TrackedChat(config, provider, judges, this._logger);
   }
 
   async createJudge(
@@ -351,7 +355,7 @@ export class LDAIClientImpl implements LDAIClient {
         extendedVariables,
       );
 
-      if (!judgeConfig.enabled || !judgeConfig.tracker) {
+      if (!judgeConfig.enabled) {
         this._logger?.info(`Judge configuration is disabled: ${key}`);
         return undefined;
       }
@@ -361,7 +365,7 @@ export class LDAIClientImpl implements LDAIClient {
         return undefined;
       }
 
-      return new Judge(judgeConfig, judgeConfig.tracker, provider, this._logger);
+      return new Judge(judgeConfig, provider, this._logger);
     } catch (error) {
       this._logger?.error(`Failed to initialize judge ${key}:`, error);
       return undefined;
@@ -380,4 +384,8 @@ export class LDAIClientImpl implements LDAIClient {
   ): Promise<TrackedChat | undefined> {
     return this.createChat(key, context, defaultValue, variables, defaultAiProvider);
   }
+
+  createTracker(token: string, context: LDContext): LDAIConfigTracker {
+    return LDAIConfigTrackerImpl.fromResumptionToken(token, this._ldClient, context);
+  }
 }
diff --git a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
index adda7c97c8..151a3c1d97 100644
--- a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
+++ b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
@@ -18,6 +18,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
 
   constructor(
     private _ldClient: LDClientMin,
+    private _runId: string,
     private _configKey: string,
     private _variationKey: string,
     private _version: number,
@@ -27,16 +28,18 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
   ) {}
 
   getTrackData(graphKey?: string): {
-    variationKey: string;
+    runId: string;
     configKey: string;
+    variationKey: string;
     version: number;
     modelName: string;
     providerName: string;
     graphKey?: string;
   } {
     return {
-      variationKey: this._variationKey,
+      runId: this._runId,
       configKey: this._configKey,
+      variationKey: this._variationKey,
       version: this._version,
       modelName: this._modelName,
       providerName: this._providerName,
@@ -44,7 +47,42 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
     };
   }
 
+  get resumptionToken(): string {
+    const json = JSON.stringify({
+      runId: this._runId,
+      configKey: this._configKey,
+      variationKey: this._variationKey,
+      version: this._version,
+    });
+    return Buffer.from(json).toString('base64url');
+  }
+
+  static fromResumptionToken(
+    token: string,
+    ldClient: LDClientMin,
+    context: LDContext,
+  ): LDAIConfigTrackerImpl {
+    const json = Buffer.from(token, 'base64url').toString('utf8');
+    const payload = JSON.parse(json);
+    return new LDAIConfigTrackerImpl(
+      ldClient,
+      payload.runId,
+      payload.configKey,
+      payload.variationKey ?? '',
+      payload.version,
+      '',
+      '',
+      context,
+    );
+  }
+
   trackDuration(duration: number, graphKey?: string): void {
+    if (this._trackedMetrics.durationMs !== undefined) {
+      this._ldClient.logger?.warn(
+        'Duration has already been tracked for this execution. Use createTracker() for a new execution.',
+      );
+      return;
+    }
     this._trackedMetrics.durationMs = duration;
     this._ldClient.track(
       '$ld:ai:duration:total',
@@ -68,6 +106,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
   }
 
   trackTimeToFirstToken(timeToFirstTokenMs: number, graphKey?: string) {
+    if (this._trackedMetrics.timeToFirstTokenMs !== undefined) {
+      this._ldClient.logger?.warn(
+        'Time to first token has already been tracked for this execution. Use createTracker() for a new execution.',
+      );
+      return;
+    }
     this._trackedMetrics.timeToFirstTokenMs = timeToFirstTokenMs;
     this._ldClient.track(
       '$ld:ai:tokens:ttf',
@@ -110,6 +154,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
   }
 
   trackFeedback(feedback: { kind: LDFeedbackKind }, graphKey?: string): void {
+    if (this._trackedMetrics.feedback !== undefined) {
+      this._ldClient.logger?.warn(
+        'Feedback has already been tracked for this execution. Use createTracker() for a new execution.',
+      );
+      return;
+    }
     this._trackedMetrics.feedback = feedback;
     if (feedback.kind === LDFeedbackKind.Positive) {
       this._ldClient.track(
@@ -129,6 +179,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
   }
 
   trackSuccess(graphKey?: string): void {
+    if (this._trackedMetrics.success !== undefined) {
+      this._ldClient.logger?.warn(
+        'Generation result has already been tracked for this execution. Use createTracker() for a new execution.',
+      );
+      return;
+    }
     this._trackedMetrics.success = true;
     this._ldClient.track(
       '$ld:ai:generation:success',
@@ -139,6 +195,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
   }
 
   trackError(graphKey?: string): void {
+    if (this._trackedMetrics.success !== undefined) {
+      this._ldClient.logger?.warn(
+        'Generation result has already been tracked for this execution. Use createTracker() for a new execution.',
+      );
+      return;
+    }
     this._trackedMetrics.success = false;
     this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(graphKey), 1);
   }
@@ -301,6 +363,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
   }
 
   trackTokens(tokens: LDTokenUsage, graphKey?: string): void {
+    if (this._trackedMetrics.tokens !== undefined) {
+      this._ldClient.logger?.warn(
+        'Token usage has already been tracked for this execution. Use createTracker() for a new execution.',
+      );
+      return;
+    }
     this._trackedMetrics.tokens = tokens;
     const trackData = this.getTrackData(graphKey);
     if (tokens.total > 0) {
diff --git a/packages/sdk/server-ai/src/api/LDAIClient.ts b/packages/sdk/server-ai/src/api/LDAIClient.ts
index 3e4ceca864..fd93ca92a5 100644
--- a/packages/sdk/server-ai/src/api/LDAIClient.ts
+++ b/packages/sdk/server-ai/src/api/LDAIClient.ts
@@ -7,6 +7,7 @@ import {
   LDAIAgentRequestConfig,
   LDAICompletionConfig,
   LDAICompletionConfigDefault,
+  LDAIConfigTracker,
   LDAIJudgeConfig,
   LDAIJudgeConfigDefault,
 } from './config';
@@ -325,4 +326,15 @@ export interface LDAIClient {
     variables?: Record<string, unknown>,
     defaultAiProvider?: SupportedAIProvider,
   ): Promise<Judge | undefined>;
+
+  /**
+   * Reconstructs an AIConfigTracker from a resumption token string previously
+   * obtained from a tracker's `resumptionToken` property. Use this to associate
+   * deferred events (such as user feedback) with the original invocation's runId.
+   *
+   * @param token A URL-safe Base64-encoded resumption token string.
+   * @param context The evaluation context to use for subsequent track calls.
+   * @returns A reconstructed AIConfigTracker with the original runId preserved.
+   */
+  createTracker(token: string, context: LDContext): LDAIConfigTracker;
 }
diff --git a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts
index 542547bffc..054969dc3d 100644
--- a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts
+++ b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts
@@ -1,6 +1,5 @@
 import { LDLogger } from '@launchdarkly/js-server-sdk-common';
 
-import { LDAIConfigTracker } from '../config/LDAIConfigTracker';
 import { LDAICompletionConfig, LDMessage } from '../config/types';
 import { Judge } from '../judge/Judge';
 import { JudgeResponse } from '../judge/types';
@@ -18,7 +17,6 @@ export class TrackedChat {
 
   constructor(
     protected readonly aiConfig: LDAICompletionConfig,
-    protected readonly tracker: LDAIConfigTracker,
     protected readonly provider: AIProvider,
     protected readonly judges: Record<string, Judge> = {},
     private readonly _logger?: LDLogger,
@@ -31,6 +29,8 @@ export class TrackedChat {
    * This method handles conversation management and tracking, delegating to the provider's invokeModel method.
    */
   async invoke(prompt: string): Promise<ChatResponse> {
+    const tracker = this.aiConfig.createTracker!();
+
     // Convert prompt string to LDMessage with role 'user' and add to conversation history
     const userMessage: LDMessage = {
       role: 'user',
@@ -43,7 +43,7 @@ export class TrackedChat {
     const allMessages = [...configMessages, ...this.messages];
 
     // Delegate to provider-specific implementation with tracking
-    const response = await this.tracker.trackMetricsOf(
+    const response = await tracker.trackMetricsOf(
       (result: ChatResponse) => result.metrics,
       () => this.provider.invokeModel(allMessages),
     );
@@ -52,7 +52,16 @@ export class TrackedChat {
       this.aiConfig.judgeConfiguration?.judges &&
       this.aiConfig.judgeConfiguration.judges.length > 0
     ) {
-      response.evaluations = this._evaluateWithJudges(this.messages, response);
+      response.evaluations = this._evaluateWithJudges(this.messages, response).then(
+        (evaluations) => {
+          evaluations.forEach((judgeResponse) => {
+            if (judgeResponse?.success) {
+              tracker.trackJudgeResponse(judgeResponse);
+            }
+          });
+          return evaluations;
+        },
+      );
     }
 
     this.messages.push(response.message);
@@ -78,23 +87,12 @@ export class TrackedChat {
       const judge = this.judges[judgeConfig.key];
       if (!judge) {
         this._logger?.warn(
-          `Judge configuration is not enabled: ${judgeConfig.key}`,
-          this.tracker.getTrackData(),
+          `Judge configuration is not enabled for ${judgeConfig.key} in ${this.aiConfig.key}`,
         );
         return undefined;
       }
 
-      const judgeResponse = await judge.evaluateMessages(
-        messages,
-        response,
-        judgeConfig.samplingRate,
-      );
-
-      if (judgeResponse && judgeResponse.success) {
-        this.tracker.trackJudgeResponse(judgeResponse);
-      }
-
-      return judgeResponse;
+      return judge.evaluateMessages(messages, response, judgeConfig.samplingRate);
     });
 
     // ensure all evaluations complete even if some fail
@@ -110,13 +108,6 @@ export class TrackedChat {
     return this.aiConfig;
   }
 
-  /**
-   * Get the underlying AI configuration tracker used to initialize this TrackedChat.
-   */
-  getTracker(): LDAIConfigTracker {
-    return this.tracker;
-  }
-
   /**
    * Get the underlying AI provider instance.
    * This provides direct access to the provider for advanced use cases.
diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
index 3a40fd3c6d..18b243d94b 100644
--- a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
+++ b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
@@ -41,16 +41,30 @@ export interface LDAIConfigTracker {
    * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
   getTrackData(graphKey?: string): {
-    variationKey: string;
+    runId: string;
     configKey: string;
+    variationKey: string;
     version: number;
     modelName: string;
     providerName: string;
     graphKey?: string;
   };
+
+  /**
+   * A URL-safe Base64-encoded token that encodes the tracker's runId, configKey,
+   * variationKey, and version. Pass this to AIClient.createTracker() to reconstruct
+   * the tracker across process boundaries (e.g. for associating deferred feedback
+   * with the original invocation).
+   */
+  readonly resumptionToken: string;
+
   /**
    * Track the duration of generation.
    *
+   * At-most-once per execution: subsequent calls on the same tracker are dropped
+   * with a warning. Use createTracker() on the config result to obtain a fresh
+   * tracker for a new execution.
+   *
    * Ideally this would not include overhead time such as network communication.
    *
    * @param durationMs The duration in milliseconds.
@@ -61,6 +75,9 @@ export interface LDAIConfigTracker {
   /**
    * Track information about token usage.
    *
+   * At-most-once per execution: subsequent calls on the same tracker are dropped
+   * with a warning.
+   *
    * @param tokens Token usage information.
    * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
@@ -69,6 +86,9 @@ export interface LDAIConfigTracker {
   /**
    * Generation was successful.
    *
+   * At-most-once per execution: subsequent calls (including trackError) on the
+   * same tracker are dropped with a warning.
+   *
    * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
   trackSuccess(graphKey?: string): void;
@@ -76,6 +96,9 @@ export interface LDAIConfigTracker {
   /**
    * An error was encountered during generation.
    *
+   * At-most-once per execution: subsequent calls (including trackSuccess) on the
+   * same tracker are dropped with a warning.
+   *
    * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
   trackError(graphKey?: string): void;
@@ -83,6 +106,9 @@ export interface LDAIConfigTracker {
   /**
    * Track sentiment about the generation.
    *
+   * At-most-once per execution: subsequent calls on the same tracker are dropped
+   * with a warning.
+   *
    * @param feedback Feedback about the generation.
    * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
@@ -91,6 +117,9 @@ export interface LDAIConfigTracker {
   /**
    * Track the time to first token for this generation.
    *
+   * At-most-once per execution: subsequent calls on the same tracker are dropped
+   * with a warning.
+   *
    * @param timeToFirstTokenMs The duration in milliseconds.
    * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts
index 2a926f1c87..74ab8ee30a 100644
--- a/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts
+++ b/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts
@@ -82,14 +82,15 @@ export class LDAIConfigUtils {
   /**
    * Converts a LaunchDarkly flag value to the appropriate AI configuration type.
    *
+   * @param key The configuration key
    * @param flagValue The flag value from LaunchDarkly
-   * @param tracker The tracker to add to the config
+   * @param trackerFactory A factory function that creates a new tracker for each execution
    * @returns The appropriate AI configuration type
    */
   static fromFlagValue(
     key: string,
     flagValue: LDAIConfigFlagValue,
-    tracker: LDAIConfigTracker,
+    trackerFactory: () => LDAIConfigTracker,
   ): LDAIConfigKind {
     // Determine the actual mode from flag value
     // eslint-disable-next-line no-underscore-dangle
@@ -97,12 +98,12 @@ export class LDAIConfigUtils {
 
     switch (flagValueMode) {
       case 'agent':
-        return this.toAgentConfig(key, flagValue, tracker);
+        return this.toAgentConfig(key, flagValue, trackerFactory);
       case 'judge':
-        return this.toJudgeConfig(key, flagValue, tracker);
+        return this.toJudgeConfig(key, flagValue, trackerFactory);
       case 'completion':
       default:
-        return this.toCompletionConfig(key, flagValue, tracker);
+        return this.toCompletionConfig(key, flagValue, trackerFactory);
     }
   }
 
@@ -118,13 +119,13 @@ export class LDAIConfigUtils {
         return {
           key,
           enabled: false,
-          tracker: undefined,
+          createTracker: undefined,
         } as LDAIAgentConfig;
       case 'judge':
         return {
           key,
           enabled: false,
-          tracker: undefined,
+          createTracker: undefined,
         } as LDAIJudgeConfig;
       case 'completion':
       default:
@@ -132,7 +133,7 @@ export class LDAIConfigUtils {
         return {
           key,
           enabled: false,
-          tracker: undefined,
+          createTracker: undefined,
         } as LDAICompletionConfig;
     }
   }
@@ -156,18 +157,19 @@ export class LDAIConfigUtils {
   /**
    * Creates a completion config from flag value data.
    *
+   * @param key The configuration key
    * @param flagValue The flag value from LaunchDarkly
-   * @param tracker The tracker to add to the config
+   * @param trackerFactory A factory function that creates a new tracker for each execution
    * @returns A completion configuration
    */
   static toCompletionConfig(
     key: string,
     flagValue: LDAIConfigFlagValue,
-    tracker: LDAIConfigTracker,
+    trackerFactory: () => LDAIConfigTracker,
   ): LDAICompletionConfig {
     return {
       ...this._toBaseConfig(key, flagValue),
-      tracker,
+      createTracker: trackerFactory,
       messages: flagValue.messages,
       judgeConfiguration: flagValue.judgeConfiguration,
     };
@@ -176,18 +178,19 @@ export class LDAIConfigUtils {
   /**
    * Creates an agent config from flag value data.
    *
+   * @param key The configuration key
    * @param flagValue The flag value from LaunchDarkly
-   * @param tracker The tracker to add to the config
+   * @param trackerFactory A factory function that creates a new tracker for each execution
    * @returns An agent configuration
    */
   static toAgentConfig(
     key: string,
     flagValue: LDAIConfigFlagValue,
-    tracker: LDAIConfigTracker,
+    trackerFactory: () => LDAIConfigTracker,
   ): LDAIAgentConfig {
     return {
       ...this._toBaseConfig(key, flagValue),
-      tracker,
+      createTracker: trackerFactory,
       instructions: flagValue.instructions,
       judgeConfiguration: flagValue.judgeConfiguration,
     };
@@ -196,14 +199,15 @@ export class LDAIConfigUtils {
   /**
    * Creates a judge config from flag value data.
    *
+   * @param key The configuration key
    * @param flagValue The flag value from LaunchDarkly
-   * @param tracker The tracker to add to the config
+   * @param trackerFactory A factory function that creates a new tracker for each execution
    * @returns A judge configuration
    */
   static toJudgeConfig(
     key: string,
     flagValue: LDAIConfigFlagValue,
-    tracker: LDAIConfigTracker,
+    trackerFactory: () => LDAIConfigTracker,
   ): LDAIJudgeConfig {
     // Prioritize evaluationMetricKey, fallback to first valid (non-empty, non-whitespace) value in evaluationMetricKeys
     let evaluationMetricKey: string | undefined;
@@ -218,7 +222,7 @@ export class LDAIConfigUtils {
 
     return {
       ...this._toBaseConfig(key, flagValue),
-      tracker,
+      createTracker: trackerFactory,
       messages: flagValue.messages,
       evaluationMetricKey,
     };
diff --git a/packages/sdk/server-ai/src/api/config/types.ts b/packages/sdk/server-ai/src/api/config/types.ts
index 44b89160c8..56a54d1ca2 100644
--- a/packages/sdk/server-ai/src/api/config/types.ts
+++ b/packages/sdk/server-ai/src/api/config/types.ts
@@ -105,10 +105,11 @@ export interface LDAIConfig extends Omit<LDAIConfigDefault, 'enabled'> {
   enabled: boolean;
 
   /**
-   * A tracker which can be used to generate analytics.
-   * Undefined for disabled configs.
+   * Creates a new tracker for this AI Config invocation. Each call returns a
+   * new tracker with a fresh runId. Use createTracker() at the start of each
+   * execution to obtain a tracker, then use it to record metrics for that run.
    */
-  tracker?: LDAIConfigTracker;
+  createTracker?: () => LDAIConfigTracker;
 }
 
 // ============================================================================
diff --git a/packages/sdk/server-ai/src/api/judge/Judge.ts b/packages/sdk/server-ai/src/api/judge/Judge.ts
index 382addc632..1bab8d1a12 100644
--- a/packages/sdk/server-ai/src/api/judge/Judge.ts
+++ b/packages/sdk/server-ai/src/api/judge/Judge.ts
@@ -21,7 +21,6 @@ export class Judge {
 
   constructor(
     private readonly _aiConfig: LDAIJudgeConfig,
-    private readonly _aiConfigTracker: LDAIConfigTracker,
     private readonly _aiProvider: AIProvider,
     logger?: LDLogger,
   ) {
@@ -65,21 +64,19 @@ export class Judge {
     output: string,
     samplingRate: number = 1,
   ): Promise<JudgeResponse | undefined> {
+    const tracker = this._aiConfig.createTracker!();
     try {
       const evaluationMetricKey = this._getEvaluationMetricKey();
       if (!evaluationMetricKey) {
         this._logger?.warn(
           'Judge configuration is missing required evaluation metric key',
-          this._aiConfigTracker.getTrackData(),
+          tracker.getTrackData(),
         );
         return undefined;
       }
 
       if (!this._aiConfig.messages) {
-        this._logger?.warn(
-          'Judge configuration must include messages',
-          this._aiConfigTracker.getTrackData(),
-        );
+        this._logger?.warn('Judge configuration must include messages', tracker.getTrackData());
         return undefined;
       }
 
@@ -90,19 +87,19 @@ export class Judge {
 
       const messages = this._constructEvaluationMessages(input, output);
 
-      const response = await this._aiConfigTracker.trackMetricsOf(
+      const response = await tracker.trackMetricsOf(
         (result: StructuredResponse) => result.metrics,
         () => this._aiProvider.invokeStructuredModel(messages, this._evaluationResponseStructure),
       );
 
       let { success } = response.metrics;
 
-      const evals = this._parseEvaluationResponse(response.data, evaluationMetricKey);
+      const evals = this._parseEvaluationResponse(response.data, evaluationMetricKey, tracker);
 
       if (!evals[evaluationMetricKey]) {
         this._logger?.warn(
           'Judge evaluation did not return the expected evaluation',
-          this._aiConfigTracker.getTrackData(),
+          tracker.getTrackData(),
         );
         success = false;
       }
@@ -149,13 +146,6 @@ export class Judge {
     return this._aiConfig;
   }
 
-  /**
-   * Returns the tracker associated with this judge.
-   */
-  getTracker(): LDAIConfigTracker {
-    return this._aiConfigTracker;
-  }
-
   /**
    * Returns the AI provider used by this judge.
    */
@@ -191,6 +181,7 @@ export class Judge {
   private _parseEvaluationResponse(
     data: Record<string, unknown>,
     evaluationMetricKey: string,
+    tracker: LDAIConfigTracker,
   ): Record<string, EvalScore> {
     const evaluations = data.evaluations as Record<string, unknown>;
     const results: Record<string, EvalScore> = {};
@@ -205,7 +196,7 @@ export class Judge {
     if (!evaluation || typeof evaluation !== 'object') {
       this._logger?.warn(
         `Missing evaluation for metric key: ${evaluationMetricKey}`,
-        this._aiConfigTracker.getTrackData(),
+        tracker.getTrackData(),
       );
       return results;
     }
@@ -215,7 +206,7 @@ export class Judge {
     if (typeof evalData.score !== 'number' || evalData.score < 0 || evalData.score > 1) {
       this._logger?.warn(
         `Invalid score evaluated for ${evaluationMetricKey}: ${evalData.score}. Score must be a number between 0 and 1 inclusive`,
-        this._aiConfigTracker.getTrackData(),
+        tracker.getTrackData(),
       );
       return results;
     }
@@ -223,7 +214,7 @@ export class Judge {
     if (typeof evalData.reasoning !== 'string') {
       this._logger?.warn(
         `Invalid reasoning evaluated for ${evaluationMetricKey}: ${evalData.reasoning}. Reasoning must be a string`,
-        this._aiConfigTracker.getTrackData(),
+        tracker.getTrackData(),
       );
       return results;
     }

From 367d369d7b55e94488472f367e00d49c3c0003fc Mon Sep 17 00:00:00 2001
From: Jason Bailey <jbailey@launchdarkly.com>
Date: Thu, 16 Apr 2026 10:11:37 -0500
Subject: [PATCH 3/7] chore: Move graphKey to LDAIConfigTracker constructor
 (#1279)

---
 .../__tests__/LDAIClientImpl.test.ts          |  18 ++-
 .../__tests__/LDAIConfigTrackerImpl.test.ts   | 146 +++++++++++++++++-
 packages/sdk/server-ai/src/LDAIClientImpl.ts  |  27 ++--
 .../server-ai/src/LDAIConfigTrackerImpl.ts    | 107 +++++--------
 .../src/api/config/LDAIConfigTracker.ts       |  43 ++----
 5 files changed, 223 insertions(+), 118 deletions(-)

diff --git a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts
index 9695c1f815..77af66a0b5 100644
--- a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts
+++ b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts
@@ -139,7 +139,14 @@ describe('config evaluation', () => {
     const evaluateSpy = jest.spyOn(client as any, '_evaluate');
     const result = await client.agentConfig(key, testContext, defaultValue, variables);
 
-    expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'agent', variables);
+    expect(evaluateSpy).toHaveBeenCalledWith(
+      key,
+      testContext,
+      defaultValue,
+      'agent',
+      variables,
+      undefined,
+    );
     expect(result.instructions).toBe(
       'You are a helpful assistant. Your name is John and your score is 42',
     );
@@ -464,7 +471,14 @@ describe('agentConfig method', () => {
       key,
       1,
     );
-    expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'agent', variables);
+    expect(evaluateSpy).toHaveBeenCalledWith(
+      key,
+      testContext,
+      defaultValue,
+      'agent',
+      variables,
+      undefined,
+    );
     expect(result).toBe(mockConfig);
     evaluateSpy.mockRestore();
   });
diff --git a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts
index e644eff377..a4b40b62cb 100644
--- a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts
+++ b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts
@@ -933,7 +933,7 @@ describe('trackToolCall', () => {
     );
   });
 
-  it('includes graphKey when provided', () => {
+  it('includes graphKey when set on constructor', () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
       testRunId,
@@ -943,9 +943,10 @@ describe('trackToolCall', () => {
       modelName,
       providerName,
       testContext,
+      'my-graph',
     );
 
-    tracker.trackToolCall('my-tool', 'my-graph');
+    tracker.trackToolCall('my-tool');
 
     expect(mockTrack).toHaveBeenCalledWith(
       '$ld:ai:tool_call',
@@ -993,8 +994,8 @@ describe('trackToolCalls', () => {
   });
 });
 
-describe('graphKey parameter support', () => {
-  it('includes graphKey in trackDuration event', () => {
+describe('graphKey constructor support', () => {
+  it('includes graphKey in trackDuration event when set on constructor', () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
       testRunId,
@@ -1004,9 +1005,10 @@ describe('graphKey parameter support', () => {
       modelName,
       providerName,
       testContext,
+      'my-graph',
     );
 
-    tracker.trackDuration(1000, 'my-graph');
+    tracker.trackDuration(1000);
 
     expect(mockTrack).toHaveBeenCalledWith(
       '$ld:ai:duration:total',
@@ -1016,7 +1018,7 @@ describe('graphKey parameter support', () => {
     );
   });
 
-  it('includes graphKey in trackSuccess event', () => {
+  it('includes graphKey in trackSuccess event when set on constructor', () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
       testRunId,
@@ -1026,9 +1028,10 @@ describe('graphKey parameter support', () => {
       modelName,
       providerName,
       testContext,
+      'my-graph',
     );
 
-    tracker.trackSuccess('my-graph');
+    tracker.trackSuccess();
 
     expect(mockTrack).toHaveBeenCalledWith(
       '$ld:ai:generation:success',
@@ -1038,7 +1041,7 @@ describe('graphKey parameter support', () => {
     );
   });
 
-  it('does not include graphKey when not provided', () => {
+  it('does not include graphKey when not set on constructor', () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
       testRunId,
@@ -1059,6 +1062,41 @@ describe('graphKey parameter support', () => {
       1,
     );
   });
+
+  it('includes graphKey in getTrackData when set on constructor', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+      'my-graph',
+    );
+
+    expect(tracker.getTrackData()).toEqual({
+      ...getExpectedTrackData(),
+      graphKey: 'my-graph',
+    });
+  });
+
+  it('does not include graphKey in getTrackData when not set', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    expect(tracker.getTrackData()).toEqual(getExpectedTrackData());
+    expect('graphKey' in tracker.getTrackData()).toBe(false);
+  });
 });
 
 describe('at-most-once semantics', () => {
@@ -1311,4 +1349,96 @@ describe('fromResumptionToken', () => {
       1,
     );
   });
+
+  it('includes graphKey in resumption token when set on constructor', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+      'my-graph',
+    );
+
+    const token = tracker.resumptionToken;
+    const decoded = JSON.parse(Buffer.from(token, 'base64url').toString('utf8'));
+
+    expect(decoded).toEqual({
+      runId: testRunId,
+      configKey,
+      variationKey,
+      version,
+      graphKey: 'my-graph',
+    });
+  });
+
+  it('does not include graphKey in resumption token when not set', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    const token = tracker.resumptionToken;
+    const decoded = JSON.parse(Buffer.from(token, 'base64url').toString('utf8'));
+
+    expect(decoded).toEqual({
+      runId: testRunId,
+      configKey,
+      variationKey,
+      version,
+    });
+    expect('graphKey' in decoded).toBe(false);
+  });
+
+  it('reconstructs tracker with graphKey from resumption token', () => {
+    const original = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+      'my-graph',
+    );
+
+    const reconstructed = LDAIConfigTrackerImpl.fromResumptionToken(
+      original.resumptionToken,
+      mockLdClient,
+      testContext,
+    );
+
+    expect(reconstructed.getTrackData().graphKey).toBe('my-graph');
+  });
+
+  it('reconstructed tracker without graphKey does not include graphKey in track data', () => {
+    const original = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
+      testContext,
+    );
+
+    const reconstructed = LDAIConfigTrackerImpl.fromResumptionToken(
+      original.resumptionToken,
+      mockLdClient,
+      testContext,
+    );
+
+    expect('graphKey' in reconstructed.getTrackData()).toBe(false);
+  });
 });
diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts
index 209c0ce860..65eb87a1a9 100644
--- a/packages/sdk/server-ai/src/LDAIClientImpl.ts
+++ b/packages/sdk/server-ai/src/LDAIClientImpl.ts
@@ -74,6 +74,7 @@ export class LDAIClientImpl implements LDAIClient {
     defaultValue: LDAIConfigDefaultKind,
     mode: LDAIConfigMode,
     variables?: Record<string, unknown>,
+    graphKey?: string,
   ): Promise<LDAIConfigKind> {
     const ldFlagValue = LDAIConfigUtils.toFlagValue(defaultValue, mode);
 
@@ -101,6 +102,7 @@ export class LDAIClientImpl implements LDAIClient {
         value.model?.name ?? '',
         value.provider?.name ?? '',
         context,
+        graphKey,
       );
 
     const config = LDAIConfigUtils.fromFlagValue(key, value, trackerFactory);
@@ -217,6 +219,17 @@ export class LDAIClientImpl implements LDAIClient {
     return this._judgeConfig(key, context, defaultValue ?? disabledAIConfig, variables);
   }
 
+  private async _agentConfig(
+    key: string,
+    context: LDContext,
+    defaultValue: LDAIAgentConfigDefault,
+    variables?: Record<string, unknown>,
+    graphKey?: string,
+  ): Promise<LDAIAgentConfig> {
+    const config = await this._evaluate(key, context, defaultValue, 'agent', variables, graphKey);
+    return config as LDAIAgentConfig;
+  }
+
   async agentConfig(
     key: string,
     context: LDContext,
@@ -224,14 +237,7 @@ export class LDAIClientImpl implements LDAIClient {
     variables?: Record<string, unknown>,
   ): Promise<LDAIAgentConfig> {
     this._ldClient.track(TRACK_USAGE_AGENT_CONFIG, context, key, 1);
-    const config = await this._evaluate(
-      key,
-      context,
-      defaultValue ?? disabledAIConfig,
-      'agent',
-      variables,
-    );
-    return config as LDAIAgentConfig;
+    return this._agentConfig(key, context, defaultValue ?? disabledAIConfig, variables);
   }
 
   /**
@@ -261,14 +267,13 @@ export class LDAIClientImpl implements LDAIClient {
 
     await Promise.all(
       agentConfigs.map(async (config) => {
-        const agent = await this._evaluate(
+        const agent = await this._agentConfig(
           config.key,
           context,
           config.defaultValue ?? disabledAIConfig,
-          'agent',
           config.variables,
         );
-        agents[config.key as T[number]['key']] = agent as LDAIAgentConfig;
+        agents[config.key as T[number]['key']] = agent;
       }),
     );
 
diff --git a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
index 151a3c1d97..d87729c14f 100644
--- a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
+++ b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
@@ -25,9 +25,10 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
     private _modelName: string,
     private _providerName: string,
     private _context: LDContext,
+    private _graphKey?: string,
   ) {}
 
-  getTrackData(graphKey?: string): {
+  getTrackData(): {
     runId: string;
     configKey: string;
     variationKey: string;
@@ -43,7 +44,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
       version: this._version,
       modelName: this._modelName,
       providerName: this._providerName,
-      ...(graphKey !== undefined ? { graphKey } : {}),
+      ...(this._graphKey !== undefined ? { graphKey: this._graphKey } : {}),
     };
   }
 
@@ -53,6 +54,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
       configKey: this._configKey,
       variationKey: this._variationKey,
       version: this._version,
+      ...(this._graphKey !== undefined ? { graphKey: this._graphKey } : {}),
     });
     return Buffer.from(json).toString('base64url');
   }
@@ -73,10 +75,11 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
       '',
       '',
       context,
+      payload.graphKey,
     );
   }
 
-  trackDuration(duration: number, graphKey?: string): void {
+  trackDuration(duration: number): void {
     if (this._trackedMetrics.durationMs !== undefined) {
       this._ldClient.logger?.warn(
         'Duration has already been tracked for this execution. Use createTracker() for a new execution.',
@@ -84,15 +87,10 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
       return;
     }
     this._trackedMetrics.durationMs = duration;
-    this._ldClient.track(
-      '$ld:ai:duration:total',
-      this._context,
-      this.getTrackData(graphKey),
-      duration,
-    );
+    this._ldClient.track('$ld:ai:duration:total', this._context, this.getTrackData(), duration);
   }
 
-  async trackDurationOf<TRes>(func: () => Promise<TRes>, graphKey?: string): Promise<TRes> {
+  async trackDurationOf<TRes>(func: () => Promise<TRes>): Promise<TRes> {
     const startTime = Date.now();
     try {
       // Be sure to await here so that we can track the duration of the function and also handle errors.
@@ -101,11 +99,11 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
     } finally {
       const endTime = Date.now();
       const duration = endTime - startTime; // duration in milliseconds
-      this.trackDuration(duration, graphKey);
+      this.trackDuration(duration);
     }
   }
 
-  trackTimeToFirstToken(timeToFirstTokenMs: number, graphKey?: string) {
+  trackTimeToFirstToken(timeToFirstTokenMs: number) {
     if (this._trackedMetrics.timeToFirstTokenMs !== undefined) {
       this._ldClient.logger?.warn(
         'Time to first token has already been tracked for this execution. Use createTracker() for a new execution.',
@@ -116,44 +114,39 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
     this._ldClient.track(
       '$ld:ai:tokens:ttf',
       this._context,
-      this.getTrackData(graphKey),
+      this.getTrackData(),
       timeToFirstTokenMs,
     );
   }
 
-  trackEvalScores(scores: Record<string, EvalScore>, graphKey?: string) {
+  trackEvalScores(scores: Record<string, EvalScore>) {
     Object.entries(scores).forEach(([metricKey, evalScore]) => {
-      this._ldClient.track(metricKey, this._context, this.getTrackData(graphKey), evalScore.score);
+      this._ldClient.track(metricKey, this._context, this.getTrackData(), evalScore.score);
     });
   }
 
-  trackJudgeResponse(response: JudgeResponse, graphKey?: string) {
+  trackJudgeResponse(response: JudgeResponse) {
     Object.entries(response.evals).forEach(([metricKey, evalScore]) => {
       this._ldClient.track(
         metricKey,
         this._context,
-        { ...this.getTrackData(graphKey), judgeConfigKey: response.judgeConfigKey },
+        { ...this.getTrackData(), judgeConfigKey: response.judgeConfigKey },
         evalScore.score,
       );
     });
   }
 
-  trackToolCall(toolKey: string, graphKey?: string): void {
-    this._ldClient.track(
-      '$ld:ai:tool_call',
-      this._context,
-      { ...this.getTrackData(graphKey), toolKey },
-      1,
-    );
+  trackToolCall(toolKey: string): void {
+    this._ldClient.track('$ld:ai:tool_call', this._context, { ...this.getTrackData(), toolKey }, 1);
   }
 
-  trackToolCalls(toolKeys: string[], graphKey?: string): void {
+  trackToolCalls(toolKeys: string[]): void {
     toolKeys.forEach((toolKey) => {
-      this.trackToolCall(toolKey, graphKey);
+      this.trackToolCall(toolKey);
     });
   }
 
-  trackFeedback(feedback: { kind: LDFeedbackKind }, graphKey?: string): void {
+  trackFeedback(feedback: { kind: LDFeedbackKind }): void {
     if (this._trackedMetrics.feedback !== undefined) {
       this._ldClient.logger?.warn(
         'Feedback has already been tracked for this execution. Use createTracker() for a new execution.',
@@ -162,23 +155,13 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
     }
     this._trackedMetrics.feedback = feedback;
     if (feedback.kind === LDFeedbackKind.Positive) {
-      this._ldClient.track(
-        '$ld:ai:feedback:user:positive',
-        this._context,
-        this.getTrackData(graphKey),
-        1,
-      );
+      this._ldClient.track('$ld:ai:feedback:user:positive', this._context, this.getTrackData(), 1);
     } else if (feedback.kind === LDFeedbackKind.Negative) {
-      this._ldClient.track(
-        '$ld:ai:feedback:user:negative',
-        this._context,
-        this.getTrackData(graphKey),
-        1,
-      );
+      this._ldClient.track('$ld:ai:feedback:user:negative', this._context, this.getTrackData(), 1);
     }
   }
 
-  trackSuccess(graphKey?: string): void {
+  trackSuccess(): void {
     if (this._trackedMetrics.success !== undefined) {
       this._ldClient.logger?.warn(
         'Generation result has already been tracked for this execution. Use createTracker() for a new execution.',
@@ -186,15 +169,10 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
       return;
     }
     this._trackedMetrics.success = true;
-    this._ldClient.track(
-      '$ld:ai:generation:success',
-      this._context,
-      this.getTrackData(graphKey),
-      1,
-    );
+    this._ldClient.track('$ld:ai:generation:success', this._context, this.getTrackData(), 1);
   }
 
-  trackError(graphKey?: string): void {
+  trackError(): void {
     if (this._trackedMetrics.success !== undefined) {
       this._ldClient.logger?.warn(
         'Generation result has already been tracked for this execution. Use createTracker() for a new execution.',
@@ -202,20 +180,19 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
       return;
     }
     this._trackedMetrics.success = false;
-    this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(graphKey), 1);
+    this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(), 1);
   }
 
   async trackMetricsOf<TRes>(
     metricsExtractor: (result: TRes) => LDAIMetrics,
     func: () => Promise<TRes>,
-    graphKey?: string,
   ): Promise<TRes> {
     let result: TRes;
 
     try {
-      result = await this.trackDurationOf(func, graphKey);
+      result = await this.trackDurationOf(func);
     } catch (err) {
-      this.trackError(graphKey);
+      this.trackError();
       throw err;
     }
 
@@ -224,14 +201,14 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
 
     // Track success/error based on metrics
     if (metrics.success) {
-      this.trackSuccess(graphKey);
+      this.trackSuccess();
     } else {
-      this.trackError(graphKey);
+      this.trackError();
     }
 
     // Track token usage if available
     if (metrics.usage) {
-      this.trackTokens(metrics.usage, graphKey);
+      this.trackTokens(metrics.usage);
     }
 
     return result;
@@ -240,7 +217,6 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
   trackStreamMetricsOf<TStream>(
     streamCreator: () => TStream,
     metricsExtractor: (stream: TStream) => Promise<LDAIMetrics>,
-    graphKey?: string,
   ): TStream {
     const startTime = Date.now();
 
@@ -249,14 +225,14 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
       const stream = streamCreator();
 
       // Start background metrics tracking (fire and forget)
-      this._trackStreamMetricsInBackground(stream, metricsExtractor, startTime, graphKey);
+      this._trackStreamMetricsInBackground(stream, metricsExtractor, startTime);
 
       // Return stream immediately for consumption
       return stream;
     } catch (error) {
       // Track error if stream creation fails
-      this.trackDuration(Date.now() - startTime, graphKey);
-      this.trackError(graphKey);
+      this.trackDuration(Date.now() - startTime);
+      this.trackError();
       throw error;
     }
   }
@@ -265,7 +241,6 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
     stream: TStream,
     metricsExtractor: (stream: TStream) => Promise<LDAIMetrics>,
     startTime: number,
-    graphKey?: string,
   ): Promise<void> {
     try {
       // Wait for metrics to be available
@@ -273,21 +248,21 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
 
       // Track success/error based on metrics
       if (metrics.success) {
-        this.trackSuccess(graphKey);
+        this.trackSuccess();
       } else {
-        this.trackError(graphKey);
+        this.trackError();
       }
 
       // Track token usage if available
       if (metrics.usage) {
-        this.trackTokens(metrics.usage, graphKey);
+        this.trackTokens(metrics.usage);
       }
     } catch (error) {
       // If metrics extraction fails, track error
-      this.trackError(graphKey);
+      this.trackError();
     } finally {
       // Track duration regardless of success/error
-      this.trackDuration(Date.now() - startTime, graphKey);
+      this.trackDuration(Date.now() - startTime);
     }
   }
 
@@ -362,7 +337,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
     }
   }
 
-  trackTokens(tokens: LDTokenUsage, graphKey?: string): void {
+  trackTokens(tokens: LDTokenUsage): void {
     if (this._trackedMetrics.tokens !== undefined) {
       this._ldClient.logger?.warn(
         'Token usage has already been tracked for this execution. Use createTracker() for a new execution.',
@@ -370,7 +345,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
       return;
     }
     this._trackedMetrics.tokens = tokens;
-    const trackData = this.getTrackData(graphKey);
+    const trackData = this.getTrackData();
     if (tokens.total > 0) {
       this._ldClient.track('$ld:ai:tokens:total', this._context, trackData, tokens.total);
     }
diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
index 18b243d94b..883177becb 100644
--- a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
+++ b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
@@ -37,10 +37,8 @@ export interface LDAIMetricSummary {
 export interface LDAIConfigTracker {
   /**
    * Get the data for tracking.
-   *
-   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  getTrackData(graphKey?: string): {
+  getTrackData(): {
     runId: string;
     configKey: string;
     variationKey: string;
@@ -68,9 +66,8 @@ export interface LDAIConfigTracker {
    * Ideally this would not include overhead time such as network communication.
    *
    * @param durationMs The duration in milliseconds.
-   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackDuration(durationMs: number, graphKey?: string): void;
+  trackDuration(durationMs: number): void;
 
   /**
    * Track information about token usage.
@@ -79,29 +76,24 @@ export interface LDAIConfigTracker {
    * with a warning.
    *
    * @param tokens Token usage information.
-   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackTokens(tokens: LDTokenUsage, graphKey?: string): void;
+  trackTokens(tokens: LDTokenUsage): void;
 
   /**
    * Generation was successful.
    *
    * At-most-once per execution: subsequent calls (including trackError) on the
    * same tracker are dropped with a warning.
-   *
-   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackSuccess(graphKey?: string): void;
+  trackSuccess(): void;
 
   /**
    * An error was encountered during generation.
    *
    * At-most-once per execution: subsequent calls (including trackSuccess) on the
    * same tracker are dropped with a warning.
-   *
-   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackError(graphKey?: string): void;
+  trackError(): void;
 
   /**
    * Track sentiment about the generation.
@@ -110,9 +102,8 @@ export interface LDAIConfigTracker {
    * with a warning.
    *
    * @param feedback Feedback about the generation.
-   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackFeedback(feedback: { kind: LDFeedbackKind }, graphKey?: string): void;
+  trackFeedback(feedback: { kind: LDFeedbackKind }): void;
 
   /**
    * Track the time to first token for this generation.
@@ -121,41 +112,36 @@ export interface LDAIConfigTracker {
    * with a warning.
    *
    * @param timeToFirstTokenMs The duration in milliseconds.
-   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackTimeToFirstToken(timeToFirstTokenMs: number, graphKey?: string): void;
+  trackTimeToFirstToken(timeToFirstTokenMs: number): void;
 
   /**
    * Track evaluation scores for multiple metrics.
    *
    * @param scores Record mapping metric keys to their evaluation scores
-   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackEvalScores(scores: Record<string, EvalScore>, graphKey?: string): void;
+  trackEvalScores(scores: Record<string, EvalScore>): void;
 
   /**
    * Track a judge response containing evaluation scores and judge configuration key.
    *
    * @param response Judge response containing evaluation scores and judge configuration key
-   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackJudgeResponse(response: JudgeResponse, graphKey?: string): void;
+  trackJudgeResponse(response: JudgeResponse): void;
 
   /**
    * Track a single tool invocation.
    *
    * @param toolKey The identifier of the tool that was invoked.
-   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackToolCall(toolKey: string, graphKey?: string): void;
+  trackToolCall(toolKey: string): void;
 
   /**
    * Track multiple tool invocations.
    *
    * @param toolKeys The identifiers of the tools that were invoked.
-   * @param graphKey When provided, associates this metric with the specified agent graph key.
    */
-  trackToolCalls(toolKeys: string[], graphKey?: string): void;
+  trackToolCalls(toolKeys: string[]): void;
 
   /**
    * Track the duration of execution of the provided function.
@@ -166,10 +152,9 @@ export interface LDAIConfigTracker {
    * This function does not automatically record an error when the function throws.
    *
    * @param func The function to track the duration of.
-   * @param graphKey When provided, associates this metric with the specified agent graph key.
    * @returns The result of the function.
    */
-  trackDurationOf(func: () => Promise<any>, graphKey?: string): Promise<any>;
+  trackDurationOf(func: () => Promise<any>): Promise<any>;
 
   /**
    * Track metrics for a generic AI operation.
@@ -183,13 +168,11 @@ export interface LDAIConfigTracker {
    *
    * @param metricsExtractor Function that extracts LDAIMetrics from the operation result
    * @param func Function which executes the operation
-   * @param graphKey When provided, associates this metric with the specified agent graph key.
    * @returns The result of the operation
    */
   trackMetricsOf<TRes>(
     metricsExtractor: (result: TRes) => LDAIMetrics,
     func: () => Promise<TRes>,
-    graphKey?: string,
   ): Promise<TRes>;
 
   /**
@@ -211,13 +194,11 @@ export interface LDAIConfigTracker {
    *
    * @param streamCreator Function that creates and returns the stream (synchronous)
    * @param metricsExtractor Function that asynchronously extracts metrics from the stream
-   * @param graphKey When provided, associates this metric with the specified agent graph key.
    * @returns The stream result (returned immediately, not a Promise)
    */
   trackStreamMetricsOf<TStream>(
     streamCreator: () => TStream,
     metricsExtractor: (stream: TStream) => Promise<LDAIMetrics>,
-    graphKey?: string,
   ): TStream;
 
   /**

From dd49a79747301208ea89b6713b7a0513fc1c1520 Mon Sep 17 00:00:00 2001
From: Jason Bailey <jbailey@launchdarkly.com>
Date: Thu, 16 Apr 2026 12:32:45 -0500
Subject: [PATCH 4/7] feat!: Flatten JudgeResponse and EvalScore into new
 LDJudgeResult (#1284)

---
 .../sdk/server-ai/__tests__/Judge.test.ts     | 136 ++++++++++--------
 .../__tests__/LDAIConfigTrackerImpl.test.ts   |  64 +++++----
 .../__tests__/LDGraphTrackerImpl.test.ts      |  69 ++++++---
 .../examples/direct-judge/src/index.ts        |   8 +-
 .../server-ai/src/LDAIConfigTrackerImpl.ts    |  23 ++-
 .../sdk/server-ai/src/LDGraphTrackerImpl.ts   |  19 +--
 .../sdk/server-ai/src/api/chat/TrackedChat.ts |  29 ++--
 packages/sdk/server-ai/src/api/chat/types.ts  |   4 +-
 .../src/api/config/LDAIConfigTracker.ts       |  15 +-
 .../server-ai/src/api/graph/LDGraphTracker.ts |  10 +-
 packages/sdk/server-ai/src/api/judge/Judge.ts |  75 +++++-----
 packages/sdk/server-ai/src/api/judge/index.ts |   2 +-
 packages/sdk/server-ai/src/api/judge/types.ts |  28 ++--
 13 files changed, 277 insertions(+), 205 deletions(-)

diff --git a/packages/sdk/server-ai/__tests__/Judge.test.ts b/packages/sdk/server-ai/__tests__/Judge.test.ts
index c0def31740..ee9ff0351f 100644
--- a/packages/sdk/server-ai/__tests__/Judge.test.ts
+++ b/packages/sdk/server-ai/__tests__/Judge.test.ts
@@ -98,13 +98,11 @@ describe('Judge', () => {
       );
 
       expect(result).toEqual({
-        evals: {
-          relevance: {
-            score: 0.8,
-            reasoning: 'The response is relevant to the question',
-          },
-        },
+        score: 0.8,
+        reasoning: 'The response is relevant to the question',
+        metricKey: 'relevance',
         success: true,
+        sampled: true,
         judgeConfigKey: 'test-judge',
       });
 
@@ -148,12 +146,11 @@ describe('Judge', () => {
       const result = await judge.evaluate('test input', 'test output');
 
       expect(result).toBeDefined();
-      expect(result?.evals).toHaveProperty('relevance');
-      expect(result?.evals.relevance.score).toBe(0.85);
-      expect(result?.judgeConfigKey).toBe('test-judge');
-      expect(result?.success).toBe(true);
-      // Verify the evaluationMetricKey from config is used in the result
-      expect(Object.keys(result?.evals || {})).toContain(judgeConfig.evaluationMetricKey);
+      expect(result.score).toBe(0.85);
+      expect(result.metricKey).toBe('relevance');
+      expect(result.judgeConfigKey).toBe('test-judge');
+      expect(result.success).toBe(true);
+      expect(result.sampled).toBe(true);
     });
 
     it('handles sampling rate correctly', async () => {
@@ -183,18 +180,23 @@ describe('Judge', () => {
       const result = await judge.evaluate('test input', 'test output', 0.5);
 
       expect(result).toBeDefined();
+      expect(result.sampled).toBe(true);
       expect(mockProvider.invokeStructuredModel).toHaveBeenCalled();
 
       Math.random = originalRandom;
     });
 
-    it('returns undefined when not sampled', async () => {
+    it('returns unsampled result when skipped by sampling', async () => {
       const originalRandom = Math.random;
       Math.random = jest.fn().mockReturnValue(0.8);
 
       const result = await judge.evaluate('test input', 'test output', 0.5);
 
-      expect(result).toBeUndefined();
+      expect(result).toEqual({
+        success: false,
+        sampled: false,
+        judgeConfigKey: 'test-judge',
+      });
       expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled();
       expect(mockLogger.debug).toHaveBeenCalledWith(
         'Judge evaluation skipped due to sampling rate: 0.5',
@@ -203,7 +205,7 @@ describe('Judge', () => {
       Math.random = originalRandom;
     });
 
-    it('returns undefined when evaluationMetricKey and evaluationMetricKeys are both missing', async () => {
+    it('returns error result when evaluationMetricKey and evaluationMetricKeys are both missing', async () => {
       const configWithoutMetrics: LDAIJudgeConfig = {
         ...judgeConfig,
         evaluationMetricKey: undefined,
@@ -213,7 +215,12 @@ describe('Judge', () => {
 
       const result = await judgeWithoutMetrics.evaluate('test input', 'test output');
 
-      expect(result).toBeUndefined();
+      expect(result).toEqual({
+        success: false,
+        sampled: true,
+        errorMessage: 'Judge configuration is missing required evaluation metric key',
+        judgeConfigKey: 'test-judge',
+      });
       expect(mockLogger.warn).toHaveBeenCalledWith(
         'Judge configuration is missing required evaluation metric key',
         mockTrackData,
@@ -251,10 +258,11 @@ describe('Judge', () => {
       const result = await judgeWithSingleKey.evaluate('test input', 'test output');
 
       expect(result).toEqual({
-        evals: {
-          relevance: { score: 0.8, reasoning: 'The response is relevant' },
-        },
+        score: 0.8,
+        reasoning: 'The response is relevant',
+        metricKey: 'relevance',
         success: true,
+        sampled: true,
         judgeConfigKey: 'test-judge',
       });
     });
@@ -290,10 +298,11 @@ describe('Judge', () => {
       const result = await judgeWithLegacyKeys.evaluate('test input', 'test output');
 
       expect(result).toEqual({
-        evals: {
-          relevance: { score: 0.8, reasoning: 'The response is relevant' },
-        },
+        score: 0.8,
+        reasoning: 'The response is relevant',
+        metricKey: 'relevance',
         success: true,
+        sampled: true,
         judgeConfigKey: 'test-judge',
       });
     });
@@ -330,10 +339,11 @@ describe('Judge', () => {
 
       // Should skip empty and whitespace strings, use first valid value
       expect(result).toEqual({
-        evals: {
-          relevance: { score: 0.8, reasoning: 'The response is relevant' },
-        },
+        score: 0.8,
+        reasoning: 'The response is relevant',
+        metricKey: 'relevance',
         success: true,
+        sampled: true,
         judgeConfigKey: 'test-judge',
       });
     });
@@ -369,15 +379,16 @@ describe('Judge', () => {
       const result = await judgeWithBoth.evaluate('test input', 'test output');
 
       expect(result).toEqual({
-        evals: {
-          helpfulness: { score: 0.7, reasoning: 'The response is helpful' },
-        },
+        score: 0.7,
+        reasoning: 'The response is helpful',
+        metricKey: 'helpfulness',
         success: true,
+        sampled: true,
         judgeConfigKey: 'test-judge',
       });
     });
 
-    it('returns undefined when messages are missing', async () => {
+    it('returns error result when messages are missing', async () => {
       const configWithoutMessages: LDAIJudgeConfig = {
         ...judgeConfig,
         messages: undefined,
@@ -386,14 +397,19 @@ describe('Judge', () => {
 
       const result = await judgeWithoutMessages.evaluate('test input', 'test output');
 
-      expect(result).toBeUndefined();
+      expect(result).toEqual({
+        success: false,
+        sampled: true,
+        errorMessage: 'Judge configuration must include messages',
+        judgeConfigKey: 'test-judge',
+      });
       expect(mockLogger.warn).toHaveBeenCalledWith(
         'Judge configuration must include messages',
         mockTrackData,
       );
     });
 
-    it('returns empty evaluations with success false when expected metric is missing', async () => {
+    it('returns result with success false when expected metric is missing', async () => {
       const mockStructuredResponse: StructuredResponse = {
         data: {
           evaluations: {
@@ -417,13 +433,13 @@ describe('Judge', () => {
       const result = await judge.evaluate('test input', 'test output');
 
       expect(result).toEqual({
-        evals: {},
         success: false,
+        sampled: true,
         judgeConfigKey: 'test-judge',
       });
     });
 
-    it('returns empty evaluations when response structure is malformed', async () => {
+    it('returns result with success false when response structure is malformed', async () => {
       const mockStructuredResponse: StructuredResponse = {
         data: {
           relevance: { score: 0.8, reasoning: 'Good' },
@@ -447,8 +463,8 @@ describe('Judge', () => {
       const result = await judge.evaluate('test input', 'test output');
 
       expect(result).toEqual({
-        evals: {},
         success: false,
+        sampled: true,
         judgeConfigKey: 'test-judge',
       });
     });
@@ -460,9 +476,9 @@ describe('Judge', () => {
       const result = await judge.evaluate('test input', 'test output');
 
       expect(result).toEqual({
-        evals: {},
         success: false,
-        error: 'Provider error',
+        sampled: true,
+        errorMessage: 'Provider error',
         judgeConfigKey: 'test-judge',
       });
       expect(mockLogger.error).toHaveBeenCalledWith('Judge evaluation failed:', error);
@@ -474,9 +490,9 @@ describe('Judge', () => {
       const result = await judge.evaluate('test input', 'test output');
 
       expect(result).toEqual({
-        evals: {},
         success: false,
-        error: 'Unknown error',
+        sampled: true,
+        errorMessage: 'Unknown error',
         judgeConfigKey: 'test-judge',
       });
     });
@@ -522,13 +538,11 @@ describe('Judge', () => {
       const result = await judge.evaluateMessages(messages, response);
 
       expect(result).toEqual({
-        evals: {
-          relevance: {
-            score: 0.8,
-            reasoning: 'The response is relevant to the question',
-          },
-        },
+        score: 0.8,
+        reasoning: 'The response is relevant to the question',
+        metricKey: 'relevance',
         success: true,
+        sampled: true,
         judgeConfigKey: 'test-judge',
       });
 
@@ -560,7 +574,11 @@ describe('Judge', () => {
 
       const result = await judge.evaluateMessages(messages, response, 0.5);
 
-      expect(result).toBeUndefined();
+      expect(result).toEqual({
+        success: false,
+        sampled: false,
+        judgeConfigKey: 'test-judge',
+      });
       expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled();
 
       Math.random = originalRandom;
@@ -611,11 +629,12 @@ describe('Judge', () => {
       const result = parseResponse(responseData, 'relevance', mockTracker);
 
       expect(result).toEqual({
-        relevance: { score: 0.8, reasoning: 'Good' },
+        score: 0.8,
+        reasoning: 'Good',
       });
     });
 
-    it('returns empty object for invalid response data', () => {
+    it('returns undefined for invalid response data', () => {
       // eslint-disable-next-line no-underscore-dangle
       const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
       const responseData = {
@@ -624,7 +643,7 @@ describe('Judge', () => {
 
       const result = parseResponse(responseData, 'relevance', mockTracker);
 
-      expect(result).toEqual({});
+      expect(result).toBeUndefined();
     });
 
     it('handles missing score or reasoning fields', () => {
@@ -638,7 +657,7 @@ describe('Judge', () => {
 
       const result = parseResponse(responseData, 'relevance', mockTracker);
 
-      expect(result).toEqual({});
+      expect(result).toBeUndefined();
     });
 
     it('handles invalid score values out of range', () => {
@@ -652,7 +671,7 @@ describe('Judge', () => {
 
       const result = parseResponse(responseData, 'relevance', mockTracker);
 
-      expect(result).toEqual({});
+      expect(result).toBeUndefined();
       expect(mockLogger.warn).toHaveBeenCalledWith(
         expect.stringContaining('Invalid score evaluated for relevance: 1.5'),
         mockTrackData,
@@ -670,7 +689,7 @@ describe('Judge', () => {
 
       const result = parseResponse(responseData, 'relevance', mockTracker);
 
-      expect(result).toEqual({});
+      expect(result).toBeUndefined();
       expect(mockLogger.warn).toHaveBeenCalledWith(
         expect.stringContaining('Invalid score evaluated for relevance: -0.1'),
         mockTrackData,
@@ -688,7 +707,7 @@ describe('Judge', () => {
 
       const result = parseResponse(responseData, 'relevance', mockTracker);
 
-      expect(result).toEqual({});
+      expect(result).toBeUndefined();
       expect(mockLogger.warn).toHaveBeenCalledWith(
         expect.stringContaining('Invalid reasoning evaluated for relevance: 123'),
         mockTrackData,
@@ -706,7 +725,7 @@ describe('Judge', () => {
 
       const result = parseResponse(responseData, 'relevance', mockTracker);
 
-      expect(result).toEqual({});
+      expect(result).toBeUndefined();
       expect(mockLogger.warn).toHaveBeenCalledWith(
         'Missing evaluation for metric key: relevance',
         mockTrackData,
@@ -723,7 +742,12 @@ describe('Judge', () => {
 
       const result = await judgeWithEmptyKeys.evaluate('test input', 'test output');
 
-      expect(result).toBeUndefined();
+      expect(result).toEqual({
+        success: false,
+        sampled: true,
+        errorMessage: 'Judge configuration is missing required evaluation metric key',
+        judgeConfigKey: 'test-judge',
+      });
       expect(mockLogger.warn).toHaveBeenCalledWith(
         'Judge configuration is missing required evaluation metric key',
         mockTrackData,
@@ -741,7 +765,7 @@ describe('Judge', () => {
 
       const result = parseResponse(responseData, 'relevance', mockTracker);
 
-      expect(result).toEqual({});
+      expect(result).toBeUndefined();
       expect(mockLogger.warn).toHaveBeenCalledWith(
         'Missing evaluation for metric key: relevance',
         mockTrackData,
@@ -759,7 +783,7 @@ describe('Judge', () => {
 
       const result = parseResponse(responseData, 'relevance', mockTracker);
 
-      expect(result).toEqual({});
+      expect(result).toBeUndefined();
       expect(mockLogger.warn).toHaveBeenCalledWith(
         'Missing evaluation for metric key: relevance',
         mockTrackData,
diff --git a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts
index a4b40b62cb..4263bc3048 100644
--- a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts
+++ b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts
@@ -841,8 +841,8 @@ describe('trackMetricsOf', () => {
   });
 });
 
-describe('trackJudgeResponse', () => {
-  it('tracks evaluation metric key with score', () => {
+describe('trackJudgeResult', () => {
+  it('tracks metric key with score', () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
       testRunId,
@@ -854,15 +854,14 @@ describe('trackJudgeResponse', () => {
       testContext,
     );
 
-    const judgeResponse = {
+    tracker.trackJudgeResult({
       judgeConfigKey: 'test-judge',
-      evals: {
-        relevance: { score: 0.8, reasoning: 'The response is relevant' },
-      },
       success: true,
-    };
-
-    tracker.trackJudgeResponse(judgeResponse);
+      sampled: true,
+      score: 0.8,
+      reasoning: 'The response is relevant',
+      metricKey: 'relevance',
+    });
 
     expect(mockTrack).toHaveBeenCalledWith(
       'relevance',
@@ -872,7 +871,7 @@ describe('trackJudgeResponse', () => {
     );
   });
 
-  it('tracks multiple evaluation metrics when present', () => {
+  it('does not track when sampled is false', () => {
     const tracker = new LDAIConfigTrackerImpl(
       mockLdClient,
       testRunId,
@@ -884,29 +883,38 @@ describe('trackJudgeResponse', () => {
       testContext,
     );
 
-    const judgeResponse = {
+    tracker.trackJudgeResult({
       judgeConfigKey: 'test-judge',
-      evals: {
-        relevance: { score: 0.8, reasoning: 'Relevant' },
-        accuracy: { score: 0.9, reasoning: 'Accurate' },
-      },
-      success: true,
-    };
+      success: false,
+      sampled: false,
+      score: 0.8,
+      metricKey: 'relevance',
+    });
 
-    tracker.trackJudgeResponse(judgeResponse);
+    expect(mockTrack).not.toHaveBeenCalled();
+  });
 
-    expect(mockTrack).toHaveBeenCalledWith(
-      'relevance',
-      testContext,
-      { ...getExpectedTrackData(), judgeConfigKey: 'test-judge' },
-      0.8,
-    );
-    expect(mockTrack).toHaveBeenCalledWith(
-      'accuracy',
+  it('does not track when success is false', () => {
+    const tracker = new LDAIConfigTrackerImpl(
+      mockLdClient,
+      testRunId,
+      configKey,
+      variationKey,
+      version,
+      modelName,
+      providerName,
       testContext,
-      { ...getExpectedTrackData(), judgeConfigKey: 'test-judge' },
-      0.9,
     );
+
+    tracker.trackJudgeResult({
+      judgeConfigKey: 'test-judge',
+      success: false,
+      sampled: true,
+      score: 0.8,
+      metricKey: 'relevance',
+    });
+
+    expect(mockTrack).not.toHaveBeenCalled();
   });
 });
 
diff --git a/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts
index fe42bf4e4d..77af551302 100644
--- a/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts
+++ b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts
@@ -140,7 +140,7 @@ it('tracks path', () => {
   );
 });
 
-it('tracks judge response', () => {
+it('tracks judge result', () => {
   const tracker = new LDGraphTrackerImpl(
     mockLdClient,
     graphKey,
@@ -148,15 +148,14 @@ it('tracks judge response', () => {
     version,
     testContext,
   );
-  const response = {
+  tracker.trackJudgeResult({
     judgeConfigKey: 'my-judge',
-    evals: {
-      relevance: { score: 0.9, reasoning: 'Relevant' },
-      accuracy: { score: 0.85, reasoning: 'Accurate' },
-    },
     success: true,
-  };
-  tracker.trackJudgeResponse(response);
+    sampled: true,
+    score: 0.9,
+    reasoning: 'Relevant',
+    metricKey: 'relevance',
+  });
 
   expect(mockTrack).toHaveBeenCalledWith(
     'relevance',
@@ -164,15 +163,9 @@ it('tracks judge response', () => {
     { ...getExpectedTrackData(), judgeConfigKey: 'my-judge' },
     0.9,
   );
-  expect(mockTrack).toHaveBeenCalledWith(
-    'accuracy',
-    testContext,
-    { ...getExpectedTrackData(), judgeConfigKey: 'my-judge' },
-    0.85,
-  );
 });
 
-it('tracks judge response without judgeConfigKey', () => {
+it('tracks judge result without judgeConfigKey', () => {
   const tracker = new LDGraphTrackerImpl(
     mockLdClient,
     graphKey,
@@ -180,15 +173,53 @@ it('tracks judge response without judgeConfigKey', () => {
     version,
     testContext,
   );
-  const response = {
-    evals: { relevance: { score: 0.7, reasoning: 'Somewhat relevant' } },
+  tracker.trackJudgeResult({
     success: true,
-  };
-  tracker.trackJudgeResponse(response);
+    sampled: true,
+    score: 0.7,
+    reasoning: 'Somewhat relevant',
+    metricKey: 'relevance',
+  });
 
   expect(mockTrack).toHaveBeenCalledWith('relevance', testContext, getExpectedTrackData(), 0.7);
 });
 
+it('does not track judge result when not sampled', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+  tracker.trackJudgeResult({
+    judgeConfigKey: 'my-judge',
+    success: false,
+    sampled: false,
+  });
+
+  expect(mockTrack).not.toHaveBeenCalled();
+});
+
+it('does not track judge result when success is false', () => {
+  const tracker = new LDGraphTrackerImpl(
+    mockLdClient,
+    graphKey,
+    variationKey,
+    version,
+    testContext,
+  );
+  tracker.trackJudgeResult({
+    judgeConfigKey: 'my-judge',
+    success: false,
+    sampled: true,
+    score: 0.9,
+    metricKey: 'relevance',
+  });
+
+  expect(mockTrack).not.toHaveBeenCalled();
+});
+
 it('tracks redirect', () => {
   const tracker = new LDGraphTrackerImpl(
     mockLdClient,
diff --git a/packages/sdk/server-ai/examples/direct-judge/src/index.ts b/packages/sdk/server-ai/examples/direct-judge/src/index.ts
index 349b72f1a9..0be897e32c 100644
--- a/packages/sdk/server-ai/examples/direct-judge/src/index.ts
+++ b/packages/sdk/server-ai/examples/direct-judge/src/index.ts
@@ -65,13 +65,13 @@ async function main() {
     console.log('Input:', input);
     console.log('Output:', output);
 
-    const judgeResponse = await judge.evaluate(input, output);
+    const judgeResult = await judge.evaluate(input, output);
 
-    // Track the judge evaluation scores on the tracker for the aiConfig you are evaluating.
+    // Track the judge result on the tracker for the aiConfig you are evaluating.
     // Example:
-    // aiConfig.tracker.trackEvalScores(judgeResponse?.evals);
+    // aiConfig.tracker.trackJudgeResult(judgeResult);
 
-    console.log('Judge Response:', judgeResponse);
+    console.log('Judge Result:', judgeResult);
 
     console.log('Success.');
   } catch (err) {
diff --git a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
index d87729c14f..b3ed3ae9f1 100644
--- a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
+++ b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts
@@ -2,7 +2,7 @@ import { LDContext } from '@launchdarkly/js-server-sdk-common';
 
 import { LDAIConfigTracker } from './api/config';
 import { LDAIMetricSummary } from './api/config/LDAIConfigTracker';
-import { EvalScore, JudgeResponse } from './api/judge/types';
+import { LDJudgeResult } from './api/judge/types';
 import {
   createBedrockTokenUsage,
   createOpenAiUsage,
@@ -119,21 +119,18 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker {
     );
   }
 
-  trackEvalScores(scores: Record<string, EvalScore>) {
-    Object.entries(scores).forEach(([metricKey, evalScore]) => {
-      this._ldClient.track(metricKey, this._context, this.getTrackData(), evalScore.score);
-    });
-  }
-
-  trackJudgeResponse(response: JudgeResponse) {
-    Object.entries(response.evals).forEach(([metricKey, evalScore]) => {
+  trackJudgeResult(result: LDJudgeResult) {
+    if (!result.sampled || !result.success) {
+      return;
+    }
+    if (result.metricKey !== undefined && result.score !== undefined) {
       this._ldClient.track(
-        metricKey,
+        result.metricKey,
         this._context,
-        { ...this.getTrackData(), judgeConfigKey: response.judgeConfigKey },
-        evalScore.score,
+        { ...this.getTrackData(), judgeConfigKey: result.judgeConfigKey },
+        result.score,
       );
-    });
+    }
   }
 
   trackToolCall(toolKey: string): void {
diff --git a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts
index 4c08e26a58..d1f0602f50 100644
--- a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts
+++ b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts
@@ -1,7 +1,7 @@
 import { LDContext } from '@launchdarkly/js-server-sdk-common';
 
 import { LDGraphMetricSummary, LDGraphTracker } from './api/graph/LDGraphTracker';
-import { JudgeResponse } from './api/judge/types';
+import { LDJudgeResult } from './api/judge/types';
 import { LDTokenUsage } from './api/metrics';
 import { LDClientMin } from './LDClientMin';
 
@@ -76,14 +76,17 @@ export class LDGraphTrackerImpl implements LDGraphTracker {
     this._ldClient.track('$ld:ai:graph:path', this._context, { ...this.getTrackData(), path }, 1);
   }
 
-  trackJudgeResponse(response: JudgeResponse): void {
-    const trackData = response.judgeConfigKey
-      ? { ...this.getTrackData(), judgeConfigKey: response.judgeConfigKey }
-      : this.getTrackData();
+  trackJudgeResult(result: LDJudgeResult): void {
+    if (!result.sampled || !result.success) {
+      return;
+    }
+    if (result.metricKey !== undefined && result.score !== undefined) {
+      const trackData = result.judgeConfigKey
+        ? { ...this.getTrackData(), judgeConfigKey: result.judgeConfigKey }
+        : this.getTrackData();
 
-    Object.entries(response.evals).forEach(([metricKey, evalScore]) => {
-      this._ldClient.track(metricKey, this._context, trackData, evalScore.score);
-    });
+      this._ldClient.track(result.metricKey, this._context, trackData, result.score);
+    }
   }
 
   trackRedirect(sourceKey: string, redirectedTarget: string): void {
diff --git a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts
index 054969dc3d..2d5b21a85f 100644
--- a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts
+++ b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts
@@ -2,7 +2,7 @@ import { LDLogger } from '@launchdarkly/js-server-sdk-common';
 
 import { LDAICompletionConfig, LDMessage } from '../config/types';
 import { Judge } from '../judge/Judge';
-import { JudgeResponse } from '../judge/types';
+import { LDJudgeResult } from '../judge/types';
 import { AIProvider } from '../providers/AIProvider';
 import { ChatResponse } from './types';
 
@@ -54,10 +54,8 @@ export class TrackedChat {
     ) {
       response.evaluations = this._evaluateWithJudges(this.messages, response).then(
         (evaluations) => {
-          evaluations.forEach((judgeResponse) => {
-            if (judgeResponse?.success) {
-              tracker.trackJudgeResponse(judgeResponse);
-            }
+          evaluations.forEach((judgeResult) => {
+            tracker.trackJudgeResult(judgeResult);
           });
           return evaluations;
         },
@@ -79,7 +77,7 @@ export class TrackedChat {
   private async _evaluateWithJudges(
     messages: LDMessage[],
     response: ChatResponse,
-  ): Promise<Array<JudgeResponse | undefined>> {
+  ): Promise<LDJudgeResult[]> {
     const judgeConfigs = this.aiConfig.judgeConfiguration!.judges;
 
     // Start all judge evaluations in parallel
@@ -89,7 +87,12 @@ export class TrackedChat {
         this._logger?.warn(
           `Judge configuration is not enabled for ${judgeConfig.key} in ${this.aiConfig.key}`,
         );
-        return undefined;
+        const result: LDJudgeResult = {
+          success: false,
+          sampled: true,
+          errorMessage: `Judge configuration is not enabled for ${judgeConfig.key}`,
+        };
+        return result;
       }
 
       return judge.evaluateMessages(messages, response, judgeConfig.samplingRate);
@@ -98,7 +101,17 @@ export class TrackedChat {
     // ensure all evaluations complete even if some fail
     const results = await Promise.allSettled(evaluationPromises);
 
-    return results.map((result) => (result.status === 'fulfilled' ? result.value : undefined));
+    return results.map((settled) => {
+      if (settled.status === 'fulfilled') {
+        return settled.value;
+      }
+      const result: LDJudgeResult = {
+        success: false,
+        sampled: true,
+        errorMessage: 'Judge evaluation failed',
+      };
+      return result;
+    });
   }
 
   /**
diff --git a/packages/sdk/server-ai/src/api/chat/types.ts b/packages/sdk/server-ai/src/api/chat/types.ts
index 5b32109fcf..19173e30f8 100644
--- a/packages/sdk/server-ai/src/api/chat/types.ts
+++ b/packages/sdk/server-ai/src/api/chat/types.ts
@@ -1,5 +1,5 @@
 import { LDMessage } from '../config/types';
-import { JudgeResponse } from '../judge/types';
+import { LDJudgeResult } from '../judge/types';
 import { LDAIMetrics } from '../metrics/LDAIMetrics';
 
 /**
@@ -20,5 +20,5 @@ export interface ChatResponse {
    * Promise that resolves to judge evaluation results.
    * Only present when judges are configured for evaluation.
    */
-  evaluations?: Promise<Array<JudgeResponse | undefined>>;
+  evaluations?: Promise<LDJudgeResult[]>;
 }
diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
index 883177becb..e0aff2c6b5 100644
--- a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
+++ b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts
@@ -1,4 +1,4 @@
-import { EvalScore, JudgeResponse } from '../judge/types';
+import { LDJudgeResult } from '../judge/types';
 import { LDAIMetrics, LDFeedbackKind, LDTokenUsage } from '../metrics';
 
 /**
@@ -116,18 +116,13 @@ export interface LDAIConfigTracker {
   trackTimeToFirstToken(timeToFirstTokenMs: number): void;
 
   /**
-   * Track evaluation scores for multiple metrics.
+   * Track a judge evaluation result.
    *
-   * @param scores Record mapping metric keys to their evaluation scores
-   */
-  trackEvalScores(scores: Record<string, EvalScore>): void;
-
-  /**
-   * Track a judge response containing evaluation scores and judge configuration key.
+   * No event is emitted when the result was not sampled (result.sampled is false).
    *
-   * @param response Judge response containing evaluation scores and judge configuration key
+   * @param result Judge result containing score, reasoning, and metadata
    */
-  trackJudgeResponse(response: JudgeResponse): void;
+  trackJudgeResult(result: LDJudgeResult): void;
 
   /**
    * Track a single tool invocation.
diff --git a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts
index 94cf30658f..9ce432d1db 100644
--- a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts
+++ b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts
@@ -1,4 +1,4 @@
-import { JudgeResponse } from '../judge/types';
+import { LDJudgeResult } from '../judge/types';
 import { LDTokenUsage } from '../metrics';
 
 /**
@@ -83,11 +83,13 @@ export interface LDGraphTracker {
   trackPath(path: string[]): void;
 
   /**
-   * Track judge responses for the final graph output.
+   * Track a judge evaluation result for the final graph output.
    *
-   * @param response Judge response containing evaluation scores.
+   * No event is emitted when the result was not sampled (result.sampled is false).
+   *
+   * @param result Judge result containing score, reasoning, and metadata.
    */
-  trackJudgeResponse(response: JudgeResponse): void;
+  trackJudgeResult(result: LDJudgeResult): void;
 
   /**
    * Track when a node redirects to a different target than originally specified.
diff --git a/packages/sdk/server-ai/src/api/judge/Judge.ts b/packages/sdk/server-ai/src/api/judge/Judge.ts
index 1bab8d1a12..e36ab138cd 100644
--- a/packages/sdk/server-ai/src/api/judge/Judge.ts
+++ b/packages/sdk/server-ai/src/api/judge/Judge.ts
@@ -7,7 +7,7 @@ import { LDAIConfigTracker } from '../config/LDAIConfigTracker';
 import { LDAIJudgeConfig, LDMessage } from '../config/types';
 import { AIProvider } from '../providers/AIProvider';
 import { EvaluationSchemaBuilder } from './EvaluationSchemaBuilder';
-import { EvalScore, JudgeResponse, StructuredResponse } from './types';
+import { LDJudgeResult, StructuredResponse } from './types';
 
 /**
  * Judge implementation that handles evaluation functionality and conversation management.
@@ -57,13 +57,15 @@ export class Judge {
    * @param input The input prompt or question that was provided to the AI
    * @param output The AI-generated response to be evaluated
    * @param samplingRate Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1)
-   * @returns Promise that resolves to evaluation results or undefined if not sampled
+   * @returns Promise that resolves to evaluation results
    */
-  async evaluate(
-    input: string,
-    output: string,
-    samplingRate: number = 1,
-  ): Promise<JudgeResponse | undefined> {
+  async evaluate(input: string, output: string, samplingRate: number = 1): Promise<LDJudgeResult> {
+    const result: LDJudgeResult = {
+      success: false,
+      sampled: false,
+      judgeConfigKey: this._aiConfig.key,
+    };
+
     const tracker = this._aiConfig.createTracker!();
     try {
       const evaluationMetricKey = this._getEvaluationMetricKey();
@@ -72,51 +74,54 @@ export class Judge {
           'Judge configuration is missing required evaluation metric key',
           tracker.getTrackData(),
         );
-        return undefined;
+        result.sampled = true;
+        result.errorMessage = 'Judge configuration is missing required evaluation metric key';
+        return result;
       }
 
       if (!this._aiConfig.messages) {
         this._logger?.warn('Judge configuration must include messages', tracker.getTrackData());
-        return undefined;
+        result.sampled = true;
+        result.errorMessage = 'Judge configuration must include messages';
+        return result;
       }
 
       if (Math.random() > samplingRate) {
         this._logger?.debug(`Judge evaluation skipped due to sampling rate: ${samplingRate}`);
-        return undefined;
+        return result;
       }
 
+      result.sampled = true;
+
       const messages = this._constructEvaluationMessages(input, output);
 
       const response = await tracker.trackMetricsOf(
-        (result: StructuredResponse) => result.metrics,
+        (r: StructuredResponse) => r.metrics,
         () => this._aiProvider.invokeStructuredModel(messages, this._evaluationResponseStructure),
       );
 
-      let { success } = response.metrics;
-
-      const evals = this._parseEvaluationResponse(response.data, evaluationMetricKey, tracker);
+      const evalResult = this._parseEvaluationResponse(response.data, evaluationMetricKey, tracker);
 
-      if (!evals[evaluationMetricKey]) {
+      if (!evalResult) {
         this._logger?.warn(
           'Judge evaluation did not return the expected evaluation',
           tracker.getTrackData(),
         );
-        success = false;
+        return result;
       }
 
       return {
-        evals,
-        success,
-        judgeConfigKey: this._aiConfig.key,
+        ...result,
+        success: response.metrics.success,
+        score: evalResult.score,
+        reasoning: evalResult.reasoning,
+        metricKey: evaluationMetricKey,
       };
     } catch (error) {
       this._logger?.error('Judge evaluation failed:', error);
-      return {
-        evals: {},
-        success: false,
-        error: error instanceof Error ? error.message : 'Unknown error',
-        judgeConfigKey: this._aiConfig.key,
-      };
+      result.sampled = true;
+      result.errorMessage = error instanceof Error ? error.message : 'Unknown error';
+      return result;
     }
   }
 
@@ -126,13 +131,13 @@ export class Judge {
    * @param messages Array of messages representing the conversation history
    * @param response The AI response to be evaluated
    * @param samplingRatio Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1)
-   * @returns Promise that resolves to evaluation results or undefined if not sampled
+   * @returns Promise that resolves to evaluation results
    */
   async evaluateMessages(
     messages: LDMessage[],
     response: ChatResponse,
     samplingRatio: number = 1,
-  ): Promise<JudgeResponse | undefined> {
+  ): Promise<LDJudgeResult> {
     const input = messages.length === 0 ? '' : messages.map((msg) => msg.content).join('\r\n');
     const output = response.message.content;
 
@@ -177,18 +182,18 @@ export class Judge {
 
   /**
    * Parses the structured evaluation response from the AI provider.
+   * Returns score and reasoning, or undefined if parsing fails.
    */
   private _parseEvaluationResponse(
     data: Record<string, unknown>,
     evaluationMetricKey: string,
     tracker: LDAIConfigTracker,
-  ): Record<string, EvalScore> {
+  ): { score: number; reasoning: string } | undefined {
     const evaluations = data.evaluations as Record<string, unknown>;
-    const results: Record<string, EvalScore> = {};
 
     if (!data.evaluations || typeof data.evaluations !== 'object') {
       this._logger?.warn('Invalid response: missing or invalid evaluations object');
-      return results;
+      return undefined;
     }
 
     const evaluation = evaluations[evaluationMetricKey];
@@ -198,7 +203,7 @@ export class Judge {
         `Missing evaluation for metric key: ${evaluationMetricKey}`,
         tracker.getTrackData(),
       );
-      return results;
+      return undefined;
     }
 
     const evalData = evaluation as Record<string, unknown>;
@@ -208,7 +213,7 @@ export class Judge {
         `Invalid score evaluated for ${evaluationMetricKey}: ${evalData.score}. Score must be a number between 0 and 1 inclusive`,
         tracker.getTrackData(),
       );
-      return results;
+      return undefined;
     }
 
     if (typeof evalData.reasoning !== 'string') {
@@ -216,14 +221,12 @@ export class Judge {
         `Invalid reasoning evaluated for ${evaluationMetricKey}: ${evalData.reasoning}. Reasoning must be a string`,
         tracker.getTrackData(),
       );
-      return results;
+      return undefined;
     }
 
-    results[evaluationMetricKey] = {
+    return {
       score: evalData.score,
       reasoning: evalData.reasoning,
     };
-
-    return results;
   }
 }
diff --git a/packages/sdk/server-ai/src/api/judge/index.ts b/packages/sdk/server-ai/src/api/judge/index.ts
index 912ec47fb0..ca86630278 100644
--- a/packages/sdk/server-ai/src/api/judge/index.ts
+++ b/packages/sdk/server-ai/src/api/judge/index.ts
@@ -1,2 +1,2 @@
 export { Judge } from './Judge';
-export type { EvalScore, JudgeResponse, StructuredResponse } from './types';
+export type { LDJudgeResult, StructuredResponse } from './types';
diff --git a/packages/sdk/server-ai/src/api/judge/types.ts b/packages/sdk/server-ai/src/api/judge/types.ts
index 68ad141c89..b9d8a05a46 100644
--- a/packages/sdk/server-ai/src/api/judge/types.ts
+++ b/packages/sdk/server-ai/src/api/judge/types.ts
@@ -17,25 +17,21 @@ export interface StructuredResponse {
 }
 
 /**
- * Score and reasoning for a single evaluation metric.
+ * Result from a judge evaluation containing score, reasoning, and metadata.
  */
-export interface EvalScore {
-  /** Score between 0.0 and 1.0 indicating the evaluation result for this metric */
-  score: number;
-  /** Reasoning behind the provided score for this metric */
-  reasoning: string;
-}
-
-/**
- * Response from a judge evaluation containing scores and reasoning for multiple metrics.
- */
-export interface JudgeResponse {
-  /** The key of the judge configuration that was used to generate this response */
+export interface LDJudgeResult {
+  /** The key of the judge configuration that was used to generate this result */
   judgeConfigKey?: string;
-  /** Dictionary where keys are metric names and values contain score and reasoning */
-  evals: Record<string, EvalScore>;
   /** Whether the evaluation completed successfully */
   success: boolean;
   /** Error message if evaluation failed */
-  error?: string;
+  errorMessage?: string;
+  /** Whether this evaluation was sampled (i.e. actually run). False when skipped by sampling. */
+  sampled: boolean;
+  /** The metric key for this evaluation */
+  metricKey?: string;
+  /** Score between 0.0 and 1.0 indicating the evaluation result */
+  score?: number;
+  /** Reasoning behind the provided score */
+  reasoning?: string;
 }

From 524c99e60b1bd1621bf49452e00ab9a240819c8e Mon Sep 17 00:00:00 2001
From: Jason Bailey <jbailey@launchdarkly.com>
Date: Fri, 17 Apr 2026 11:42:59 -0500
Subject: [PATCH 5/7] feat: simplify evaluation schema to flat score/reasoning
 shape (#1286)

---
 .../sdk/server-ai/__tests__/Judge.test.ts     | 229 +++++-------------
 .../src/api/judge/EvaluationSchemaBuilder.ts  |  49 ----
 packages/sdk/server-ai/src/api/judge/Judge.ts |  66 +++--
 3 files changed, 89 insertions(+), 255 deletions(-)
 delete mode 100644 packages/sdk/server-ai/src/api/judge/EvaluationSchemaBuilder.ts

diff --git a/packages/sdk/server-ai/__tests__/Judge.test.ts b/packages/sdk/server-ai/__tests__/Judge.test.ts
index ee9ff0351f..43ea75e0ab 100644
--- a/packages/sdk/server-ai/__tests__/Judge.test.ts
+++ b/packages/sdk/server-ai/__tests__/Judge.test.ts
@@ -70,14 +70,12 @@ describe('Judge', () => {
     it('evaluates AI response successfully', async () => {
       const mockStructuredResponse: StructuredResponse = {
         data: {
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
-          },
+          score: 0.8,
+          reasoning: 'The response is relevant to the question',
         },
         rawResponse: JSON.stringify({
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
-          },
+          score: 0.8,
+          reasoning: 'The response is relevant to the question',
         }),
         metrics: {
           success: true,
@@ -125,14 +123,12 @@ describe('Judge', () => {
     it('returns evaluation result with correct evaluationMetricKey for tracker integration', async () => {
       const mockStructuredResponse: StructuredResponse = {
         data: {
-          evaluations: {
-            relevance: { score: 0.85, reasoning: 'Highly relevant response' },
-          },
+          score: 0.85,
+          reasoning: 'Highly relevant response',
         },
         rawResponse: JSON.stringify({
-          evaluations: {
-            relevance: { score: 0.85, reasoning: 'Highly relevant response' },
-          },
+          score: 0.85,
+          reasoning: 'Highly relevant response',
         }),
         metrics: {
           success: true,
@@ -159,14 +155,12 @@ describe('Judge', () => {
 
       const mockStructuredResponse: StructuredResponse = {
         data: {
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'Good' },
-          },
+          score: 0.8,
+          reasoning: 'Good',
         },
         rawResponse: JSON.stringify({
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'Good' },
-          },
+          score: 0.8,
+          reasoning: 'Good',
         }),
         metrics: {
           success: true,
@@ -237,14 +231,12 @@ describe('Judge', () => {
 
       const mockStructuredResponse: StructuredResponse = {
         data: {
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'The response is relevant' },
-          },
+          score: 0.8,
+          reasoning: 'The response is relevant',
         },
         rawResponse: JSON.stringify({
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'The response is relevant' },
-          },
+          score: 0.8,
+          reasoning: 'The response is relevant',
         }),
         metrics: {
           success: true,
@@ -277,14 +269,12 @@ describe('Judge', () => {
 
       const mockStructuredResponse: StructuredResponse = {
         data: {
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'The response is relevant' },
-          },
+          score: 0.8,
+          reasoning: 'The response is relevant',
         },
         rawResponse: JSON.stringify({
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'The response is relevant' },
-          },
+          score: 0.8,
+          reasoning: 'The response is relevant',
         }),
         metrics: {
           success: true,
@@ -317,14 +307,12 @@ describe('Judge', () => {
 
       const mockStructuredResponse: StructuredResponse = {
         data: {
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'The response is relevant' },
-          },
+          score: 0.8,
+          reasoning: 'The response is relevant',
         },
         rawResponse: JSON.stringify({
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'The response is relevant' },
-          },
+          score: 0.8,
+          reasoning: 'The response is relevant',
         }),
         metrics: {
           success: true,
@@ -358,14 +346,12 @@ describe('Judge', () => {
 
       const mockStructuredResponse: StructuredResponse = {
         data: {
-          evaluations: {
-            helpfulness: { score: 0.7, reasoning: 'The response is helpful' },
-          },
+          score: 0.7,
+          reasoning: 'The response is helpful',
         },
         rawResponse: JSON.stringify({
-          evaluations: {
-            helpfulness: { score: 0.7, reasoning: 'The response is helpful' },
-          },
+          score: 0.7,
+          reasoning: 'The response is helpful',
         }),
         metrics: {
           success: true,
@@ -409,18 +395,10 @@ describe('Judge', () => {
       );
     });
 
-    it('returns result with success false when expected metric is missing', async () => {
+    it('returns result with success false when response has no score or reasoning', async () => {
       const mockStructuredResponse: StructuredResponse = {
-        data: {
-          evaluations: {
-            accuracy: { score: 0.9, reasoning: 'Accurate' },
-          },
-        },
-        rawResponse: JSON.stringify({
-          evaluations: {
-            accuracy: { score: 0.9, reasoning: 'Accurate' },
-          },
-        }),
+        data: {},
+        rawResponse: '{}',
         metrics: {
           success: true,
           usage: { total: 100, input: 50, output: 50 },
@@ -437,19 +415,23 @@ describe('Judge', () => {
         sampled: true,
         judgeConfigKey: 'test-judge',
       });
+      expect(mockLogger.warn).toHaveBeenCalledWith(
+        'Could not parse evaluation response: {}',
+        mockTrackData,
+      );
     });
 
     it('returns result with success false when response structure is malformed', async () => {
       const mockStructuredResponse: StructuredResponse = {
         data: {
-          relevance: { score: 0.8, reasoning: 'Good' },
-          accuracy: { score: 0.9, reasoning: 'Accurate' },
-          helpfulness: { score: 0.7, reasoning: 'Helpful' },
+          evaluations: {
+            relevance: { score: 0.8, reasoning: 'Good' },
+          },
         },
         rawResponse: JSON.stringify({
-          relevance: { score: 0.8, reasoning: 'Good' },
-          accuracy: { score: 0.9, reasoning: 'Accurate' },
-          helpfulness: { score: 0.7, reasoning: 'Helpful' },
+          evaluations: {
+            relevance: { score: 0.8, reasoning: 'Good' },
+          },
         }),
         metrics: {
           success: true,
@@ -467,6 +449,10 @@ describe('Judge', () => {
         sampled: true,
         judgeConfigKey: 'test-judge',
       });
+      expect(mockLogger.warn).toHaveBeenCalledWith(
+        expect.stringContaining('Could not parse evaluation response:'),
+        mockTrackData,
+      );
     });
 
     it('handles provider errors gracefully', async () => {
@@ -517,14 +503,12 @@ describe('Judge', () => {
 
       const mockStructuredResponse: StructuredResponse = {
         data: {
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
-          },
+          score: 0.8,
+          reasoning: 'The response is relevant to the question',
         },
         rawResponse: JSON.stringify({
-          evaluations: {
-            relevance: { score: 0.8, reasoning: 'The response is relevant to the question' },
-          },
+          score: 0.8,
+          reasoning: 'The response is relevant to the question',
         }),
         metrics: {
           success: true,
@@ -620,13 +604,9 @@ describe('Judge', () => {
     it('parses valid evaluation response correctly', () => {
       // eslint-disable-next-line no-underscore-dangle
       const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
-      const responseData = {
-        evaluations: {
-          relevance: { score: 0.8, reasoning: 'Good' },
-        },
-      };
+      const responseData = { score: 0.8, reasoning: 'Good' };
 
-      const result = parseResponse(responseData, 'relevance', mockTracker);
+      const result = parseResponse(responseData);
 
       expect(result).toEqual({
         score: 0.8,
@@ -634,28 +614,21 @@ describe('Judge', () => {
       });
     });
 
-    it('returns undefined for invalid response data', () => {
+    it('returns undefined for empty response data', () => {
       // eslint-disable-next-line no-underscore-dangle
       const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
-      const responseData = {
-        relevance: { score: 0.8, reasoning: 'Good' },
-      };
 
-      const result = parseResponse(responseData, 'relevance', mockTracker);
+      const result = parseResponse({});
 
       expect(result).toBeUndefined();
     });
 
-    it('handles missing score or reasoning fields', () => {
+    it('handles missing reasoning field', () => {
       // eslint-disable-next-line no-underscore-dangle
       const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
-      const responseData = {
-        evaluations: {
-          relevance: { score: 0.8 },
-        },
-      };
+      const responseData = { score: 0.8 };
 
-      const result = parseResponse(responseData, 'relevance', mockTracker);
+      const result = parseResponse(responseData);
 
       expect(result).toBeUndefined();
     });
@@ -663,73 +636,31 @@ describe('Judge', () => {
     it('handles invalid score values out of range', () => {
       // eslint-disable-next-line no-underscore-dangle
       const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
-      const responseData = {
-        evaluations: {
-          relevance: { score: 1.5, reasoning: 'Good' },
-        },
-      };
+      const responseData = { score: 1.5, reasoning: 'Good' };
 
-      const result = parseResponse(responseData, 'relevance', mockTracker);
+      const result = parseResponse(responseData);
 
       expect(result).toBeUndefined();
-      expect(mockLogger.warn).toHaveBeenCalledWith(
-        expect.stringContaining('Invalid score evaluated for relevance: 1.5'),
-        mockTrackData,
-      );
     });
 
     it('handles negative score values', () => {
       // eslint-disable-next-line no-underscore-dangle
       const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
-      const responseData = {
-        evaluations: {
-          relevance: { score: -0.1, reasoning: 'Good' },
-        },
-      };
+      const responseData = { score: -0.1, reasoning: 'Good' };
 
-      const result = parseResponse(responseData, 'relevance', mockTracker);
+      const result = parseResponse(responseData);
 
       expect(result).toBeUndefined();
-      expect(mockLogger.warn).toHaveBeenCalledWith(
-        expect.stringContaining('Invalid score evaluated for relevance: -0.1'),
-        mockTrackData,
-      );
     });
 
     it('handles invalid reasoning type', () => {
       // eslint-disable-next-line no-underscore-dangle
       const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
-      const responseData = {
-        evaluations: {
-          relevance: { score: 0.8, reasoning: 123 },
-        },
-      };
+      const responseData = { score: 0.8, reasoning: 123 };
 
-      const result = parseResponse(responseData, 'relevance', mockTracker);
+      const result = parseResponse(responseData);
 
       expect(result).toBeUndefined();
-      expect(mockLogger.warn).toHaveBeenCalledWith(
-        expect.stringContaining('Invalid reasoning evaluated for relevance: 123'),
-        mockTrackData,
-      );
-    });
-
-    it('handles missing evaluation when key does not exist in response', () => {
-      // eslint-disable-next-line no-underscore-dangle
-      const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
-      const responseData = {
-        evaluations: {
-          accuracy: { score: 0.9, reasoning: 'Accurate' },
-        },
-      };
-
-      const result = parseResponse(responseData, 'relevance', mockTracker);
-
-      expect(result).toBeUndefined();
-      expect(mockLogger.warn).toHaveBeenCalledWith(
-        'Missing evaluation for metric key: relevance',
-        mockTrackData,
-      );
     });
 
     it('handles empty evaluationMetricKeys array fallback', async () => {
@@ -753,41 +684,5 @@ describe('Judge', () => {
         mockTrackData,
       );
     });
-
-    it('handles evaluation value that is not an object', () => {
-      // eslint-disable-next-line no-underscore-dangle
-      const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
-      const responseData = {
-        evaluations: {
-          relevance: 'not an object',
-        },
-      };
-
-      const result = parseResponse(responseData, 'relevance', mockTracker);
-
-      expect(result).toBeUndefined();
-      expect(mockLogger.warn).toHaveBeenCalledWith(
-        'Missing evaluation for metric key: relevance',
-        mockTrackData,
-      );
-    });
-
-    it('handles null evaluation value', () => {
-      // eslint-disable-next-line no-underscore-dangle
-      const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge);
-      const responseData = {
-        evaluations: {
-          relevance: null,
-        },
-      };
-
-      const result = parseResponse(responseData, 'relevance', mockTracker);
-
-      expect(result).toBeUndefined();
-      expect(mockLogger.warn).toHaveBeenCalledWith(
-        'Missing evaluation for metric key: relevance',
-        mockTrackData,
-      );
-    });
   });
 });
diff --git a/packages/sdk/server-ai/src/api/judge/EvaluationSchemaBuilder.ts b/packages/sdk/server-ai/src/api/judge/EvaluationSchemaBuilder.ts
deleted file mode 100644
index 06f745a418..0000000000
--- a/packages/sdk/server-ai/src/api/judge/EvaluationSchemaBuilder.ts
+++ /dev/null
@@ -1,49 +0,0 @@
-/**
- * Internal class for building dynamic evaluation response schemas.
- * Not exported - only used internally by TrackedJudge.
- */
-class EvaluationSchemaBuilder {
-  static build(evaluationMetricKey?: string): Record<string, unknown> {
-    if (!evaluationMetricKey) {
-      return {};
-    }
-    return {
-      type: 'object',
-      properties: {
-        evaluations: {
-          type: 'object',
-          description: `Object containing evaluation results for ${evaluationMetricKey} metric`,
-          properties: {
-            [evaluationMetricKey]: this._buildKeySchema(evaluationMetricKey),
-          },
-          required: [evaluationMetricKey],
-          additionalProperties: false,
-        },
-      },
-      required: ['evaluations'],
-      additionalProperties: false,
-    } as const;
-  }
-
-  private static _buildKeySchema(key: string) {
-    return {
-      type: 'object',
-      properties: {
-        score: {
-          type: 'number',
-          minimum: 0,
-          maximum: 1,
-          description: `Score between 0.0 and 1.0 for ${key}`,
-        },
-        reasoning: {
-          type: 'string',
-          description: `Reasoning behind the score for ${key}`,
-        },
-      },
-      required: ['score', 'reasoning'],
-      additionalProperties: false,
-    };
-  }
-}
-
-export { EvaluationSchemaBuilder };
diff --git a/packages/sdk/server-ai/src/api/judge/Judge.ts b/packages/sdk/server-ai/src/api/judge/Judge.ts
index e36ab138cd..ef49e3b723 100644
--- a/packages/sdk/server-ai/src/api/judge/Judge.ts
+++ b/packages/sdk/server-ai/src/api/judge/Judge.ts
@@ -3,12 +3,28 @@ import Mustache from 'mustache';
 import { LDLogger } from '@launchdarkly/js-server-sdk-common';
 
 import { ChatResponse } from '../chat/types';
-import { LDAIConfigTracker } from '../config/LDAIConfigTracker';
 import { LDAIJudgeConfig, LDMessage } from '../config/types';
 import { AIProvider } from '../providers/AIProvider';
-import { EvaluationSchemaBuilder } from './EvaluationSchemaBuilder';
 import { LDJudgeResult, StructuredResponse } from './types';
 
+const EVALUATION_SCHEMA = {
+  type: 'object',
+  properties: {
+    score: {
+      type: 'number',
+      minimum: 0,
+      maximum: 1,
+      description: 'Score between 0.0 and 1.0.',
+    },
+    reasoning: {
+      type: 'string',
+      description: 'Reasoning behind the score.',
+    },
+  },
+  required: ['score', 'reasoning'],
+  additionalProperties: false,
+} as const;
+
 /**
  * Judge implementation that handles evaluation functionality and conversation management.
  *
@@ -17,7 +33,6 @@ import { LDJudgeResult, StructuredResponse } from './types';
  */
 export class Judge {
   private readonly _logger?: LDLogger;
-  private readonly _evaluationResponseStructure: Record<string, unknown>;
 
   constructor(
     private readonly _aiConfig: LDAIJudgeConfig,
@@ -25,8 +40,6 @@ export class Judge {
     logger?: LDLogger,
   ) {
     this._logger = logger;
-    const evaluationMetricKey = this._getEvaluationMetricKey();
-    this._evaluationResponseStructure = EvaluationSchemaBuilder.build(evaluationMetricKey);
   }
 
   /**
@@ -97,14 +110,14 @@ export class Judge {
 
       const response = await tracker.trackMetricsOf(
         (r: StructuredResponse) => r.metrics,
-        () => this._aiProvider.invokeStructuredModel(messages, this._evaluationResponseStructure),
+        () => this._aiProvider.invokeStructuredModel(messages, EVALUATION_SCHEMA),
       );
 
-      const evalResult = this._parseEvaluationResponse(response.data, evaluationMetricKey, tracker);
+      const evalResult = this._parseEvaluationResponse(response.data);
 
       if (!evalResult) {
         this._logger?.warn(
-          'Judge evaluation did not return the expected evaluation',
+          `Could not parse evaluation response: ${JSON.stringify(response.data)}`,
           tracker.getTrackData(),
         );
         return result;
@@ -181,52 +194,27 @@ export class Judge {
   }
 
   /**
-   * Parses the structured evaluation response from the AI provider.
+   * Parses the structured evaluation response. Expects top-level {score, reasoning}.
    * Returns score and reasoning, or undefined if parsing fails.
    */
   private _parseEvaluationResponse(
     data: Record<string, unknown>,
-    evaluationMetricKey: string,
-    tracker: LDAIConfigTracker,
   ): { score: number; reasoning: string } | undefined {
-    const evaluations = data.evaluations as Record<string, unknown>;
-
-    if (!data.evaluations || typeof data.evaluations !== 'object') {
-      this._logger?.warn('Invalid response: missing or invalid evaluations object');
-      return undefined;
-    }
-
-    const evaluation = evaluations[evaluationMetricKey];
-
-    if (!evaluation || typeof evaluation !== 'object') {
-      this._logger?.warn(
-        `Missing evaluation for metric key: ${evaluationMetricKey}`,
-        tracker.getTrackData(),
-      );
+    if (!data || typeof data !== 'object' || Array.isArray(data)) {
       return undefined;
     }
 
-    const evalData = evaluation as Record<string, unknown>;
-
-    if (typeof evalData.score !== 'number' || evalData.score < 0 || evalData.score > 1) {
-      this._logger?.warn(
-        `Invalid score evaluated for ${evaluationMetricKey}: ${evalData.score}. Score must be a number between 0 and 1 inclusive`,
-        tracker.getTrackData(),
-      );
+    if (typeof data.score !== 'number' || data.score < 0 || data.score > 1) {
       return undefined;
     }
 
-    if (typeof evalData.reasoning !== 'string') {
-      this._logger?.warn(
-        `Invalid reasoning evaluated for ${evaluationMetricKey}: ${evalData.reasoning}. Reasoning must be a string`,
-        tracker.getTrackData(),
-      );
+    if (typeof data.reasoning !== 'string') {
       return undefined;
     }
 
     return {
-      score: evalData.score,
-      reasoning: evalData.reasoning,
+      score: data.score,
+      reasoning: data.reasoning,
     };
   }
 }

From 092e38a4c34bdbcdee8dfecb89925fb1c9606350 Mon Sep 17 00:00:00 2001
From: Jason Bailey <jbailey@launchdarkly.com>
Date: Mon, 20 Apr 2026 13:05:56 -0500
Subject: [PATCH 6/7] feat: Implement agent graph definitions (#1282)

---
 package.json                                  |   1 +
 .../__tests__/AgentGraphDefinition.test.ts    | 418 ++++++++++++++
 .../__tests__/LDGraphTrackerImpl.test.ts      | 545 ++++++++----------
 .../server-ai/__tests__/agentGraph.test.ts    | 200 +++++++
 .../examples/agent-graph-traversal/README.md  | 106 ++++
 .../agent-graph-traversal/package.json        |  19 +
 .../agent-graph-traversal/src/index.ts        | 134 +++++
 .../agent-graph-traversal/tsconfig.json       |  18 +
 packages/sdk/server-ai/src/LDAIClientImpl.ts  | 118 ++++
 .../sdk/server-ai/src/LDGraphTrackerImpl.ts   | 133 +++--
 packages/sdk/server-ai/src/api/LDAIClient.ts  |  52 ++
 .../src/api/graph/AgentGraphDefinition.ts     | 253 ++++++++
 .../server-ai/src/api/graph/AgentGraphNode.ts |  46 ++
 .../server-ai/src/api/graph/LDGraphTracker.ts | 132 +++--
 packages/sdk/server-ai/src/api/graph/index.ts |   3 +
 packages/sdk/server-ai/src/api/graph/types.ts |  88 +++
 release-please-config.json                    |  10 +
 17 files changed, 1857 insertions(+), 419 deletions(-)
 create mode 100644 packages/sdk/server-ai/__tests__/AgentGraphDefinition.test.ts
 create mode 100644 packages/sdk/server-ai/__tests__/agentGraph.test.ts
 create mode 100644 packages/sdk/server-ai/examples/agent-graph-traversal/README.md
 create mode 100644 packages/sdk/server-ai/examples/agent-graph-traversal/package.json
 create mode 100644 packages/sdk/server-ai/examples/agent-graph-traversal/src/index.ts
 create mode 100644 packages/sdk/server-ai/examples/agent-graph-traversal/tsconfig.json
 create mode 100644 packages/sdk/server-ai/src/api/graph/AgentGraphDefinition.ts
 create mode 100644 packages/sdk/server-ai/src/api/graph/AgentGraphNode.ts
 create mode 100644 packages/sdk/server-ai/src/api/graph/types.ts

diff --git a/package.json b/package.json
index 0de39840af..ffbee84446 100644
--- a/package.json
+++ b/package.json
@@ -50,6 +50,7 @@
     "packages/sdk/server-ai/examples/chat-observability",
     "packages/sdk/server-ai/examples/openai-observability",
     "packages/sdk/server-ai/examples/vercel-ai",
+    "packages/sdk/server-ai/examples/agent-graph-traversal",
     "packages/telemetry/browser-telemetry",
     "packages/sdk/combined-browser",
     "packages/sdk/shopify-oxygen",
diff --git a/packages/sdk/server-ai/__tests__/AgentGraphDefinition.test.ts b/packages/sdk/server-ai/__tests__/AgentGraphDefinition.test.ts
new file mode 100644
index 0000000000..8839a3474a
--- /dev/null
+++ b/packages/sdk/server-ai/__tests__/AgentGraphDefinition.test.ts
@@ -0,0 +1,418 @@
+import { randomUUID } from 'crypto';
+
+import { LDContext } from '@launchdarkly/js-server-sdk-common';
+
+import { LDAIAgentConfig } from '../src/api/config';
+import { AgentGraphDefinition } from '../src/api/graph/AgentGraphDefinition';
+import { LDAgentGraphFlagValue, LDGraphEdge } from '../src/api/graph/types';
+import { LDClientMin } from '../src/LDClientMin';
+import { LDGraphTrackerImpl } from '../src/LDGraphTrackerImpl';
+
+const mockLdClient: LDClientMin = {
+  track: jest.fn(),
+  variation: jest.fn(),
+};
+
+const testContext: LDContext = { kind: 'user', key: 'test-user' };
+
+// ---------------------------------------------------------------------------
+// Helper builders
+// ---------------------------------------------------------------------------
+
+function makeAgentConfig(key: string, enabled = true): LDAIAgentConfig {
+  return { key, enabled, instructions: `You are ${key}.` } as LDAIAgentConfig;
+}
+
+function makeGraph(
+  root: string,
+  edges: Record<string, LDGraphEdge[]> = {},
+  variationKey?: string,
+  version = 1,
+): LDAgentGraphFlagValue {
+  return {
+    _ldMeta: { variationKey, version },
+    root,
+    edges,
+  };
+}
+
+function makeDefinition(
+  graph: LDAgentGraphFlagValue,
+  agentConfigs: Record<string, LDAIAgentConfig>,
+  enabled = true,
+): AgentGraphDefinition {
+  const nodes = AgentGraphDefinition.buildNodes(graph, agentConfigs);
+  return new AgentGraphDefinition(
+    graph,
+    nodes,
+    enabled,
+    () =>
+      new LDGraphTrackerImpl(
+        mockLdClient,
+        randomUUID(),
+        graph.root,
+        // eslint-disable-next-line no-underscore-dangle
+        graph._ldMeta?.variationKey,
+        // eslint-disable-next-line no-underscore-dangle
+        graph._ldMeta?.version ?? 1,
+        testContext,
+      ),
+  );
+}
+
+// ---------------------------------------------------------------------------
+// buildNodes
+// ---------------------------------------------------------------------------
+
+it('buildNodes creates a node for every unique key in the graph', () => {
+  const graph = makeGraph('root', {
+    root: [{ key: 'child-a' }, { key: 'child-b' }],
+    'child-a': [{ key: 'leaf' }],
+  });
+  const configs: Record<string, LDAIAgentConfig> = {
+    root: makeAgentConfig('root'),
+    'child-a': makeAgentConfig('child-a'),
+    'child-b': makeAgentConfig('child-b'),
+    leaf: makeAgentConfig('leaf'),
+  };
+
+  const nodes = AgentGraphDefinition.buildNodes(graph, configs);
+  expect(Object.keys(nodes).sort()).toEqual(['child-a', 'child-b', 'leaf', 'root']);
+});
+
+it('buildNodes skips keys whose agent config is missing', () => {
+  const graph = makeGraph('root', { root: [{ key: 'orphan' }] });
+  const nodes = AgentGraphDefinition.buildNodes(graph, { root: makeAgentConfig('root') });
+  expect(nodes.root).toBeDefined();
+  expect(nodes.orphan).toBeUndefined();
+});
+
+it('buildNodes assigns correct edges to each node', () => {
+  const graph = makeGraph('root', {
+    root: [{ key: 'child', handoff: { someOption: true } }],
+  });
+  const configs = {
+    root: makeAgentConfig('root'),
+    child: makeAgentConfig('child'),
+  };
+  const nodes = AgentGraphDefinition.buildNodes(graph, configs);
+  expect(nodes.root.getEdges()).toEqual([{ key: 'child', handoff: { someOption: true } }]);
+  expect(nodes.child.getEdges()).toEqual([]);
+});
+
+// ---------------------------------------------------------------------------
+// collectAllKeys
+// ---------------------------------------------------------------------------
+
+it('collectAllKeys includes root, edge sources, and edge targets', () => {
+  const graph = makeGraph('root', {
+    root: [{ key: 'a' }, { key: 'b' }],
+    a: [{ key: 'c' }],
+  });
+  const keys = AgentGraphDefinition.collectAllKeys(graph);
+  expect([...keys].sort()).toEqual(['a', 'b', 'c', 'root']);
+});
+
+it('collectAllKeys works for a graph with no edges', () => {
+  const graph = makeGraph('solo');
+  const keys = AgentGraphDefinition.collectAllKeys(graph);
+  expect([...keys]).toEqual(['solo']);
+});
+
+// ---------------------------------------------------------------------------
+// enabled
+// ---------------------------------------------------------------------------
+
+it('enabled reflects the value passed at construction', () => {
+  const graph = makeGraph('r');
+  const enabled = makeDefinition(graph, { r: makeAgentConfig('r') }, true);
+  expect(enabled.enabled).toBe(true);
+
+  const disabled = makeDefinition(graph, { r: makeAgentConfig('r') }, false);
+  expect(disabled.enabled).toBe(false);
+});
+
+// ---------------------------------------------------------------------------
+// rootNode / getNode / terminalNodes
+// ---------------------------------------------------------------------------
+
+it('rootNode returns the root node', () => {
+  const graph = makeGraph('root', { root: [{ key: 'leaf' }] });
+  const def = makeDefinition(graph, {
+    root: makeAgentConfig('root'),
+    leaf: makeAgentConfig('leaf'),
+  });
+  expect(def.rootNode().getKey()).toBe('root');
+});
+
+it('getNode returns null for unknown key', () => {
+  const graph = makeGraph('root');
+  const def = makeDefinition(graph, { root: makeAgentConfig('root') });
+  expect(def.getNode('nonexistent')).toBeNull();
+});
+
+it('terminalNodes returns nodes with no outgoing edges', () => {
+  const graph = makeGraph('root', {
+    root: [{ key: 'mid' }],
+    mid: [{ key: 'leaf-a' }, { key: 'leaf-b' }],
+  });
+  const def = makeDefinition(graph, {
+    root: makeAgentConfig('root'),
+    mid: makeAgentConfig('mid'),
+    'leaf-a': makeAgentConfig('leaf-a'),
+    'leaf-b': makeAgentConfig('leaf-b'),
+  });
+  const terminalKeys = def
+    .terminalNodes()
+    .map((n) => n.getKey())
+    .sort();
+  expect(terminalKeys).toEqual(['leaf-a', 'leaf-b']);
+});
+
+// ---------------------------------------------------------------------------
+// getChildNodes / getParentNodes
+// ---------------------------------------------------------------------------
+
+it('getChildNodes returns direct children', () => {
+  const graph = makeGraph('root', {
+    root: [{ key: 'a' }, { key: 'b' }],
+  });
+  const def = makeDefinition(graph, {
+    root: makeAgentConfig('root'),
+    a: makeAgentConfig('a'),
+    b: makeAgentConfig('b'),
+  });
+  const childKeys = def
+    .getChildNodes('root')
+    .map((n) => n.getKey())
+    .sort();
+  expect(childKeys).toEqual(['a', 'b']);
+});
+
+it('getChildNodes returns empty array for terminal node', () => {
+  const graph = makeGraph('root');
+  const def = makeDefinition(graph, { root: makeAgentConfig('root') });
+  expect(def.getChildNodes('root')).toEqual([]);
+});
+
+it('getChildNodes returns empty array for unknown key', () => {
+  const graph = makeGraph('root');
+  const def = makeDefinition(graph, { root: makeAgentConfig('root') });
+  expect(def.getChildNodes('unknown')).toEqual([]);
+});
+
+it('getParentNodes returns nodes that have direct edges to the given key', () => {
+  const graph = makeGraph('root', {
+    root: [{ key: 'child' }],
+    sibling: [{ key: 'child' }],
+  });
+  const def = makeDefinition(graph, {
+    root: makeAgentConfig('root'),
+    sibling: makeAgentConfig('sibling'),
+    child: makeAgentConfig('child'),
+  });
+  const parentKeys = def
+    .getParentNodes('child')
+    .map((n) => n.getKey())
+    .sort();
+  expect(parentKeys).toEqual(['root', 'sibling']);
+});
+
+it('getParentNodes returns empty array for root node', () => {
+  const graph = makeGraph('root', { root: [{ key: 'child' }] });
+  const def = makeDefinition(graph, {
+    root: makeAgentConfig('root'),
+    child: makeAgentConfig('child'),
+  });
+  expect(def.getParentNodes('root')).toEqual([]);
+});
+
+// ---------------------------------------------------------------------------
+// traverse
+// ---------------------------------------------------------------------------
+
+it('traverse calls fn for every node in BFS order (root first)', () => {
+  //    root
+  //   /    \
+  //  a      b
+  //  |
+  //  c
+  const graph = makeGraph('root', {
+    root: [{ key: 'a' }, { key: 'b' }],
+    a: [{ key: 'c' }],
+  });
+  const def = makeDefinition(graph, {
+    root: makeAgentConfig('root'),
+    a: makeAgentConfig('a'),
+    b: makeAgentConfig('b'),
+    c: makeAgentConfig('c'),
+  });
+
+  const order: string[] = [];
+  def.traverse((node) => {
+    order.push(node.getKey());
+  });
+
+  expect(order[0]).toBe('root');
+  // a and b must both appear before c
+  const aIdx = order.indexOf('a');
+  const bIdx = order.indexOf('b');
+  const cIdx = order.indexOf('c');
+  expect(aIdx).toBeLessThan(cIdx);
+  expect(bIdx).toBeLessThan(cIdx);
+  expect(order).toHaveLength(4);
+});
+
+it('traverse stores fn return values in execution context', () => {
+  const graph = makeGraph('root', { root: [{ key: 'child' }] });
+  const def = makeDefinition(graph, {
+    root: makeAgentConfig('root'),
+    child: makeAgentConfig('child'),
+  });
+
+  const contextCaptures: Record<string, unknown>[] = [];
+  def.traverse((node, ctx) => {
+    contextCaptures.push({ ...ctx });
+    return `result-of-${node.getKey()}`;
+  });
+
+  // After root is processed, the child's context should contain root's result
+  expect(contextCaptures[1]).toHaveProperty('root', 'result-of-root');
+});
+
+it('traverse accepts and uses initial execution context', () => {
+  const graph = makeGraph('root');
+  const def = makeDefinition(graph, { root: makeAgentConfig('root') });
+
+  const captured: Record<string, unknown>[] = [];
+  def.traverse(
+    (node, ctx) => {
+      captured.push({ ...ctx });
+    },
+    { initialKey: 'initialValue' },
+  );
+
+  expect(captured[0]).toHaveProperty('initialKey', 'initialValue');
+});
+
+it('traverse handles a single-node graph', () => {
+  const graph = makeGraph('solo');
+  const def = makeDefinition(graph, { solo: makeAgentConfig('solo') });
+  const visited: string[] = [];
+  def.traverse((node) => {
+    visited.push(node.getKey());
+  });
+  expect(visited).toEqual(['solo']);
+});
+
+// ---------------------------------------------------------------------------
+// reverseTraverse
+// ---------------------------------------------------------------------------
+
+it('reverseTraverse processes terminal nodes before their parents, root last', () => {
+  //    root
+  //   /    \
+  //  a      b    ← mid-level
+  //  |
+  //  c           ← terminal (deepest)
+  const graph = makeGraph('root', {
+    root: [{ key: 'a' }, { key: 'b' }],
+    a: [{ key: 'c' }],
+  });
+  const def = makeDefinition(graph, {
+    root: makeAgentConfig('root'),
+    a: makeAgentConfig('a'),
+    b: makeAgentConfig('b'),
+    c: makeAgentConfig('c'),
+  });
+
+  const order: string[] = [];
+  def.reverseTraverse((node) => {
+    order.push(node.getKey());
+  });
+
+  expect(order[order.length - 1]).toBe('root'); // root always last
+  // c must appear before a (c is a descendant of a)
+  expect(order.indexOf('c')).toBeLessThan(order.indexOf('a'));
+  // all four nodes visited
+  expect(order.sort()).toEqual(['a', 'b', 'c', 'root']);
+});
+
+it('reverseTraverse stores fn return values in execution context', () => {
+  const graph = makeGraph('root', { root: [{ key: 'child' }] });
+  const def = makeDefinition(graph, {
+    root: makeAgentConfig('root'),
+    child: makeAgentConfig('child'),
+  });
+
+  const contextWhenRootRuns: Record<string, unknown>[] = [];
+  def.reverseTraverse((node, ctx) => {
+    if (node.getKey() === 'root') {
+      contextWhenRootRuns.push({ ...ctx });
+    }
+    return `result-of-${node.getKey()}`;
+  });
+
+  // root runs last; at that point, child's result should be in context
+  expect(contextWhenRootRuns[0]).toHaveProperty('child', 'result-of-child');
+});
+
+it('reverseTraverse visits a node with multiple parents only once', () => {
+  // root → a → d → c
+  // root → b → c   ← c has two parents
+  const graph = makeGraph('root', {
+    root: [{ key: 'a' }, { key: 'b' }],
+    a: [{ key: 'd' }],
+    b: [{ key: 'c' }],
+    d: [{ key: 'c' }],
+  });
+  const def = makeDefinition(graph, {
+    root: makeAgentConfig('root'),
+    a: makeAgentConfig('a'),
+    b: makeAgentConfig('b'),
+    c: makeAgentConfig('c'),
+    d: makeAgentConfig('d'),
+  });
+
+  const order: string[] = [];
+  def.reverseTraverse((node) => {
+    order.push(node.getKey());
+  });
+
+  // c is the only terminal — it goes first
+  expect(order[0]).toBe('c');
+  // root is always last
+  expect(order[order.length - 1]).toBe('root');
+  // every node visited exactly once
+  expect(order.sort()).toEqual(['a', 'b', 'c', 'd', 'root']);
+});
+
+it('reverseTraverse visits each node once on a cyclic graph', () => {
+  // A → B → A (no terminals)
+  const graph = makeGraph('a', {
+    a: [{ key: 'b' }],
+    b: [{ key: 'a' }],
+  });
+  const def = makeDefinition(graph, {
+    a: makeAgentConfig('a'),
+    b: makeAgentConfig('b'),
+  });
+
+  const visited: string[] = [];
+  def.reverseTraverse((node) => {
+    visited.push(node.getKey());
+  });
+
+  // No terminals → returns without visiting anything (same as Python)
+  expect(visited).toEqual([]);
+});
+
+// ---------------------------------------------------------------------------
+// getConfig
+// ---------------------------------------------------------------------------
+
+it('getConfig returns the raw flag value', () => {
+  const graph = makeGraph('root', {}, 'var-key', 5);
+  const def = makeDefinition(graph, { root: makeAgentConfig('root') });
+  expect(def.getConfig()).toBe(graph);
+});
diff --git a/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts
index 77af551302..9f734eb5d0 100644
--- a/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts
+++ b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts
@@ -4,446 +4,351 @@ import { LDClientMin } from '../src/LDClientMin';
 import { LDGraphTrackerImpl } from '../src/LDGraphTrackerImpl';
 
 const mockTrack = jest.fn();
+const mockWarn = jest.fn();
 const mockLdClient: LDClientMin = {
   track: mockTrack,
   variation: jest.fn(),
+  logger: { warn: mockWarn, error: jest.fn(), info: jest.fn(), debug: jest.fn() },
 };
 
 const testContext: LDContext = { kind: 'user', key: 'test-user' };
-const graphKey = 'test-graph';
+const graphKey = 'my-agent-graph';
 const variationKey = 'v1';
 const version = 2;
 
-const getExpectedTrackData = () => ({
-  graphKey,
-  variationKey,
-  version,
-});
+const makeTracker = (runId = 'test-run-id') =>
+  new LDGraphTrackerImpl(mockLdClient, runId, graphKey, variationKey, version, testContext);
 
 beforeEach(() => {
   jest.clearAllMocks();
 });
 
-it('returns track data', () => {
+// ---------------------------------------------------------------------------
+// getTrackData
+// ---------------------------------------------------------------------------
+
+it('returns correct track data with variationKey', () => {
+  const tracker = makeTracker('fixed-run-id');
+  expect(tracker.getTrackData()).toEqual({
+    runId: 'fixed-run-id',
+    graphKey,
+    version,
+    variationKey,
+  });
+});
+
+it('omits variationKey when not provided', () => {
   const tracker = new LDGraphTrackerImpl(
     mockLdClient,
+    'some-run-id',
     graphKey,
-    variationKey,
+    undefined,
     version,
     testContext,
   );
+  const data = tracker.getTrackData();
+  expect(data.variationKey).toBeUndefined();
+  expect(data.graphKey).toBe(graphKey);
+  expect(data.version).toBe(version);
+  expect(data.runId).toBe('some-run-id');
+});
 
-  expect(tracker.getTrackData()).toEqual(getExpectedTrackData());
+it('uses provided runId', () => {
+  const tracker = makeTracker('my-custom-run-id');
+  expect(tracker.getTrackData().runId).toBe('my-custom-run-id');
 });
 
-it('tracks invocation success', () => {
+// ---------------------------------------------------------------------------
+// resumptionToken round-trip
+// ---------------------------------------------------------------------------
+
+it('encodes a resumption token with correct field order', () => {
+  const tracker = makeTracker('550e8400-e29b-41d4-a716-446655440000');
+  const token = tracker.resumptionToken;
+  const decoded = Buffer.from(token, 'base64url').toString('utf8');
+  expect(decoded).toBe(
+    '{"runId":"550e8400-e29b-41d4-a716-446655440000","graphKey":"my-agent-graph","variationKey":"v1","version":2}',
+  );
+});
+
+it('omits variationKey from token when not set', () => {
   const tracker = new LDGraphTrackerImpl(
     mockLdClient,
+    'run-abc',
     graphKey,
-    variationKey,
+    undefined,
     version,
     testContext,
   );
+  const token = tracker.resumptionToken;
+  const decoded = Buffer.from(token, 'base64url').toString('utf8');
+  expect(decoded).toBe('{"runId":"run-abc","graphKey":"my-agent-graph","version":2}');
+});
+
+it('fromResumptionToken reconstructs the tracker with original runId', () => {
+  const original = makeTracker('orig-run-id');
+  const token = original.resumptionToken;
+
+  const reconstructed = LDGraphTrackerImpl.fromResumptionToken(token, mockLdClient, testContext);
+  expect(reconstructed.getTrackData()).toEqual({
+    runId: 'orig-run-id',
+    graphKey,
+    version,
+    variationKey,
+  });
+});
+
+// ---------------------------------------------------------------------------
+// getSummary
+// ---------------------------------------------------------------------------
+
+it('returns an empty summary initially', () => {
+  const tracker = makeTracker('r');
+  expect(tracker.getSummary()).toEqual({});
+});
+
+it('returns a copy of the summary (not a reference)', () => {
+  const tracker = makeTracker('r');
   tracker.trackInvocationSuccess();
+  const summary1 = tracker.getSummary();
+  const summary2 = tracker.getSummary();
+  expect(summary1).not.toBe(summary2);
+  expect(summary1).toEqual(summary2);
+});
+
+// ---------------------------------------------------------------------------
+// trackInvocationSuccess / trackInvocationFailure – at-most-once
+// ---------------------------------------------------------------------------
 
+it('trackInvocationSuccess sets success=true and emits event', () => {
+  const tracker = makeTracker('r');
+  tracker.trackInvocationSuccess();
+  expect(tracker.getSummary().success).toBe(true);
   expect(mockTrack).toHaveBeenCalledWith(
     '$ld:ai:graph:invocation_success',
     testContext,
-    getExpectedTrackData(),
+    tracker.getTrackData(),
     1,
   );
 });
 
-it('tracks invocation failure', () => {
-  const tracker = new LDGraphTrackerImpl(
-    mockLdClient,
-    graphKey,
-    variationKey,
-    version,
-    testContext,
-  );
+it('trackInvocationFailure sets success=false and emits event', () => {
+  const tracker = makeTracker('r');
   tracker.trackInvocationFailure();
-
+  expect(tracker.getSummary().success).toBe(false);
   expect(mockTrack).toHaveBeenCalledWith(
     '$ld:ai:graph:invocation_failure',
     testContext,
-    getExpectedTrackData(),
+    tracker.getTrackData(),
     1,
   );
 });
 
-it('tracks latency', () => {
-  const tracker = new LDGraphTrackerImpl(
-    mockLdClient,
-    graphKey,
-    variationKey,
-    version,
-    testContext,
+it('drops second trackInvocationSuccess call and warns', () => {
+  const tracker = makeTracker('r');
+  tracker.trackInvocationSuccess();
+  tracker.trackInvocationSuccess();
+  expect(mockTrack).toHaveBeenCalledTimes(1);
+  expect(mockWarn).toHaveBeenCalledWith(
+    expect.stringContaining('invocation success/failure already recorded for this run'),
+  );
+});
+
+it('drops trackInvocationFailure after trackInvocationSuccess and warns', () => {
+  const tracker = makeTracker('r');
+  tracker.trackInvocationSuccess();
+  tracker.trackInvocationFailure();
+  expect(mockTrack).toHaveBeenCalledTimes(1);
+  expect(mockWarn).toHaveBeenCalledWith(
+    expect.stringContaining('invocation success/failure already recorded for this run'),
   );
-  tracker.trackLatency(1500);
+});
 
+// ---------------------------------------------------------------------------
+// trackLatency – at-most-once
+// ---------------------------------------------------------------------------
+
+it('trackLatency sets durationMs and emits event', () => {
+  const tracker = makeTracker('r');
+  tracker.trackLatency(1234);
+  expect(tracker.getSummary().durationMs).toBe(1234);
   expect(mockTrack).toHaveBeenCalledWith(
     '$ld:ai:graph:latency',
     testContext,
-    getExpectedTrackData(),
-    1500,
+    tracker.getTrackData(),
+    1234,
   );
 });
 
-it('tracks total tokens', () => {
-  const tracker = new LDGraphTrackerImpl(
-    mockLdClient,
-    graphKey,
-    variationKey,
-    version,
-    testContext,
-  );
-  tracker.trackTotalTokens({ total: 200, input: 80, output: 120 });
+it('drops second trackLatency call and warns', () => {
+  const tracker = makeTracker('r');
+  tracker.trackLatency(100);
+  tracker.trackLatency(200);
+  expect(mockTrack).toHaveBeenCalledTimes(1);
+  expect(tracker.getSummary().durationMs).toBe(100);
+  expect(mockWarn).toHaveBeenCalled();
+});
 
+// ---------------------------------------------------------------------------
+// trackTotalTokens – at-most-once
+// ---------------------------------------------------------------------------
+
+it('trackTotalTokens sets tokens and emits event with total as metric value', () => {
+  const tracker = makeTracker('r');
+  const tokens = { total: 500, input: 200, output: 300 };
+  tracker.trackTotalTokens(tokens);
+  expect(tracker.getSummary().tokens).toEqual(tokens);
   expect(mockTrack).toHaveBeenCalledWith(
     '$ld:ai:graph:total_tokens',
     testContext,
-    getExpectedTrackData(),
-    200,
+    tracker.getTrackData(),
+    500,
   );
 });
 
-it('does not track total tokens when total is zero', () => {
-  const tracker = new LDGraphTrackerImpl(
-    mockLdClient,
-    graphKey,
-    variationKey,
-    version,
-    testContext,
-  );
-  tracker.trackTotalTokens({ total: 0, input: 0, output: 0 });
-
-  expect(mockTrack).not.toHaveBeenCalled();
+it('drops second trackTotalTokens call and warns', () => {
+  const tracker = makeTracker('r');
+  tracker.trackTotalTokens({ total: 100, input: 50, output: 50 });
+  tracker.trackTotalTokens({ total: 200, input: 100, output: 100 });
+  expect(mockTrack).toHaveBeenCalledTimes(1);
+  expect(tracker.getSummary().tokens?.total).toBe(100);
+  expect(mockWarn).toHaveBeenCalled();
 });
 
-it('tracks path', () => {
-  const tracker = new LDGraphTrackerImpl(
-    mockLdClient,
-    graphKey,
-    variationKey,
-    version,
-    testContext,
-  );
-  const path = ['node-a', 'node-b', 'node-c'];
-  tracker.trackPath(path);
+// ---------------------------------------------------------------------------
+// trackPath – at-most-once
+// ---------------------------------------------------------------------------
 
+it('trackPath sets path and emits event with path in data payload', () => {
+  const tracker = makeTracker('r');
+  const path = ['root-agent', 'research-agent', 'write-agent'];
+  tracker.trackPath(path);
+  expect(tracker.getSummary().path).toEqual(path);
   expect(mockTrack).toHaveBeenCalledWith(
     '$ld:ai:graph:path',
     testContext,
-    { ...getExpectedTrackData(), path },
+    { ...tracker.getTrackData(), path },
     1,
   );
 });
 
-it('tracks judge result', () => {
-  const tracker = new LDGraphTrackerImpl(
-    mockLdClient,
-    graphKey,
-    variationKey,
-    version,
-    testContext,
-  );
+it('drops second trackPath call and warns', () => {
+  const tracker = makeTracker('r');
+  tracker.trackPath(['a', 'b']);
+  tracker.trackPath(['c', 'd']);
+  expect(mockTrack).toHaveBeenCalledTimes(1);
+  expect(tracker.getSummary().path).toEqual(['a', 'b']);
+  expect(mockWarn).toHaveBeenCalled();
+});
+
+// ---------------------------------------------------------------------------
+// trackJudgeResult – NOT at-most-once
+// ---------------------------------------------------------------------------
+
+it('trackJudgeResult emits an event for a sampled, successful result', () => {
+  const tracker = makeTracker('r');
   tracker.trackJudgeResult({
-    judgeConfigKey: 'my-judge',
+    judgeConfigKey: 'judge-1',
+    metricKey: 'relevance-score',
+    score: 0.9,
+    reasoning: 'good',
     success: true,
     sampled: true,
-    score: 0.9,
-    reasoning: 'Relevant',
-    metricKey: 'relevance',
   });
-
+  expect(mockTrack).toHaveBeenCalledTimes(1);
   expect(mockTrack).toHaveBeenCalledWith(
-    'relevance',
+    'relevance-score',
     testContext,
-    { ...getExpectedTrackData(), judgeConfigKey: 'my-judge' },
+    { ...tracker.getTrackData(), judgeConfigKey: 'judge-1' },
     0.9,
   );
 });
 
-it('tracks judge result without judgeConfigKey', () => {
-  const tracker = new LDGraphTrackerImpl(
-    mockLdClient,
-    graphKey,
-    variationKey,
-    version,
-    testContext,
-  );
+it('trackJudgeResult emits event without judgeConfigKey', () => {
+  const tracker = makeTracker('r');
   tracker.trackJudgeResult({
+    metricKey: 'relevance-score',
+    score: 0.7,
     success: true,
     sampled: true,
-    score: 0.7,
-    reasoning: 'Somewhat relevant',
-    metricKey: 'relevance',
   });
-
-  expect(mockTrack).toHaveBeenCalledWith('relevance', testContext, getExpectedTrackData(), 0.7);
-});
-
-it('does not track judge result when not sampled', () => {
-  const tracker = new LDGraphTrackerImpl(
-    mockLdClient,
-    graphKey,
-    variationKey,
-    version,
+  expect(mockTrack).toHaveBeenCalledWith(
+    'relevance-score',
     testContext,
+    tracker.getTrackData(),
+    0.7,
   );
-  tracker.trackJudgeResult({
-    judgeConfigKey: 'my-judge',
-    success: false,
-    sampled: false,
-  });
+});
+
+it('trackJudgeResult can fire multiple times', () => {
+  const tracker = makeTracker('r');
+  tracker.trackJudgeResult({ metricKey: 'relevance', score: 0.5, success: true, sampled: true });
+  tracker.trackJudgeResult({ metricKey: 'relevance', score: 0.7, success: true, sampled: true });
+  expect(mockTrack).toHaveBeenCalledTimes(2);
+  expect(mockWarn).not.toHaveBeenCalled();
+});
 
+it('trackJudgeResult does not emit when not sampled', () => {
+  const tracker = makeTracker('r');
+  tracker.trackJudgeResult({ judgeConfigKey: 'j', success: false, sampled: false });
   expect(mockTrack).not.toHaveBeenCalled();
 });
 
-it('does not track judge result when success is false', () => {
-  const tracker = new LDGraphTrackerImpl(
-    mockLdClient,
-    graphKey,
-    variationKey,
-    version,
-    testContext,
-  );
+it('trackJudgeResult does not emit when success is false', () => {
+  const tracker = makeTracker('r');
   tracker.trackJudgeResult({
-    judgeConfigKey: 'my-judge',
+    judgeConfigKey: 'j',
+    metricKey: 'relevance',
+    score: 0.9,
     success: false,
     sampled: true,
-    score: 0.9,
-    metricKey: 'relevance',
   });
-
   expect(mockTrack).not.toHaveBeenCalled();
 });
 
-it('tracks redirect', () => {
-  const tracker = new LDGraphTrackerImpl(
-    mockLdClient,
-    graphKey,
-    variationKey,
-    version,
-    testContext,
-  );
-  tracker.trackRedirect('agent-a', 'agent-b');
+// ---------------------------------------------------------------------------
+// Edge-level methods – multi-fire, NOT at-most-once
+// ---------------------------------------------------------------------------
 
+it('trackRedirect emits event with sourceKey and redirectedTarget', () => {
+  const tracker = makeTracker('r');
+  tracker.trackRedirect('source-agent', 'redirected-agent');
   expect(mockTrack).toHaveBeenCalledWith(
     '$ld:ai:graph:redirect',
     testContext,
-    { ...getExpectedTrackData(), sourceKey: 'agent-a', redirectedTarget: 'agent-b' },
+    { ...tracker.getTrackData(), sourceKey: 'source-agent', redirectedTarget: 'redirected-agent' },
     1,
   );
 });
 
-it('tracks handoff success', () => {
-  const tracker = new LDGraphTrackerImpl(
-    mockLdClient,
-    graphKey,
-    variationKey,
-    version,
-    testContext,
-  );
+it('trackHandoffSuccess emits event with sourceKey and targetKey', () => {
+  const tracker = makeTracker('r');
   tracker.trackHandoffSuccess('agent-a', 'agent-b');
-
   expect(mockTrack).toHaveBeenCalledWith(
     '$ld:ai:graph:handoff_success',
     testContext,
-    { ...getExpectedTrackData(), sourceKey: 'agent-a', targetKey: 'agent-b' },
+    { ...tracker.getTrackData(), sourceKey: 'agent-a', targetKey: 'agent-b' },
     1,
   );
 });
 
-it('tracks handoff failure', () => {
-  const tracker = new LDGraphTrackerImpl(
-    mockLdClient,
-    graphKey,
-    variationKey,
-    version,
-    testContext,
-  );
+it('trackHandoffFailure emits event with sourceKey and targetKey', () => {
+  const tracker = makeTracker('r');
   tracker.trackHandoffFailure('agent-a', 'agent-b');
-
   expect(mockTrack).toHaveBeenCalledWith(
     '$ld:ai:graph:handoff_failure',
     testContext,
-    { ...getExpectedTrackData(), sourceKey: 'agent-a', targetKey: 'agent-b' },
+    { ...tracker.getTrackData(), sourceKey: 'agent-a', targetKey: 'agent-b' },
     1,
   );
 });
 
-it('returns empty summary when no metrics tracked', () => {
-  const tracker = new LDGraphTrackerImpl(
-    mockLdClient,
-    graphKey,
-    variationKey,
-    version,
-    testContext,
-  );
-
-  expect(tracker.getSummary()).toEqual({});
-});
-
-it('summarizes tracked graph metrics', () => {
-  const tracker = new LDGraphTrackerImpl(
-    mockLdClient,
-    graphKey,
-    variationKey,
-    version,
-    testContext,
-  );
-
-  tracker.trackInvocationSuccess();
-  tracker.trackLatency(2000);
-  tracker.trackTotalTokens({ total: 300, input: 100, output: 200 });
-  tracker.trackPath(['node-a', 'node-b']);
-
-  expect(tracker.getSummary()).toEqual({
-    success: true,
-    durationMs: 2000,
-    tokens: { total: 300, input: 100, output: 200 },
-    path: ['node-a', 'node-b'],
-  });
-});
-
-describe('at-most-once semantics for graph-level metrics', () => {
-  it('drops duplicate trackInvocationSuccess calls', () => {
-    const tracker = new LDGraphTrackerImpl(
-      mockLdClient,
-      graphKey,
-      variationKey,
-      version,
-      testContext,
-    );
-    tracker.trackInvocationSuccess();
-    tracker.trackInvocationSuccess();
-
-    expect(mockTrack).toHaveBeenCalledTimes(1);
-  });
-
-  it('drops trackInvocationFailure after trackInvocationSuccess', () => {
-    const tracker = new LDGraphTrackerImpl(
-      mockLdClient,
-      graphKey,
-      variationKey,
-      version,
-      testContext,
-    );
-    tracker.trackInvocationSuccess();
-    tracker.trackInvocationFailure();
-
-    expect(mockTrack).toHaveBeenCalledTimes(1);
-    expect(mockTrack).toHaveBeenCalledWith(
-      '$ld:ai:graph:invocation_success',
-      expect.anything(),
-      expect.anything(),
-      expect.anything(),
-    );
-  });
-
-  it('drops duplicate trackLatency calls', () => {
-    const tracker = new LDGraphTrackerImpl(
-      mockLdClient,
-      graphKey,
-      variationKey,
-      version,
-      testContext,
-    );
-    tracker.trackLatency(1000);
-    tracker.trackLatency(2000);
-
-    expect(mockTrack).toHaveBeenCalledTimes(1);
-    expect(mockTrack).toHaveBeenCalledWith(
-      '$ld:ai:graph:latency',
-      testContext,
-      getExpectedTrackData(),
-      1000,
-    );
-  });
-
-  it('drops duplicate trackTotalTokens calls', () => {
-    const tracker = new LDGraphTrackerImpl(
-      mockLdClient,
-      graphKey,
-      variationKey,
-      version,
-      testContext,
-    );
-    tracker.trackTotalTokens({ total: 100, input: 40, output: 60 });
-    tracker.trackTotalTokens({ total: 200, input: 80, output: 120 });
-
-    expect(mockTrack).toHaveBeenCalledTimes(1);
-    expect(mockTrack).toHaveBeenCalledWith(
-      '$ld:ai:graph:total_tokens',
-      testContext,
-      getExpectedTrackData(),
-      100,
-    );
-  });
-
-  it('drops duplicate trackPath calls', () => {
-    const tracker = new LDGraphTrackerImpl(
-      mockLdClient,
-      graphKey,
-      variationKey,
-      version,
-      testContext,
-    );
-    tracker.trackPath(['node-a']);
-    tracker.trackPath(['node-b', 'node-c']);
-
-    expect(mockTrack).toHaveBeenCalledTimes(1);
-    expect(mockTrack).toHaveBeenCalledWith(
-      '$ld:ai:graph:path',
-      testContext,
-      { ...getExpectedTrackData(), path: ['node-a'] },
-      1,
-    );
-  });
-});
-
-describe('edge-level methods can be called multiple times', () => {
-  it('allows multiple trackRedirect calls', () => {
-    const tracker = new LDGraphTrackerImpl(
-      mockLdClient,
-      graphKey,
-      variationKey,
-      version,
-      testContext,
-    );
-    tracker.trackRedirect('a', 'b');
-    tracker.trackRedirect('b', 'c');
-
-    expect(mockTrack).toHaveBeenCalledTimes(2);
-  });
-
-  it('allows multiple trackHandoffSuccess calls', () => {
-    const tracker = new LDGraphTrackerImpl(
-      mockLdClient,
-      graphKey,
-      variationKey,
-      version,
-      testContext,
-    );
-    tracker.trackHandoffSuccess('a', 'b');
-    tracker.trackHandoffSuccess('b', 'c');
-
-    expect(mockTrack).toHaveBeenCalledTimes(2);
-  });
-
-  it('allows multiple trackHandoffFailure calls', () => {
-    const tracker = new LDGraphTrackerImpl(
-      mockLdClient,
-      graphKey,
-      variationKey,
-      version,
-      testContext,
-    );
-    tracker.trackHandoffFailure('a', 'b');
-    tracker.trackHandoffFailure('b', 'c');
-
-    expect(mockTrack).toHaveBeenCalledTimes(2);
-  });
+it('edge-level methods can fire multiple times without warning', () => {
+  const tracker = makeTracker('r');
+  tracker.trackHandoffSuccess('a', 'b');
+  tracker.trackHandoffSuccess('a', 'b');
+  tracker.trackRedirect('a', 'c');
+  tracker.trackHandoffFailure('x', 'y');
+  expect(mockTrack).toHaveBeenCalledTimes(4);
+  expect(mockWarn).not.toHaveBeenCalled();
 });
diff --git a/packages/sdk/server-ai/__tests__/agentGraph.test.ts b/packages/sdk/server-ai/__tests__/agentGraph.test.ts
new file mode 100644
index 0000000000..e5a52d836f
--- /dev/null
+++ b/packages/sdk/server-ai/__tests__/agentGraph.test.ts
@@ -0,0 +1,200 @@
+import { LDContext } from '@launchdarkly/js-server-sdk-common';
+
+import { AgentGraphDefinition } from '../src/api/graph/AgentGraphDefinition';
+import { LDAIClientImpl } from '../src/LDAIClientImpl';
+import { LDClientMin } from '../src/LDClientMin';
+
+// ---------------------------------------------------------------------------
+// Mocks
+// ---------------------------------------------------------------------------
+
+const mockTrack = jest.fn();
+const mockVariation = jest.fn();
+const mockDebug = jest.fn();
+
+const mockLdClient: LDClientMin = {
+  track: mockTrack,
+  variation: mockVariation,
+  logger: {
+    debug: mockDebug,
+    info: jest.fn(),
+    warn: jest.fn(),
+    error: jest.fn(),
+  },
+};
+
+const testContext: LDContext = { kind: 'user', key: 'test-user' };
+
+const makeClient = () => new LDAIClientImpl(mockLdClient);
+
+beforeEach(() => {
+  jest.clearAllMocks();
+});
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+function makeGraphFlagValue(
+  root: string,
+  edges: Record<string, Array<{ key: string }>> = {},
+  variationKey = 'v1',
+  version = 1,
+) {
+  return { _ldMeta: { variationKey, version }, root, edges };
+}
+
+function makeAgentFlagValue(key: string, enabled = true) {
+  return {
+    _ldMeta: { variationKey: `${key}-v1`, enabled, version: 1, mode: 'agent' },
+    instructions: `Instructions for ${key}`,
+  };
+}
+
+// ---------------------------------------------------------------------------
+// agentGraph – disabled / validation failures
+// ---------------------------------------------------------------------------
+
+it('returns a disabled graph when _ldMeta.enabled is false', async () => {
+  const client = makeClient();
+  mockVariation.mockResolvedValueOnce({ _ldMeta: { enabled: false }, root: 'root' });
+  const graph = await client.agentGraph('my-graph', testContext);
+  expect(graph).toBeInstanceOf(AgentGraphDefinition);
+  expect(graph.enabled).toBe(false);
+});
+
+it('logs debug when graph is disabled via _ldMeta.enabled', async () => {
+  const client = makeClient();
+  mockVariation.mockResolvedValueOnce({ _ldMeta: { enabled: false }, root: 'root' });
+  await client.agentGraph('my-graph', testContext);
+  expect(mockDebug).toHaveBeenCalledWith(expect.stringContaining('disabled'));
+});
+
+it('returns a disabled graph when graph flag has no root', async () => {
+  const client = makeClient();
+  mockVariation.mockResolvedValueOnce({ root: '' });
+  const graph = await client.agentGraph('my-graph', testContext);
+  expect(graph).toBeInstanceOf(AgentGraphDefinition);
+  expect(graph.enabled).toBe(false);
+});
+
+it('logs debug when graph has no root', async () => {
+  const client = makeClient();
+  mockVariation.mockResolvedValueOnce({ root: '' });
+  await client.agentGraph('my-graph', testContext);
+  expect(mockDebug).toHaveBeenCalledWith(expect.stringContaining('not fetchable'));
+});
+
+it('returns a disabled graph when a node is unconnected (not reachable from root)', async () => {
+  const client = makeClient();
+  const graphValue = makeGraphFlagValue('root', {
+    root: [{ key: 'child' }],
+    orphan: [{ key: 'other' }],
+  });
+  mockVariation.mockResolvedValueOnce(graphValue);
+  const graph = await client.agentGraph('my-graph', testContext);
+  expect(graph).toBeInstanceOf(AgentGraphDefinition);
+  expect(graph.enabled).toBe(false);
+  expect(mockDebug).toHaveBeenCalledWith(expect.stringContaining('unconnected node'));
+});
+
+it('returns an enabled graph and traverses a cyclic graph (each node visited once)', async () => {
+  const client = makeClient();
+  const graphValue = makeGraphFlagValue('a', {
+    a: [{ key: 'b' }],
+    b: [{ key: 'a' }],
+  });
+  mockVariation
+    .mockResolvedValueOnce(graphValue)
+    .mockResolvedValue(makeAgentFlagValue('agent', true));
+
+  const graph = await client.agentGraph('my-graph', testContext);
+  expect(graph.enabled).toBe(true);
+
+  const visited: string[] = [];
+  graph.traverse((node) => {
+    visited.push(node.getKey());
+  });
+  expect(visited.sort()).toEqual(['a', 'b']);
+});
+
+it('returns a disabled graph when a child agent config is disabled', async () => {
+  const client = makeClient();
+  const graphValue = makeGraphFlagValue('root', { root: [{ key: 'child' }] });
+  mockVariation
+    .mockResolvedValueOnce(graphValue)
+    .mockResolvedValueOnce(makeAgentFlagValue('root', true))
+    .mockResolvedValueOnce(makeAgentFlagValue('child', false));
+  const graph = await client.agentGraph('my-graph', testContext);
+  expect(graph).toBeInstanceOf(AgentGraphDefinition);
+  expect(graph.enabled).toBe(false);
+  expect(mockDebug).toHaveBeenCalledWith(expect.stringContaining('not enabled'));
+});
+
+// ---------------------------------------------------------------------------
+// agentGraph – success path
+// ---------------------------------------------------------------------------
+
+it('returns an enabled graph for a valid graph with a single node', async () => {
+  const client = makeClient();
+  const graphValue = makeGraphFlagValue('solo-agent');
+  mockVariation
+    .mockResolvedValueOnce(graphValue)
+    .mockResolvedValueOnce(makeAgentFlagValue('solo-agent'));
+  const graph = await client.agentGraph('my-graph', testContext);
+  expect(graph).toBeInstanceOf(AgentGraphDefinition);
+  expect(graph.enabled).toBe(true);
+  expect(graph.rootNode().getKey()).toBe('solo-agent');
+});
+
+it('returns an enabled graph with correct nodes for multi-node graph', async () => {
+  const client = makeClient();
+  const graphValue = makeGraphFlagValue('root', {
+    root: [{ key: 'child-a' }, { key: 'child-b' }],
+    'child-a': [{ key: 'leaf' }],
+  });
+  mockVariation
+    .mockResolvedValueOnce(graphValue)
+    .mockResolvedValue(makeAgentFlagValue('agent', true));
+
+  const graph = await client.agentGraph('my-graph', testContext);
+  expect(graph.enabled).toBe(true);
+  expect(graph.rootNode().getKey()).toBe('root');
+  expect(
+    graph
+      .getChildNodes('root')
+      .map((n) => n.getKey())
+      .sort(),
+  ).toEqual(['child-a', 'child-b']);
+  expect(
+    graph
+      .terminalNodes()
+      .map((n) => n.getKey())
+      .sort(),
+  ).toEqual(['child-b', 'leaf']);
+});
+
+it('tracks usage event when agentGraph is called', async () => {
+  const client = makeClient();
+  mockVariation.mockResolvedValue({ root: '' });
+  await client.agentGraph('my-graph', testContext);
+  expect(mockTrack).toHaveBeenCalledWith('$ld:ai:usage:agent-graph', testContext, 'my-graph', 1);
+});
+
+// ---------------------------------------------------------------------------
+// createGraphTracker
+// ---------------------------------------------------------------------------
+
+it('createGraphTracker reconstructs a tracker from a resumption token', () => {
+  const client = makeClient();
+  const token = Buffer.from(
+    '{"runId":"run-1","graphKey":"g-key","variationKey":"v99","version":7}',
+  ).toString('base64url');
+
+  const tracker = client.createGraphTracker(token, testContext);
+
+  expect(tracker.getTrackData().graphKey).toBe('g-key');
+  expect(tracker.getTrackData().version).toBe(7);
+  expect(tracker.getTrackData().variationKey).toBe('v99');
+  expect(tracker.getTrackData().runId).toBe('run-1');
+});
diff --git a/packages/sdk/server-ai/examples/agent-graph-traversal/README.md b/packages/sdk/server-ai/examples/agent-graph-traversal/README.md
new file mode 100644
index 0000000000..2281901f1b
--- /dev/null
+++ b/packages/sdk/server-ai/examples/agent-graph-traversal/README.md
@@ -0,0 +1,106 @@
+# Agent Graph Traversal Example
+
+Demonstrates how to fetch an agent graph from LaunchDarkly and wire it into
+an AI framework using forward or reverse traversal.
+
+## Setup
+
+```bash
+export LAUNCHDARKLY_SDK_KEY=<your-sdk-key>
+export LAUNCHDARKLY_GRAPH_KEY=sample-graph   # optional, this is the default
+yarn start
+```
+
+## What it does
+
+1. Fetches the graph flag and validates that it is enabled.
+2. Runs a **forward traversal** (root → terminals), simulating how you would
+   build agents in a framework that constructs parents before children.
+3. Runs a **reverse traversal** (terminals → root), simulating how you would
+   build agents in a framework that constructs children before parents.
+4. Creates a tracker and records a successful invocation.
+
+## Choosing a traversal direction
+
+Both methods visit every node exactly once and pass an `executionContext` map
+to each callback. The return value of your callback is stored under the node's
+key, making it available to all subsequent nodes in that traversal.
+
+### Forward traversal (`graph.traverse`)
+
+Processes nodes from the root down to the terminals (BFS order). Use this when
+your framework requires a **parent to be defined first** so that child agents
+can be registered as handoff targets on it afterward.
+
+```
+orchestrator-agent → specialist-agent-a → summarizer-agent
+                   ↘ specialist-agent-b ↗
+```
+
+When `specialist-agent-a` runs, `orchestrator-agent` is already in
+`executionContext`. When `summarizer-agent` runs, both specialists are there.
+
+Typical frameworks: **OpenAI Agents SDK** — you create the orchestrator agent
+first and then attach child agents as handoff targets.
+
+### Reverse traversal (`graph.reverseTraverse`)
+
+Processes nodes from the terminals up to the root (upward BFS). Use this when
+your framework requires **children to be defined first** so they can be
+attached to their parent as tools or sub-graphs.
+
+```
+summarizer-agent → specialist-agent-a → orchestrator-agent
+                 ↗ specialist-agent-b
+```
+
+When `specialist-agent-a` runs, `summarizer-agent` is already in
+`executionContext`. When `orchestrator-agent` runs, both specialists are there.
+
+Typical frameworks: **LangGraph** — you define leaf nodes first, then compose
+them into parent nodes by attaching them as edges in the graph.
+
+### Cyclic graphs
+
+Both traversal methods are cycle-safe via a visited set. For `reverseTraverse`,
+a graph with no terminal nodes (every node has at least one outgoing edge)
+produces no iterations — there is no starting point for upward BFS.
+`traverse` handles cycles normally; the cycle back-edge is simply skipped once
+the target node has already been visited.
+
+## Tracking
+
+### Graph-level tracker
+
+Call `graph.createTracker()` once per invocation. The tracker groups all
+telemetry events (latency, tokens, success/failure) under a shared `runId`
+that appears in LaunchDarkly's AI metrics.
+
+```typescript
+const tracker = graph.createTracker();
+try {
+  // ... execute graph ...
+  tracker.trackInvocationSuccess();
+} catch {
+  tracker.trackInvocationFailure();
+}
+```
+
+If you need to record tracking events across multiple requests (e.g. streaming),
+use `tracker.resumptionToken` to serialize the tracker and reconstruct it later
+via `aiClient.createGraphTracker(token, context)`.
+
+### Node-level tracker
+
+Each node also carries its own `LDAIConfigTracker` for recording metrics
+against the underlying agent config (tokens, latency, model usage). Access it
+inside your traversal callback via `node.getConfig().createTracker?.()`.
+
+```typescript
+graph.traverse((node, executionContext) => {
+  const nodeTracker = node.getConfig().createTracker?.();
+  // ... invoke the node's agent ...
+  nodeTracker?.trackSuccess({ totalTokens: 120, inputTokens: 80, outputTokens: 40 });
+  return result;
+});
+```
diff --git a/packages/sdk/server-ai/examples/agent-graph-traversal/package.json b/packages/sdk/server-ai/examples/agent-graph-traversal/package.json
new file mode 100644
index 0000000000..7a3fddc707
--- /dev/null
+++ b/packages/sdk/server-ai/examples/agent-graph-traversal/package.json
@@ -0,0 +1,19 @@
+{
+  "name": "@launchdarkly/server-sdk-ai-agent-graph-traversal",
+  "private": true,
+  "version": "1.0.0",
+  "description": "Example demonstrating LaunchDarkly AI SDK agent graph traversal",
+  "type": "module",
+  "scripts": {
+    "build": "tsc",
+    "start": "yarn build && node ./dist/index.js"
+  },
+  "dependencies": {
+    "@launchdarkly/node-server-sdk": "9.10.11",
+    "@launchdarkly/server-sdk-ai": "0.16.8"
+  },
+  "devDependencies": {
+    "@tsconfig/node20": "20.1.4",
+    "typescript": "^5.5.3"
+  }
+}
diff --git a/packages/sdk/server-ai/examples/agent-graph-traversal/src/index.ts b/packages/sdk/server-ai/examples/agent-graph-traversal/src/index.ts
new file mode 100644
index 0000000000..6bf1d1cf42
--- /dev/null
+++ b/packages/sdk/server-ai/examples/agent-graph-traversal/src/index.ts
@@ -0,0 +1,134 @@
+/* eslint-disable no-console */
+import { init, type LDContext } from '@launchdarkly/node-server-sdk';
+import { initAi } from '@launchdarkly/server-sdk-ai';
+import type { AgentGraphNode } from '@launchdarkly/server-sdk-ai';
+
+const GRAPH_KEY = process.env.LAUNCHDARKLY_GRAPH_KEY || 'sample-graph';
+
+const sdkKey = process.env.LAUNCHDARKLY_SDK_KEY;
+if (!sdkKey) {
+  console.error('*** Please set the LAUNCHDARKLY_SDK_KEY env first');
+  process.exit(1);
+}
+
+const ldClient = init(sdkKey);
+
+const context: LDContext = {
+  kind: 'user',
+  key: 'example-user-key',
+  name: 'Sandy',
+};
+
+// ---------------------------------------------------------------------------
+// Helpers
+// ---------------------------------------------------------------------------
+
+// Build a provider-specific agent for this node.
+// In a real implementation you would use node.getConfig() to read the agent's
+// instructions/model and wire them into your framework (e.g. OpenAI Agents SDK,
+// LangGraph, CrewAI).
+function buildAgent(node: AgentGraphNode): string {
+  return `<agent:${node.getKey()}>`;
+}
+
+// ---------------------------------------------------------------------------
+// Forward traversal — use when your framework builds parents before children.
+//
+// Each node receives the agents built by its ancestors via executionContext,
+// so a parent can be passed to its children as a handoff target.
+//
+// Example frameworks: OpenAI Agents SDK (register tools/handoffs on the
+// parent, then attach child agents).
+// ---------------------------------------------------------------------------
+function forwardTraversalExample(graph: ReturnType<typeof Object.create>): void {
+  console.log('\n--- Forward traversal (root → terminals) ---');
+
+  graph.traverse((node: AgentGraphNode, executionContext: Record<string, unknown>) => {
+    const agent = buildAgent(node);
+
+    // Edges leaving this node tell you which agents this one can hand off to.
+    // Those child agents will be built in subsequent iterations and available
+    // in executionContext by the time they run.
+    const childKeys = node.getEdges().map((e) => e.key);
+    const ready = childKeys.filter((k) => executionContext[k]);
+    console.log(
+      `  built ${agent}  children: [${childKeys.join(', ') || 'none'}]  pre-built: [${ready.join(', ') || 'none'}]`,
+    );
+
+    // Store the built agent so descendants can reference it.
+    return agent;
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Reverse traversal — use when your framework builds children before parents.
+//
+// Each node receives already-built descendant agents via executionContext,
+// so a child can be attached to its parent as a tool or sub-agent.
+//
+// Example frameworks: LangGraph (define leaf nodes first, then compose them
+// into parent nodes as edges in the graph).
+// ---------------------------------------------------------------------------
+function reverseTraversalExample(graph: ReturnType<typeof Object.create>): void {
+  console.log('\n--- Reverse traversal (terminals → root) ---');
+
+  graph.reverseTraverse((node: AgentGraphNode, executionContext: Record<string, unknown>) => {
+    const agent = buildAgent(node);
+
+    // Children of this node are guaranteed to already be in executionContext.
+    const childKeys = node.getEdges().map((e) => e.key);
+    const builtChildren = childKeys.map((k) => executionContext[k]).filter(Boolean);
+    console.log(`  built ${agent}  attaching children: [${builtChildren.join(', ') || 'none'}]`);
+
+    return agent;
+  });
+}
+
+// ---------------------------------------------------------------------------
+// Main
+// ---------------------------------------------------------------------------
+
+async function main() {
+  try {
+    await ldClient.waitForInitialization({ timeout: 10 });
+    console.log('*** SDK successfully initialized');
+  } catch (error) {
+    console.log(`*** SDK failed to initialize: ${error}`);
+    process.exit(1);
+  }
+
+  const aiClient = initAi(ldClient);
+
+  const graph = await aiClient.agentGraph(GRAPH_KEY, context);
+
+  if (!graph.enabled) {
+    console.log(`\n*** Graph "${GRAPH_KEY}" is not enabled or could not be fetched.`);
+    process.exit(0);
+  }
+
+  console.log(`\n=== Graph: ${GRAPH_KEY} ===`);
+  console.log(`Root : ${graph.rootNode().getKey()}`);
+  console.log(
+    `Terminals: ${
+      graph
+        .terminalNodes()
+        .map((n) => n.getKey())
+        .join(', ') || '(none — cyclic graph)'
+    }`,
+  );
+
+  forwardTraversalExample(graph);
+  reverseTraversalExample(graph);
+
+  // Create a tracker to record this graph invocation in LaunchDarkly.
+  // Call trackInvocationSuccess() or trackInvocationFailure() when done.
+  const tracker = graph.createTracker();
+  tracker.trackInvocationSuccess();
+
+  await ldClient.close();
+}
+
+main().catch((err) => {
+  console.error(err);
+  process.exit(1);
+});
diff --git a/packages/sdk/server-ai/examples/agent-graph-traversal/tsconfig.json b/packages/sdk/server-ai/examples/agent-graph-traversal/tsconfig.json
new file mode 100644
index 0000000000..6916599c7d
--- /dev/null
+++ b/packages/sdk/server-ai/examples/agent-graph-traversal/tsconfig.json
@@ -0,0 +1,18 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ESNext",
+    "moduleResolution": "node",
+    "esModuleInterop": true,
+    "allowSyntheticDefaultImports": true,
+    "strict": true,
+    "skipLibCheck": true,
+    "forceConsistentCasingInFileNames": true,
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "declaration": true,
+    "sourceMap": true
+  },
+  "include": ["src/**/*"],
+  "exclude": ["node_modules", "dist"]
+}
diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts
index 65eb87a1a9..9bf9c2ffc3 100644
--- a/packages/sdk/server-ai/src/LDAIClientImpl.ts
+++ b/packages/sdk/server-ai/src/LDAIClientImpl.ts
@@ -21,11 +21,13 @@ import {
   LDMessage,
 } from './api/config';
 import { LDAIConfigFlagValue, LDAIConfigUtils } from './api/config/LDAIConfigUtils';
+import { AgentGraphDefinition, LDAgentGraphFlagValue, LDGraphTracker } from './api/graph';
 import { Judge } from './api/judge/Judge';
 import { LDAIClient } from './api/LDAIClient';
 import { AIProviderFactory, SupportedAIProvider } from './api/providers';
 import { LDAIConfigTrackerImpl } from './LDAIConfigTrackerImpl';
 import { LDClientMin } from './LDClientMin';
+import { LDGraphTrackerImpl } from './LDGraphTrackerImpl';
 import { aiSdkLanguage, aiSdkName, aiSdkVersion } from './sdkInfo';
 
 /**
@@ -38,6 +40,7 @@ const TRACK_USAGE_JUDGE_CONFIG = '$ld:ai:usage:judge-config';
 const TRACK_USAGE_CREATE_JUDGE = '$ld:ai:usage:create-judge';
 const TRACK_USAGE_AGENT_CONFIG = '$ld:ai:usage:agent-config';
 const TRACK_USAGE_AGENT_CONFIGS = '$ld:ai:usage:agent-configs';
+const TRACK_USAGE_AGENT_GRAPH = '$ld:ai:usage:agent-graph';
 
 const INIT_TRACK_CONTEXT: LDContext = {
   kind: 'ld_ai',
@@ -393,4 +396,119 @@ export class LDAIClientImpl implements LDAIClient {
   createTracker(token: string, context: LDContext): LDAIConfigTracker {
     return LDAIConfigTrackerImpl.fromResumptionToken(token, this._ldClient, context);
   }
+
+  async agentGraph(
+    graphKey: string,
+    context: LDContext,
+    variables?: Record<string, unknown>,
+  ): Promise<AgentGraphDefinition> {
+    this._ldClient.track(TRACK_USAGE_AGENT_GRAPH, context, graphKey, 1);
+
+    const defaultGraphValue: LDAgentGraphFlagValue = { root: '' };
+    const graphFlagValue = (await this._ldClient.variation(
+      graphKey,
+      context,
+      defaultGraphValue,
+    )) as LDAgentGraphFlagValue;
+
+    // eslint-disable-next-line no-underscore-dangle
+    const variationKey = graphFlagValue._ldMeta?.variationKey;
+    // eslint-disable-next-line no-underscore-dangle
+    const version = graphFlagValue._ldMeta?.version ?? 1;
+    const ldClient = this._ldClient;
+    const trackerFactory = () =>
+      new LDGraphTrackerImpl(ldClient, randomUUID(), graphKey, variationKey, version, context);
+
+    const disabled = new AgentGraphDefinition(graphFlagValue, {}, false, trackerFactory);
+
+    // eslint-disable-next-line no-underscore-dangle
+    if (graphFlagValue._ldMeta?.enabled === false) {
+      this._logger?.debug(`agentGraph: graph "${graphKey}" is disabled.`);
+      return disabled;
+    }
+
+    if (!graphFlagValue.root) {
+      this._logger?.debug(`agentGraph: graph "${graphKey}" is not fetchable or has no root node.`);
+      return disabled;
+    }
+
+    const allKeys = AgentGraphDefinition.collectAllKeys(graphFlagValue);
+    const reachableKeys = this._collectReachableKeys(graphFlagValue);
+
+    const unreachableKey = [...allKeys].find((key) => !reachableKeys.has(key));
+    if (unreachableKey) {
+      this._logger?.debug(
+        `agentGraph: graph "${graphKey}" has unconnected node "${unreachableKey}" that is not reachable from the root.`,
+      );
+      return disabled;
+    }
+
+    const agentConfigs: Record<string, LDAIAgentConfig> = {};
+    const fetchResults = await Promise.all(
+      [...allKeys].map(async (key) => {
+        const config = await this._agentConfigInternal(key, context, graphKey, variables);
+        return { key, config };
+      }),
+    );
+
+    const disabledResult = fetchResults.find(({ config }) => !config.enabled);
+    if (disabledResult) {
+      this._logger?.debug(
+        `agentGraph: agent config "${disabledResult.key}" in graph "${graphKey}" is not enabled or could not be fetched.`,
+      );
+      return disabled;
+    }
+    fetchResults.forEach(({ key, config }) => {
+      agentConfigs[key] = config;
+    });
+
+    const nodes = AgentGraphDefinition.buildNodes(graphFlagValue, agentConfigs);
+    return new AgentGraphDefinition(graphFlagValue, nodes, true, trackerFactory);
+  }
+
+  createGraphTracker(token: string, context: LDContext): LDGraphTracker {
+    return LDGraphTrackerImpl.fromResumptionToken(token, this._ldClient, context);
+  }
+
+  /**
+   * Fetches a single agent config without tracking usage (used internally by agentGraph).
+   */
+  private async _agentConfigInternal(
+    key: string,
+    context: LDContext,
+    graphKey?: string,
+    variables?: Record<string, unknown>,
+  ): Promise<LDAIAgentConfig> {
+    const config = await this._evaluate(
+      key,
+      context,
+      disabledAIConfig,
+      'agent',
+      variables,
+      graphKey,
+    );
+    return config as LDAIAgentConfig;
+  }
+
+  /**
+   * Returns the set of all node keys reachable from the root via BFS.
+   */
+  private _collectReachableKeys(graph: LDAgentGraphFlagValue): Set<string> {
+    const visited = new Set<string>();
+    const queue: string[] = [graph.root];
+    visited.add(graph.root);
+
+    while (queue.length > 0) {
+      const key = queue.shift()!;
+      const edges = graph.edges?.[key] ?? [];
+      edges.forEach((edge) => {
+        if (!visited.has(edge.key)) {
+          visited.add(edge.key);
+          queue.push(edge.key);
+        }
+      });
+    }
+
+    return visited;
+  }
 }
diff --git a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts
index d1f0602f50..6accab1959 100644
--- a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts
+++ b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts
@@ -1,65 +1,129 @@
-import { LDContext } from '@launchdarkly/js-server-sdk-common';
+import type { LDContext } from '@launchdarkly/js-server-sdk-common';
 
-import { LDGraphMetricSummary, LDGraphTracker } from './api/graph/LDGraphTracker';
-import { LDJudgeResult } from './api/judge/types';
-import { LDTokenUsage } from './api/metrics';
-import { LDClientMin } from './LDClientMin';
+import type { LDGraphTracker } from './api/graph/LDGraphTracker';
+import type { LDGraphMetricSummary, LDGraphTrackData } from './api/graph/types';
+import type { LDJudgeResult } from './api/judge/types';
+import type { LDTokenUsage } from './api/metrics';
+import type { LDClientMin } from './LDClientMin';
 
+/**
+ * Concrete implementation of {@link LDGraphTracker}.
+ *
+ * Construct directly or reconstruct from a resumption token via
+ * {@link LDGraphTrackerImpl.fromResumptionToken}.
+ */
 export class LDGraphTrackerImpl implements LDGraphTracker {
-  private _trackedMetrics: LDGraphMetricSummary = {};
+  private _summary: LDGraphMetricSummary = {};
 
   constructor(
-    private _ldClient: LDClientMin,
-    private _graphKey: string,
-    private _variationKey: string,
-    private _version: number,
-    private _context: LDContext,
+    private readonly _ldClient: LDClientMin,
+    private readonly _runId: string,
+    private readonly _graphKey: string,
+    private readonly _variationKey: string | undefined,
+    private readonly _version: number,
+    private readonly _context: LDContext,
   ) {}
 
-  getTrackData(): {
-    variationKey: string;
-    graphKey: string;
-    version: number;
-  } {
-    return {
-      variationKey: this._variationKey,
+  /**
+   * Reconstructs an {@link LDGraphTrackerImpl} from a resumption token, preserving
+   * the original `runId` so all events continue to be correlated under the same run.
+   *
+   * **Security note:** The token contains the flag variation key and version.
+   * Do not pass the raw token to untrusted clients.
+   *
+   * @param token URL-safe Base64-encoded token produced by {@link LDGraphTrackerImpl.resumptionToken}.
+   * @param ldClient LaunchDarkly client instance.
+   * @param context LDContext for the new tracker.
+   */
+  static fromResumptionToken(
+    token: string,
+    ldClient: LDClientMin,
+    context: LDContext,
+  ): LDGraphTrackerImpl {
+    const json = Buffer.from(token, 'base64url').toString('utf8');
+    const data = JSON.parse(json) as LDGraphTrackData;
+    return new LDGraphTrackerImpl(
+      ldClient,
+      data.runId,
+      data.graphKey,
+      data.variationKey,
+      data.version,
+      context,
+    );
+  }
+
+  getTrackData(): LDGraphTrackData {
+    const data: LDGraphTrackData = {
+      runId: this._runId,
       graphKey: this._graphKey,
       version: this._version,
     };
+    if (this._variationKey !== undefined) {
+      data.variationKey = this._variationKey;
+    }
+    return data;
+  }
+
+  getSummary(): LDGraphMetricSummary {
+    return { ...this._summary };
+  }
+
+  get resumptionToken(): string {
+    // Keys must appear in exact spec-defined order:
+    // runId, graphKey, variationKey (omitted if absent), version
+    const parts: string[] = [
+      `"runId":${JSON.stringify(this._runId)}`,
+      `"graphKey":${JSON.stringify(this._graphKey)}`,
+    ];
+    if (this._variationKey !== undefined) {
+      parts.push(`"variationKey":${JSON.stringify(this._variationKey)}`);
+    }
+    parts.push(`"version":${this._version}`);
+    const json = `{${parts.join(',')}}`;
+    return Buffer.from(json).toString('base64url');
   }
 
   trackInvocationSuccess(): void {
-    if (this._trackedMetrics.success !== undefined) {
+    if (this._summary.success !== undefined) {
+      this._ldClient.logger?.warn(
+        'LDGraphTracker: invocation success/failure already recorded for this run — dropping duplicate call.',
+      );
       return;
     }
-    this._trackedMetrics.success = true;
+    this._summary.success = true;
     this._ldClient.track('$ld:ai:graph:invocation_success', this._context, this.getTrackData(), 1);
   }
 
   trackInvocationFailure(): void {
-    if (this._trackedMetrics.success !== undefined) {
+    if (this._summary.success !== undefined) {
+      this._ldClient.logger?.warn(
+        'LDGraphTracker: invocation success/failure already recorded for this run — dropping duplicate call.',
+      );
       return;
     }
-    this._trackedMetrics.success = false;
+    this._summary.success = false;
     this._ldClient.track('$ld:ai:graph:invocation_failure', this._context, this.getTrackData(), 1);
   }
 
   trackLatency(durationMs: number): void {
-    if (this._trackedMetrics.durationMs !== undefined) {
+    if (this._summary.durationMs !== undefined) {
+      this._ldClient.logger?.warn(
+        'LDGraphTracker: trackLatency already called for this run — dropping duplicate call.',
+      );
       return;
     }
-    this._trackedMetrics.durationMs = durationMs;
+    this._summary.durationMs = durationMs;
     this._ldClient.track('$ld:ai:graph:latency', this._context, this.getTrackData(), durationMs);
   }
 
   trackTotalTokens(tokens: LDTokenUsage): void {
-    if (this._trackedMetrics.tokens !== undefined) {
-      return;
-    }
-    if (tokens.total <= 0) {
+    if (this._summary.tokens !== undefined) {
+      this._ldClient.logger?.warn(
+        'LDGraphTracker: trackTotalTokens already called for this run — dropping duplicate call.',
+      );
       return;
     }
-    this._trackedMetrics.tokens = tokens;
+    this._summary.tokens = { ...tokens };
     this._ldClient.track(
       '$ld:ai:graph:total_tokens',
       this._context,
@@ -69,10 +133,13 @@ export class LDGraphTrackerImpl implements LDGraphTracker {
   }
 
   trackPath(path: string[]): void {
-    if (this._trackedMetrics.path !== undefined) {
+    if (this._summary.path !== undefined) {
+      this._ldClient.logger?.warn(
+        'LDGraphTracker: trackPath already called for this run — dropping duplicate call.',
+      );
       return;
     }
-    this._trackedMetrics.path = path;
+    this._summary.path = [...path];
     this._ldClient.track('$ld:ai:graph:path', this._context, { ...this.getTrackData(), path }, 1);
   }
 
@@ -115,8 +182,4 @@ export class LDGraphTrackerImpl implements LDGraphTracker {
       1,
     );
   }
-
-  getSummary(): LDGraphMetricSummary {
-    return { ...this._trackedMetrics };
-  }
 }
diff --git a/packages/sdk/server-ai/src/api/LDAIClient.ts b/packages/sdk/server-ai/src/api/LDAIClient.ts
index fd93ca92a5..5dfec98072 100644
--- a/packages/sdk/server-ai/src/api/LDAIClient.ts
+++ b/packages/sdk/server-ai/src/api/LDAIClient.ts
@@ -11,6 +11,7 @@ import {
   LDAIJudgeConfig,
   LDAIJudgeConfigDefault,
 } from './config';
+import { AgentGraphDefinition, LDGraphTracker } from './graph';
 import { Judge } from './judge/Judge';
 import { SupportedAIProvider } from './providers';
 
@@ -337,4 +338,55 @@ export interface LDAIClient {
    * @returns A reconstructed AIConfigTracker with the original runId preserved.
    */
   createTracker(token: string, context: LDContext): LDAIConfigTracker;
+
+  /**
+   * Fetches an agent graph configuration from LaunchDarkly and returns an
+   * {@link AgentGraphDefinition}.
+   *
+   * When the graph is enabled the method validates that:
+   * - The graph flag can be evaluated.
+   * - A single root node is present.
+   * - All nodes in the graph are reachable from the root (no disconnected nodes).
+   * - Every referenced agent config can be fetched and is enabled.
+   *
+   * If any validation check fails, the returned definition has
+   * {@link AgentGraphDefinition.enabled | enabled} set to `false` with an empty
+   * node collection. When the logger level is DEBUG, a message describing the
+   * failure is emitted.
+   *
+   * @param graphKey The LaunchDarkly flag key for the agent graph configuration.
+   * @param context The LaunchDarkly context used for flag evaluation and tracking.
+   * @param variables Optional key-value pairs used for Mustache template interpolation
+   *   in each node's agent config instructions. Applied uniformly to all nodes.
+   *
+   * @returns A promise that resolves to an {@link AgentGraphDefinition}. Check
+   *   {@link AgentGraphDefinition.enabled | enabled} before traversing.
+   *
+   * @example
+   * ```typescript
+   * const graph = await aiClient.agentGraph('my-agent-graph', context, { userName: 'Sandy' });
+   * if (graph.enabled) {
+   *   graph.traverse((node, ctx) => {
+   *     // build your provider-specific node here
+   *   });
+   * }
+   * ```
+   */
+  agentGraph(
+    graphKey: string,
+    context: LDContext,
+    variables?: Record<string, unknown>,
+  ): Promise<AgentGraphDefinition>;
+
+  /**
+   * Reconstructs an {@link LDGraphTracker} from a resumption token, preserving
+   * the original `runId` so events from a resumed session are correlated correctly.
+   *
+   * **Security note:** The token encodes the flag variation key and version.
+   * Keep it server-side; do not expose it to untrusted clients.
+   *
+   * @param token URL-safe Base64-encoded token from {@link LDGraphTracker.resumptionToken}.
+   * @param context LDContext to associate with the reconstructed tracker.
+   */
+  createGraphTracker(token: string, context: LDContext): LDGraphTracker;
 }
diff --git a/packages/sdk/server-ai/src/api/graph/AgentGraphDefinition.ts b/packages/sdk/server-ai/src/api/graph/AgentGraphDefinition.ts
new file mode 100644
index 0000000000..c5113b53a5
--- /dev/null
+++ b/packages/sdk/server-ai/src/api/graph/AgentGraphDefinition.ts
@@ -0,0 +1,253 @@
+import type { LDAIAgentConfig } from '../config';
+import { AgentGraphNode } from './AgentGraphNode';
+import type { LDGraphTracker } from './LDGraphTracker';
+import type { LDAgentGraphFlagValue, LDGraphEdge } from './types';
+
+/**
+ * Callback function signature for graph traversal methods.
+ */
+export type TraversalFn = (
+  node: AgentGraphNode,
+  executionContext: Record<string, unknown>,
+) => unknown;
+
+/**
+ * Encapsulates an agent graph configuration and its pre-built node collection.
+ *
+ * Provides graph-level orchestration including relationship queries (parent/child),
+ * breadth-first traversal in both forward and reverse directions, and graph tracker creation.
+ *
+ * Obtain an instance via {@link LDAIClient.agentGraph}. When the graph is disabled
+ * or invalid, the returned instance has {@link enabled} set to `false` and an
+ * empty node collection.
+ */
+export class AgentGraphDefinition {
+  constructor(
+    private readonly _agentGraph: LDAgentGraphFlagValue,
+    private readonly _nodes: Record<string, AgentGraphNode>,
+    readonly enabled: boolean,
+    private readonly _createTracker: () => LDGraphTracker,
+  ) {}
+
+  /**
+   * Builds a node map from a raw agent graph flag value and a map of pre-fetched agent configs.
+   *
+   * @param graph Raw graph flag value from LaunchDarkly.
+   * @param agentConfigs Map of agent config key to resolved LDAIAgentConfig.
+   * @returns Record mapping agent config keys to AgentGraphNode instances.
+   */
+  static buildNodes(
+    graph: LDAgentGraphFlagValue,
+    agentConfigs: Record<string, LDAIAgentConfig>,
+  ): Record<string, AgentGraphNode> {
+    const nodes: Record<string, AgentGraphNode> = {};
+    const allKeys = AgentGraphDefinition.collectAllKeys(graph);
+
+    allKeys.forEach((key) => {
+      const config = agentConfigs[key];
+      if (!config) {
+        return;
+      }
+      const outgoingEdges: LDGraphEdge[] = graph.edges?.[key] ?? [];
+      nodes[key] = new AgentGraphNode(key, config, outgoingEdges);
+    });
+
+    return nodes;
+  }
+
+  /**
+   * Returns the children of the node identified by `nodeKey`.
+   *
+   * @param nodeKey The agent config key of the parent node.
+   */
+  getChildNodes(nodeKey: string): AgentGraphNode[] {
+    const node = this._nodes[nodeKey];
+    if (!node) {
+      return [];
+    }
+    return node
+      .getEdges()
+      .map((edge) => this._nodes[edge.key])
+      .filter((n): n is AgentGraphNode => n !== undefined);
+  }
+
+  /**
+   * Returns all nodes that have a direct edge to the node identified by `nodeKey`.
+   *
+   * @param nodeKey The agent config key of the child node.
+   */
+  getParentNodes(nodeKey: string): AgentGraphNode[] {
+    return Object.values(this._nodes).filter((node) =>
+      node.getEdges().some((edge) => edge.key === nodeKey),
+    );
+  }
+
+  /**
+   * Returns all terminal nodes (nodes with no outgoing edges).
+   */
+  terminalNodes(): AgentGraphNode[] {
+    return Object.values(this._nodes).filter((node) => node.isTerminal());
+  }
+
+  /**
+   * Returns the root node of the graph.
+   */
+  rootNode(): AgentGraphNode {
+    return this._nodes[this._agentGraph.root];
+  }
+
+  /**
+   * Returns the node with the given key, or `null` if not found.
+   *
+   * @param nodeKey The agent config key to look up.
+   */
+  getNode(nodeKey: string): AgentGraphNode | null {
+    return this._nodes[nodeKey] ?? null;
+  }
+
+  /**
+   * Returns the underlying raw graph configuration from LaunchDarkly.
+   */
+  getConfig(): LDAgentGraphFlagValue {
+    return this._agentGraph;
+  }
+
+  /**
+   * Returns a new {@link LDGraphTracker} for this graph invocation.
+   *
+   * Call this once per invocation. Each call produces a tracker with a fresh `runId`
+   * that groups all events for that invocation.
+   */
+  createTracker(): LDGraphTracker {
+    return this._createTracker();
+  }
+
+  /**
+   * Traverses the graph breadth-first from the root to all terminal nodes.
+   *
+   * Nodes at the same depth are processed before advancing to the next depth.
+   * The value returned by `fn` is stored in the mutable `executionContext` under
+   * the node's key, making upstream results available to downstream nodes.
+   *
+   * Cyclic graphs are handled safely — each node is visited at most once.
+   *
+   * @param fn Callback invoked for each node. Its return value is added to
+   *   `executionContext` keyed by the node's config key.
+   * @param initialExecutionContext Optional initial context to seed the traversal.
+   */
+  traverse(fn: TraversalFn, initialExecutionContext: Record<string, unknown> = {}): void {
+    const root = this.rootNode();
+    if (!root) {
+      return;
+    }
+
+    const executionContext = { ...initialExecutionContext };
+    const visited = new Set<string>();
+    const queue: AgentGraphNode[] = [root];
+    visited.add(root.getKey());
+
+    while (queue.length > 0) {
+      const node = queue.shift()!;
+      const result = fn(node, executionContext);
+      executionContext[node.getKey()] = result;
+
+      node.getEdges().forEach((edge) => {
+        if (!visited.has(edge.key)) {
+          const child = this._nodes[edge.key];
+          if (child) {
+            visited.add(edge.key);
+            queue.push(child);
+          }
+        }
+      });
+    }
+  }
+
+  /**
+   * Traverses the graph from terminal nodes up to the root.
+   *
+   * Uses BFS upward via parent edges so that each node is processed only after
+   * all of its reachable descendants have been processed. The root is always
+   * visited last. Cyclic graphs are handled safely — each node is visited at
+   * most once; if the graph has no terminal nodes, this method returns without
+   * invoking `fn`.
+   *
+   * **Ordering note:** Within a single BFS level (nodes at the same depth from a
+   * terminal) the visit order is not strictly guaranteed. The guarantee is only
+   * that a node is visited before any of its ancestors — not that siblings at the
+   * same depth are visited in a specific order relative to each other.
+   *
+   * The value returned by `fn` is stored in the mutable `executionContext` under
+   * the node's key.
+   *
+   * @param fn Callback invoked for each node. Its return value is added to
+   *   `executionContext` keyed by the node's config key.
+   * @param initialExecutionContext Optional initial context to seed the traversal.
+   */
+  reverseTraverse(fn: TraversalFn, initialExecutionContext: Record<string, unknown> = {}): void {
+    const terminals = this.terminalNodes();
+    if (terminals.length === 0) {
+      return;
+    }
+
+    const executionContext = { ...initialExecutionContext };
+    const rootKey = this._agentGraph.root;
+    const visited = new Set<string>();
+    let queue: AgentGraphNode[] = terminals;
+
+    while (queue.length > 0) {
+      const nextQueue: AgentGraphNode[] = [];
+
+      queue.forEach((node) => {
+        const key = node.getKey();
+        if (visited.has(key)) {
+          return;
+        }
+        visited.add(key);
+
+        // Defer the root so it is always processed last
+        if (key === rootKey) {
+          return;
+        }
+
+        const result = fn(node, executionContext);
+        executionContext[key] = result;
+
+        this.getParentNodes(key).forEach((parent) => {
+          if (!visited.has(parent.getKey())) {
+            nextQueue.push(parent);
+          }
+        });
+      });
+
+      queue = nextQueue;
+    }
+
+    // Root is always last — only invoke if it was reached during traversal
+    const root = this._nodes[rootKey];
+    if (root && visited.has(rootKey)) {
+      const result = fn(root, executionContext);
+      executionContext[rootKey] = result;
+    }
+  }
+
+  /**
+   * Collects every unique node key referenced in the graph (root + all edge sources
+   * and targets).
+   */
+  static collectAllKeys(graph: LDAgentGraphFlagValue): Set<string> {
+    const keys = new Set<string>();
+    keys.add(graph.root);
+
+    if (graph.edges) {
+      Object.entries(graph.edges).forEach(([sourceKey, edges]) => {
+        keys.add(sourceKey);
+        edges.forEach((edge) => {
+          keys.add(edge.key);
+        });
+      });
+    }
+
+    return keys;
+  }
+}
diff --git a/packages/sdk/server-ai/src/api/graph/AgentGraphNode.ts b/packages/sdk/server-ai/src/api/graph/AgentGraphNode.ts
new file mode 100644
index 0000000000..598bfbf0c1
--- /dev/null
+++ b/packages/sdk/server-ai/src/api/graph/AgentGraphNode.ts
@@ -0,0 +1,46 @@
+import type { LDAIAgentConfig } from '../config';
+import type { LDGraphEdge } from './types';
+
+/**
+ * Represents a single node within an agent graph.
+ *
+ * Each node wraps an {@link LDAIAgentConfig} and carries the outgoing edges
+ * to its children. Use the node's tracker (via `getConfig().tracker`) to record
+ * node-level metrics against the underlying agent config.
+ */
+export class AgentGraphNode {
+  constructor(
+    private readonly _key: string,
+    private readonly _config: LDAIAgentConfig,
+    private readonly _edges: LDGraphEdge[],
+  ) {}
+
+  /**
+   * Returns the agent config key that identifies this node in the graph.
+   */
+  getKey(): string {
+    return this._key;
+  }
+
+  /**
+   * Returns the underlying AIAgentConfig for this node.
+   * Use `getConfig().tracker` to record node-level metrics.
+   */
+  getConfig(): LDAIAgentConfig {
+    return this._config;
+  }
+
+  /**
+   * Returns the outgoing edges from this node to its children.
+   */
+  getEdges(): LDGraphEdge[] {
+    return this._edges;
+  }
+
+  /**
+   * Returns `true` if this node has no outgoing edges (i.e., it is a terminal/leaf node).
+   */
+  isTerminal(): boolean {
+    return this._edges.length === 0;
+  }
+}
diff --git a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts
index 9ce432d1db..25afc9b2ce 100644
--- a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts
+++ b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts
@@ -1,110 +1,120 @@
-import { LDJudgeResult } from '../judge/types';
-import { LDTokenUsage } from '../metrics';
+import type { LDJudgeResult } from '../judge/types';
+import type { LDTokenUsage } from '../metrics';
+import type { LDGraphMetricSummary, LDGraphTrackData } from './types';
 
 /**
- * Metrics tracked at the graph level.
+ * Tracks graph-level and edge-level metrics for an agent graph invocation.
+ *
+ * Graph-level methods enforce at-most-once semantics: calling the same method
+ * twice on a tracker instance drops the second call and emits a warning.
+ * Edge-level methods (trackRedirect, trackHandoffSuccess, trackHandoffFailure)
+ * are multi-fire and are not subject to this constraint.
+ *
+ * @example
+ * ```typescript
+ * const tracker = graphDefinition.createTracker();
+ * try {
+ *   // ... execute graph ...
+ *   tracker.trackInvocationSuccess();
+ *   tracker.trackLatency(durationMs);
+ * } catch {
+ *   tracker.trackInvocationFailure();
+ * }
+ * ```
  */
-export interface LDGraphMetricSummary {
+export interface LDGraphTracker {
   /**
-   * True if the graph invocation succeeded, false if it failed, absent if not tracked.
+   * Returns tracking metadata to be included in every LDClient.track call.
    */
-  success?: boolean;
+  getTrackData(): LDGraphTrackData;
 
   /**
-   * Total graph execution duration in milliseconds, if tracked.
+   * Returns a snapshot of all graph-level metrics tracked so far.
    */
-  durationMs?: number;
+  getSummary(): LDGraphMetricSummary;
 
   /**
-   * Aggregated token usage across the entire graph invocation, if tracked.
+   * A URL-safe Base64-encoded (RFC 4648, no padding) token encoding the tracker's
+   * identity. Pass this token to {@link LDGraphTrackerImpl.fromResumptionToken} to
+   * reconstruct the tracker across process boundaries, preserving the original runId.
+   *
+   * **Security note:** The token contains the flag variation key and version. If passed
+   * to an untrusted client (e.g., a browser) this could expose feature-flag targeting
+   * details. Keep the token server-side and use an opaque reference in client-facing APIs.
    */
-  tokens?: LDTokenUsage;
+  readonly resumptionToken: string;
 
-  /**
-   * Execution path through the graph as an array of config keys, if tracked.
-   */
-  path?: string[];
-}
+  // -------------------------------------------------------------------------
+  // Graph-level tracking methods (at-most-once per tracker instance)
+  // -------------------------------------------------------------------------
 
-/**
- * Tracker for graph-level and edge-level metrics in AI agent graph operations.
- *
- * Node-level metrics are tracked via each node's {@link LDAIConfigTracker}.
- */
-export interface LDGraphTracker {
   /**
-   * Get the data for tracking.
-   */
-  getTrackData(): {
-    variationKey: string;
-    graphKey: string;
-    version: number;
-  };
-
-  /**
-   * Track a successful graph invocation.
-   *
-   * At-most-once per tracker instance. Subsequent calls are dropped.
+   * Tracks a successful graph invocation.
+   * Emits event `$ld:ai:graph:invocation_success` with metric value `1`.
+   * At-most-once: subsequent calls are dropped with a warning.
    */
   trackInvocationSuccess(): void;
 
   /**
-   * Track an unsuccessful graph invocation.
-   *
-   * At-most-once per tracker instance. Subsequent calls are dropped.
+   * Tracks an unsuccessful graph invocation.
+   * Emits event `$ld:ai:graph:invocation_failure` with metric value `1`.
+   * At-most-once: subsequent calls are dropped with a warning.
    */
   trackInvocationFailure(): void;
 
   /**
-   * Track the total latency of graph execution.
-   *
-   * At-most-once per tracker instance. Subsequent calls are dropped.
+   * Tracks the total latency of the graph execution in milliseconds.
+   * Emits event `$ld:ai:graph:latency` with the duration as the metric value.
+   * At-most-once: subsequent calls are dropped with a warning.
    *
    * @param durationMs Duration in milliseconds.
    */
   trackLatency(durationMs: number): void;
 
   /**
-   * Track aggregated token usage across the entire graph invocation.
-   *
-   * At-most-once per tracker instance. Subsequent calls are dropped.
+   * Tracks aggregate token usage across the entire graph invocation.
+   * Emits event `$ld:ai:graph:total_tokens` with the total token count as the metric value.
+   * At-most-once: subsequent calls are dropped with a warning.
    *
    * @param tokens Token usage information.
    */
   trackTotalTokens(tokens: LDTokenUsage): void;
 
   /**
-   * Track the execution path through the graph.
+   * Tracks the execution path through the graph.
+   * Emits event `$ld:ai:graph:path` with metric value `1`.
+   * The data payload includes the path array in addition to standard track data.
+   * At-most-once: subsequent calls are dropped with a warning.
    *
-   * At-most-once per tracker instance. Subsequent calls are dropped.
-   *
-   * @param path Array of config keys representing the sequence of nodes executed.
+   * @param path An ordered array of agent config keys representing the execution path.
    */
   trackPath(path: string[]): void;
 
   /**
-   * Track a judge evaluation result for the final graph output.
-   *
-   * No event is emitted when the result was not sampled (result.sampled is false).
+   * Tracks a judge evaluation result for the final graph output.
+   * Emits one LDClient.track call when the result was sampled and successful.
+   * Not subject to at-most-once constraints.
    *
    * @param result Judge result containing score, reasoning, and metadata.
    */
   trackJudgeResult(result: LDJudgeResult): void;
 
+  // -------------------------------------------------------------------------
+  // Edge-level tracking methods (multi-fire, not at-most-once)
+  // -------------------------------------------------------------------------
+
   /**
-   * Track when a node redirects to a different target than originally specified.
-   *
-   * May be called multiple times.
+   * Tracks when a node redirects to a different target than originally specified.
+   * Emits event `$ld:ai:graph:redirect` with metric value `1`.
    *
    * @param sourceKey Config key of the source node.
-   * @param redirectedTarget Config key of the target node that was redirected to.
+   * @param redirectedTarget Config key of the actual target node.
    */
   trackRedirect(sourceKey: string, redirectedTarget: string): void;
 
   /**
-   * Track a successful handoff between nodes.
-   *
-   * May be called multiple times.
+   * Tracks a successful handoff between two nodes.
+   * Emits event `$ld:ai:graph:handoff_success` with metric value `1`.
    *
    * @param sourceKey Config key of the source node.
    * @param targetKey Config key of the target node.
@@ -112,17 +122,11 @@ export interface LDGraphTracker {
   trackHandoffSuccess(sourceKey: string, targetKey: string): void;
 
   /**
-   * Track a failed handoff between nodes.
-   *
-   * May be called multiple times.
+   * Tracks a failed handoff between two nodes.
+   * Emits event `$ld:ai:graph:handoff_failure` with metric value `1`.
    *
    * @param sourceKey Config key of the source node.
    * @param targetKey Config key of the target node.
    */
   trackHandoffFailure(sourceKey: string, targetKey: string): void;
-
-  /**
-   * Get a summary of the tracked graph-level metrics.
-   */
-  getSummary(): LDGraphMetricSummary;
 }
diff --git a/packages/sdk/server-ai/src/api/graph/index.ts b/packages/sdk/server-ai/src/api/graph/index.ts
index 536e630115..9d899029d5 100644
--- a/packages/sdk/server-ai/src/api/graph/index.ts
+++ b/packages/sdk/server-ai/src/api/graph/index.ts
@@ -1 +1,4 @@
+export * from './types';
 export * from './LDGraphTracker';
+export * from './AgentGraphNode';
+export * from './AgentGraphDefinition';
diff --git a/packages/sdk/server-ai/src/api/graph/types.ts b/packages/sdk/server-ai/src/api/graph/types.ts
new file mode 100644
index 0000000000..1b578fecba
--- /dev/null
+++ b/packages/sdk/server-ai/src/api/graph/types.ts
@@ -0,0 +1,88 @@
+import { LDTokenUsage } from '../metrics';
+
+/**
+ * Represents a directed edge in an agent graph, connecting a source node to a target node.
+ */
+export interface LDGraphEdge {
+  /**
+   * The key of the target AIAgentConfig node.
+   */
+  key: string;
+
+  /**
+   * Optional handoff options that customize how data flows between nodes.
+   */
+  handoff?: Record<string, unknown>;
+}
+
+/**
+ * Raw flag value for an agent graph configuration as returned by LaunchDarkly.
+ * This represents the data structure delivered by LaunchDarkly for graph configurations.
+ */
+export interface LDAgentGraphFlagValue {
+  _ldMeta?: {
+    variationKey?: string;
+    version?: number;
+    enabled?: boolean;
+  };
+
+  /**
+   * The key of the root AIAgentConfig in the graph.
+   */
+  root: string;
+
+  /**
+   * Object mapping source agent config keys to arrays of target edges.
+   */
+  edges?: Record<string, LDGraphEdge[]>;
+}
+
+/**
+ * Accumulated graph-level metrics collected by an LDGraphTracker.
+ */
+export interface LDGraphMetricSummary {
+  /**
+   * Whether the graph invocation succeeded. Absent if not yet tracked.
+   */
+  success?: boolean;
+
+  /**
+   * Total graph execution duration in milliseconds. Absent if not yet tracked.
+   */
+  durationMs?: number;
+
+  /**
+   * Aggregate token usage across the entire graph invocation. Absent if not yet tracked.
+   */
+  tokens?: LDTokenUsage;
+
+  /**
+   * Execution path through the graph as an array of config keys. Absent if not yet tracked.
+   */
+  path?: string[];
+}
+
+/**
+ * Tracking metadata returned by {@link LDGraphTracker.getTrackData}.
+ */
+export interface LDGraphTrackData {
+  /**
+   * UUID v4 uniquely identifying this tracker and all events it emits.
+   */
+  runId: string;
+
+  /**
+   * The graph configuration key.
+   */
+  graphKey: string;
+
+  /**
+   * The variation key. Absent when a default config was used rather than a real flag evaluation.
+   */
+  variationKey?: string;
+
+  /**
+   * The version of the flag variation.
+   */
+  version: number;
+}
diff --git a/release-please-config.json b/release-please-config.json
index a667b4b38c..8aada61d70 100644
--- a/release-please-config.json
+++ b/release-please-config.json
@@ -167,6 +167,11 @@
           "type": "json",
           "path": "/packages/sdk/server-ai/examples/chat-observability/package.json",
           "jsonpath": "$.dependencies['@launchdarkly/node-server-sdk']"
+        },
+        {
+          "type": "json",
+          "path": "/packages/sdk/server-ai/examples/agent-graph-traversal/package.json",
+          "jsonpath": "$.dependencies['@launchdarkly/node-server-sdk']"
         }
       ]
     },
@@ -256,6 +261,11 @@
           "type": "json",
           "path": "examples/chat-observability/package.json",
           "jsonpath": "$.dependencies['@launchdarkly/server-sdk-ai']"
+        },
+        {
+          "type": "json",
+          "path": "examples/agent-graph-traversal/package.json",
+          "jsonpath": "$.dependencies['@launchdarkly/server-sdk-ai']"
         }
       ]
     },

From 400d97162e6e6ddc651bc4a73fd340f3e47110e0 Mon Sep 17 00:00:00 2001
From: Jason Bailey <jbailey@launchdarkly.com>
Date: Mon, 20 Apr 2026 13:06:34 -0500
Subject: [PATCH 7/7] chore: update deprecated API usage in server-ai examples
 (#1294)

---
 .../server-ai/examples/bedrock/src/index.ts   |  2 +-
 .../openai-observability/src/index.ts         | 19 +++++++++----------
 .../server-ai/examples/openai/src/index.ts    | 16 +++++++++-------
 3 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/packages/sdk/server-ai/examples/bedrock/src/index.ts b/packages/sdk/server-ai/examples/bedrock/src/index.ts
index e1cbf93e06..3efe003a31 100644
--- a/packages/sdk/server-ai/examples/bedrock/src/index.ts
+++ b/packages/sdk/server-ai/examples/bedrock/src/index.ts
@@ -48,7 +48,7 @@ async function main() {
 
   const aiClient = initAi(ldClient);
 
-  const aiConfig = await aiClient.config(
+  const aiConfig = await aiClient.completionConfig(
     aiConfigKey!,
     context,
     {
diff --git a/packages/sdk/server-ai/examples/openai-observability/src/index.ts b/packages/sdk/server-ai/examples/openai-observability/src/index.ts
index eb8d1f2470..d13c01349f 100644
--- a/packages/sdk/server-ai/examples/openai-observability/src/index.ts
+++ b/packages/sdk/server-ai/examples/openai-observability/src/index.ts
@@ -68,7 +68,7 @@ async function main() {
     { example_type: 'provider_observability_demo' },
   );
 
-  if (!aiConfig.enabled || !aiConfig.tracker) {
+  if (!aiConfig.enabled) {
     console.log('*** AI configuration is not enabled');
     ldClient.close();
     process.exit(0);
@@ -76,15 +76,14 @@ async function main() {
 
   try {
     // ── 4. Call OpenAI and track metrics with the provider's extractor ──
-    const completion = await aiConfig.tracker.trackMetricsOf(
-      OpenAIProvider.getAIMetricsFromResponse,
-      () =>
-        openai.chat.completions.create({
-          messages: aiConfig.messages || [],
-          model: aiConfig.model?.name || 'gpt-4',
-          temperature: (aiConfig.model?.parameters?.temperature as number) ?? 0.5,
-          max_tokens: (aiConfig.model?.parameters?.maxTokens as number) ?? 4096,
-        }),
+    const tracker = aiConfig.createTracker!();
+    const completion = await tracker.trackMetricsOf(OpenAIProvider.getAIMetricsFromResponse, () =>
+      openai.chat.completions.create({
+        messages: aiConfig.messages || [],
+        model: aiConfig.model?.name || 'gpt-4',
+        temperature: (aiConfig.model?.parameters?.temperature as number) ?? 0.5,
+        max_tokens: (aiConfig.model?.parameters?.maxTokens as number) ?? 4096,
+      }),
     );
 
     console.log('AI Response:', completion.choices[0]?.message.content);
diff --git a/packages/sdk/server-ai/examples/openai/src/index.ts b/packages/sdk/server-ai/examples/openai/src/index.ts
index e16643d6c5..c5d348ad4b 100644
--- a/packages/sdk/server-ai/examples/openai/src/index.ts
+++ b/packages/sdk/server-ai/examples/openai/src/index.ts
@@ -66,13 +66,15 @@ async function main() {
   }
 
   const tracker = aiConfig.createTracker!();
-  const completion = await tracker.trackMetricsOf(OpenAIProvider.createAIMetrics, async () =>
-    client.chat.completions.create({
-      messages: aiConfig.messages || [],
-      model: aiConfig.model?.name || 'gpt-4',
-      temperature: (aiConfig.model?.parameters?.temperature as number) ?? 0.5,
-      max_tokens: (aiConfig.model?.parameters?.maxTokens as number) ?? 4096,
-    }),
+  const completion = await tracker.trackMetricsOf(
+    OpenAIProvider.getAIMetricsFromResponse,
+    async () =>
+      client.chat.completions.create({
+        messages: aiConfig.messages || [],
+        model: aiConfig.model?.name || 'gpt-4',
+        temperature: (aiConfig.model?.parameters?.temperature as number) ?? 0.5,
+        max_tokens: (aiConfig.model?.parameters?.maxTokens as number) ?? 4096,
+      }),
   );
 
   console.log('AI Response:', completion.choices[0]?.message.content);