From 7745852fd540ccd415f95ccb0dcf571b3f2c0310 Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Tue, 14 Apr 2026 16:46:02 -0500 Subject: [PATCH 1/7] fix: Add support for graph metric tracking (#1269) --- .../__tests__/LDAIConfigTrackerImpl.test.ts | 145 ++++++ .../__tests__/LDGraphTrackerImpl.test.ts | 418 ++++++++++++++++++ .../server-ai/src/LDAIConfigTrackerImpl.ts | 106 +++-- .../sdk/server-ai/src/LDGraphTrackerImpl.ts | 119 +++++ .../src/api/config/LDAIConfigTracker.ts | 54 ++- .../server-ai/src/api/graph/LDGraphTracker.ts | 126 ++++++ packages/sdk/server-ai/src/api/graph/index.ts | 1 + packages/sdk/server-ai/src/api/index.ts | 1 + packages/sdk/server-ai/src/index.ts | 1 + 9 files changed, 928 insertions(+), 43 deletions(-) create mode 100644 packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts create mode 100644 packages/sdk/server-ai/src/LDGraphTrackerImpl.ts create mode 100644 packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts create mode 100644 packages/sdk/server-ai/src/api/graph/index.ts diff --git a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts index 1fbe25538c..5ea65c4c93 100644 --- a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts @@ -877,3 +877,148 @@ describe('trackJudgeResponse', () => { ); }); }); + +describe('trackToolCall', () => { + it('tracks a single tool call', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + tracker.trackToolCall('my-tool'); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:tool_call', + testContext, + { ...getExpectedTrackData(), toolKey: 'my-tool' }, + 1, + ); + }); + + it('includes graphKey when provided', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + tracker.trackToolCall('my-tool', 'my-graph'); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:tool_call', + testContext, + { ...getExpectedTrackData(), graphKey: 'my-graph', toolKey: 'my-tool' }, + 1, + ); + }); +}); + +describe('trackToolCalls', () => { + it('tracks multiple tool calls', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + tracker.trackToolCalls(['tool-a', 'tool-b', 'tool-c']); + + expect(mockTrack).toHaveBeenCalledTimes(3); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:tool_call', + testContext, + { ...getExpectedTrackData(), toolKey: 'tool-a' }, + 1, + ); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:tool_call', + testContext, + { ...getExpectedTrackData(), toolKey: 'tool-b' }, + 1, + ); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:tool_call', + testContext, + { ...getExpectedTrackData(), toolKey: 'tool-c' }, + 1, + ); + }); +}); + +describe('graphKey parameter support', () => { + it('includes graphKey in trackDuration event', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + tracker.trackDuration(1000, 'my-graph'); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:duration:total', + testContext, + { ...getExpectedTrackData(), graphKey: 'my-graph' }, + 1000, + ); + }); + + it('includes graphKey in trackSuccess event', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + tracker.trackSuccess('my-graph'); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:generation:success', + testContext, + { ...getExpectedTrackData(), graphKey: 'my-graph' }, + 1, + ); + }); + + it('does not include graphKey when not provided', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + tracker.trackSuccess(); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:generation:success', + testContext, + getExpectedTrackData(), + 1, + ); + }); +}); diff --git a/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts new file mode 100644 index 0000000000..fe42bf4e4d --- /dev/null +++ b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts @@ -0,0 +1,418 @@ +import { LDContext } from '@launchdarkly/js-server-sdk-common'; + +import { LDClientMin } from '../src/LDClientMin'; +import { LDGraphTrackerImpl } from '../src/LDGraphTrackerImpl'; + +const mockTrack = jest.fn(); +const mockLdClient: LDClientMin = { + track: mockTrack, + variation: jest.fn(), +}; + +const testContext: LDContext = { kind: 'user', key: 'test-user' }; +const graphKey = 'test-graph'; +const variationKey = 'v1'; +const version = 2; + +const getExpectedTrackData = () => ({ + graphKey, + variationKey, + version, +}); + +beforeEach(() => { + jest.clearAllMocks(); +}); + +it('returns track data', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + + expect(tracker.getTrackData()).toEqual(getExpectedTrackData()); +}); + +it('tracks invocation success', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackInvocationSuccess(); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:invocation_success', + testContext, + getExpectedTrackData(), + 1, + ); +}); + +it('tracks invocation failure', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackInvocationFailure(); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:invocation_failure', + testContext, + getExpectedTrackData(), + 1, + ); +}); + +it('tracks latency', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackLatency(1500); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:latency', + testContext, + getExpectedTrackData(), + 1500, + ); +}); + +it('tracks total tokens', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackTotalTokens({ total: 200, input: 80, output: 120 }); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:total_tokens', + testContext, + getExpectedTrackData(), + 200, + ); +}); + +it('does not track total tokens when total is zero', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackTotalTokens({ total: 0, input: 0, output: 0 }); + + expect(mockTrack).not.toHaveBeenCalled(); +}); + +it('tracks path', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + const path = ['node-a', 'node-b', 'node-c']; + tracker.trackPath(path); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:path', + testContext, + { ...getExpectedTrackData(), path }, + 1, + ); +}); + +it('tracks judge response', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + const response = { + judgeConfigKey: 'my-judge', + evals: { + relevance: { score: 0.9, reasoning: 'Relevant' }, + accuracy: { score: 0.85, reasoning: 'Accurate' }, + }, + success: true, + }; + tracker.trackJudgeResponse(response); + + expect(mockTrack).toHaveBeenCalledWith( + 'relevance', + testContext, + { ...getExpectedTrackData(), judgeConfigKey: 'my-judge' }, + 0.9, + ); + expect(mockTrack).toHaveBeenCalledWith( + 'accuracy', + testContext, + { ...getExpectedTrackData(), judgeConfigKey: 'my-judge' }, + 0.85, + ); +}); + +it('tracks judge response without judgeConfigKey', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + const response = { + evals: { relevance: { score: 0.7, reasoning: 'Somewhat relevant' } }, + success: true, + }; + tracker.trackJudgeResponse(response); + + expect(mockTrack).toHaveBeenCalledWith('relevance', testContext, getExpectedTrackData(), 0.7); +}); + +it('tracks redirect', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackRedirect('agent-a', 'agent-b'); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:redirect', + testContext, + { ...getExpectedTrackData(), sourceKey: 'agent-a', redirectedTarget: 'agent-b' }, + 1, + ); +}); + +it('tracks handoff success', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackHandoffSuccess('agent-a', 'agent-b'); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:handoff_success', + testContext, + { ...getExpectedTrackData(), sourceKey: 'agent-a', targetKey: 'agent-b' }, + 1, + ); +}); + +it('tracks handoff failure', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackHandoffFailure('agent-a', 'agent-b'); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:handoff_failure', + testContext, + { ...getExpectedTrackData(), sourceKey: 'agent-a', targetKey: 'agent-b' }, + 1, + ); +}); + +it('returns empty summary when no metrics tracked', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + + expect(tracker.getSummary()).toEqual({}); +}); + +it('summarizes tracked graph metrics', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + + tracker.trackInvocationSuccess(); + tracker.trackLatency(2000); + tracker.trackTotalTokens({ total: 300, input: 100, output: 200 }); + tracker.trackPath(['node-a', 'node-b']); + + expect(tracker.getSummary()).toEqual({ + success: true, + durationMs: 2000, + tokens: { total: 300, input: 100, output: 200 }, + path: ['node-a', 'node-b'], + }); +}); + +describe('at-most-once semantics for graph-level metrics', () => { + it('drops duplicate trackInvocationSuccess calls', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackInvocationSuccess(); + tracker.trackInvocationSuccess(); + + expect(mockTrack).toHaveBeenCalledTimes(1); + }); + + it('drops trackInvocationFailure after trackInvocationSuccess', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackInvocationSuccess(); + tracker.trackInvocationFailure(); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:invocation_success', + expect.anything(), + expect.anything(), + expect.anything(), + ); + }); + + it('drops duplicate trackLatency calls', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackLatency(1000); + tracker.trackLatency(2000); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:latency', + testContext, + getExpectedTrackData(), + 1000, + ); + }); + + it('drops duplicate trackTotalTokens calls', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackTotalTokens({ total: 100, input: 40, output: 60 }); + tracker.trackTotalTokens({ total: 200, input: 80, output: 120 }); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:total_tokens', + testContext, + getExpectedTrackData(), + 100, + ); + }); + + it('drops duplicate trackPath calls', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackPath(['node-a']); + tracker.trackPath(['node-b', 'node-c']); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:graph:path', + testContext, + { ...getExpectedTrackData(), path: ['node-a'] }, + 1, + ); + }); +}); + +describe('edge-level methods can be called multiple times', () => { + it('allows multiple trackRedirect calls', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackRedirect('a', 'b'); + tracker.trackRedirect('b', 'c'); + + expect(mockTrack).toHaveBeenCalledTimes(2); + }); + + it('allows multiple trackHandoffSuccess calls', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackHandoffSuccess('a', 'b'); + tracker.trackHandoffSuccess('b', 'c'); + + expect(mockTrack).toHaveBeenCalledTimes(2); + }); + + it('allows multiple trackHandoffFailure calls', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackHandoffFailure('a', 'b'); + tracker.trackHandoffFailure('b', 'c'); + + expect(mockTrack).toHaveBeenCalledTimes(2); + }); +}); diff --git a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts index a81f8e0721..adda7c97c8 100644 --- a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts +++ b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts @@ -26,12 +26,13 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { private _context: LDContext, ) {} - getTrackData(): { + getTrackData(graphKey?: string): { variationKey: string; configKey: string; version: number; modelName: string; providerName: string; + graphKey?: string; } { return { variationKey: this._variationKey, @@ -39,15 +40,21 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { version: this._version, modelName: this._modelName, providerName: this._providerName, + ...(graphKey !== undefined ? { graphKey } : {}), }; } - trackDuration(duration: number): void { + trackDuration(duration: number, graphKey?: string): void { this._trackedMetrics.durationMs = duration; - this._ldClient.track('$ld:ai:duration:total', this._context, this.getTrackData(), duration); + this._ldClient.track( + '$ld:ai:duration:total', + this._context, + this.getTrackData(graphKey), + duration, + ); } - async trackDurationOf(func: () => Promise): Promise { + async trackDurationOf(func: () => Promise, graphKey?: string): Promise { const startTime = Date.now(); try { // Be sure to await here so that we can track the duration of the function and also handle errors. @@ -56,66 +63,97 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } finally { const endTime = Date.now(); const duration = endTime - startTime; // duration in milliseconds - this.trackDuration(duration); + this.trackDuration(duration, graphKey); } } - trackTimeToFirstToken(timeToFirstTokenMs: number) { + trackTimeToFirstToken(timeToFirstTokenMs: number, graphKey?: string) { this._trackedMetrics.timeToFirstTokenMs = timeToFirstTokenMs; this._ldClient.track( '$ld:ai:tokens:ttf', this._context, - this.getTrackData(), + this.getTrackData(graphKey), timeToFirstTokenMs, ); } - trackEvalScores(scores: Record) { + trackEvalScores(scores: Record, graphKey?: string) { Object.entries(scores).forEach(([metricKey, evalScore]) => { - this._ldClient.track(metricKey, this._context, this.getTrackData(), evalScore.score); + this._ldClient.track(metricKey, this._context, this.getTrackData(graphKey), evalScore.score); }); } - trackJudgeResponse(response: JudgeResponse) { + trackJudgeResponse(response: JudgeResponse, graphKey?: string) { Object.entries(response.evals).forEach(([metricKey, evalScore]) => { this._ldClient.track( metricKey, this._context, - { ...this.getTrackData(), judgeConfigKey: response.judgeConfigKey }, + { ...this.getTrackData(graphKey), judgeConfigKey: response.judgeConfigKey }, evalScore.score, ); }); } - trackFeedback(feedback: { kind: LDFeedbackKind }): void { + trackToolCall(toolKey: string, graphKey?: string): void { + this._ldClient.track( + '$ld:ai:tool_call', + this._context, + { ...this.getTrackData(graphKey), toolKey }, + 1, + ); + } + + trackToolCalls(toolKeys: string[], graphKey?: string): void { + toolKeys.forEach((toolKey) => { + this.trackToolCall(toolKey, graphKey); + }); + } + + trackFeedback(feedback: { kind: LDFeedbackKind }, graphKey?: string): void { this._trackedMetrics.feedback = feedback; if (feedback.kind === LDFeedbackKind.Positive) { - this._ldClient.track('$ld:ai:feedback:user:positive', this._context, this.getTrackData(), 1); + this._ldClient.track( + '$ld:ai:feedback:user:positive', + this._context, + this.getTrackData(graphKey), + 1, + ); } else if (feedback.kind === LDFeedbackKind.Negative) { - this._ldClient.track('$ld:ai:feedback:user:negative', this._context, this.getTrackData(), 1); + this._ldClient.track( + '$ld:ai:feedback:user:negative', + this._context, + this.getTrackData(graphKey), + 1, + ); } } - trackSuccess(): void { + trackSuccess(graphKey?: string): void { this._trackedMetrics.success = true; - this._ldClient.track('$ld:ai:generation:success', this._context, this.getTrackData(), 1); + this._ldClient.track( + '$ld:ai:generation:success', + this._context, + this.getTrackData(graphKey), + 1, + ); } - trackError(): void { + trackError(graphKey?: string): void { this._trackedMetrics.success = false; - this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(), 1); + this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(graphKey), 1); } async trackMetricsOf( metricsExtractor: (result: TRes) => LDAIMetrics, func: () => Promise, + graphKey?: string, ): Promise { let result: TRes; try { - result = await this.trackDurationOf(func); + result = await this.trackDurationOf(func, graphKey); } catch (err) { - this.trackError(); + this.trackError(graphKey); throw err; } @@ -124,14 +162,14 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { // Track success/error based on metrics if (metrics.success) { - this.trackSuccess(); + this.trackSuccess(graphKey); } else { - this.trackError(); + this.trackError(graphKey); } // Track token usage if available if (metrics.usage) { - this.trackTokens(metrics.usage); + this.trackTokens(metrics.usage, graphKey); } return result; @@ -140,6 +178,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { trackStreamMetricsOf( streamCreator: () => TStream, metricsExtractor: (stream: TStream) => Promise, + graphKey?: string, ): TStream { const startTime = Date.now(); @@ -148,14 +187,14 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { const stream = streamCreator(); // Start background metrics tracking (fire and forget) - this._trackStreamMetricsInBackground(stream, metricsExtractor, startTime); + this._trackStreamMetricsInBackground(stream, metricsExtractor, startTime, graphKey); // Return stream immediately for consumption return stream; } catch (error) { // Track error if stream creation fails - this.trackDuration(Date.now() - startTime); - this.trackError(); + this.trackDuration(Date.now() - startTime, graphKey); + this.trackError(graphKey); throw error; } } @@ -164,6 +203,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { stream: TStream, metricsExtractor: (stream: TStream) => Promise, startTime: number, + graphKey?: string, ): Promise { try { // Wait for metrics to be available @@ -171,21 +211,21 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { // Track success/error based on metrics if (metrics.success) { - this.trackSuccess(); + this.trackSuccess(graphKey); } else { - this.trackError(); + this.trackError(graphKey); } // Track token usage if available if (metrics.usage) { - this.trackTokens(metrics.usage); + this.trackTokens(metrics.usage, graphKey); } } catch (error) { // If metrics extraction fails, track error - this.trackError(); + this.trackError(graphKey); } finally { // Track duration regardless of success/error - this.trackDuration(Date.now() - startTime); + this.trackDuration(Date.now() - startTime, graphKey); } } @@ -260,9 +300,9 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } } - trackTokens(tokens: LDTokenUsage): void { + trackTokens(tokens: LDTokenUsage, graphKey?: string): void { this._trackedMetrics.tokens = tokens; - const trackData = this.getTrackData(); + const trackData = this.getTrackData(graphKey); if (tokens.total > 0) { this._ldClient.track('$ld:ai:tokens:total', this._context, trackData, tokens.total); } diff --git a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts new file mode 100644 index 0000000000..4c08e26a58 --- /dev/null +++ b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts @@ -0,0 +1,119 @@ +import { LDContext } from '@launchdarkly/js-server-sdk-common'; + +import { LDGraphMetricSummary, LDGraphTracker } from './api/graph/LDGraphTracker'; +import { JudgeResponse } from './api/judge/types'; +import { LDTokenUsage } from './api/metrics'; +import { LDClientMin } from './LDClientMin'; + +export class LDGraphTrackerImpl implements LDGraphTracker { + private _trackedMetrics: LDGraphMetricSummary = {}; + + constructor( + private _ldClient: LDClientMin, + private _graphKey: string, + private _variationKey: string, + private _version: number, + private _context: LDContext, + ) {} + + getTrackData(): { + variationKey: string; + graphKey: string; + version: number; + } { + return { + variationKey: this._variationKey, + graphKey: this._graphKey, + version: this._version, + }; + } + + trackInvocationSuccess(): void { + if (this._trackedMetrics.success !== undefined) { + return; + } + this._trackedMetrics.success = true; + this._ldClient.track('$ld:ai:graph:invocation_success', this._context, this.getTrackData(), 1); + } + + trackInvocationFailure(): void { + if (this._trackedMetrics.success !== undefined) { + return; + } + this._trackedMetrics.success = false; + this._ldClient.track('$ld:ai:graph:invocation_failure', this._context, this.getTrackData(), 1); + } + + trackLatency(durationMs: number): void { + if (this._trackedMetrics.durationMs !== undefined) { + return; + } + this._trackedMetrics.durationMs = durationMs; + this._ldClient.track('$ld:ai:graph:latency', this._context, this.getTrackData(), durationMs); + } + + trackTotalTokens(tokens: LDTokenUsage): void { + if (this._trackedMetrics.tokens !== undefined) { + return; + } + if (tokens.total <= 0) { + return; + } + this._trackedMetrics.tokens = tokens; + this._ldClient.track( + '$ld:ai:graph:total_tokens', + this._context, + this.getTrackData(), + tokens.total, + ); + } + + trackPath(path: string[]): void { + if (this._trackedMetrics.path !== undefined) { + return; + } + this._trackedMetrics.path = path; + this._ldClient.track('$ld:ai:graph:path', this._context, { ...this.getTrackData(), path }, 1); + } + + trackJudgeResponse(response: JudgeResponse): void { + const trackData = response.judgeConfigKey + ? { ...this.getTrackData(), judgeConfigKey: response.judgeConfigKey } + : this.getTrackData(); + + Object.entries(response.evals).forEach(([metricKey, evalScore]) => { + this._ldClient.track(metricKey, this._context, trackData, evalScore.score); + }); + } + + trackRedirect(sourceKey: string, redirectedTarget: string): void { + this._ldClient.track( + '$ld:ai:graph:redirect', + this._context, + { ...this.getTrackData(), sourceKey, redirectedTarget }, + 1, + ); + } + + trackHandoffSuccess(sourceKey: string, targetKey: string): void { + this._ldClient.track( + '$ld:ai:graph:handoff_success', + this._context, + { ...this.getTrackData(), sourceKey, targetKey }, + 1, + ); + } + + trackHandoffFailure(sourceKey: string, targetKey: string): void { + this._ldClient.track( + '$ld:ai:graph:handoff_failure', + this._context, + { ...this.getTrackData(), sourceKey, targetKey }, + 1, + ); + } + + getSummary(): LDGraphMetricSummary { + return { ...this._trackedMetrics }; + } +} diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts index 41ff0e20a1..3a40fd3c6d 100644 --- a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts +++ b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts @@ -37,13 +37,16 @@ export interface LDAIMetricSummary { export interface LDAIConfigTracker { /** * Get the data for tracking. + * + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - getTrackData(): { + getTrackData(graphKey?: string): { variationKey: string; configKey: string; version: number; modelName: string; providerName: string; + graphKey?: string; }; /** * Track the duration of generation. @@ -51,53 +54,79 @@ export interface LDAIConfigTracker { * Ideally this would not include overhead time such as network communication. * * @param durationMs The duration in milliseconds. + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackDuration(durationMs: number): void; + trackDuration(durationMs: number, graphKey?: string): void; /** * Track information about token usage. * * @param tokens Token usage information. + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackTokens(tokens: LDTokenUsage): void; + trackTokens(tokens: LDTokenUsage, graphKey?: string): void; /** * Generation was successful. + * + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackSuccess(): void; + trackSuccess(graphKey?: string): void; /** * An error was encountered during generation. + * + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackError(): void; + trackError(graphKey?: string): void; /** * Track sentiment about the generation. * * @param feedback Feedback about the generation. + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackFeedback(feedback: { kind: LDFeedbackKind }): void; + trackFeedback(feedback: { kind: LDFeedbackKind }, graphKey?: string): void; /** * Track the time to first token for this generation. * * @param timeToFirstTokenMs The duration in milliseconds. + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackTimeToFirstToken(timeToFirstTokenMs: number): void; + trackTimeToFirstToken(timeToFirstTokenMs: number, graphKey?: string): void; /** * Track evaluation scores for multiple metrics. * * @param scores Record mapping metric keys to their evaluation scores + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackEvalScores(scores: Record): void; + trackEvalScores(scores: Record, graphKey?: string): void; /** * Track a judge response containing evaluation scores and judge configuration key. * * @param response Judge response containing evaluation scores and judge configuration key + * @param graphKey When provided, associates this metric with the specified agent graph key. + */ + trackJudgeResponse(response: JudgeResponse, graphKey?: string): void; + + /** + * Track a single tool invocation. + * + * @param toolKey The identifier of the tool that was invoked. + * @param graphKey When provided, associates this metric with the specified agent graph key. + */ + trackToolCall(toolKey: string, graphKey?: string): void; + + /** + * Track multiple tool invocations. + * + * @param toolKeys The identifiers of the tools that were invoked. + * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackJudgeResponse(response: JudgeResponse): void; + trackToolCalls(toolKeys: string[], graphKey?: string): void; /** * Track the duration of execution of the provided function. @@ -108,9 +137,10 @@ export interface LDAIConfigTracker { * This function does not automatically record an error when the function throws. * * @param func The function to track the duration of. + * @param graphKey When provided, associates this metric with the specified agent graph key. * @returns The result of the function. */ - trackDurationOf(func: () => Promise): Promise; + trackDurationOf(func: () => Promise, graphKey?: string): Promise; /** * Track metrics for a generic AI operation. @@ -124,11 +154,13 @@ export interface LDAIConfigTracker { * * @param metricsExtractor Function that extracts LDAIMetrics from the operation result * @param func Function which executes the operation + * @param graphKey When provided, associates this metric with the specified agent graph key. * @returns The result of the operation */ trackMetricsOf( metricsExtractor: (result: TRes) => LDAIMetrics, func: () => Promise, + graphKey?: string, ): Promise; /** @@ -150,11 +182,13 @@ export interface LDAIConfigTracker { * * @param streamCreator Function that creates and returns the stream (synchronous) * @param metricsExtractor Function that asynchronously extracts metrics from the stream + * @param graphKey When provided, associates this metric with the specified agent graph key. * @returns The stream result (returned immediately, not a Promise) */ trackStreamMetricsOf( streamCreator: () => TStream, metricsExtractor: (stream: TStream) => Promise, + graphKey?: string, ): TStream; /** diff --git a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts new file mode 100644 index 0000000000..94cf30658f --- /dev/null +++ b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts @@ -0,0 +1,126 @@ +import { JudgeResponse } from '../judge/types'; +import { LDTokenUsage } from '../metrics'; + +/** + * Metrics tracked at the graph level. + */ +export interface LDGraphMetricSummary { + /** + * True if the graph invocation succeeded, false if it failed, absent if not tracked. + */ + success?: boolean; + + /** + * Total graph execution duration in milliseconds, if tracked. + */ + durationMs?: number; + + /** + * Aggregated token usage across the entire graph invocation, if tracked. + */ + tokens?: LDTokenUsage; + + /** + * Execution path through the graph as an array of config keys, if tracked. + */ + path?: string[]; +} + +/** + * Tracker for graph-level and edge-level metrics in AI agent graph operations. + * + * Node-level metrics are tracked via each node's {@link LDAIConfigTracker}. + */ +export interface LDGraphTracker { + /** + * Get the data for tracking. + */ + getTrackData(): { + variationKey: string; + graphKey: string; + version: number; + }; + + /** + * Track a successful graph invocation. + * + * At-most-once per tracker instance. Subsequent calls are dropped. + */ + trackInvocationSuccess(): void; + + /** + * Track an unsuccessful graph invocation. + * + * At-most-once per tracker instance. Subsequent calls are dropped. + */ + trackInvocationFailure(): void; + + /** + * Track the total latency of graph execution. + * + * At-most-once per tracker instance. Subsequent calls are dropped. + * + * @param durationMs Duration in milliseconds. + */ + trackLatency(durationMs: number): void; + + /** + * Track aggregated token usage across the entire graph invocation. + * + * At-most-once per tracker instance. Subsequent calls are dropped. + * + * @param tokens Token usage information. + */ + trackTotalTokens(tokens: LDTokenUsage): void; + + /** + * Track the execution path through the graph. + * + * At-most-once per tracker instance. Subsequent calls are dropped. + * + * @param path Array of config keys representing the sequence of nodes executed. + */ + trackPath(path: string[]): void; + + /** + * Track judge responses for the final graph output. + * + * @param response Judge response containing evaluation scores. + */ + trackJudgeResponse(response: JudgeResponse): void; + + /** + * Track when a node redirects to a different target than originally specified. + * + * May be called multiple times. + * + * @param sourceKey Config key of the source node. + * @param redirectedTarget Config key of the target node that was redirected to. + */ + trackRedirect(sourceKey: string, redirectedTarget: string): void; + + /** + * Track a successful handoff between nodes. + * + * May be called multiple times. + * + * @param sourceKey Config key of the source node. + * @param targetKey Config key of the target node. + */ + trackHandoffSuccess(sourceKey: string, targetKey: string): void; + + /** + * Track a failed handoff between nodes. + * + * May be called multiple times. + * + * @param sourceKey Config key of the source node. + * @param targetKey Config key of the target node. + */ + trackHandoffFailure(sourceKey: string, targetKey: string): void; + + /** + * Get a summary of the tracked graph-level metrics. + */ + getSummary(): LDGraphMetricSummary; +} diff --git a/packages/sdk/server-ai/src/api/graph/index.ts b/packages/sdk/server-ai/src/api/graph/index.ts new file mode 100644 index 0000000000..536e630115 --- /dev/null +++ b/packages/sdk/server-ai/src/api/graph/index.ts @@ -0,0 +1 @@ +export * from './LDGraphTracker'; diff --git a/packages/sdk/server-ai/src/api/index.ts b/packages/sdk/server-ai/src/api/index.ts index 2f289b8356..7470ef740c 100644 --- a/packages/sdk/server-ai/src/api/index.ts +++ b/packages/sdk/server-ai/src/api/index.ts @@ -1,5 +1,6 @@ export * from './config'; export * from './chat'; +export * from './graph'; export * from './judge'; export * from './metrics'; export * from './LDAIClient'; diff --git a/packages/sdk/server-ai/src/index.ts b/packages/sdk/server-ai/src/index.ts index 7c1bb54b3d..8bb6c11808 100644 --- a/packages/sdk/server-ai/src/index.ts +++ b/packages/sdk/server-ai/src/index.ts @@ -26,3 +26,4 @@ export function initAi(ldClient: LDClientMin): LDAIClient { export type LDLogger = common.LDLogger; export * from './api'; +export { LDGraphTrackerImpl } from './LDGraphTrackerImpl'; From d640d8e0b9542dce2a1dfac404c78648aa3d05c5 Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Wed, 15 Apr 2026 07:38:28 -0500 Subject: [PATCH 2/7] feat!: Add per-execution runId, at-most-once tracking, and cross-process tracker resumption (#1270) --- .../sdk/server-ai/__tests__/Judge.test.ts | 74 ++--- .../__tests__/LDAIClientImpl.test.ts | 66 ++-- .../__tests__/LDAIConfigTrackerImpl.test.ts | 290 ++++++++++++++++++ .../server-ai/__tests__/TrackedChat.test.ts | 26 +- .../server-ai/examples/bedrock/src/index.ts | 5 +- .../server-ai/examples/openai/src/index.ts | 19 +- .../server-ai/examples/vercel-ai/src/index.ts | 11 +- packages/sdk/server-ai/src/LDAIClientImpl.ts | 40 ++- .../server-ai/src/LDAIConfigTrackerImpl.ts | 72 ++++- packages/sdk/server-ai/src/api/LDAIClient.ts | 12 + .../sdk/server-ai/src/api/chat/TrackedChat.ts | 39 +-- .../src/api/config/LDAIConfigTracker.ts | 31 +- .../src/api/config/LDAIConfigUtils.ts | 38 ++- .../sdk/server-ai/src/api/config/types.ts | 7 +- packages/sdk/server-ai/src/api/judge/Judge.ts | 29 +- 15 files changed, 572 insertions(+), 187 deletions(-) diff --git a/packages/sdk/server-ai/__tests__/Judge.test.ts b/packages/sdk/server-ai/__tests__/Judge.test.ts index 6c8985b914..c0def31740 100644 --- a/packages/sdk/server-ai/__tests__/Judge.test.ts +++ b/packages/sdk/server-ai/__tests__/Judge.test.ts @@ -47,14 +47,14 @@ describe('Judge', () => { ], model: { name: 'gpt-4' }, provider: { name: 'openai' }, - tracker: mockTracker, + createTracker: () => mockTracker, evaluationMetricKey: 'relevance', }; }); describe('constructor', () => { it('initializes with proper configuration', () => { - const judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + const judge = new Judge(judgeConfig, mockProvider, mockLogger); expect(judge).toBeDefined(); }); @@ -64,7 +64,7 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + judge = new Judge(judgeConfig, mockProvider, mockLogger); }); it('evaluates AI response successfully', async () => { @@ -209,12 +209,7 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: [], }; - const judgeWithoutMetrics = new Judge( - configWithoutMetrics, - mockTracker, - mockProvider, - mockLogger, - ); + const judgeWithoutMetrics = new Judge(configWithoutMetrics, mockProvider, mockLogger); const result = await judgeWithoutMetrics.evaluate('test input', 'test output'); @@ -231,12 +226,7 @@ describe('Judge', () => { evaluationMetricKey: 'relevance', evaluationMetricKeys: undefined, }; - const judgeWithSingleKey = new Judge( - configWithSingleKey, - mockTracker, - mockProvider, - mockLogger, - ); + const judgeWithSingleKey = new Judge(configWithSingleKey, mockProvider, mockLogger); const mockStructuredResponse: StructuredResponse = { data: { @@ -275,12 +265,7 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: ['relevance', 'accuracy'], }; - const judgeWithLegacyKeys = new Judge( - configWithLegacyKeys, - mockTracker, - mockProvider, - mockLogger, - ); + const judgeWithLegacyKeys = new Judge(configWithLegacyKeys, mockProvider, mockLogger); const mockStructuredResponse: StructuredResponse = { data: { @@ -319,12 +304,7 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: ['', ' ', 'relevance', 'accuracy'], }; - const judgeWithInvalidKeys = new Judge( - configWithInvalidKeys, - mockTracker, - mockProvider, - mockLogger, - ); + const judgeWithInvalidKeys = new Judge(configWithInvalidKeys, mockProvider, mockLogger); const mockStructuredResponse: StructuredResponse = { data: { @@ -364,7 +344,7 @@ describe('Judge', () => { evaluationMetricKey: 'helpfulness', evaluationMetricKeys: ['relevance', 'accuracy'], }; - const judgeWithBoth = new Judge(configWithBoth, mockTracker, mockProvider, mockLogger); + const judgeWithBoth = new Judge(configWithBoth, mockProvider, mockLogger); const mockStructuredResponse: StructuredResponse = { data: { @@ -402,12 +382,7 @@ describe('Judge', () => { ...judgeConfig, messages: undefined, }; - const judgeWithoutMessages = new Judge( - configWithoutMessages, - mockTracker, - mockProvider, - mockLogger, - ); + const judgeWithoutMessages = new Judge(configWithoutMessages, mockProvider, mockLogger); const result = await judgeWithoutMessages.evaluate('test input', 'test output'); @@ -511,7 +486,7 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + judge = new Judge(judgeConfig, mockProvider, mockLogger); }); it('evaluates messages and response successfully', async () => { @@ -596,7 +571,7 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + judge = new Judge(judgeConfig, mockProvider, mockLogger); }); it('constructs evaluation messages correctly', () => { @@ -621,7 +596,7 @@ describe('Judge', () => { let judge: Judge; beforeEach(() => { - judge = new Judge(judgeConfig, mockTracker, mockProvider, mockLogger); + judge = new Judge(judgeConfig, mockProvider, mockLogger); }); it('parses valid evaluation response correctly', () => { @@ -633,7 +608,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({ relevance: { score: 0.8, reasoning: 'Good' }, @@ -647,7 +622,7 @@ describe('Judge', () => { relevance: { score: 0.8, reasoning: 'Good' }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); }); @@ -661,7 +636,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); }); @@ -675,7 +650,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); expect(mockLogger.warn).toHaveBeenCalledWith( @@ -693,7 +668,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); expect(mockLogger.warn).toHaveBeenCalledWith( @@ -711,7 +686,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); expect(mockLogger.warn).toHaveBeenCalledWith( @@ -729,7 +704,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); expect(mockLogger.warn).toHaveBeenCalledWith( @@ -744,12 +719,7 @@ describe('Judge', () => { evaluationMetricKey: undefined, evaluationMetricKeys: [], }; - const judgeWithEmptyKeys = new Judge( - configWithEmptyKeys, - mockTracker, - mockProvider, - mockLogger, - ); + const judgeWithEmptyKeys = new Judge(configWithEmptyKeys, mockProvider, mockLogger); const result = await judgeWithEmptyKeys.evaluate('test input', 'test output'); @@ -769,7 +739,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); expect(mockLogger.warn).toHaveBeenCalledWith( @@ -787,7 +757,7 @@ describe('Judge', () => { }, }; - const result = parseResponse(responseData, 'relevance'); + const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({}); expect(mockLogger.warn).toHaveBeenCalledWith( diff --git a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts index 35e8b671a2..9695c1f815 100644 --- a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts @@ -85,7 +85,7 @@ describe('config evaluation', () => { { role: 'system', content: 'Hello John' }, { role: 'user', content: 'Score: 42' }, ]); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); evaluateSpy.mockRestore(); }); @@ -143,7 +143,7 @@ describe('config evaluation', () => { expect(result.instructions).toBe( 'You are a helpful assistant. Your name is John and your score is 42', ); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); evaluateSpy.mockRestore(); }); @@ -176,7 +176,7 @@ describe('config evaluation', () => { expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); // Should use first value from evaluationMetricKeys expect(result.evaluationMetricKey).toBe('relevance'); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); evaluateSpy.mockRestore(); }); @@ -208,7 +208,7 @@ describe('config evaluation', () => { expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); expect(result.evaluationMetricKey).toBe('relevance'); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); evaluateSpy.mockRestore(); }); @@ -241,7 +241,7 @@ describe('config evaluation', () => { expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); expect(result.evaluationMetricKey).toBe('helpfulness'); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); evaluateSpy.mockRestore(); }); @@ -275,7 +275,7 @@ describe('config evaluation', () => { expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); // Empty string should be treated as invalid, so should fall back to first value in evaluationMetricKeys expect(result.evaluationMetricKey).toBe('relevance'); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); evaluateSpy.mockRestore(); }); @@ -308,7 +308,7 @@ describe('config evaluation', () => { expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined); // Should skip empty and whitespace strings, use first valid value expect(result.evaluationMetricKey).toBe('relevance'); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(true); evaluateSpy.mockRestore(); }); @@ -331,7 +331,7 @@ describe('config evaluation', () => { const result = await client.completionConfig(key, testContext, defaultValue); expect(result.enabled).toBe(false); - expect(result.tracker).toBeUndefined(); + expect(result.createTracker).toBeUndefined(); }); it('handles missing metadata mode by defaulting to completion mode', async () => { @@ -352,7 +352,7 @@ describe('config evaluation', () => { const result = await client.completionConfig(key, testContext, defaultValue); expect(result.enabled).toBe(false); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.messages).toEqual([{ role: 'system', content: 'Hello' }]); expect(result.model).toEqual({ name: 'example-provider', parameters: { name: 'imagination' } }); }); @@ -381,7 +381,7 @@ describe('config evaluation', () => { expect(result.model).toEqual(defaultValue.model); expect(result.messages).toEqual(defaultValue.messages); expect(result.provider).toEqual(defaultValue.provider); - expect(result.tracker).toBeDefined(); + expect(result.createTracker).toBeDefined(); expect(result.enabled).toBe(defaultValue.enabled); expect(mockLdClient.variation).toHaveBeenCalledWith( key, @@ -408,7 +408,7 @@ describe('completionConfig method', () => { const mockConfig = { model: { name: 'test-model' }, messages: [], - tracker: {} as any, + createTracker: () => ({}) as any, enabled: true, }; @@ -449,7 +449,7 @@ describe('agentConfig method', () => { const mockConfig = { model: { name: 'test-model' }, instructions: 'You are a helpful assistant.', - tracker: {} as any, + createTracker: () => ({}) as any, enabled: true, }; @@ -527,7 +527,7 @@ describe('agents method', () => { }, provider: { name: 'openai' }, instructions: 'You are a research assistant specializing in climate change.', - tracker: expect.any(Object), + createTracker: expect.any(Function), enabled: true, }, 'writing-agent': { @@ -538,7 +538,7 @@ describe('agents method', () => { }, provider: { name: 'anthropic' }, instructions: 'You are a writing assistant with academic style.', - tracker: expect.any(Object), + createTracker: expect.any(Function), enabled: true, }, }); @@ -582,7 +582,7 @@ describe('judgeConfig method', () => { provider: { name: 'openai' }, evaluationMetricKeys: ['relevance'], messages: [{ role: 'system' as const, content: 'You are a judge for {{metric}}.' }], - tracker: {} as any, + createTracker: () => ({}) as any, toVercelAISDK: jest.fn(), }; @@ -631,6 +631,7 @@ describe('createJudge method', () => { enabled: false, }; + const mockTrackerInstance = {} as any; const mockJudgeConfig = { key: 'test-judge', enabled: true, @@ -638,7 +639,7 @@ describe('createJudge method', () => { provider: { name: 'openai' }, evaluationMetricKeys: ['relevance', 'accuracy'], messages: [{ role: 'system' as const, content: 'You are a judge.' }], - tracker: {} as any, + createTracker: () => mockTrackerInstance, toVercelAISDK: jest.fn(), }; @@ -658,12 +659,7 @@ describe('createJudge method', () => { response_to_evaluate: '{{response_to_evaluate}}', }); expect(AIProviderFactory.create).toHaveBeenCalledWith(mockJudgeConfig, undefined, undefined); - expect(Judge).toHaveBeenCalledWith( - mockJudgeConfig, - mockJudgeConfig.tracker, - mockProvider, - undefined, - ); + expect(Judge).toHaveBeenCalledWith(mockJudgeConfig, mockProvider, undefined); expect(result).toBe(mockJudge); judgeConfigSpy.mockRestore(); }); @@ -706,7 +702,7 @@ describe('createJudge method', () => { provider: { name: 'openai' }, evaluationMetricKeys: ['relevance'], messages: [{ role: 'system' as const, content: 'You are a judge.' }], - tracker: {} as any, + createTracker: () => ({}) as any, toVercelAISDK: jest.fn(), }; @@ -741,6 +737,30 @@ describe('createJudge method', () => { }); }); +describe('createTracker method', () => { + it('reconstructs a tracker from a resumption token', () => { + const client = new LDAIClientImpl(mockLdClient); + + // Build a token manually: { runId, configKey, variationKey, version } + const payload = JSON.stringify({ + runId: 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11', + configKey: 'my-config', + variationKey: 'v1', + version: 3, + }); + const token = Buffer.from(payload).toString('base64url'); + + const tracker = client.createTracker(token, testContext); + + expect(tracker.getTrackData()).toMatchObject({ + runId: 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11', + configKey: 'my-config', + variationKey: 'v1', + version: 3, + }); + }); +}); + describe('optional default values', () => { it('uses a disabled completion config when no default is provided', async () => { const client = new LDAIClientImpl(mockLdClient); diff --git a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts index 5ea65c4c93..e644eff377 100644 --- a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts @@ -4,11 +4,18 @@ import { LDFeedbackKind } from '../src/api/metrics'; import { LDAIConfigTrackerImpl } from '../src/LDAIConfigTrackerImpl'; import { LDClientMin } from '../src/LDClientMin'; +const testRunId = 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11'; +jest.mock('node:crypto', () => ({ + randomUUID: jest.fn(() => testRunId), +})); + const mockTrack = jest.fn(); const mockVariation = jest.fn(); +const mockWarn = jest.fn(); const mockLdClient: LDClientMin = { track: mockTrack, variation: mockVariation, + logger: { warn: mockWarn, error: jest.fn(), info: jest.fn(), debug: jest.fn() } as any, }; const testContext: LDContext = { kind: 'user', key: 'test-user' }; @@ -24,6 +31,7 @@ const getExpectedTrackData = () => ({ version, modelName, providerName, + runId: testRunId, }); beforeEach(() => { @@ -33,6 +41,7 @@ beforeEach(() => { it('tracks duration', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -53,6 +62,7 @@ it('tracks duration', () => { it('tracks duration of async function', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -76,6 +86,7 @@ it('tracks duration of async function', async () => { it('tracks time to first token', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -96,6 +107,7 @@ it('tracks time to first token', () => { it('tracks positive feedback', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -116,6 +128,7 @@ it('tracks positive feedback', () => { it('tracks negative feedback', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -136,6 +149,7 @@ it('tracks negative feedback', () => { it('tracks success', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -156,6 +170,7 @@ it('tracks success', () => { it('tracks OpenAI usage', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -223,6 +238,7 @@ it('tracks OpenAI usage', async () => { it('tracks error when OpenAI metrics function throws', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -264,6 +280,7 @@ it('tracks error when OpenAI metrics function throws', async () => { it('tracks Bedrock conversation with successful response', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -334,6 +351,7 @@ it('tracks Bedrock conversation with successful response', () => { it('tracks Bedrock conversation with error response', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -369,6 +387,7 @@ describe('Vercel AI SDK generateText', () => { it('tracks Vercel AI SDK usage', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -436,6 +455,7 @@ describe('Vercel AI SDK generateText', () => { it('tracks error when Vercel AI SDK metrics function throws', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -478,6 +498,7 @@ describe('Vercel AI SDK generateText', () => { it('tracks tokens', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -521,6 +542,7 @@ it('tracks tokens', () => { it('only tracks non-zero token counts', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -560,6 +582,7 @@ it('only tracks non-zero token counts', () => { it('returns empty summary when no metrics tracked', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -576,6 +599,7 @@ it('returns empty summary when no metrics tracked', () => { it('summarizes tracked metrics', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -612,6 +636,7 @@ it('summarizes tracked metrics', () => { it('tracks duration when async function throws', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -639,6 +664,7 @@ it('tracks duration when async function throws', async () => { it('tracks error', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -660,6 +686,7 @@ describe('trackMetricsOf', () => { it('tracks success and token usage from metrics', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -715,6 +742,7 @@ describe('trackMetricsOf', () => { it('tracks failure when metrics indicate failure', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -745,6 +773,7 @@ describe('trackMetricsOf', () => { it('tracks failure when operation throws', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -774,6 +803,7 @@ describe('trackMetricsOf', () => { it('tracks metrics without token usage', async () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -815,6 +845,7 @@ describe('trackJudgeResponse', () => { it('tracks evaluation metric key with score', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -844,6 +875,7 @@ describe('trackJudgeResponse', () => { it('tracks multiple evaluation metrics when present', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -882,6 +914,7 @@ describe('trackToolCall', () => { it('tracks a single tool call', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -903,6 +936,7 @@ describe('trackToolCall', () => { it('includes graphKey when provided', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -926,6 +960,7 @@ describe('trackToolCalls', () => { it('tracks multiple tool calls', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -962,6 +997,7 @@ describe('graphKey parameter support', () => { it('includes graphKey in trackDuration event', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -983,6 +1019,7 @@ describe('graphKey parameter support', () => { it('includes graphKey in trackSuccess event', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -1004,6 +1041,7 @@ describe('graphKey parameter support', () => { it('does not include graphKey when not provided', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, + testRunId, configKey, variationKey, version, @@ -1022,3 +1060,255 @@ describe('graphKey parameter support', () => { ); }); }); + +describe('at-most-once semantics', () => { + it('drops duplicate trackDuration call with warning', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + tracker.trackDuration(1000); + tracker.trackDuration(2000); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledWith(expect.stringContaining('Duration')); + }); + + it('drops duplicate trackSuccess call with warning', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + tracker.trackSuccess(); + tracker.trackSuccess(); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledTimes(1); + }); + + it('drops trackError call after trackSuccess with warning', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + tracker.trackSuccess(); + tracker.trackError(); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledTimes(1); + }); + + it('drops duplicate trackFeedback call with warning', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + tracker.trackFeedback({ kind: LDFeedbackKind.Positive }); + tracker.trackFeedback({ kind: LDFeedbackKind.Negative }); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledTimes(1); + }); + + it('drops duplicate trackTokens call with warning', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + tracker.trackTokens({ total: 100, input: 50, output: 50 }); + tracker.trackTokens({ total: 200, input: 100, output: 100 }); + + // First call tracks 3 events (total, input, output), second is dropped + expect(mockTrack).toHaveBeenCalledTimes(3); + expect(mockWarn).toHaveBeenCalledTimes(1); + }); + + it('drops duplicate trackTimeToFirstToken call with warning', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + tracker.trackTimeToFirstToken(100); + tracker.trackTimeToFirstToken(200); + + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledTimes(1); + }); +}); + +describe('resumptionToken', () => { + it('encodes runId, configKey, variationKey, and version as URL-safe Base64 JSON', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + const token = tracker.resumptionToken; + const decoded = JSON.parse(Buffer.from(token, 'base64url').toString('utf8')); + + expect(decoded).toEqual({ + runId: testRunId, + configKey, + variationKey, + version, + }); + }); + + it('includes empty variationKey explicitly when not set', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + '', + version, + modelName, + providerName, + testContext, + ); + + const token = tracker.resumptionToken; + const decoded = JSON.parse(Buffer.from(token, 'base64url').toString('utf8')); + + expect(decoded).toEqual({ + runId: testRunId, + configKey, + variationKey: '', + version, + }); + expect('variationKey' in decoded).toBe(true); + }); + + it('uses URL-safe Base64 encoding (no + / or = characters)', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + const token = tracker.resumptionToken; + expect(token).not.toMatch(/[+/=]/); + }); +}); + +describe('fromResumptionToken', () => { + it('reconstructs tracker with original runId', () => { + const original = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + const reconstructed = LDAIConfigTrackerImpl.fromResumptionToken( + original.resumptionToken, + mockLdClient, + testContext, + ); + + expect(reconstructed.getTrackData().runId).toBe(testRunId); + expect(reconstructed.getTrackData().configKey).toBe(configKey); + expect(reconstructed.getTrackData().variationKey).toBe(variationKey); + expect(reconstructed.getTrackData().version).toBe(version); + }); + + it('reconstructs tracker with empty variationKey when none was set', () => { + const original = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + '', + version, + modelName, + providerName, + testContext, + ); + + const reconstructed = LDAIConfigTrackerImpl.fromResumptionToken( + original.resumptionToken, + mockLdClient, + testContext, + ); + + expect(reconstructed.getTrackData().variationKey).toBe(''); + }); + + it('reconstructed tracker emits track events with original runId', () => { + const original = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + const reconstructed = LDAIConfigTrackerImpl.fromResumptionToken( + original.resumptionToken, + mockLdClient, + testContext, + ); + + reconstructed.trackSuccess(); + + expect(mockTrack).toHaveBeenCalledWith( + '$ld:ai:generation:success', + testContext, + expect.objectContaining({ runId: testRunId }), + 1, + ); + }); +}); diff --git a/packages/sdk/server-ai/__tests__/TrackedChat.test.ts b/packages/sdk/server-ai/__tests__/TrackedChat.test.ts index d750a47e65..75681b0f83 100644 --- a/packages/sdk/server-ai/__tests__/TrackedChat.test.ts +++ b/packages/sdk/server-ai/__tests__/TrackedChat.test.ts @@ -38,13 +38,13 @@ describe('TrackedChat', () => { messages: [{ role: 'system', content: 'You are a helpful assistant.' }], model: { name: 'gpt-4' }, provider: { name: 'openai' }, - tracker: mockTracker, + createTracker: () => mockTracker, }; }); describe('appendMessages', () => { it('appends messages to the conversation history', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); const messagesToAppend: LDMessage[] = [ { role: 'user', content: 'Hello' }, @@ -60,7 +60,7 @@ describe('TrackedChat', () => { }); it('appends multiple message batches sequentially', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); chat.appendMessages([{ role: 'user', content: 'First message' }]); chat.appendMessages([{ role: 'assistant', content: 'Second message' }]); @@ -74,7 +74,7 @@ describe('TrackedChat', () => { }); it('handles empty message array', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); chat.appendMessages([]); @@ -85,7 +85,7 @@ describe('TrackedChat', () => { describe('getMessages', () => { it('returns only conversation history when includeConfigMessages is false', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); chat.appendMessages([ { role: 'user', content: 'User message' }, @@ -100,7 +100,7 @@ describe('TrackedChat', () => { }); it('returns only conversation history when includeConfigMessages is omitted (defaults to false)', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); chat.appendMessages([{ role: 'user', content: 'User message' }]); @@ -111,7 +111,7 @@ describe('TrackedChat', () => { }); it('returns config messages prepended when includeConfigMessages is true', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); chat.appendMessages([ { role: 'user', content: 'User message' }, @@ -127,7 +127,7 @@ describe('TrackedChat', () => { }); it('returns only config messages when no conversation history exists and includeConfigMessages is true', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); const messages = chat.getMessages(true); @@ -140,7 +140,7 @@ describe('TrackedChat', () => { ...aiConfig, messages: [], }; - const chat = new TrackedChat(configWithoutMessages, mockTracker, mockProvider); + const chat = new TrackedChat(configWithoutMessages, mockProvider); const messages = chat.getMessages(false); @@ -148,7 +148,7 @@ describe('TrackedChat', () => { }); it('returns a copy of the messages array (not a reference)', () => { - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); chat.appendMessages([{ role: 'user', content: 'Original message' }]); @@ -171,7 +171,7 @@ describe('TrackedChat', () => { ...aiConfig, messages: undefined, }; - const chat = new TrackedChat(configWithoutMessages, mockTracker, mockProvider); + const chat = new TrackedChat(configWithoutMessages, mockProvider); chat.appendMessages([{ role: 'user', content: 'User message' }]); @@ -196,7 +196,7 @@ describe('TrackedChat', () => { mockProvider.invokeModel.mockResolvedValue(mockResponse); - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); await chat.invoke('Hello'); @@ -216,7 +216,7 @@ describe('TrackedChat', () => { mockProvider.invokeModel.mockResolvedValue(mockResponse); - const chat = new TrackedChat(aiConfig, mockTracker, mockProvider); + const chat = new TrackedChat(aiConfig, mockProvider); chat.appendMessages([{ role: 'user', content: 'Pre-appended message' }]); await chat.invoke('New user input'); diff --git a/packages/sdk/server-ai/examples/bedrock/src/index.ts b/packages/sdk/server-ai/examples/bedrock/src/index.ts index ac331d23e8..e1cbf93e06 100644 --- a/packages/sdk/server-ai/examples/bedrock/src/index.ts +++ b/packages/sdk/server-ai/examples/bedrock/src/index.ts @@ -62,12 +62,13 @@ async function main() { }, ); - if (!aiConfig.enabled || !aiConfig.tracker) { + if (!aiConfig.enabled) { console.log('*** AI configuration is not enabled'); process.exit(0); } - const completion = aiConfig.tracker.trackBedrockConverseMetrics( + const tracker = aiConfig.createTracker!(); + const completion = tracker.trackBedrockConverseMetrics( await awsClient.send( new ConverseCommand({ modelId: aiConfig.model?.name ?? 'no-model', diff --git a/packages/sdk/server-ai/examples/openai/src/index.ts b/packages/sdk/server-ai/examples/openai/src/index.ts index 8b501e9cb7..e16643d6c5 100644 --- a/packages/sdk/server-ai/examples/openai/src/index.ts +++ b/packages/sdk/server-ai/examples/openai/src/index.ts @@ -60,20 +60,19 @@ async function main() { myVariable: 'My User Defined Variable', }); - if (!aiConfig.enabled || !aiConfig.tracker) { + if (!aiConfig.enabled) { console.log('*** AI configuration is not enabled'); process.exit(0); } - const completion = await aiConfig.tracker.trackMetricsOf( - OpenAIProvider.createAIMetrics, - async () => - client.chat.completions.create({ - messages: aiConfig.messages || [], - model: aiConfig.model?.name || 'gpt-4', - temperature: (aiConfig.model?.parameters?.temperature as number) ?? 0.5, - max_tokens: (aiConfig.model?.parameters?.maxTokens as number) ?? 4096, - }), + const tracker = aiConfig.createTracker!(); + const completion = await tracker.trackMetricsOf(OpenAIProvider.createAIMetrics, async () => + client.chat.completions.create({ + messages: aiConfig.messages || [], + model: aiConfig.model?.name || 'gpt-4', + temperature: (aiConfig.model?.parameters?.temperature as number) ?? 0.5, + max_tokens: (aiConfig.model?.parameters?.maxTokens as number) ?? 4096, + }), ); console.log('AI Response:', completion.choices[0]?.message.content); diff --git a/packages/sdk/server-ai/examples/vercel-ai/src/index.ts b/packages/sdk/server-ai/examples/vercel-ai/src/index.ts index 5f138a5dd2..af1db3e918 100644 --- a/packages/sdk/server-ai/examples/vercel-ai/src/index.ts +++ b/packages/sdk/server-ai/examples/vercel-ai/src/index.ts @@ -52,7 +52,7 @@ async function main() { // const aiConfig = await aiClient.completionConfig(aiConfigKey, context, defaultValue); const aiConfig = await aiClient.completionConfig(aiConfigKey, context); - if (!aiConfig.enabled || !aiConfig.tracker) { + if (!aiConfig.enabled) { console.log('*** AI configuration is not enabled'); process.exit(0); } @@ -74,9 +74,9 @@ async function main() { }); // Call the model and track metrics for the ai config - const result = await aiConfig.tracker.trackMetricsOf( - VercelProvider.getAIMetricsFromResponse, - () => generateText({ ...vercelConfig, messages: vercelConfig.messages ?? [] }), + const tracker = aiConfig.createTracker!(); + const result = await tracker.trackMetricsOf(VercelProvider.getAIMetricsFromResponse, () => + generateText({ ...vercelConfig, messages: vercelConfig.messages ?? [] }), ); console.log('Response:', result.text); @@ -99,7 +99,8 @@ async function main() { }); // Stream is returned immediately (synchronously), metrics tracked in background - const streamResult = aiConfig.tracker.trackStreamMetricsOf( + const streamTracker = aiConfig.createTracker!(); + const streamResult = streamTracker.trackStreamMetricsOf( () => streamText({ ...vercelConfig, messages: vercelConfig.messages ?? [] }), VercelProvider.getAIMetricsFromStream, ); diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts index 37ac4e8f10..209c0ce860 100644 --- a/packages/sdk/server-ai/src/LDAIClientImpl.ts +++ b/packages/sdk/server-ai/src/LDAIClientImpl.ts @@ -1,4 +1,5 @@ import Mustache from 'mustache'; +import { randomUUID } from 'node:crypto'; import { LDContext, LDLogger } from '@launchdarkly/js-server-sdk-common'; @@ -13,6 +14,7 @@ import { LDAIConfigDefaultKind, LDAIConfigKind, LDAIConfigMode, + LDAIConfigTracker, LDAIJudgeConfig, LDAIJudgeConfigDefault, LDJudge, @@ -87,19 +89,21 @@ export class LDAIClientImpl implements LDAIClient { return LDAIConfigUtils.createDisabledConfig(key, mode); } - const tracker = new LDAIConfigTrackerImpl( - this._ldClient, - key, - // eslint-disable-next-line no-underscore-dangle - value._ldMeta?.variationKey ?? '', - // eslint-disable-next-line no-underscore-dangle - value._ldMeta?.version ?? 1, - value.model?.name ?? '', - value.provider?.name ?? '', - context, - ); + const trackerFactory = () => + new LDAIConfigTrackerImpl( + this._ldClient, + randomUUID(), + key, + // eslint-disable-next-line no-underscore-dangle + value._ldMeta?.variationKey ?? '', + // eslint-disable-next-line no-underscore-dangle + value._ldMeta?.version ?? 1, + value.model?.name ?? '', + value.provider?.name ?? '', + context, + ); - const config = LDAIConfigUtils.fromFlagValue(key, value, tracker); + const config = LDAIConfigUtils.fromFlagValue(key, value, trackerFactory); // Apply variable interpolation (always needed for ldctx) return this._applyInterpolation(config, context, variables); @@ -296,7 +300,7 @@ export class LDAIClientImpl implements LDAIClient { variables, ); - if (!config.enabled || !config.tracker) { + if (!config.enabled) { this._logger?.info(`Chat configuration is disabled: ${key}`); return undefined; } @@ -313,7 +317,7 @@ export class LDAIClientImpl implements LDAIClient { defaultAiProvider, ); - return new TrackedChat(config, config.tracker, provider, judges, this._logger); + return new TrackedChat(config, provider, judges, this._logger); } async createJudge( @@ -351,7 +355,7 @@ export class LDAIClientImpl implements LDAIClient { extendedVariables, ); - if (!judgeConfig.enabled || !judgeConfig.tracker) { + if (!judgeConfig.enabled) { this._logger?.info(`Judge configuration is disabled: ${key}`); return undefined; } @@ -361,7 +365,7 @@ export class LDAIClientImpl implements LDAIClient { return undefined; } - return new Judge(judgeConfig, judgeConfig.tracker, provider, this._logger); + return new Judge(judgeConfig, provider, this._logger); } catch (error) { this._logger?.error(`Failed to initialize judge ${key}:`, error); return undefined; @@ -380,4 +384,8 @@ export class LDAIClientImpl implements LDAIClient { ): Promise { return this.createChat(key, context, defaultValue, variables, defaultAiProvider); } + + createTracker(token: string, context: LDContext): LDAIConfigTracker { + return LDAIConfigTrackerImpl.fromResumptionToken(token, this._ldClient, context); + } } diff --git a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts index adda7c97c8..151a3c1d97 100644 --- a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts +++ b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts @@ -18,6 +18,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { constructor( private _ldClient: LDClientMin, + private _runId: string, private _configKey: string, private _variationKey: string, private _version: number, @@ -27,16 +28,18 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { ) {} getTrackData(graphKey?: string): { - variationKey: string; + runId: string; configKey: string; + variationKey: string; version: number; modelName: string; providerName: string; graphKey?: string; } { return { - variationKey: this._variationKey, + runId: this._runId, configKey: this._configKey, + variationKey: this._variationKey, version: this._version, modelName: this._modelName, providerName: this._providerName, @@ -44,7 +47,42 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { }; } + get resumptionToken(): string { + const json = JSON.stringify({ + runId: this._runId, + configKey: this._configKey, + variationKey: this._variationKey, + version: this._version, + }); + return Buffer.from(json).toString('base64url'); + } + + static fromResumptionToken( + token: string, + ldClient: LDClientMin, + context: LDContext, + ): LDAIConfigTrackerImpl { + const json = Buffer.from(token, 'base64url').toString('utf8'); + const payload = JSON.parse(json); + return new LDAIConfigTrackerImpl( + ldClient, + payload.runId, + payload.configKey, + payload.variationKey ?? '', + payload.version, + '', + '', + context, + ); + } + trackDuration(duration: number, graphKey?: string): void { + if (this._trackedMetrics.durationMs !== undefined) { + this._ldClient.logger?.warn( + 'Duration has already been tracked for this execution. Use createTracker() for a new execution.', + ); + return; + } this._trackedMetrics.durationMs = duration; this._ldClient.track( '$ld:ai:duration:total', @@ -68,6 +106,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } trackTimeToFirstToken(timeToFirstTokenMs: number, graphKey?: string) { + if (this._trackedMetrics.timeToFirstTokenMs !== undefined) { + this._ldClient.logger?.warn( + 'Time to first token has already been tracked for this execution. Use createTracker() for a new execution.', + ); + return; + } this._trackedMetrics.timeToFirstTokenMs = timeToFirstTokenMs; this._ldClient.track( '$ld:ai:tokens:ttf', @@ -110,6 +154,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } trackFeedback(feedback: { kind: LDFeedbackKind }, graphKey?: string): void { + if (this._trackedMetrics.feedback !== undefined) { + this._ldClient.logger?.warn( + 'Feedback has already been tracked for this execution. Use createTracker() for a new execution.', + ); + return; + } this._trackedMetrics.feedback = feedback; if (feedback.kind === LDFeedbackKind.Positive) { this._ldClient.track( @@ -129,6 +179,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } trackSuccess(graphKey?: string): void { + if (this._trackedMetrics.success !== undefined) { + this._ldClient.logger?.warn( + 'Generation result has already been tracked for this execution. Use createTracker() for a new execution.', + ); + return; + } this._trackedMetrics.success = true; this._ldClient.track( '$ld:ai:generation:success', @@ -139,6 +195,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } trackError(graphKey?: string): void { + if (this._trackedMetrics.success !== undefined) { + this._ldClient.logger?.warn( + 'Generation result has already been tracked for this execution. Use createTracker() for a new execution.', + ); + return; + } this._trackedMetrics.success = false; this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(graphKey), 1); } @@ -301,6 +363,12 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } trackTokens(tokens: LDTokenUsage, graphKey?: string): void { + if (this._trackedMetrics.tokens !== undefined) { + this._ldClient.logger?.warn( + 'Token usage has already been tracked for this execution. Use createTracker() for a new execution.', + ); + return; + } this._trackedMetrics.tokens = tokens; const trackData = this.getTrackData(graphKey); if (tokens.total > 0) { diff --git a/packages/sdk/server-ai/src/api/LDAIClient.ts b/packages/sdk/server-ai/src/api/LDAIClient.ts index 3e4ceca864..fd93ca92a5 100644 --- a/packages/sdk/server-ai/src/api/LDAIClient.ts +++ b/packages/sdk/server-ai/src/api/LDAIClient.ts @@ -7,6 +7,7 @@ import { LDAIAgentRequestConfig, LDAICompletionConfig, LDAICompletionConfigDefault, + LDAIConfigTracker, LDAIJudgeConfig, LDAIJudgeConfigDefault, } from './config'; @@ -325,4 +326,15 @@ export interface LDAIClient { variables?: Record, defaultAiProvider?: SupportedAIProvider, ): Promise; + + /** + * Reconstructs an AIConfigTracker from a resumption token string previously + * obtained from a tracker's `resumptionToken` property. Use this to associate + * deferred events (such as user feedback) with the original invocation's runId. + * + * @param token A URL-safe Base64-encoded resumption token string. + * @param context The evaluation context to use for subsequent track calls. + * @returns A reconstructed AIConfigTracker with the original runId preserved. + */ + createTracker(token: string, context: LDContext): LDAIConfigTracker; } diff --git a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts index 542547bffc..054969dc3d 100644 --- a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts +++ b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts @@ -1,6 +1,5 @@ import { LDLogger } from '@launchdarkly/js-server-sdk-common'; -import { LDAIConfigTracker } from '../config/LDAIConfigTracker'; import { LDAICompletionConfig, LDMessage } from '../config/types'; import { Judge } from '../judge/Judge'; import { JudgeResponse } from '../judge/types'; @@ -18,7 +17,6 @@ export class TrackedChat { constructor( protected readonly aiConfig: LDAICompletionConfig, - protected readonly tracker: LDAIConfigTracker, protected readonly provider: AIProvider, protected readonly judges: Record = {}, private readonly _logger?: LDLogger, @@ -31,6 +29,8 @@ export class TrackedChat { * This method handles conversation management and tracking, delegating to the provider's invokeModel method. */ async invoke(prompt: string): Promise { + const tracker = this.aiConfig.createTracker!(); + // Convert prompt string to LDMessage with role 'user' and add to conversation history const userMessage: LDMessage = { role: 'user', @@ -43,7 +43,7 @@ export class TrackedChat { const allMessages = [...configMessages, ...this.messages]; // Delegate to provider-specific implementation with tracking - const response = await this.tracker.trackMetricsOf( + const response = await tracker.trackMetricsOf( (result: ChatResponse) => result.metrics, () => this.provider.invokeModel(allMessages), ); @@ -52,7 +52,16 @@ export class TrackedChat { this.aiConfig.judgeConfiguration?.judges && this.aiConfig.judgeConfiguration.judges.length > 0 ) { - response.evaluations = this._evaluateWithJudges(this.messages, response); + response.evaluations = this._evaluateWithJudges(this.messages, response).then( + (evaluations) => { + evaluations.forEach((judgeResponse) => { + if (judgeResponse?.success) { + tracker.trackJudgeResponse(judgeResponse); + } + }); + return evaluations; + }, + ); } this.messages.push(response.message); @@ -78,23 +87,12 @@ export class TrackedChat { const judge = this.judges[judgeConfig.key]; if (!judge) { this._logger?.warn( - `Judge configuration is not enabled: ${judgeConfig.key}`, - this.tracker.getTrackData(), + `Judge configuration is not enabled for ${judgeConfig.key} in ${this.aiConfig.key}`, ); return undefined; } - const judgeResponse = await judge.evaluateMessages( - messages, - response, - judgeConfig.samplingRate, - ); - - if (judgeResponse && judgeResponse.success) { - this.tracker.trackJudgeResponse(judgeResponse); - } - - return judgeResponse; + return judge.evaluateMessages(messages, response, judgeConfig.samplingRate); }); // ensure all evaluations complete even if some fail @@ -110,13 +108,6 @@ export class TrackedChat { return this.aiConfig; } - /** - * Get the underlying AI configuration tracker used to initialize this TrackedChat. - */ - getTracker(): LDAIConfigTracker { - return this.tracker; - } - /** * Get the underlying AI provider instance. * This provides direct access to the provider for advanced use cases. diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts index 3a40fd3c6d..18b243d94b 100644 --- a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts +++ b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts @@ -41,16 +41,30 @@ export interface LDAIConfigTracker { * @param graphKey When provided, associates this metric with the specified agent graph key. */ getTrackData(graphKey?: string): { - variationKey: string; + runId: string; configKey: string; + variationKey: string; version: number; modelName: string; providerName: string; graphKey?: string; }; + + /** + * A URL-safe Base64-encoded token that encodes the tracker's runId, configKey, + * variationKey, and version. Pass this to AIClient.createTracker() to reconstruct + * the tracker across process boundaries (e.g. for associating deferred feedback + * with the original invocation). + */ + readonly resumptionToken: string; + /** * Track the duration of generation. * + * At-most-once per execution: subsequent calls on the same tracker are dropped + * with a warning. Use createTracker() on the config result to obtain a fresh + * tracker for a new execution. + * * Ideally this would not include overhead time such as network communication. * * @param durationMs The duration in milliseconds. @@ -61,6 +75,9 @@ export interface LDAIConfigTracker { /** * Track information about token usage. * + * At-most-once per execution: subsequent calls on the same tracker are dropped + * with a warning. + * * @param tokens Token usage information. * @param graphKey When provided, associates this metric with the specified agent graph key. */ @@ -69,6 +86,9 @@ export interface LDAIConfigTracker { /** * Generation was successful. * + * At-most-once per execution: subsequent calls (including trackError) on the + * same tracker are dropped with a warning. + * * @param graphKey When provided, associates this metric with the specified agent graph key. */ trackSuccess(graphKey?: string): void; @@ -76,6 +96,9 @@ export interface LDAIConfigTracker { /** * An error was encountered during generation. * + * At-most-once per execution: subsequent calls (including trackSuccess) on the + * same tracker are dropped with a warning. + * * @param graphKey When provided, associates this metric with the specified agent graph key. */ trackError(graphKey?: string): void; @@ -83,6 +106,9 @@ export interface LDAIConfigTracker { /** * Track sentiment about the generation. * + * At-most-once per execution: subsequent calls on the same tracker are dropped + * with a warning. + * * @param feedback Feedback about the generation. * @param graphKey When provided, associates this metric with the specified agent graph key. */ @@ -91,6 +117,9 @@ export interface LDAIConfigTracker { /** * Track the time to first token for this generation. * + * At-most-once per execution: subsequent calls on the same tracker are dropped + * with a warning. + * * @param timeToFirstTokenMs The duration in milliseconds. * @param graphKey When provided, associates this metric with the specified agent graph key. */ diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts index 2a926f1c87..74ab8ee30a 100644 --- a/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts +++ b/packages/sdk/server-ai/src/api/config/LDAIConfigUtils.ts @@ -82,14 +82,15 @@ export class LDAIConfigUtils { /** * Converts a LaunchDarkly flag value to the appropriate AI configuration type. * + * @param key The configuration key * @param flagValue The flag value from LaunchDarkly - * @param tracker The tracker to add to the config + * @param trackerFactory A factory function that creates a new tracker for each execution * @returns The appropriate AI configuration type */ static fromFlagValue( key: string, flagValue: LDAIConfigFlagValue, - tracker: LDAIConfigTracker, + trackerFactory: () => LDAIConfigTracker, ): LDAIConfigKind { // Determine the actual mode from flag value // eslint-disable-next-line no-underscore-dangle @@ -97,12 +98,12 @@ export class LDAIConfigUtils { switch (flagValueMode) { case 'agent': - return this.toAgentConfig(key, flagValue, tracker); + return this.toAgentConfig(key, flagValue, trackerFactory); case 'judge': - return this.toJudgeConfig(key, flagValue, tracker); + return this.toJudgeConfig(key, flagValue, trackerFactory); case 'completion': default: - return this.toCompletionConfig(key, flagValue, tracker); + return this.toCompletionConfig(key, flagValue, trackerFactory); } } @@ -118,13 +119,13 @@ export class LDAIConfigUtils { return { key, enabled: false, - tracker: undefined, + createTracker: undefined, } as LDAIAgentConfig; case 'judge': return { key, enabled: false, - tracker: undefined, + createTracker: undefined, } as LDAIJudgeConfig; case 'completion': default: @@ -132,7 +133,7 @@ export class LDAIConfigUtils { return { key, enabled: false, - tracker: undefined, + createTracker: undefined, } as LDAICompletionConfig; } } @@ -156,18 +157,19 @@ export class LDAIConfigUtils { /** * Creates a completion config from flag value data. * + * @param key The configuration key * @param flagValue The flag value from LaunchDarkly - * @param tracker The tracker to add to the config + * @param trackerFactory A factory function that creates a new tracker for each execution * @returns A completion configuration */ static toCompletionConfig( key: string, flagValue: LDAIConfigFlagValue, - tracker: LDAIConfigTracker, + trackerFactory: () => LDAIConfigTracker, ): LDAICompletionConfig { return { ...this._toBaseConfig(key, flagValue), - tracker, + createTracker: trackerFactory, messages: flagValue.messages, judgeConfiguration: flagValue.judgeConfiguration, }; @@ -176,18 +178,19 @@ export class LDAIConfigUtils { /** * Creates an agent config from flag value data. * + * @param key The configuration key * @param flagValue The flag value from LaunchDarkly - * @param tracker The tracker to add to the config + * @param trackerFactory A factory function that creates a new tracker for each execution * @returns An agent configuration */ static toAgentConfig( key: string, flagValue: LDAIConfigFlagValue, - tracker: LDAIConfigTracker, + trackerFactory: () => LDAIConfigTracker, ): LDAIAgentConfig { return { ...this._toBaseConfig(key, flagValue), - tracker, + createTracker: trackerFactory, instructions: flagValue.instructions, judgeConfiguration: flagValue.judgeConfiguration, }; @@ -196,14 +199,15 @@ export class LDAIConfigUtils { /** * Creates a judge config from flag value data. * + * @param key The configuration key * @param flagValue The flag value from LaunchDarkly - * @param tracker The tracker to add to the config + * @param trackerFactory A factory function that creates a new tracker for each execution * @returns A judge configuration */ static toJudgeConfig( key: string, flagValue: LDAIConfigFlagValue, - tracker: LDAIConfigTracker, + trackerFactory: () => LDAIConfigTracker, ): LDAIJudgeConfig { // Prioritize evaluationMetricKey, fallback to first valid (non-empty, non-whitespace) value in evaluationMetricKeys let evaluationMetricKey: string | undefined; @@ -218,7 +222,7 @@ export class LDAIConfigUtils { return { ...this._toBaseConfig(key, flagValue), - tracker, + createTracker: trackerFactory, messages: flagValue.messages, evaluationMetricKey, }; diff --git a/packages/sdk/server-ai/src/api/config/types.ts b/packages/sdk/server-ai/src/api/config/types.ts index 44b89160c8..56a54d1ca2 100644 --- a/packages/sdk/server-ai/src/api/config/types.ts +++ b/packages/sdk/server-ai/src/api/config/types.ts @@ -105,10 +105,11 @@ export interface LDAIConfig extends Omit { enabled: boolean; /** - * A tracker which can be used to generate analytics. - * Undefined for disabled configs. + * Creates a new tracker for this AI Config invocation. Each call returns a + * new tracker with a fresh runId. Use createTracker() at the start of each + * execution to obtain a tracker, then use it to record metrics for that run. */ - tracker?: LDAIConfigTracker; + createTracker?: () => LDAIConfigTracker; } // ============================================================================ diff --git a/packages/sdk/server-ai/src/api/judge/Judge.ts b/packages/sdk/server-ai/src/api/judge/Judge.ts index 382addc632..1bab8d1a12 100644 --- a/packages/sdk/server-ai/src/api/judge/Judge.ts +++ b/packages/sdk/server-ai/src/api/judge/Judge.ts @@ -21,7 +21,6 @@ export class Judge { constructor( private readonly _aiConfig: LDAIJudgeConfig, - private readonly _aiConfigTracker: LDAIConfigTracker, private readonly _aiProvider: AIProvider, logger?: LDLogger, ) { @@ -65,21 +64,19 @@ export class Judge { output: string, samplingRate: number = 1, ): Promise { + const tracker = this._aiConfig.createTracker!(); try { const evaluationMetricKey = this._getEvaluationMetricKey(); if (!evaluationMetricKey) { this._logger?.warn( 'Judge configuration is missing required evaluation metric key', - this._aiConfigTracker.getTrackData(), + tracker.getTrackData(), ); return undefined; } if (!this._aiConfig.messages) { - this._logger?.warn( - 'Judge configuration must include messages', - this._aiConfigTracker.getTrackData(), - ); + this._logger?.warn('Judge configuration must include messages', tracker.getTrackData()); return undefined; } @@ -90,19 +87,19 @@ export class Judge { const messages = this._constructEvaluationMessages(input, output); - const response = await this._aiConfigTracker.trackMetricsOf( + const response = await tracker.trackMetricsOf( (result: StructuredResponse) => result.metrics, () => this._aiProvider.invokeStructuredModel(messages, this._evaluationResponseStructure), ); let { success } = response.metrics; - const evals = this._parseEvaluationResponse(response.data, evaluationMetricKey); + const evals = this._parseEvaluationResponse(response.data, evaluationMetricKey, tracker); if (!evals[evaluationMetricKey]) { this._logger?.warn( 'Judge evaluation did not return the expected evaluation', - this._aiConfigTracker.getTrackData(), + tracker.getTrackData(), ); success = false; } @@ -149,13 +146,6 @@ export class Judge { return this._aiConfig; } - /** - * Returns the tracker associated with this judge. - */ - getTracker(): LDAIConfigTracker { - return this._aiConfigTracker; - } - /** * Returns the AI provider used by this judge. */ @@ -191,6 +181,7 @@ export class Judge { private _parseEvaluationResponse( data: Record, evaluationMetricKey: string, + tracker: LDAIConfigTracker, ): Record { const evaluations = data.evaluations as Record; const results: Record = {}; @@ -205,7 +196,7 @@ export class Judge { if (!evaluation || typeof evaluation !== 'object') { this._logger?.warn( `Missing evaluation for metric key: ${evaluationMetricKey}`, - this._aiConfigTracker.getTrackData(), + tracker.getTrackData(), ); return results; } @@ -215,7 +206,7 @@ export class Judge { if (typeof evalData.score !== 'number' || evalData.score < 0 || evalData.score > 1) { this._logger?.warn( `Invalid score evaluated for ${evaluationMetricKey}: ${evalData.score}. Score must be a number between 0 and 1 inclusive`, - this._aiConfigTracker.getTrackData(), + tracker.getTrackData(), ); return results; } @@ -223,7 +214,7 @@ export class Judge { if (typeof evalData.reasoning !== 'string') { this._logger?.warn( `Invalid reasoning evaluated for ${evaluationMetricKey}: ${evalData.reasoning}. Reasoning must be a string`, - this._aiConfigTracker.getTrackData(), + tracker.getTrackData(), ); return results; } From 367d369d7b55e94488472f367e00d49c3c0003fc Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Thu, 16 Apr 2026 10:11:37 -0500 Subject: [PATCH 3/7] chore: Move graphKey to LDAIConfigTracker constructor (#1279) --- .../__tests__/LDAIClientImpl.test.ts | 18 ++- .../__tests__/LDAIConfigTrackerImpl.test.ts | 146 +++++++++++++++++- packages/sdk/server-ai/src/LDAIClientImpl.ts | 27 ++-- .../server-ai/src/LDAIConfigTrackerImpl.ts | 107 +++++-------- .../src/api/config/LDAIConfigTracker.ts | 43 ++---- 5 files changed, 223 insertions(+), 118 deletions(-) diff --git a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts index 9695c1f815..77af66a0b5 100644 --- a/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts @@ -139,7 +139,14 @@ describe('config evaluation', () => { const evaluateSpy = jest.spyOn(client as any, '_evaluate'); const result = await client.agentConfig(key, testContext, defaultValue, variables); - expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'agent', variables); + expect(evaluateSpy).toHaveBeenCalledWith( + key, + testContext, + defaultValue, + 'agent', + variables, + undefined, + ); expect(result.instructions).toBe( 'You are a helpful assistant. Your name is John and your score is 42', ); @@ -464,7 +471,14 @@ describe('agentConfig method', () => { key, 1, ); - expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'agent', variables); + expect(evaluateSpy).toHaveBeenCalledWith( + key, + testContext, + defaultValue, + 'agent', + variables, + undefined, + ); expect(result).toBe(mockConfig); evaluateSpy.mockRestore(); }); diff --git a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts index e644eff377..a4b40b62cb 100644 --- a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts @@ -933,7 +933,7 @@ describe('trackToolCall', () => { ); }); - it('includes graphKey when provided', () => { + it('includes graphKey when set on constructor', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, testRunId, @@ -943,9 +943,10 @@ describe('trackToolCall', () => { modelName, providerName, testContext, + 'my-graph', ); - tracker.trackToolCall('my-tool', 'my-graph'); + tracker.trackToolCall('my-tool'); expect(mockTrack).toHaveBeenCalledWith( '$ld:ai:tool_call', @@ -993,8 +994,8 @@ describe('trackToolCalls', () => { }); }); -describe('graphKey parameter support', () => { - it('includes graphKey in trackDuration event', () => { +describe('graphKey constructor support', () => { + it('includes graphKey in trackDuration event when set on constructor', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, testRunId, @@ -1004,9 +1005,10 @@ describe('graphKey parameter support', () => { modelName, providerName, testContext, + 'my-graph', ); - tracker.trackDuration(1000, 'my-graph'); + tracker.trackDuration(1000); expect(mockTrack).toHaveBeenCalledWith( '$ld:ai:duration:total', @@ -1016,7 +1018,7 @@ describe('graphKey parameter support', () => { ); }); - it('includes graphKey in trackSuccess event', () => { + it('includes graphKey in trackSuccess event when set on constructor', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, testRunId, @@ -1026,9 +1028,10 @@ describe('graphKey parameter support', () => { modelName, providerName, testContext, + 'my-graph', ); - tracker.trackSuccess('my-graph'); + tracker.trackSuccess(); expect(mockTrack).toHaveBeenCalledWith( '$ld:ai:generation:success', @@ -1038,7 +1041,7 @@ describe('graphKey parameter support', () => { ); }); - it('does not include graphKey when not provided', () => { + it('does not include graphKey when not set on constructor', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, testRunId, @@ -1059,6 +1062,41 @@ describe('graphKey parameter support', () => { 1, ); }); + + it('includes graphKey in getTrackData when set on constructor', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + 'my-graph', + ); + + expect(tracker.getTrackData()).toEqual({ + ...getExpectedTrackData(), + graphKey: 'my-graph', + }); + }); + + it('does not include graphKey in getTrackData when not set', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + expect(tracker.getTrackData()).toEqual(getExpectedTrackData()); + expect('graphKey' in tracker.getTrackData()).toBe(false); + }); }); describe('at-most-once semantics', () => { @@ -1311,4 +1349,96 @@ describe('fromResumptionToken', () => { 1, ); }); + + it('includes graphKey in resumption token when set on constructor', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + 'my-graph', + ); + + const token = tracker.resumptionToken; + const decoded = JSON.parse(Buffer.from(token, 'base64url').toString('utf8')); + + expect(decoded).toEqual({ + runId: testRunId, + configKey, + variationKey, + version, + graphKey: 'my-graph', + }); + }); + + it('does not include graphKey in resumption token when not set', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + const token = tracker.resumptionToken; + const decoded = JSON.parse(Buffer.from(token, 'base64url').toString('utf8')); + + expect(decoded).toEqual({ + runId: testRunId, + configKey, + variationKey, + version, + }); + expect('graphKey' in decoded).toBe(false); + }); + + it('reconstructs tracker with graphKey from resumption token', () => { + const original = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + 'my-graph', + ); + + const reconstructed = LDAIConfigTrackerImpl.fromResumptionToken( + original.resumptionToken, + mockLdClient, + testContext, + ); + + expect(reconstructed.getTrackData().graphKey).toBe('my-graph'); + }); + + it('reconstructed tracker without graphKey does not include graphKey in track data', () => { + const original = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, + testContext, + ); + + const reconstructed = LDAIConfigTrackerImpl.fromResumptionToken( + original.resumptionToken, + mockLdClient, + testContext, + ); + + expect('graphKey' in reconstructed.getTrackData()).toBe(false); + }); }); diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts index 209c0ce860..65eb87a1a9 100644 --- a/packages/sdk/server-ai/src/LDAIClientImpl.ts +++ b/packages/sdk/server-ai/src/LDAIClientImpl.ts @@ -74,6 +74,7 @@ export class LDAIClientImpl implements LDAIClient { defaultValue: LDAIConfigDefaultKind, mode: LDAIConfigMode, variables?: Record, + graphKey?: string, ): Promise { const ldFlagValue = LDAIConfigUtils.toFlagValue(defaultValue, mode); @@ -101,6 +102,7 @@ export class LDAIClientImpl implements LDAIClient { value.model?.name ?? '', value.provider?.name ?? '', context, + graphKey, ); const config = LDAIConfigUtils.fromFlagValue(key, value, trackerFactory); @@ -217,6 +219,17 @@ export class LDAIClientImpl implements LDAIClient { return this._judgeConfig(key, context, defaultValue ?? disabledAIConfig, variables); } + private async _agentConfig( + key: string, + context: LDContext, + defaultValue: LDAIAgentConfigDefault, + variables?: Record, + graphKey?: string, + ): Promise { + const config = await this._evaluate(key, context, defaultValue, 'agent', variables, graphKey); + return config as LDAIAgentConfig; + } + async agentConfig( key: string, context: LDContext, @@ -224,14 +237,7 @@ export class LDAIClientImpl implements LDAIClient { variables?: Record, ): Promise { this._ldClient.track(TRACK_USAGE_AGENT_CONFIG, context, key, 1); - const config = await this._evaluate( - key, - context, - defaultValue ?? disabledAIConfig, - 'agent', - variables, - ); - return config as LDAIAgentConfig; + return this._agentConfig(key, context, defaultValue ?? disabledAIConfig, variables); } /** @@ -261,14 +267,13 @@ export class LDAIClientImpl implements LDAIClient { await Promise.all( agentConfigs.map(async (config) => { - const agent = await this._evaluate( + const agent = await this._agentConfig( config.key, context, config.defaultValue ?? disabledAIConfig, - 'agent', config.variables, ); - agents[config.key as T[number]['key']] = agent as LDAIAgentConfig; + agents[config.key as T[number]['key']] = agent; }), ); diff --git a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts index 151a3c1d97..d87729c14f 100644 --- a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts +++ b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts @@ -25,9 +25,10 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { private _modelName: string, private _providerName: string, private _context: LDContext, + private _graphKey?: string, ) {} - getTrackData(graphKey?: string): { + getTrackData(): { runId: string; configKey: string; variationKey: string; @@ -43,7 +44,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { version: this._version, modelName: this._modelName, providerName: this._providerName, - ...(graphKey !== undefined ? { graphKey } : {}), + ...(this._graphKey !== undefined ? { graphKey: this._graphKey } : {}), }; } @@ -53,6 +54,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { configKey: this._configKey, variationKey: this._variationKey, version: this._version, + ...(this._graphKey !== undefined ? { graphKey: this._graphKey } : {}), }); return Buffer.from(json).toString('base64url'); } @@ -73,10 +75,11 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { '', '', context, + payload.graphKey, ); } - trackDuration(duration: number, graphKey?: string): void { + trackDuration(duration: number): void { if (this._trackedMetrics.durationMs !== undefined) { this._ldClient.logger?.warn( 'Duration has already been tracked for this execution. Use createTracker() for a new execution.', @@ -84,15 +87,10 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { return; } this._trackedMetrics.durationMs = duration; - this._ldClient.track( - '$ld:ai:duration:total', - this._context, - this.getTrackData(graphKey), - duration, - ); + this._ldClient.track('$ld:ai:duration:total', this._context, this.getTrackData(), duration); } - async trackDurationOf(func: () => Promise, graphKey?: string): Promise { + async trackDurationOf(func: () => Promise): Promise { const startTime = Date.now(); try { // Be sure to await here so that we can track the duration of the function and also handle errors. @@ -101,11 +99,11 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } finally { const endTime = Date.now(); const duration = endTime - startTime; // duration in milliseconds - this.trackDuration(duration, graphKey); + this.trackDuration(duration); } } - trackTimeToFirstToken(timeToFirstTokenMs: number, graphKey?: string) { + trackTimeToFirstToken(timeToFirstTokenMs: number) { if (this._trackedMetrics.timeToFirstTokenMs !== undefined) { this._ldClient.logger?.warn( 'Time to first token has already been tracked for this execution. Use createTracker() for a new execution.', @@ -116,44 +114,39 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { this._ldClient.track( '$ld:ai:tokens:ttf', this._context, - this.getTrackData(graphKey), + this.getTrackData(), timeToFirstTokenMs, ); } - trackEvalScores(scores: Record, graphKey?: string) { + trackEvalScores(scores: Record) { Object.entries(scores).forEach(([metricKey, evalScore]) => { - this._ldClient.track(metricKey, this._context, this.getTrackData(graphKey), evalScore.score); + this._ldClient.track(metricKey, this._context, this.getTrackData(), evalScore.score); }); } - trackJudgeResponse(response: JudgeResponse, graphKey?: string) { + trackJudgeResponse(response: JudgeResponse) { Object.entries(response.evals).forEach(([metricKey, evalScore]) => { this._ldClient.track( metricKey, this._context, - { ...this.getTrackData(graphKey), judgeConfigKey: response.judgeConfigKey }, + { ...this.getTrackData(), judgeConfigKey: response.judgeConfigKey }, evalScore.score, ); }); } - trackToolCall(toolKey: string, graphKey?: string): void { - this._ldClient.track( - '$ld:ai:tool_call', - this._context, - { ...this.getTrackData(graphKey), toolKey }, - 1, - ); + trackToolCall(toolKey: string): void { + this._ldClient.track('$ld:ai:tool_call', this._context, { ...this.getTrackData(), toolKey }, 1); } - trackToolCalls(toolKeys: string[], graphKey?: string): void { + trackToolCalls(toolKeys: string[]): void { toolKeys.forEach((toolKey) => { - this.trackToolCall(toolKey, graphKey); + this.trackToolCall(toolKey); }); } - trackFeedback(feedback: { kind: LDFeedbackKind }, graphKey?: string): void { + trackFeedback(feedback: { kind: LDFeedbackKind }): void { if (this._trackedMetrics.feedback !== undefined) { this._ldClient.logger?.warn( 'Feedback has already been tracked for this execution. Use createTracker() for a new execution.', @@ -162,23 +155,13 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } this._trackedMetrics.feedback = feedback; if (feedback.kind === LDFeedbackKind.Positive) { - this._ldClient.track( - '$ld:ai:feedback:user:positive', - this._context, - this.getTrackData(graphKey), - 1, - ); + this._ldClient.track('$ld:ai:feedback:user:positive', this._context, this.getTrackData(), 1); } else if (feedback.kind === LDFeedbackKind.Negative) { - this._ldClient.track( - '$ld:ai:feedback:user:negative', - this._context, - this.getTrackData(graphKey), - 1, - ); + this._ldClient.track('$ld:ai:feedback:user:negative', this._context, this.getTrackData(), 1); } } - trackSuccess(graphKey?: string): void { + trackSuccess(): void { if (this._trackedMetrics.success !== undefined) { this._ldClient.logger?.warn( 'Generation result has already been tracked for this execution. Use createTracker() for a new execution.', @@ -186,15 +169,10 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { return; } this._trackedMetrics.success = true; - this._ldClient.track( - '$ld:ai:generation:success', - this._context, - this.getTrackData(graphKey), - 1, - ); + this._ldClient.track('$ld:ai:generation:success', this._context, this.getTrackData(), 1); } - trackError(graphKey?: string): void { + trackError(): void { if (this._trackedMetrics.success !== undefined) { this._ldClient.logger?.warn( 'Generation result has already been tracked for this execution. Use createTracker() for a new execution.', @@ -202,20 +180,19 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { return; } this._trackedMetrics.success = false; - this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(graphKey), 1); + this._ldClient.track('$ld:ai:generation:error', this._context, this.getTrackData(), 1); } async trackMetricsOf( metricsExtractor: (result: TRes) => LDAIMetrics, func: () => Promise, - graphKey?: string, ): Promise { let result: TRes; try { - result = await this.trackDurationOf(func, graphKey); + result = await this.trackDurationOf(func); } catch (err) { - this.trackError(graphKey); + this.trackError(); throw err; } @@ -224,14 +201,14 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { // Track success/error based on metrics if (metrics.success) { - this.trackSuccess(graphKey); + this.trackSuccess(); } else { - this.trackError(graphKey); + this.trackError(); } // Track token usage if available if (metrics.usage) { - this.trackTokens(metrics.usage, graphKey); + this.trackTokens(metrics.usage); } return result; @@ -240,7 +217,6 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { trackStreamMetricsOf( streamCreator: () => TStream, metricsExtractor: (stream: TStream) => Promise, - graphKey?: string, ): TStream { const startTime = Date.now(); @@ -249,14 +225,14 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { const stream = streamCreator(); // Start background metrics tracking (fire and forget) - this._trackStreamMetricsInBackground(stream, metricsExtractor, startTime, graphKey); + this._trackStreamMetricsInBackground(stream, metricsExtractor, startTime); // Return stream immediately for consumption return stream; } catch (error) { // Track error if stream creation fails - this.trackDuration(Date.now() - startTime, graphKey); - this.trackError(graphKey); + this.trackDuration(Date.now() - startTime); + this.trackError(); throw error; } } @@ -265,7 +241,6 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { stream: TStream, metricsExtractor: (stream: TStream) => Promise, startTime: number, - graphKey?: string, ): Promise { try { // Wait for metrics to be available @@ -273,21 +248,21 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { // Track success/error based on metrics if (metrics.success) { - this.trackSuccess(graphKey); + this.trackSuccess(); } else { - this.trackError(graphKey); + this.trackError(); } // Track token usage if available if (metrics.usage) { - this.trackTokens(metrics.usage, graphKey); + this.trackTokens(metrics.usage); } } catch (error) { // If metrics extraction fails, track error - this.trackError(graphKey); + this.trackError(); } finally { // Track duration regardless of success/error - this.trackDuration(Date.now() - startTime, graphKey); + this.trackDuration(Date.now() - startTime); } } @@ -362,7 +337,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { } } - trackTokens(tokens: LDTokenUsage, graphKey?: string): void { + trackTokens(tokens: LDTokenUsage): void { if (this._trackedMetrics.tokens !== undefined) { this._ldClient.logger?.warn( 'Token usage has already been tracked for this execution. Use createTracker() for a new execution.', @@ -370,7 +345,7 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { return; } this._trackedMetrics.tokens = tokens; - const trackData = this.getTrackData(graphKey); + const trackData = this.getTrackData(); if (tokens.total > 0) { this._ldClient.track('$ld:ai:tokens:total', this._context, trackData, tokens.total); } diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts index 18b243d94b..883177becb 100644 --- a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts +++ b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts @@ -37,10 +37,8 @@ export interface LDAIMetricSummary { export interface LDAIConfigTracker { /** * Get the data for tracking. - * - * @param graphKey When provided, associates this metric with the specified agent graph key. */ - getTrackData(graphKey?: string): { + getTrackData(): { runId: string; configKey: string; variationKey: string; @@ -68,9 +66,8 @@ export interface LDAIConfigTracker { * Ideally this would not include overhead time such as network communication. * * @param durationMs The duration in milliseconds. - * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackDuration(durationMs: number, graphKey?: string): void; + trackDuration(durationMs: number): void; /** * Track information about token usage. @@ -79,29 +76,24 @@ export interface LDAIConfigTracker { * with a warning. * * @param tokens Token usage information. - * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackTokens(tokens: LDTokenUsage, graphKey?: string): void; + trackTokens(tokens: LDTokenUsage): void; /** * Generation was successful. * * At-most-once per execution: subsequent calls (including trackError) on the * same tracker are dropped with a warning. - * - * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackSuccess(graphKey?: string): void; + trackSuccess(): void; /** * An error was encountered during generation. * * At-most-once per execution: subsequent calls (including trackSuccess) on the * same tracker are dropped with a warning. - * - * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackError(graphKey?: string): void; + trackError(): void; /** * Track sentiment about the generation. @@ -110,9 +102,8 @@ export interface LDAIConfigTracker { * with a warning. * * @param feedback Feedback about the generation. - * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackFeedback(feedback: { kind: LDFeedbackKind }, graphKey?: string): void; + trackFeedback(feedback: { kind: LDFeedbackKind }): void; /** * Track the time to first token for this generation. @@ -121,41 +112,36 @@ export interface LDAIConfigTracker { * with a warning. * * @param timeToFirstTokenMs The duration in milliseconds. - * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackTimeToFirstToken(timeToFirstTokenMs: number, graphKey?: string): void; + trackTimeToFirstToken(timeToFirstTokenMs: number): void; /** * Track evaluation scores for multiple metrics. * * @param scores Record mapping metric keys to their evaluation scores - * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackEvalScores(scores: Record, graphKey?: string): void; + trackEvalScores(scores: Record): void; /** * Track a judge response containing evaluation scores and judge configuration key. * * @param response Judge response containing evaluation scores and judge configuration key - * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackJudgeResponse(response: JudgeResponse, graphKey?: string): void; + trackJudgeResponse(response: JudgeResponse): void; /** * Track a single tool invocation. * * @param toolKey The identifier of the tool that was invoked. - * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackToolCall(toolKey: string, graphKey?: string): void; + trackToolCall(toolKey: string): void; /** * Track multiple tool invocations. * * @param toolKeys The identifiers of the tools that were invoked. - * @param graphKey When provided, associates this metric with the specified agent graph key. */ - trackToolCalls(toolKeys: string[], graphKey?: string): void; + trackToolCalls(toolKeys: string[]): void; /** * Track the duration of execution of the provided function. @@ -166,10 +152,9 @@ export interface LDAIConfigTracker { * This function does not automatically record an error when the function throws. * * @param func The function to track the duration of. - * @param graphKey When provided, associates this metric with the specified agent graph key. * @returns The result of the function. */ - trackDurationOf(func: () => Promise, graphKey?: string): Promise; + trackDurationOf(func: () => Promise): Promise; /** * Track metrics for a generic AI operation. @@ -183,13 +168,11 @@ export interface LDAIConfigTracker { * * @param metricsExtractor Function that extracts LDAIMetrics from the operation result * @param func Function which executes the operation - * @param graphKey When provided, associates this metric with the specified agent graph key. * @returns The result of the operation */ trackMetricsOf( metricsExtractor: (result: TRes) => LDAIMetrics, func: () => Promise, - graphKey?: string, ): Promise; /** @@ -211,13 +194,11 @@ export interface LDAIConfigTracker { * * @param streamCreator Function that creates and returns the stream (synchronous) * @param metricsExtractor Function that asynchronously extracts metrics from the stream - * @param graphKey When provided, associates this metric with the specified agent graph key. * @returns The stream result (returned immediately, not a Promise) */ trackStreamMetricsOf( streamCreator: () => TStream, metricsExtractor: (stream: TStream) => Promise, - graphKey?: string, ): TStream; /** From dd49a79747301208ea89b6713b7a0513fc1c1520 Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Thu, 16 Apr 2026 12:32:45 -0500 Subject: [PATCH 4/7] feat!: Flatten JudgeResponse and EvalScore into new LDJudgeResult (#1284) --- .../sdk/server-ai/__tests__/Judge.test.ts | 136 ++++++++++-------- .../__tests__/LDAIConfigTrackerImpl.test.ts | 64 +++++---- .../__tests__/LDGraphTrackerImpl.test.ts | 69 ++++++--- .../examples/direct-judge/src/index.ts | 8 +- .../server-ai/src/LDAIConfigTrackerImpl.ts | 23 ++- .../sdk/server-ai/src/LDGraphTrackerImpl.ts | 19 +-- .../sdk/server-ai/src/api/chat/TrackedChat.ts | 29 ++-- packages/sdk/server-ai/src/api/chat/types.ts | 4 +- .../src/api/config/LDAIConfigTracker.ts | 15 +- .../server-ai/src/api/graph/LDGraphTracker.ts | 10 +- packages/sdk/server-ai/src/api/judge/Judge.ts | 75 +++++----- packages/sdk/server-ai/src/api/judge/index.ts | 2 +- packages/sdk/server-ai/src/api/judge/types.ts | 28 ++-- 13 files changed, 277 insertions(+), 205 deletions(-) diff --git a/packages/sdk/server-ai/__tests__/Judge.test.ts b/packages/sdk/server-ai/__tests__/Judge.test.ts index c0def31740..ee9ff0351f 100644 --- a/packages/sdk/server-ai/__tests__/Judge.test.ts +++ b/packages/sdk/server-ai/__tests__/Judge.test.ts @@ -98,13 +98,11 @@ describe('Judge', () => { ); expect(result).toEqual({ - evals: { - relevance: { - score: 0.8, - reasoning: 'The response is relevant to the question', - }, - }, + score: 0.8, + reasoning: 'The response is relevant to the question', + metricKey: 'relevance', success: true, + sampled: true, judgeConfigKey: 'test-judge', }); @@ -148,12 +146,11 @@ describe('Judge', () => { const result = await judge.evaluate('test input', 'test output'); expect(result).toBeDefined(); - expect(result?.evals).toHaveProperty('relevance'); - expect(result?.evals.relevance.score).toBe(0.85); - expect(result?.judgeConfigKey).toBe('test-judge'); - expect(result?.success).toBe(true); - // Verify the evaluationMetricKey from config is used in the result - expect(Object.keys(result?.evals || {})).toContain(judgeConfig.evaluationMetricKey); + expect(result.score).toBe(0.85); + expect(result.metricKey).toBe('relevance'); + expect(result.judgeConfigKey).toBe('test-judge'); + expect(result.success).toBe(true); + expect(result.sampled).toBe(true); }); it('handles sampling rate correctly', async () => { @@ -183,18 +180,23 @@ describe('Judge', () => { const result = await judge.evaluate('test input', 'test output', 0.5); expect(result).toBeDefined(); + expect(result.sampled).toBe(true); expect(mockProvider.invokeStructuredModel).toHaveBeenCalled(); Math.random = originalRandom; }); - it('returns undefined when not sampled', async () => { + it('returns unsampled result when skipped by sampling', async () => { const originalRandom = Math.random; Math.random = jest.fn().mockReturnValue(0.8); const result = await judge.evaluate('test input', 'test output', 0.5); - expect(result).toBeUndefined(); + expect(result).toEqual({ + success: false, + sampled: false, + judgeConfigKey: 'test-judge', + }); expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled(); expect(mockLogger.debug).toHaveBeenCalledWith( 'Judge evaluation skipped due to sampling rate: 0.5', @@ -203,7 +205,7 @@ describe('Judge', () => { Math.random = originalRandom; }); - it('returns undefined when evaluationMetricKey and evaluationMetricKeys are both missing', async () => { + it('returns error result when evaluationMetricKey and evaluationMetricKeys are both missing', async () => { const configWithoutMetrics: LDAIJudgeConfig = { ...judgeConfig, evaluationMetricKey: undefined, @@ -213,7 +215,12 @@ describe('Judge', () => { const result = await judgeWithoutMetrics.evaluate('test input', 'test output'); - expect(result).toBeUndefined(); + expect(result).toEqual({ + success: false, + sampled: true, + errorMessage: 'Judge configuration is missing required evaluation metric key', + judgeConfigKey: 'test-judge', + }); expect(mockLogger.warn).toHaveBeenCalledWith( 'Judge configuration is missing required evaluation metric key', mockTrackData, @@ -251,10 +258,11 @@ describe('Judge', () => { const result = await judgeWithSingleKey.evaluate('test input', 'test output'); expect(result).toEqual({ - evals: { - relevance: { score: 0.8, reasoning: 'The response is relevant' }, - }, + score: 0.8, + reasoning: 'The response is relevant', + metricKey: 'relevance', success: true, + sampled: true, judgeConfigKey: 'test-judge', }); }); @@ -290,10 +298,11 @@ describe('Judge', () => { const result = await judgeWithLegacyKeys.evaluate('test input', 'test output'); expect(result).toEqual({ - evals: { - relevance: { score: 0.8, reasoning: 'The response is relevant' }, - }, + score: 0.8, + reasoning: 'The response is relevant', + metricKey: 'relevance', success: true, + sampled: true, judgeConfigKey: 'test-judge', }); }); @@ -330,10 +339,11 @@ describe('Judge', () => { // Should skip empty and whitespace strings, use first valid value expect(result).toEqual({ - evals: { - relevance: { score: 0.8, reasoning: 'The response is relevant' }, - }, + score: 0.8, + reasoning: 'The response is relevant', + metricKey: 'relevance', success: true, + sampled: true, judgeConfigKey: 'test-judge', }); }); @@ -369,15 +379,16 @@ describe('Judge', () => { const result = await judgeWithBoth.evaluate('test input', 'test output'); expect(result).toEqual({ - evals: { - helpfulness: { score: 0.7, reasoning: 'The response is helpful' }, - }, + score: 0.7, + reasoning: 'The response is helpful', + metricKey: 'helpfulness', success: true, + sampled: true, judgeConfigKey: 'test-judge', }); }); - it('returns undefined when messages are missing', async () => { + it('returns error result when messages are missing', async () => { const configWithoutMessages: LDAIJudgeConfig = { ...judgeConfig, messages: undefined, @@ -386,14 +397,19 @@ describe('Judge', () => { const result = await judgeWithoutMessages.evaluate('test input', 'test output'); - expect(result).toBeUndefined(); + expect(result).toEqual({ + success: false, + sampled: true, + errorMessage: 'Judge configuration must include messages', + judgeConfigKey: 'test-judge', + }); expect(mockLogger.warn).toHaveBeenCalledWith( 'Judge configuration must include messages', mockTrackData, ); }); - it('returns empty evaluations with success false when expected metric is missing', async () => { + it('returns result with success false when expected metric is missing', async () => { const mockStructuredResponse: StructuredResponse = { data: { evaluations: { @@ -417,13 +433,13 @@ describe('Judge', () => { const result = await judge.evaluate('test input', 'test output'); expect(result).toEqual({ - evals: {}, success: false, + sampled: true, judgeConfigKey: 'test-judge', }); }); - it('returns empty evaluations when response structure is malformed', async () => { + it('returns result with success false when response structure is malformed', async () => { const mockStructuredResponse: StructuredResponse = { data: { relevance: { score: 0.8, reasoning: 'Good' }, @@ -447,8 +463,8 @@ describe('Judge', () => { const result = await judge.evaluate('test input', 'test output'); expect(result).toEqual({ - evals: {}, success: false, + sampled: true, judgeConfigKey: 'test-judge', }); }); @@ -460,9 +476,9 @@ describe('Judge', () => { const result = await judge.evaluate('test input', 'test output'); expect(result).toEqual({ - evals: {}, success: false, - error: 'Provider error', + sampled: true, + errorMessage: 'Provider error', judgeConfigKey: 'test-judge', }); expect(mockLogger.error).toHaveBeenCalledWith('Judge evaluation failed:', error); @@ -474,9 +490,9 @@ describe('Judge', () => { const result = await judge.evaluate('test input', 'test output'); expect(result).toEqual({ - evals: {}, success: false, - error: 'Unknown error', + sampled: true, + errorMessage: 'Unknown error', judgeConfigKey: 'test-judge', }); }); @@ -522,13 +538,11 @@ describe('Judge', () => { const result = await judge.evaluateMessages(messages, response); expect(result).toEqual({ - evals: { - relevance: { - score: 0.8, - reasoning: 'The response is relevant to the question', - }, - }, + score: 0.8, + reasoning: 'The response is relevant to the question', + metricKey: 'relevance', success: true, + sampled: true, judgeConfigKey: 'test-judge', }); @@ -560,7 +574,11 @@ describe('Judge', () => { const result = await judge.evaluateMessages(messages, response, 0.5); - expect(result).toBeUndefined(); + expect(result).toEqual({ + success: false, + sampled: false, + judgeConfigKey: 'test-judge', + }); expect(mockProvider.invokeStructuredModel).not.toHaveBeenCalled(); Math.random = originalRandom; @@ -611,11 +629,12 @@ describe('Judge', () => { const result = parseResponse(responseData, 'relevance', mockTracker); expect(result).toEqual({ - relevance: { score: 0.8, reasoning: 'Good' }, + score: 0.8, + reasoning: 'Good', }); }); - it('returns empty object for invalid response data', () => { + it('returns undefined for invalid response data', () => { // eslint-disable-next-line no-underscore-dangle const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge); const responseData = { @@ -624,7 +643,7 @@ describe('Judge', () => { const result = parseResponse(responseData, 'relevance', mockTracker); - expect(result).toEqual({}); + expect(result).toBeUndefined(); }); it('handles missing score or reasoning fields', () => { @@ -638,7 +657,7 @@ describe('Judge', () => { const result = parseResponse(responseData, 'relevance', mockTracker); - expect(result).toEqual({}); + expect(result).toBeUndefined(); }); it('handles invalid score values out of range', () => { @@ -652,7 +671,7 @@ describe('Judge', () => { const result = parseResponse(responseData, 'relevance', mockTracker); - expect(result).toEqual({}); + expect(result).toBeUndefined(); expect(mockLogger.warn).toHaveBeenCalledWith( expect.stringContaining('Invalid score evaluated for relevance: 1.5'), mockTrackData, @@ -670,7 +689,7 @@ describe('Judge', () => { const result = parseResponse(responseData, 'relevance', mockTracker); - expect(result).toEqual({}); + expect(result).toBeUndefined(); expect(mockLogger.warn).toHaveBeenCalledWith( expect.stringContaining('Invalid score evaluated for relevance: -0.1'), mockTrackData, @@ -688,7 +707,7 @@ describe('Judge', () => { const result = parseResponse(responseData, 'relevance', mockTracker); - expect(result).toEqual({}); + expect(result).toBeUndefined(); expect(mockLogger.warn).toHaveBeenCalledWith( expect.stringContaining('Invalid reasoning evaluated for relevance: 123'), mockTrackData, @@ -706,7 +725,7 @@ describe('Judge', () => { const result = parseResponse(responseData, 'relevance', mockTracker); - expect(result).toEqual({}); + expect(result).toBeUndefined(); expect(mockLogger.warn).toHaveBeenCalledWith( 'Missing evaluation for metric key: relevance', mockTrackData, @@ -723,7 +742,12 @@ describe('Judge', () => { const result = await judgeWithEmptyKeys.evaluate('test input', 'test output'); - expect(result).toBeUndefined(); + expect(result).toEqual({ + success: false, + sampled: true, + errorMessage: 'Judge configuration is missing required evaluation metric key', + judgeConfigKey: 'test-judge', + }); expect(mockLogger.warn).toHaveBeenCalledWith( 'Judge configuration is missing required evaluation metric key', mockTrackData, @@ -741,7 +765,7 @@ describe('Judge', () => { const result = parseResponse(responseData, 'relevance', mockTracker); - expect(result).toEqual({}); + expect(result).toBeUndefined(); expect(mockLogger.warn).toHaveBeenCalledWith( 'Missing evaluation for metric key: relevance', mockTrackData, @@ -759,7 +783,7 @@ describe('Judge', () => { const result = parseResponse(responseData, 'relevance', mockTracker); - expect(result).toEqual({}); + expect(result).toBeUndefined(); expect(mockLogger.warn).toHaveBeenCalledWith( 'Missing evaluation for metric key: relevance', mockTrackData, diff --git a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts index a4b40b62cb..4263bc3048 100644 --- a/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDAIConfigTrackerImpl.test.ts @@ -841,8 +841,8 @@ describe('trackMetricsOf', () => { }); }); -describe('trackJudgeResponse', () => { - it('tracks evaluation metric key with score', () => { +describe('trackJudgeResult', () => { + it('tracks metric key with score', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, testRunId, @@ -854,15 +854,14 @@ describe('trackJudgeResponse', () => { testContext, ); - const judgeResponse = { + tracker.trackJudgeResult({ judgeConfigKey: 'test-judge', - evals: { - relevance: { score: 0.8, reasoning: 'The response is relevant' }, - }, success: true, - }; - - tracker.trackJudgeResponse(judgeResponse); + sampled: true, + score: 0.8, + reasoning: 'The response is relevant', + metricKey: 'relevance', + }); expect(mockTrack).toHaveBeenCalledWith( 'relevance', @@ -872,7 +871,7 @@ describe('trackJudgeResponse', () => { ); }); - it('tracks multiple evaluation metrics when present', () => { + it('does not track when sampled is false', () => { const tracker = new LDAIConfigTrackerImpl( mockLdClient, testRunId, @@ -884,29 +883,38 @@ describe('trackJudgeResponse', () => { testContext, ); - const judgeResponse = { + tracker.trackJudgeResult({ judgeConfigKey: 'test-judge', - evals: { - relevance: { score: 0.8, reasoning: 'Relevant' }, - accuracy: { score: 0.9, reasoning: 'Accurate' }, - }, - success: true, - }; + success: false, + sampled: false, + score: 0.8, + metricKey: 'relevance', + }); - tracker.trackJudgeResponse(judgeResponse); + expect(mockTrack).not.toHaveBeenCalled(); + }); - expect(mockTrack).toHaveBeenCalledWith( - 'relevance', - testContext, - { ...getExpectedTrackData(), judgeConfigKey: 'test-judge' }, - 0.8, - ); - expect(mockTrack).toHaveBeenCalledWith( - 'accuracy', + it('does not track when success is false', () => { + const tracker = new LDAIConfigTrackerImpl( + mockLdClient, + testRunId, + configKey, + variationKey, + version, + modelName, + providerName, testContext, - { ...getExpectedTrackData(), judgeConfigKey: 'test-judge' }, - 0.9, ); + + tracker.trackJudgeResult({ + judgeConfigKey: 'test-judge', + success: false, + sampled: true, + score: 0.8, + metricKey: 'relevance', + }); + + expect(mockTrack).not.toHaveBeenCalled(); }); }); diff --git a/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts index fe42bf4e4d..77af551302 100644 --- a/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts @@ -140,7 +140,7 @@ it('tracks path', () => { ); }); -it('tracks judge response', () => { +it('tracks judge result', () => { const tracker = new LDGraphTrackerImpl( mockLdClient, graphKey, @@ -148,15 +148,14 @@ it('tracks judge response', () => { version, testContext, ); - const response = { + tracker.trackJudgeResult({ judgeConfigKey: 'my-judge', - evals: { - relevance: { score: 0.9, reasoning: 'Relevant' }, - accuracy: { score: 0.85, reasoning: 'Accurate' }, - }, success: true, - }; - tracker.trackJudgeResponse(response); + sampled: true, + score: 0.9, + reasoning: 'Relevant', + metricKey: 'relevance', + }); expect(mockTrack).toHaveBeenCalledWith( 'relevance', @@ -164,15 +163,9 @@ it('tracks judge response', () => { { ...getExpectedTrackData(), judgeConfigKey: 'my-judge' }, 0.9, ); - expect(mockTrack).toHaveBeenCalledWith( - 'accuracy', - testContext, - { ...getExpectedTrackData(), judgeConfigKey: 'my-judge' }, - 0.85, - ); }); -it('tracks judge response without judgeConfigKey', () => { +it('tracks judge result without judgeConfigKey', () => { const tracker = new LDGraphTrackerImpl( mockLdClient, graphKey, @@ -180,15 +173,53 @@ it('tracks judge response without judgeConfigKey', () => { version, testContext, ); - const response = { - evals: { relevance: { score: 0.7, reasoning: 'Somewhat relevant' } }, + tracker.trackJudgeResult({ success: true, - }; - tracker.trackJudgeResponse(response); + sampled: true, + score: 0.7, + reasoning: 'Somewhat relevant', + metricKey: 'relevance', + }); expect(mockTrack).toHaveBeenCalledWith('relevance', testContext, getExpectedTrackData(), 0.7); }); +it('does not track judge result when not sampled', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackJudgeResult({ + judgeConfigKey: 'my-judge', + success: false, + sampled: false, + }); + + expect(mockTrack).not.toHaveBeenCalled(); +}); + +it('does not track judge result when success is false', () => { + const tracker = new LDGraphTrackerImpl( + mockLdClient, + graphKey, + variationKey, + version, + testContext, + ); + tracker.trackJudgeResult({ + judgeConfigKey: 'my-judge', + success: false, + sampled: true, + score: 0.9, + metricKey: 'relevance', + }); + + expect(mockTrack).not.toHaveBeenCalled(); +}); + it('tracks redirect', () => { const tracker = new LDGraphTrackerImpl( mockLdClient, diff --git a/packages/sdk/server-ai/examples/direct-judge/src/index.ts b/packages/sdk/server-ai/examples/direct-judge/src/index.ts index 349b72f1a9..0be897e32c 100644 --- a/packages/sdk/server-ai/examples/direct-judge/src/index.ts +++ b/packages/sdk/server-ai/examples/direct-judge/src/index.ts @@ -65,13 +65,13 @@ async function main() { console.log('Input:', input); console.log('Output:', output); - const judgeResponse = await judge.evaluate(input, output); + const judgeResult = await judge.evaluate(input, output); - // Track the judge evaluation scores on the tracker for the aiConfig you are evaluating. + // Track the judge result on the tracker for the aiConfig you are evaluating. // Example: - // aiConfig.tracker.trackEvalScores(judgeResponse?.evals); + // aiConfig.tracker.trackJudgeResult(judgeResult); - console.log('Judge Response:', judgeResponse); + console.log('Judge Result:', judgeResult); console.log('Success.'); } catch (err) { diff --git a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts index d87729c14f..b3ed3ae9f1 100644 --- a/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts +++ b/packages/sdk/server-ai/src/LDAIConfigTrackerImpl.ts @@ -2,7 +2,7 @@ import { LDContext } from '@launchdarkly/js-server-sdk-common'; import { LDAIConfigTracker } from './api/config'; import { LDAIMetricSummary } from './api/config/LDAIConfigTracker'; -import { EvalScore, JudgeResponse } from './api/judge/types'; +import { LDJudgeResult } from './api/judge/types'; import { createBedrockTokenUsage, createOpenAiUsage, @@ -119,21 +119,18 @@ export class LDAIConfigTrackerImpl implements LDAIConfigTracker { ); } - trackEvalScores(scores: Record) { - Object.entries(scores).forEach(([metricKey, evalScore]) => { - this._ldClient.track(metricKey, this._context, this.getTrackData(), evalScore.score); - }); - } - - trackJudgeResponse(response: JudgeResponse) { - Object.entries(response.evals).forEach(([metricKey, evalScore]) => { + trackJudgeResult(result: LDJudgeResult) { + if (!result.sampled || !result.success) { + return; + } + if (result.metricKey !== undefined && result.score !== undefined) { this._ldClient.track( - metricKey, + result.metricKey, this._context, - { ...this.getTrackData(), judgeConfigKey: response.judgeConfigKey }, - evalScore.score, + { ...this.getTrackData(), judgeConfigKey: result.judgeConfigKey }, + result.score, ); - }); + } } trackToolCall(toolKey: string): void { diff --git a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts index 4c08e26a58..d1f0602f50 100644 --- a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts +++ b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts @@ -1,7 +1,7 @@ import { LDContext } from '@launchdarkly/js-server-sdk-common'; import { LDGraphMetricSummary, LDGraphTracker } from './api/graph/LDGraphTracker'; -import { JudgeResponse } from './api/judge/types'; +import { LDJudgeResult } from './api/judge/types'; import { LDTokenUsage } from './api/metrics'; import { LDClientMin } from './LDClientMin'; @@ -76,14 +76,17 @@ export class LDGraphTrackerImpl implements LDGraphTracker { this._ldClient.track('$ld:ai:graph:path', this._context, { ...this.getTrackData(), path }, 1); } - trackJudgeResponse(response: JudgeResponse): void { - const trackData = response.judgeConfigKey - ? { ...this.getTrackData(), judgeConfigKey: response.judgeConfigKey } - : this.getTrackData(); + trackJudgeResult(result: LDJudgeResult): void { + if (!result.sampled || !result.success) { + return; + } + if (result.metricKey !== undefined && result.score !== undefined) { + const trackData = result.judgeConfigKey + ? { ...this.getTrackData(), judgeConfigKey: result.judgeConfigKey } + : this.getTrackData(); - Object.entries(response.evals).forEach(([metricKey, evalScore]) => { - this._ldClient.track(metricKey, this._context, trackData, evalScore.score); - }); + this._ldClient.track(result.metricKey, this._context, trackData, result.score); + } } trackRedirect(sourceKey: string, redirectedTarget: string): void { diff --git a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts index 054969dc3d..2d5b21a85f 100644 --- a/packages/sdk/server-ai/src/api/chat/TrackedChat.ts +++ b/packages/sdk/server-ai/src/api/chat/TrackedChat.ts @@ -2,7 +2,7 @@ import { LDLogger } from '@launchdarkly/js-server-sdk-common'; import { LDAICompletionConfig, LDMessage } from '../config/types'; import { Judge } from '../judge/Judge'; -import { JudgeResponse } from '../judge/types'; +import { LDJudgeResult } from '../judge/types'; import { AIProvider } from '../providers/AIProvider'; import { ChatResponse } from './types'; @@ -54,10 +54,8 @@ export class TrackedChat { ) { response.evaluations = this._evaluateWithJudges(this.messages, response).then( (evaluations) => { - evaluations.forEach((judgeResponse) => { - if (judgeResponse?.success) { - tracker.trackJudgeResponse(judgeResponse); - } + evaluations.forEach((judgeResult) => { + tracker.trackJudgeResult(judgeResult); }); return evaluations; }, @@ -79,7 +77,7 @@ export class TrackedChat { private async _evaluateWithJudges( messages: LDMessage[], response: ChatResponse, - ): Promise> { + ): Promise { const judgeConfigs = this.aiConfig.judgeConfiguration!.judges; // Start all judge evaluations in parallel @@ -89,7 +87,12 @@ export class TrackedChat { this._logger?.warn( `Judge configuration is not enabled for ${judgeConfig.key} in ${this.aiConfig.key}`, ); - return undefined; + const result: LDJudgeResult = { + success: false, + sampled: true, + errorMessage: `Judge configuration is not enabled for ${judgeConfig.key}`, + }; + return result; } return judge.evaluateMessages(messages, response, judgeConfig.samplingRate); @@ -98,7 +101,17 @@ export class TrackedChat { // ensure all evaluations complete even if some fail const results = await Promise.allSettled(evaluationPromises); - return results.map((result) => (result.status === 'fulfilled' ? result.value : undefined)); + return results.map((settled) => { + if (settled.status === 'fulfilled') { + return settled.value; + } + const result: LDJudgeResult = { + success: false, + sampled: true, + errorMessage: 'Judge evaluation failed', + }; + return result; + }); } /** diff --git a/packages/sdk/server-ai/src/api/chat/types.ts b/packages/sdk/server-ai/src/api/chat/types.ts index 5b32109fcf..19173e30f8 100644 --- a/packages/sdk/server-ai/src/api/chat/types.ts +++ b/packages/sdk/server-ai/src/api/chat/types.ts @@ -1,5 +1,5 @@ import { LDMessage } from '../config/types'; -import { JudgeResponse } from '../judge/types'; +import { LDJudgeResult } from '../judge/types'; import { LDAIMetrics } from '../metrics/LDAIMetrics'; /** @@ -20,5 +20,5 @@ export interface ChatResponse { * Promise that resolves to judge evaluation results. * Only present when judges are configured for evaluation. */ - evaluations?: Promise>; + evaluations?: Promise; } diff --git a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts index 883177becb..e0aff2c6b5 100644 --- a/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts +++ b/packages/sdk/server-ai/src/api/config/LDAIConfigTracker.ts @@ -1,4 +1,4 @@ -import { EvalScore, JudgeResponse } from '../judge/types'; +import { LDJudgeResult } from '../judge/types'; import { LDAIMetrics, LDFeedbackKind, LDTokenUsage } from '../metrics'; /** @@ -116,18 +116,13 @@ export interface LDAIConfigTracker { trackTimeToFirstToken(timeToFirstTokenMs: number): void; /** - * Track evaluation scores for multiple metrics. + * Track a judge evaluation result. * - * @param scores Record mapping metric keys to their evaluation scores - */ - trackEvalScores(scores: Record): void; - - /** - * Track a judge response containing evaluation scores and judge configuration key. + * No event is emitted when the result was not sampled (result.sampled is false). * - * @param response Judge response containing evaluation scores and judge configuration key + * @param result Judge result containing score, reasoning, and metadata */ - trackJudgeResponse(response: JudgeResponse): void; + trackJudgeResult(result: LDJudgeResult): void; /** * Track a single tool invocation. diff --git a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts index 94cf30658f..9ce432d1db 100644 --- a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts +++ b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts @@ -1,4 +1,4 @@ -import { JudgeResponse } from '../judge/types'; +import { LDJudgeResult } from '../judge/types'; import { LDTokenUsage } from '../metrics'; /** @@ -83,11 +83,13 @@ export interface LDGraphTracker { trackPath(path: string[]): void; /** - * Track judge responses for the final graph output. + * Track a judge evaluation result for the final graph output. * - * @param response Judge response containing evaluation scores. + * No event is emitted when the result was not sampled (result.sampled is false). + * + * @param result Judge result containing score, reasoning, and metadata. */ - trackJudgeResponse(response: JudgeResponse): void; + trackJudgeResult(result: LDJudgeResult): void; /** * Track when a node redirects to a different target than originally specified. diff --git a/packages/sdk/server-ai/src/api/judge/Judge.ts b/packages/sdk/server-ai/src/api/judge/Judge.ts index 1bab8d1a12..e36ab138cd 100644 --- a/packages/sdk/server-ai/src/api/judge/Judge.ts +++ b/packages/sdk/server-ai/src/api/judge/Judge.ts @@ -7,7 +7,7 @@ import { LDAIConfigTracker } from '../config/LDAIConfigTracker'; import { LDAIJudgeConfig, LDMessage } from '../config/types'; import { AIProvider } from '../providers/AIProvider'; import { EvaluationSchemaBuilder } from './EvaluationSchemaBuilder'; -import { EvalScore, JudgeResponse, StructuredResponse } from './types'; +import { LDJudgeResult, StructuredResponse } from './types'; /** * Judge implementation that handles evaluation functionality and conversation management. @@ -57,13 +57,15 @@ export class Judge { * @param input The input prompt or question that was provided to the AI * @param output The AI-generated response to be evaluated * @param samplingRate Sampling rate (0-1) to determine if evaluation should be processed (defaults to 1) - * @returns Promise that resolves to evaluation results or undefined if not sampled + * @returns Promise that resolves to evaluation results */ - async evaluate( - input: string, - output: string, - samplingRate: number = 1, - ): Promise { + async evaluate(input: string, output: string, samplingRate: number = 1): Promise { + const result: LDJudgeResult = { + success: false, + sampled: false, + judgeConfigKey: this._aiConfig.key, + }; + const tracker = this._aiConfig.createTracker!(); try { const evaluationMetricKey = this._getEvaluationMetricKey(); @@ -72,51 +74,54 @@ export class Judge { 'Judge configuration is missing required evaluation metric key', tracker.getTrackData(), ); - return undefined; + result.sampled = true; + result.errorMessage = 'Judge configuration is missing required evaluation metric key'; + return result; } if (!this._aiConfig.messages) { this._logger?.warn('Judge configuration must include messages', tracker.getTrackData()); - return undefined; + result.sampled = true; + result.errorMessage = 'Judge configuration must include messages'; + return result; } if (Math.random() > samplingRate) { this._logger?.debug(`Judge evaluation skipped due to sampling rate: ${samplingRate}`); - return undefined; + return result; } + result.sampled = true; + const messages = this._constructEvaluationMessages(input, output); const response = await tracker.trackMetricsOf( - (result: StructuredResponse) => result.metrics, + (r: StructuredResponse) => r.metrics, () => this._aiProvider.invokeStructuredModel(messages, this._evaluationResponseStructure), ); - let { success } = response.metrics; - - const evals = this._parseEvaluationResponse(response.data, evaluationMetricKey, tracker); + const evalResult = this._parseEvaluationResponse(response.data, evaluationMetricKey, tracker); - if (!evals[evaluationMetricKey]) { + if (!evalResult) { this._logger?.warn( 'Judge evaluation did not return the expected evaluation', tracker.getTrackData(), ); - success = false; + return result; } return { - evals, - success, - judgeConfigKey: this._aiConfig.key, + ...result, + success: response.metrics.success, + score: evalResult.score, + reasoning: evalResult.reasoning, + metricKey: evaluationMetricKey, }; } catch (error) { this._logger?.error('Judge evaluation failed:', error); - return { - evals: {}, - success: false, - error: error instanceof Error ? error.message : 'Unknown error', - judgeConfigKey: this._aiConfig.key, - }; + result.sampled = true; + result.errorMessage = error instanceof Error ? error.message : 'Unknown error'; + return result; } } @@ -126,13 +131,13 @@ export class Judge { * @param messages Array of messages representing the conversation history * @param response The AI response to be evaluated * @param samplingRatio Sampling ratio (0-1) to determine if evaluation should be processed (defaults to 1) - * @returns Promise that resolves to evaluation results or undefined if not sampled + * @returns Promise that resolves to evaluation results */ async evaluateMessages( messages: LDMessage[], response: ChatResponse, samplingRatio: number = 1, - ): Promise { + ): Promise { const input = messages.length === 0 ? '' : messages.map((msg) => msg.content).join('\r\n'); const output = response.message.content; @@ -177,18 +182,18 @@ export class Judge { /** * Parses the structured evaluation response from the AI provider. + * Returns score and reasoning, or undefined if parsing fails. */ private _parseEvaluationResponse( data: Record, evaluationMetricKey: string, tracker: LDAIConfigTracker, - ): Record { + ): { score: number; reasoning: string } | undefined { const evaluations = data.evaluations as Record; - const results: Record = {}; if (!data.evaluations || typeof data.evaluations !== 'object') { this._logger?.warn('Invalid response: missing or invalid evaluations object'); - return results; + return undefined; } const evaluation = evaluations[evaluationMetricKey]; @@ -198,7 +203,7 @@ export class Judge { `Missing evaluation for metric key: ${evaluationMetricKey}`, tracker.getTrackData(), ); - return results; + return undefined; } const evalData = evaluation as Record; @@ -208,7 +213,7 @@ export class Judge { `Invalid score evaluated for ${evaluationMetricKey}: ${evalData.score}. Score must be a number between 0 and 1 inclusive`, tracker.getTrackData(), ); - return results; + return undefined; } if (typeof evalData.reasoning !== 'string') { @@ -216,14 +221,12 @@ export class Judge { `Invalid reasoning evaluated for ${evaluationMetricKey}: ${evalData.reasoning}. Reasoning must be a string`, tracker.getTrackData(), ); - return results; + return undefined; } - results[evaluationMetricKey] = { + return { score: evalData.score, reasoning: evalData.reasoning, }; - - return results; } } diff --git a/packages/sdk/server-ai/src/api/judge/index.ts b/packages/sdk/server-ai/src/api/judge/index.ts index 912ec47fb0..ca86630278 100644 --- a/packages/sdk/server-ai/src/api/judge/index.ts +++ b/packages/sdk/server-ai/src/api/judge/index.ts @@ -1,2 +1,2 @@ export { Judge } from './Judge'; -export type { EvalScore, JudgeResponse, StructuredResponse } from './types'; +export type { LDJudgeResult, StructuredResponse } from './types'; diff --git a/packages/sdk/server-ai/src/api/judge/types.ts b/packages/sdk/server-ai/src/api/judge/types.ts index 68ad141c89..b9d8a05a46 100644 --- a/packages/sdk/server-ai/src/api/judge/types.ts +++ b/packages/sdk/server-ai/src/api/judge/types.ts @@ -17,25 +17,21 @@ export interface StructuredResponse { } /** - * Score and reasoning for a single evaluation metric. + * Result from a judge evaluation containing score, reasoning, and metadata. */ -export interface EvalScore { - /** Score between 0.0 and 1.0 indicating the evaluation result for this metric */ - score: number; - /** Reasoning behind the provided score for this metric */ - reasoning: string; -} - -/** - * Response from a judge evaluation containing scores and reasoning for multiple metrics. - */ -export interface JudgeResponse { - /** The key of the judge configuration that was used to generate this response */ +export interface LDJudgeResult { + /** The key of the judge configuration that was used to generate this result */ judgeConfigKey?: string; - /** Dictionary where keys are metric names and values contain score and reasoning */ - evals: Record; /** Whether the evaluation completed successfully */ success: boolean; /** Error message if evaluation failed */ - error?: string; + errorMessage?: string; + /** Whether this evaluation was sampled (i.e. actually run). False when skipped by sampling. */ + sampled: boolean; + /** The metric key for this evaluation */ + metricKey?: string; + /** Score between 0.0 and 1.0 indicating the evaluation result */ + score?: number; + /** Reasoning behind the provided score */ + reasoning?: string; } From 524c99e60b1bd1621bf49452e00ab9a240819c8e Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Fri, 17 Apr 2026 11:42:59 -0500 Subject: [PATCH 5/7] feat: simplify evaluation schema to flat score/reasoning shape (#1286) --- .../sdk/server-ai/__tests__/Judge.test.ts | 229 +++++------------- .../src/api/judge/EvaluationSchemaBuilder.ts | 49 ---- packages/sdk/server-ai/src/api/judge/Judge.ts | 66 +++-- 3 files changed, 89 insertions(+), 255 deletions(-) delete mode 100644 packages/sdk/server-ai/src/api/judge/EvaluationSchemaBuilder.ts diff --git a/packages/sdk/server-ai/__tests__/Judge.test.ts b/packages/sdk/server-ai/__tests__/Judge.test.ts index ee9ff0351f..43ea75e0ab 100644 --- a/packages/sdk/server-ai/__tests__/Judge.test.ts +++ b/packages/sdk/server-ai/__tests__/Judge.test.ts @@ -70,14 +70,12 @@ describe('Judge', () => { it('evaluates AI response successfully', async () => { const mockStructuredResponse: StructuredResponse = { data: { - evaluations: { - relevance: { score: 0.8, reasoning: 'The response is relevant to the question' }, - }, + score: 0.8, + reasoning: 'The response is relevant to the question', }, rawResponse: JSON.stringify({ - evaluations: { - relevance: { score: 0.8, reasoning: 'The response is relevant to the question' }, - }, + score: 0.8, + reasoning: 'The response is relevant to the question', }), metrics: { success: true, @@ -125,14 +123,12 @@ describe('Judge', () => { it('returns evaluation result with correct evaluationMetricKey for tracker integration', async () => { const mockStructuredResponse: StructuredResponse = { data: { - evaluations: { - relevance: { score: 0.85, reasoning: 'Highly relevant response' }, - }, + score: 0.85, + reasoning: 'Highly relevant response', }, rawResponse: JSON.stringify({ - evaluations: { - relevance: { score: 0.85, reasoning: 'Highly relevant response' }, - }, + score: 0.85, + reasoning: 'Highly relevant response', }), metrics: { success: true, @@ -159,14 +155,12 @@ describe('Judge', () => { const mockStructuredResponse: StructuredResponse = { data: { - evaluations: { - relevance: { score: 0.8, reasoning: 'Good' }, - }, + score: 0.8, + reasoning: 'Good', }, rawResponse: JSON.stringify({ - evaluations: { - relevance: { score: 0.8, reasoning: 'Good' }, - }, + score: 0.8, + reasoning: 'Good', }), metrics: { success: true, @@ -237,14 +231,12 @@ describe('Judge', () => { const mockStructuredResponse: StructuredResponse = { data: { - evaluations: { - relevance: { score: 0.8, reasoning: 'The response is relevant' }, - }, + score: 0.8, + reasoning: 'The response is relevant', }, rawResponse: JSON.stringify({ - evaluations: { - relevance: { score: 0.8, reasoning: 'The response is relevant' }, - }, + score: 0.8, + reasoning: 'The response is relevant', }), metrics: { success: true, @@ -277,14 +269,12 @@ describe('Judge', () => { const mockStructuredResponse: StructuredResponse = { data: { - evaluations: { - relevance: { score: 0.8, reasoning: 'The response is relevant' }, - }, + score: 0.8, + reasoning: 'The response is relevant', }, rawResponse: JSON.stringify({ - evaluations: { - relevance: { score: 0.8, reasoning: 'The response is relevant' }, - }, + score: 0.8, + reasoning: 'The response is relevant', }), metrics: { success: true, @@ -317,14 +307,12 @@ describe('Judge', () => { const mockStructuredResponse: StructuredResponse = { data: { - evaluations: { - relevance: { score: 0.8, reasoning: 'The response is relevant' }, - }, + score: 0.8, + reasoning: 'The response is relevant', }, rawResponse: JSON.stringify({ - evaluations: { - relevance: { score: 0.8, reasoning: 'The response is relevant' }, - }, + score: 0.8, + reasoning: 'The response is relevant', }), metrics: { success: true, @@ -358,14 +346,12 @@ describe('Judge', () => { const mockStructuredResponse: StructuredResponse = { data: { - evaluations: { - helpfulness: { score: 0.7, reasoning: 'The response is helpful' }, - }, + score: 0.7, + reasoning: 'The response is helpful', }, rawResponse: JSON.stringify({ - evaluations: { - helpfulness: { score: 0.7, reasoning: 'The response is helpful' }, - }, + score: 0.7, + reasoning: 'The response is helpful', }), metrics: { success: true, @@ -409,18 +395,10 @@ describe('Judge', () => { ); }); - it('returns result with success false when expected metric is missing', async () => { + it('returns result with success false when response has no score or reasoning', async () => { const mockStructuredResponse: StructuredResponse = { - data: { - evaluations: { - accuracy: { score: 0.9, reasoning: 'Accurate' }, - }, - }, - rawResponse: JSON.stringify({ - evaluations: { - accuracy: { score: 0.9, reasoning: 'Accurate' }, - }, - }), + data: {}, + rawResponse: '{}', metrics: { success: true, usage: { total: 100, input: 50, output: 50 }, @@ -437,19 +415,23 @@ describe('Judge', () => { sampled: true, judgeConfigKey: 'test-judge', }); + expect(mockLogger.warn).toHaveBeenCalledWith( + 'Could not parse evaluation response: {}', + mockTrackData, + ); }); it('returns result with success false when response structure is malformed', async () => { const mockStructuredResponse: StructuredResponse = { data: { - relevance: { score: 0.8, reasoning: 'Good' }, - accuracy: { score: 0.9, reasoning: 'Accurate' }, - helpfulness: { score: 0.7, reasoning: 'Helpful' }, + evaluations: { + relevance: { score: 0.8, reasoning: 'Good' }, + }, }, rawResponse: JSON.stringify({ - relevance: { score: 0.8, reasoning: 'Good' }, - accuracy: { score: 0.9, reasoning: 'Accurate' }, - helpfulness: { score: 0.7, reasoning: 'Helpful' }, + evaluations: { + relevance: { score: 0.8, reasoning: 'Good' }, + }, }), metrics: { success: true, @@ -467,6 +449,10 @@ describe('Judge', () => { sampled: true, judgeConfigKey: 'test-judge', }); + expect(mockLogger.warn).toHaveBeenCalledWith( + expect.stringContaining('Could not parse evaluation response:'), + mockTrackData, + ); }); it('handles provider errors gracefully', async () => { @@ -517,14 +503,12 @@ describe('Judge', () => { const mockStructuredResponse: StructuredResponse = { data: { - evaluations: { - relevance: { score: 0.8, reasoning: 'The response is relevant to the question' }, - }, + score: 0.8, + reasoning: 'The response is relevant to the question', }, rawResponse: JSON.stringify({ - evaluations: { - relevance: { score: 0.8, reasoning: 'The response is relevant to the question' }, - }, + score: 0.8, + reasoning: 'The response is relevant to the question', }), metrics: { success: true, @@ -620,13 +604,9 @@ describe('Judge', () => { it('parses valid evaluation response correctly', () => { // eslint-disable-next-line no-underscore-dangle const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge); - const responseData = { - evaluations: { - relevance: { score: 0.8, reasoning: 'Good' }, - }, - }; + const responseData = { score: 0.8, reasoning: 'Good' }; - const result = parseResponse(responseData, 'relevance', mockTracker); + const result = parseResponse(responseData); expect(result).toEqual({ score: 0.8, @@ -634,28 +614,21 @@ describe('Judge', () => { }); }); - it('returns undefined for invalid response data', () => { + it('returns undefined for empty response data', () => { // eslint-disable-next-line no-underscore-dangle const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge); - const responseData = { - relevance: { score: 0.8, reasoning: 'Good' }, - }; - const result = parseResponse(responseData, 'relevance', mockTracker); + const result = parseResponse({}); expect(result).toBeUndefined(); }); - it('handles missing score or reasoning fields', () => { + it('handles missing reasoning field', () => { // eslint-disable-next-line no-underscore-dangle const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge); - const responseData = { - evaluations: { - relevance: { score: 0.8 }, - }, - }; + const responseData = { score: 0.8 }; - const result = parseResponse(responseData, 'relevance', mockTracker); + const result = parseResponse(responseData); expect(result).toBeUndefined(); }); @@ -663,73 +636,31 @@ describe('Judge', () => { it('handles invalid score values out of range', () => { // eslint-disable-next-line no-underscore-dangle const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge); - const responseData = { - evaluations: { - relevance: { score: 1.5, reasoning: 'Good' }, - }, - }; + const responseData = { score: 1.5, reasoning: 'Good' }; - const result = parseResponse(responseData, 'relevance', mockTracker); + const result = parseResponse(responseData); expect(result).toBeUndefined(); - expect(mockLogger.warn).toHaveBeenCalledWith( - expect.stringContaining('Invalid score evaluated for relevance: 1.5'), - mockTrackData, - ); }); it('handles negative score values', () => { // eslint-disable-next-line no-underscore-dangle const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge); - const responseData = { - evaluations: { - relevance: { score: -0.1, reasoning: 'Good' }, - }, - }; + const responseData = { score: -0.1, reasoning: 'Good' }; - const result = parseResponse(responseData, 'relevance', mockTracker); + const result = parseResponse(responseData); expect(result).toBeUndefined(); - expect(mockLogger.warn).toHaveBeenCalledWith( - expect.stringContaining('Invalid score evaluated for relevance: -0.1'), - mockTrackData, - ); }); it('handles invalid reasoning type', () => { // eslint-disable-next-line no-underscore-dangle const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge); - const responseData = { - evaluations: { - relevance: { score: 0.8, reasoning: 123 }, - }, - }; + const responseData = { score: 0.8, reasoning: 123 }; - const result = parseResponse(responseData, 'relevance', mockTracker); + const result = parseResponse(responseData); expect(result).toBeUndefined(); - expect(mockLogger.warn).toHaveBeenCalledWith( - expect.stringContaining('Invalid reasoning evaluated for relevance: 123'), - mockTrackData, - ); - }); - - it('handles missing evaluation when key does not exist in response', () => { - // eslint-disable-next-line no-underscore-dangle - const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge); - const responseData = { - evaluations: { - accuracy: { score: 0.9, reasoning: 'Accurate' }, - }, - }; - - const result = parseResponse(responseData, 'relevance', mockTracker); - - expect(result).toBeUndefined(); - expect(mockLogger.warn).toHaveBeenCalledWith( - 'Missing evaluation for metric key: relevance', - mockTrackData, - ); }); it('handles empty evaluationMetricKeys array fallback', async () => { @@ -753,41 +684,5 @@ describe('Judge', () => { mockTrackData, ); }); - - it('handles evaluation value that is not an object', () => { - // eslint-disable-next-line no-underscore-dangle - const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge); - const responseData = { - evaluations: { - relevance: 'not an object', - }, - }; - - const result = parseResponse(responseData, 'relevance', mockTracker); - - expect(result).toBeUndefined(); - expect(mockLogger.warn).toHaveBeenCalledWith( - 'Missing evaluation for metric key: relevance', - mockTrackData, - ); - }); - - it('handles null evaluation value', () => { - // eslint-disable-next-line no-underscore-dangle - const parseResponse = (judge as any)._parseEvaluationResponse.bind(judge); - const responseData = { - evaluations: { - relevance: null, - }, - }; - - const result = parseResponse(responseData, 'relevance', mockTracker); - - expect(result).toBeUndefined(); - expect(mockLogger.warn).toHaveBeenCalledWith( - 'Missing evaluation for metric key: relevance', - mockTrackData, - ); - }); }); }); diff --git a/packages/sdk/server-ai/src/api/judge/EvaluationSchemaBuilder.ts b/packages/sdk/server-ai/src/api/judge/EvaluationSchemaBuilder.ts deleted file mode 100644 index 06f745a418..0000000000 --- a/packages/sdk/server-ai/src/api/judge/EvaluationSchemaBuilder.ts +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Internal class for building dynamic evaluation response schemas. - * Not exported - only used internally by TrackedJudge. - */ -class EvaluationSchemaBuilder { - static build(evaluationMetricKey?: string): Record { - if (!evaluationMetricKey) { - return {}; - } - return { - type: 'object', - properties: { - evaluations: { - type: 'object', - description: `Object containing evaluation results for ${evaluationMetricKey} metric`, - properties: { - [evaluationMetricKey]: this._buildKeySchema(evaluationMetricKey), - }, - required: [evaluationMetricKey], - additionalProperties: false, - }, - }, - required: ['evaluations'], - additionalProperties: false, - } as const; - } - - private static _buildKeySchema(key: string) { - return { - type: 'object', - properties: { - score: { - type: 'number', - minimum: 0, - maximum: 1, - description: `Score between 0.0 and 1.0 for ${key}`, - }, - reasoning: { - type: 'string', - description: `Reasoning behind the score for ${key}`, - }, - }, - required: ['score', 'reasoning'], - additionalProperties: false, - }; - } -} - -export { EvaluationSchemaBuilder }; diff --git a/packages/sdk/server-ai/src/api/judge/Judge.ts b/packages/sdk/server-ai/src/api/judge/Judge.ts index e36ab138cd..ef49e3b723 100644 --- a/packages/sdk/server-ai/src/api/judge/Judge.ts +++ b/packages/sdk/server-ai/src/api/judge/Judge.ts @@ -3,12 +3,28 @@ import Mustache from 'mustache'; import { LDLogger } from '@launchdarkly/js-server-sdk-common'; import { ChatResponse } from '../chat/types'; -import { LDAIConfigTracker } from '../config/LDAIConfigTracker'; import { LDAIJudgeConfig, LDMessage } from '../config/types'; import { AIProvider } from '../providers/AIProvider'; -import { EvaluationSchemaBuilder } from './EvaluationSchemaBuilder'; import { LDJudgeResult, StructuredResponse } from './types'; +const EVALUATION_SCHEMA = { + type: 'object', + properties: { + score: { + type: 'number', + minimum: 0, + maximum: 1, + description: 'Score between 0.0 and 1.0.', + }, + reasoning: { + type: 'string', + description: 'Reasoning behind the score.', + }, + }, + required: ['score', 'reasoning'], + additionalProperties: false, +} as const; + /** * Judge implementation that handles evaluation functionality and conversation management. * @@ -17,7 +33,6 @@ import { LDJudgeResult, StructuredResponse } from './types'; */ export class Judge { private readonly _logger?: LDLogger; - private readonly _evaluationResponseStructure: Record; constructor( private readonly _aiConfig: LDAIJudgeConfig, @@ -25,8 +40,6 @@ export class Judge { logger?: LDLogger, ) { this._logger = logger; - const evaluationMetricKey = this._getEvaluationMetricKey(); - this._evaluationResponseStructure = EvaluationSchemaBuilder.build(evaluationMetricKey); } /** @@ -97,14 +110,14 @@ export class Judge { const response = await tracker.trackMetricsOf( (r: StructuredResponse) => r.metrics, - () => this._aiProvider.invokeStructuredModel(messages, this._evaluationResponseStructure), + () => this._aiProvider.invokeStructuredModel(messages, EVALUATION_SCHEMA), ); - const evalResult = this._parseEvaluationResponse(response.data, evaluationMetricKey, tracker); + const evalResult = this._parseEvaluationResponse(response.data); if (!evalResult) { this._logger?.warn( - 'Judge evaluation did not return the expected evaluation', + `Could not parse evaluation response: ${JSON.stringify(response.data)}`, tracker.getTrackData(), ); return result; @@ -181,52 +194,27 @@ export class Judge { } /** - * Parses the structured evaluation response from the AI provider. + * Parses the structured evaluation response. Expects top-level {score, reasoning}. * Returns score and reasoning, or undefined if parsing fails. */ private _parseEvaluationResponse( data: Record, - evaluationMetricKey: string, - tracker: LDAIConfigTracker, ): { score: number; reasoning: string } | undefined { - const evaluations = data.evaluations as Record; - - if (!data.evaluations || typeof data.evaluations !== 'object') { - this._logger?.warn('Invalid response: missing or invalid evaluations object'); - return undefined; - } - - const evaluation = evaluations[evaluationMetricKey]; - - if (!evaluation || typeof evaluation !== 'object') { - this._logger?.warn( - `Missing evaluation for metric key: ${evaluationMetricKey}`, - tracker.getTrackData(), - ); + if (!data || typeof data !== 'object' || Array.isArray(data)) { return undefined; } - const evalData = evaluation as Record; - - if (typeof evalData.score !== 'number' || evalData.score < 0 || evalData.score > 1) { - this._logger?.warn( - `Invalid score evaluated for ${evaluationMetricKey}: ${evalData.score}. Score must be a number between 0 and 1 inclusive`, - tracker.getTrackData(), - ); + if (typeof data.score !== 'number' || data.score < 0 || data.score > 1) { return undefined; } - if (typeof evalData.reasoning !== 'string') { - this._logger?.warn( - `Invalid reasoning evaluated for ${evaluationMetricKey}: ${evalData.reasoning}. Reasoning must be a string`, - tracker.getTrackData(), - ); + if (typeof data.reasoning !== 'string') { return undefined; } return { - score: evalData.score, - reasoning: evalData.reasoning, + score: data.score, + reasoning: data.reasoning, }; } } From 092e38a4c34bdbcdee8dfecb89925fb1c9606350 Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Mon, 20 Apr 2026 13:05:56 -0500 Subject: [PATCH 6/7] feat: Implement agent graph definitions (#1282) --- package.json | 1 + .../__tests__/AgentGraphDefinition.test.ts | 418 ++++++++++++++ .../__tests__/LDGraphTrackerImpl.test.ts | 545 ++++++++---------- .../server-ai/__tests__/agentGraph.test.ts | 200 +++++++ .../examples/agent-graph-traversal/README.md | 106 ++++ .../agent-graph-traversal/package.json | 19 + .../agent-graph-traversal/src/index.ts | 134 +++++ .../agent-graph-traversal/tsconfig.json | 18 + packages/sdk/server-ai/src/LDAIClientImpl.ts | 118 ++++ .../sdk/server-ai/src/LDGraphTrackerImpl.ts | 133 +++-- packages/sdk/server-ai/src/api/LDAIClient.ts | 52 ++ .../src/api/graph/AgentGraphDefinition.ts | 253 ++++++++ .../server-ai/src/api/graph/AgentGraphNode.ts | 46 ++ .../server-ai/src/api/graph/LDGraphTracker.ts | 132 +++-- packages/sdk/server-ai/src/api/graph/index.ts | 3 + packages/sdk/server-ai/src/api/graph/types.ts | 88 +++ release-please-config.json | 10 + 17 files changed, 1857 insertions(+), 419 deletions(-) create mode 100644 packages/sdk/server-ai/__tests__/AgentGraphDefinition.test.ts create mode 100644 packages/sdk/server-ai/__tests__/agentGraph.test.ts create mode 100644 packages/sdk/server-ai/examples/agent-graph-traversal/README.md create mode 100644 packages/sdk/server-ai/examples/agent-graph-traversal/package.json create mode 100644 packages/sdk/server-ai/examples/agent-graph-traversal/src/index.ts create mode 100644 packages/sdk/server-ai/examples/agent-graph-traversal/tsconfig.json create mode 100644 packages/sdk/server-ai/src/api/graph/AgentGraphDefinition.ts create mode 100644 packages/sdk/server-ai/src/api/graph/AgentGraphNode.ts create mode 100644 packages/sdk/server-ai/src/api/graph/types.ts diff --git a/package.json b/package.json index 0de39840af..ffbee84446 100644 --- a/package.json +++ b/package.json @@ -50,6 +50,7 @@ "packages/sdk/server-ai/examples/chat-observability", "packages/sdk/server-ai/examples/openai-observability", "packages/sdk/server-ai/examples/vercel-ai", + "packages/sdk/server-ai/examples/agent-graph-traversal", "packages/telemetry/browser-telemetry", "packages/sdk/combined-browser", "packages/sdk/shopify-oxygen", diff --git a/packages/sdk/server-ai/__tests__/AgentGraphDefinition.test.ts b/packages/sdk/server-ai/__tests__/AgentGraphDefinition.test.ts new file mode 100644 index 0000000000..8839a3474a --- /dev/null +++ b/packages/sdk/server-ai/__tests__/AgentGraphDefinition.test.ts @@ -0,0 +1,418 @@ +import { randomUUID } from 'crypto'; + +import { LDContext } from '@launchdarkly/js-server-sdk-common'; + +import { LDAIAgentConfig } from '../src/api/config'; +import { AgentGraphDefinition } from '../src/api/graph/AgentGraphDefinition'; +import { LDAgentGraphFlagValue, LDGraphEdge } from '../src/api/graph/types'; +import { LDClientMin } from '../src/LDClientMin'; +import { LDGraphTrackerImpl } from '../src/LDGraphTrackerImpl'; + +const mockLdClient: LDClientMin = { + track: jest.fn(), + variation: jest.fn(), +}; + +const testContext: LDContext = { kind: 'user', key: 'test-user' }; + +// --------------------------------------------------------------------------- +// Helper builders +// --------------------------------------------------------------------------- + +function makeAgentConfig(key: string, enabled = true): LDAIAgentConfig { + return { key, enabled, instructions: `You are ${key}.` } as LDAIAgentConfig; +} + +function makeGraph( + root: string, + edges: Record = {}, + variationKey?: string, + version = 1, +): LDAgentGraphFlagValue { + return { + _ldMeta: { variationKey, version }, + root, + edges, + }; +} + +function makeDefinition( + graph: LDAgentGraphFlagValue, + agentConfigs: Record, + enabled = true, +): AgentGraphDefinition { + const nodes = AgentGraphDefinition.buildNodes(graph, agentConfigs); + return new AgentGraphDefinition( + graph, + nodes, + enabled, + () => + new LDGraphTrackerImpl( + mockLdClient, + randomUUID(), + graph.root, + // eslint-disable-next-line no-underscore-dangle + graph._ldMeta?.variationKey, + // eslint-disable-next-line no-underscore-dangle + graph._ldMeta?.version ?? 1, + testContext, + ), + ); +} + +// --------------------------------------------------------------------------- +// buildNodes +// --------------------------------------------------------------------------- + +it('buildNodes creates a node for every unique key in the graph', () => { + const graph = makeGraph('root', { + root: [{ key: 'child-a' }, { key: 'child-b' }], + 'child-a': [{ key: 'leaf' }], + }); + const configs: Record = { + root: makeAgentConfig('root'), + 'child-a': makeAgentConfig('child-a'), + 'child-b': makeAgentConfig('child-b'), + leaf: makeAgentConfig('leaf'), + }; + + const nodes = AgentGraphDefinition.buildNodes(graph, configs); + expect(Object.keys(nodes).sort()).toEqual(['child-a', 'child-b', 'leaf', 'root']); +}); + +it('buildNodes skips keys whose agent config is missing', () => { + const graph = makeGraph('root', { root: [{ key: 'orphan' }] }); + const nodes = AgentGraphDefinition.buildNodes(graph, { root: makeAgentConfig('root') }); + expect(nodes.root).toBeDefined(); + expect(nodes.orphan).toBeUndefined(); +}); + +it('buildNodes assigns correct edges to each node', () => { + const graph = makeGraph('root', { + root: [{ key: 'child', handoff: { someOption: true } }], + }); + const configs = { + root: makeAgentConfig('root'), + child: makeAgentConfig('child'), + }; + const nodes = AgentGraphDefinition.buildNodes(graph, configs); + expect(nodes.root.getEdges()).toEqual([{ key: 'child', handoff: { someOption: true } }]); + expect(nodes.child.getEdges()).toEqual([]); +}); + +// --------------------------------------------------------------------------- +// collectAllKeys +// --------------------------------------------------------------------------- + +it('collectAllKeys includes root, edge sources, and edge targets', () => { + const graph = makeGraph('root', { + root: [{ key: 'a' }, { key: 'b' }], + a: [{ key: 'c' }], + }); + const keys = AgentGraphDefinition.collectAllKeys(graph); + expect([...keys].sort()).toEqual(['a', 'b', 'c', 'root']); +}); + +it('collectAllKeys works for a graph with no edges', () => { + const graph = makeGraph('solo'); + const keys = AgentGraphDefinition.collectAllKeys(graph); + expect([...keys]).toEqual(['solo']); +}); + +// --------------------------------------------------------------------------- +// enabled +// --------------------------------------------------------------------------- + +it('enabled reflects the value passed at construction', () => { + const graph = makeGraph('r'); + const enabled = makeDefinition(graph, { r: makeAgentConfig('r') }, true); + expect(enabled.enabled).toBe(true); + + const disabled = makeDefinition(graph, { r: makeAgentConfig('r') }, false); + expect(disabled.enabled).toBe(false); +}); + +// --------------------------------------------------------------------------- +// rootNode / getNode / terminalNodes +// --------------------------------------------------------------------------- + +it('rootNode returns the root node', () => { + const graph = makeGraph('root', { root: [{ key: 'leaf' }] }); + const def = makeDefinition(graph, { + root: makeAgentConfig('root'), + leaf: makeAgentConfig('leaf'), + }); + expect(def.rootNode().getKey()).toBe('root'); +}); + +it('getNode returns null for unknown key', () => { + const graph = makeGraph('root'); + const def = makeDefinition(graph, { root: makeAgentConfig('root') }); + expect(def.getNode('nonexistent')).toBeNull(); +}); + +it('terminalNodes returns nodes with no outgoing edges', () => { + const graph = makeGraph('root', { + root: [{ key: 'mid' }], + mid: [{ key: 'leaf-a' }, { key: 'leaf-b' }], + }); + const def = makeDefinition(graph, { + root: makeAgentConfig('root'), + mid: makeAgentConfig('mid'), + 'leaf-a': makeAgentConfig('leaf-a'), + 'leaf-b': makeAgentConfig('leaf-b'), + }); + const terminalKeys = def + .terminalNodes() + .map((n) => n.getKey()) + .sort(); + expect(terminalKeys).toEqual(['leaf-a', 'leaf-b']); +}); + +// --------------------------------------------------------------------------- +// getChildNodes / getParentNodes +// --------------------------------------------------------------------------- + +it('getChildNodes returns direct children', () => { + const graph = makeGraph('root', { + root: [{ key: 'a' }, { key: 'b' }], + }); + const def = makeDefinition(graph, { + root: makeAgentConfig('root'), + a: makeAgentConfig('a'), + b: makeAgentConfig('b'), + }); + const childKeys = def + .getChildNodes('root') + .map((n) => n.getKey()) + .sort(); + expect(childKeys).toEqual(['a', 'b']); +}); + +it('getChildNodes returns empty array for terminal node', () => { + const graph = makeGraph('root'); + const def = makeDefinition(graph, { root: makeAgentConfig('root') }); + expect(def.getChildNodes('root')).toEqual([]); +}); + +it('getChildNodes returns empty array for unknown key', () => { + const graph = makeGraph('root'); + const def = makeDefinition(graph, { root: makeAgentConfig('root') }); + expect(def.getChildNodes('unknown')).toEqual([]); +}); + +it('getParentNodes returns nodes that have direct edges to the given key', () => { + const graph = makeGraph('root', { + root: [{ key: 'child' }], + sibling: [{ key: 'child' }], + }); + const def = makeDefinition(graph, { + root: makeAgentConfig('root'), + sibling: makeAgentConfig('sibling'), + child: makeAgentConfig('child'), + }); + const parentKeys = def + .getParentNodes('child') + .map((n) => n.getKey()) + .sort(); + expect(parentKeys).toEqual(['root', 'sibling']); +}); + +it('getParentNodes returns empty array for root node', () => { + const graph = makeGraph('root', { root: [{ key: 'child' }] }); + const def = makeDefinition(graph, { + root: makeAgentConfig('root'), + child: makeAgentConfig('child'), + }); + expect(def.getParentNodes('root')).toEqual([]); +}); + +// --------------------------------------------------------------------------- +// traverse +// --------------------------------------------------------------------------- + +it('traverse calls fn for every node in BFS order (root first)', () => { + // root + // / \ + // a b + // | + // c + const graph = makeGraph('root', { + root: [{ key: 'a' }, { key: 'b' }], + a: [{ key: 'c' }], + }); + const def = makeDefinition(graph, { + root: makeAgentConfig('root'), + a: makeAgentConfig('a'), + b: makeAgentConfig('b'), + c: makeAgentConfig('c'), + }); + + const order: string[] = []; + def.traverse((node) => { + order.push(node.getKey()); + }); + + expect(order[0]).toBe('root'); + // a and b must both appear before c + const aIdx = order.indexOf('a'); + const bIdx = order.indexOf('b'); + const cIdx = order.indexOf('c'); + expect(aIdx).toBeLessThan(cIdx); + expect(bIdx).toBeLessThan(cIdx); + expect(order).toHaveLength(4); +}); + +it('traverse stores fn return values in execution context', () => { + const graph = makeGraph('root', { root: [{ key: 'child' }] }); + const def = makeDefinition(graph, { + root: makeAgentConfig('root'), + child: makeAgentConfig('child'), + }); + + const contextCaptures: Record[] = []; + def.traverse((node, ctx) => { + contextCaptures.push({ ...ctx }); + return `result-of-${node.getKey()}`; + }); + + // After root is processed, the child's context should contain root's result + expect(contextCaptures[1]).toHaveProperty('root', 'result-of-root'); +}); + +it('traverse accepts and uses initial execution context', () => { + const graph = makeGraph('root'); + const def = makeDefinition(graph, { root: makeAgentConfig('root') }); + + const captured: Record[] = []; + def.traverse( + (node, ctx) => { + captured.push({ ...ctx }); + }, + { initialKey: 'initialValue' }, + ); + + expect(captured[0]).toHaveProperty('initialKey', 'initialValue'); +}); + +it('traverse handles a single-node graph', () => { + const graph = makeGraph('solo'); + const def = makeDefinition(graph, { solo: makeAgentConfig('solo') }); + const visited: string[] = []; + def.traverse((node) => { + visited.push(node.getKey()); + }); + expect(visited).toEqual(['solo']); +}); + +// --------------------------------------------------------------------------- +// reverseTraverse +// --------------------------------------------------------------------------- + +it('reverseTraverse processes terminal nodes before their parents, root last', () => { + // root + // / \ + // a b ← mid-level + // | + // c ← terminal (deepest) + const graph = makeGraph('root', { + root: [{ key: 'a' }, { key: 'b' }], + a: [{ key: 'c' }], + }); + const def = makeDefinition(graph, { + root: makeAgentConfig('root'), + a: makeAgentConfig('a'), + b: makeAgentConfig('b'), + c: makeAgentConfig('c'), + }); + + const order: string[] = []; + def.reverseTraverse((node) => { + order.push(node.getKey()); + }); + + expect(order[order.length - 1]).toBe('root'); // root always last + // c must appear before a (c is a descendant of a) + expect(order.indexOf('c')).toBeLessThan(order.indexOf('a')); + // all four nodes visited + expect(order.sort()).toEqual(['a', 'b', 'c', 'root']); +}); + +it('reverseTraverse stores fn return values in execution context', () => { + const graph = makeGraph('root', { root: [{ key: 'child' }] }); + const def = makeDefinition(graph, { + root: makeAgentConfig('root'), + child: makeAgentConfig('child'), + }); + + const contextWhenRootRuns: Record[] = []; + def.reverseTraverse((node, ctx) => { + if (node.getKey() === 'root') { + contextWhenRootRuns.push({ ...ctx }); + } + return `result-of-${node.getKey()}`; + }); + + // root runs last; at that point, child's result should be in context + expect(contextWhenRootRuns[0]).toHaveProperty('child', 'result-of-child'); +}); + +it('reverseTraverse visits a node with multiple parents only once', () => { + // root → a → d → c + // root → b → c ← c has two parents + const graph = makeGraph('root', { + root: [{ key: 'a' }, { key: 'b' }], + a: [{ key: 'd' }], + b: [{ key: 'c' }], + d: [{ key: 'c' }], + }); + const def = makeDefinition(graph, { + root: makeAgentConfig('root'), + a: makeAgentConfig('a'), + b: makeAgentConfig('b'), + c: makeAgentConfig('c'), + d: makeAgentConfig('d'), + }); + + const order: string[] = []; + def.reverseTraverse((node) => { + order.push(node.getKey()); + }); + + // c is the only terminal — it goes first + expect(order[0]).toBe('c'); + // root is always last + expect(order[order.length - 1]).toBe('root'); + // every node visited exactly once + expect(order.sort()).toEqual(['a', 'b', 'c', 'd', 'root']); +}); + +it('reverseTraverse visits each node once on a cyclic graph', () => { + // A → B → A (no terminals) + const graph = makeGraph('a', { + a: [{ key: 'b' }], + b: [{ key: 'a' }], + }); + const def = makeDefinition(graph, { + a: makeAgentConfig('a'), + b: makeAgentConfig('b'), + }); + + const visited: string[] = []; + def.reverseTraverse((node) => { + visited.push(node.getKey()); + }); + + // No terminals → returns without visiting anything (same as Python) + expect(visited).toEqual([]); +}); + +// --------------------------------------------------------------------------- +// getConfig +// --------------------------------------------------------------------------- + +it('getConfig returns the raw flag value', () => { + const graph = makeGraph('root', {}, 'var-key', 5); + const def = makeDefinition(graph, { root: makeAgentConfig('root') }); + expect(def.getConfig()).toBe(graph); +}); diff --git a/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts index 77af551302..9f734eb5d0 100644 --- a/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts +++ b/packages/sdk/server-ai/__tests__/LDGraphTrackerImpl.test.ts @@ -4,446 +4,351 @@ import { LDClientMin } from '../src/LDClientMin'; import { LDGraphTrackerImpl } from '../src/LDGraphTrackerImpl'; const mockTrack = jest.fn(); +const mockWarn = jest.fn(); const mockLdClient: LDClientMin = { track: mockTrack, variation: jest.fn(), + logger: { warn: mockWarn, error: jest.fn(), info: jest.fn(), debug: jest.fn() }, }; const testContext: LDContext = { kind: 'user', key: 'test-user' }; -const graphKey = 'test-graph'; +const graphKey = 'my-agent-graph'; const variationKey = 'v1'; const version = 2; -const getExpectedTrackData = () => ({ - graphKey, - variationKey, - version, -}); +const makeTracker = (runId = 'test-run-id') => + new LDGraphTrackerImpl(mockLdClient, runId, graphKey, variationKey, version, testContext); beforeEach(() => { jest.clearAllMocks(); }); -it('returns track data', () => { +// --------------------------------------------------------------------------- +// getTrackData +// --------------------------------------------------------------------------- + +it('returns correct track data with variationKey', () => { + const tracker = makeTracker('fixed-run-id'); + expect(tracker.getTrackData()).toEqual({ + runId: 'fixed-run-id', + graphKey, + version, + variationKey, + }); +}); + +it('omits variationKey when not provided', () => { const tracker = new LDGraphTrackerImpl( mockLdClient, + 'some-run-id', graphKey, - variationKey, + undefined, version, testContext, ); + const data = tracker.getTrackData(); + expect(data.variationKey).toBeUndefined(); + expect(data.graphKey).toBe(graphKey); + expect(data.version).toBe(version); + expect(data.runId).toBe('some-run-id'); +}); - expect(tracker.getTrackData()).toEqual(getExpectedTrackData()); +it('uses provided runId', () => { + const tracker = makeTracker('my-custom-run-id'); + expect(tracker.getTrackData().runId).toBe('my-custom-run-id'); }); -it('tracks invocation success', () => { +// --------------------------------------------------------------------------- +// resumptionToken round-trip +// --------------------------------------------------------------------------- + +it('encodes a resumption token with correct field order', () => { + const tracker = makeTracker('550e8400-e29b-41d4-a716-446655440000'); + const token = tracker.resumptionToken; + const decoded = Buffer.from(token, 'base64url').toString('utf8'); + expect(decoded).toBe( + '{"runId":"550e8400-e29b-41d4-a716-446655440000","graphKey":"my-agent-graph","variationKey":"v1","version":2}', + ); +}); + +it('omits variationKey from token when not set', () => { const tracker = new LDGraphTrackerImpl( mockLdClient, + 'run-abc', graphKey, - variationKey, + undefined, version, testContext, ); + const token = tracker.resumptionToken; + const decoded = Buffer.from(token, 'base64url').toString('utf8'); + expect(decoded).toBe('{"runId":"run-abc","graphKey":"my-agent-graph","version":2}'); +}); + +it('fromResumptionToken reconstructs the tracker with original runId', () => { + const original = makeTracker('orig-run-id'); + const token = original.resumptionToken; + + const reconstructed = LDGraphTrackerImpl.fromResumptionToken(token, mockLdClient, testContext); + expect(reconstructed.getTrackData()).toEqual({ + runId: 'orig-run-id', + graphKey, + version, + variationKey, + }); +}); + +// --------------------------------------------------------------------------- +// getSummary +// --------------------------------------------------------------------------- + +it('returns an empty summary initially', () => { + const tracker = makeTracker('r'); + expect(tracker.getSummary()).toEqual({}); +}); + +it('returns a copy of the summary (not a reference)', () => { + const tracker = makeTracker('r'); tracker.trackInvocationSuccess(); + const summary1 = tracker.getSummary(); + const summary2 = tracker.getSummary(); + expect(summary1).not.toBe(summary2); + expect(summary1).toEqual(summary2); +}); + +// --------------------------------------------------------------------------- +// trackInvocationSuccess / trackInvocationFailure – at-most-once +// --------------------------------------------------------------------------- +it('trackInvocationSuccess sets success=true and emits event', () => { + const tracker = makeTracker('r'); + tracker.trackInvocationSuccess(); + expect(tracker.getSummary().success).toBe(true); expect(mockTrack).toHaveBeenCalledWith( '$ld:ai:graph:invocation_success', testContext, - getExpectedTrackData(), + tracker.getTrackData(), 1, ); }); -it('tracks invocation failure', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); +it('trackInvocationFailure sets success=false and emits event', () => { + const tracker = makeTracker('r'); tracker.trackInvocationFailure(); - + expect(tracker.getSummary().success).toBe(false); expect(mockTrack).toHaveBeenCalledWith( '$ld:ai:graph:invocation_failure', testContext, - getExpectedTrackData(), + tracker.getTrackData(), 1, ); }); -it('tracks latency', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, +it('drops second trackInvocationSuccess call and warns', () => { + const tracker = makeTracker('r'); + tracker.trackInvocationSuccess(); + tracker.trackInvocationSuccess(); + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledWith( + expect.stringContaining('invocation success/failure already recorded for this run'), + ); +}); + +it('drops trackInvocationFailure after trackInvocationSuccess and warns', () => { + const tracker = makeTracker('r'); + tracker.trackInvocationSuccess(); + tracker.trackInvocationFailure(); + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(mockWarn).toHaveBeenCalledWith( + expect.stringContaining('invocation success/failure already recorded for this run'), ); - tracker.trackLatency(1500); +}); +// --------------------------------------------------------------------------- +// trackLatency – at-most-once +// --------------------------------------------------------------------------- + +it('trackLatency sets durationMs and emits event', () => { + const tracker = makeTracker('r'); + tracker.trackLatency(1234); + expect(tracker.getSummary().durationMs).toBe(1234); expect(mockTrack).toHaveBeenCalledWith( '$ld:ai:graph:latency', testContext, - getExpectedTrackData(), - 1500, + tracker.getTrackData(), + 1234, ); }); -it('tracks total tokens', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); - tracker.trackTotalTokens({ total: 200, input: 80, output: 120 }); +it('drops second trackLatency call and warns', () => { + const tracker = makeTracker('r'); + tracker.trackLatency(100); + tracker.trackLatency(200); + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(tracker.getSummary().durationMs).toBe(100); + expect(mockWarn).toHaveBeenCalled(); +}); +// --------------------------------------------------------------------------- +// trackTotalTokens – at-most-once +// --------------------------------------------------------------------------- + +it('trackTotalTokens sets tokens and emits event with total as metric value', () => { + const tracker = makeTracker('r'); + const tokens = { total: 500, input: 200, output: 300 }; + tracker.trackTotalTokens(tokens); + expect(tracker.getSummary().tokens).toEqual(tokens); expect(mockTrack).toHaveBeenCalledWith( '$ld:ai:graph:total_tokens', testContext, - getExpectedTrackData(), - 200, + tracker.getTrackData(), + 500, ); }); -it('does not track total tokens when total is zero', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); - tracker.trackTotalTokens({ total: 0, input: 0, output: 0 }); - - expect(mockTrack).not.toHaveBeenCalled(); +it('drops second trackTotalTokens call and warns', () => { + const tracker = makeTracker('r'); + tracker.trackTotalTokens({ total: 100, input: 50, output: 50 }); + tracker.trackTotalTokens({ total: 200, input: 100, output: 100 }); + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(tracker.getSummary().tokens?.total).toBe(100); + expect(mockWarn).toHaveBeenCalled(); }); -it('tracks path', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); - const path = ['node-a', 'node-b', 'node-c']; - tracker.trackPath(path); +// --------------------------------------------------------------------------- +// trackPath – at-most-once +// --------------------------------------------------------------------------- +it('trackPath sets path and emits event with path in data payload', () => { + const tracker = makeTracker('r'); + const path = ['root-agent', 'research-agent', 'write-agent']; + tracker.trackPath(path); + expect(tracker.getSummary().path).toEqual(path); expect(mockTrack).toHaveBeenCalledWith( '$ld:ai:graph:path', testContext, - { ...getExpectedTrackData(), path }, + { ...tracker.getTrackData(), path }, 1, ); }); -it('tracks judge result', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); +it('drops second trackPath call and warns', () => { + const tracker = makeTracker('r'); + tracker.trackPath(['a', 'b']); + tracker.trackPath(['c', 'd']); + expect(mockTrack).toHaveBeenCalledTimes(1); + expect(tracker.getSummary().path).toEqual(['a', 'b']); + expect(mockWarn).toHaveBeenCalled(); +}); + +// --------------------------------------------------------------------------- +// trackJudgeResult – NOT at-most-once +// --------------------------------------------------------------------------- + +it('trackJudgeResult emits an event for a sampled, successful result', () => { + const tracker = makeTracker('r'); tracker.trackJudgeResult({ - judgeConfigKey: 'my-judge', + judgeConfigKey: 'judge-1', + metricKey: 'relevance-score', + score: 0.9, + reasoning: 'good', success: true, sampled: true, - score: 0.9, - reasoning: 'Relevant', - metricKey: 'relevance', }); - + expect(mockTrack).toHaveBeenCalledTimes(1); expect(mockTrack).toHaveBeenCalledWith( - 'relevance', + 'relevance-score', testContext, - { ...getExpectedTrackData(), judgeConfigKey: 'my-judge' }, + { ...tracker.getTrackData(), judgeConfigKey: 'judge-1' }, 0.9, ); }); -it('tracks judge result without judgeConfigKey', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); +it('trackJudgeResult emits event without judgeConfigKey', () => { + const tracker = makeTracker('r'); tracker.trackJudgeResult({ + metricKey: 'relevance-score', + score: 0.7, success: true, sampled: true, - score: 0.7, - reasoning: 'Somewhat relevant', - metricKey: 'relevance', }); - - expect(mockTrack).toHaveBeenCalledWith('relevance', testContext, getExpectedTrackData(), 0.7); -}); - -it('does not track judge result when not sampled', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, + expect(mockTrack).toHaveBeenCalledWith( + 'relevance-score', testContext, + tracker.getTrackData(), + 0.7, ); - tracker.trackJudgeResult({ - judgeConfigKey: 'my-judge', - success: false, - sampled: false, - }); +}); + +it('trackJudgeResult can fire multiple times', () => { + const tracker = makeTracker('r'); + tracker.trackJudgeResult({ metricKey: 'relevance', score: 0.5, success: true, sampled: true }); + tracker.trackJudgeResult({ metricKey: 'relevance', score: 0.7, success: true, sampled: true }); + expect(mockTrack).toHaveBeenCalledTimes(2); + expect(mockWarn).not.toHaveBeenCalled(); +}); +it('trackJudgeResult does not emit when not sampled', () => { + const tracker = makeTracker('r'); + tracker.trackJudgeResult({ judgeConfigKey: 'j', success: false, sampled: false }); expect(mockTrack).not.toHaveBeenCalled(); }); -it('does not track judge result when success is false', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); +it('trackJudgeResult does not emit when success is false', () => { + const tracker = makeTracker('r'); tracker.trackJudgeResult({ - judgeConfigKey: 'my-judge', + judgeConfigKey: 'j', + metricKey: 'relevance', + score: 0.9, success: false, sampled: true, - score: 0.9, - metricKey: 'relevance', }); - expect(mockTrack).not.toHaveBeenCalled(); }); -it('tracks redirect', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); - tracker.trackRedirect('agent-a', 'agent-b'); +// --------------------------------------------------------------------------- +// Edge-level methods – multi-fire, NOT at-most-once +// --------------------------------------------------------------------------- +it('trackRedirect emits event with sourceKey and redirectedTarget', () => { + const tracker = makeTracker('r'); + tracker.trackRedirect('source-agent', 'redirected-agent'); expect(mockTrack).toHaveBeenCalledWith( '$ld:ai:graph:redirect', testContext, - { ...getExpectedTrackData(), sourceKey: 'agent-a', redirectedTarget: 'agent-b' }, + { ...tracker.getTrackData(), sourceKey: 'source-agent', redirectedTarget: 'redirected-agent' }, 1, ); }); -it('tracks handoff success', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); +it('trackHandoffSuccess emits event with sourceKey and targetKey', () => { + const tracker = makeTracker('r'); tracker.trackHandoffSuccess('agent-a', 'agent-b'); - expect(mockTrack).toHaveBeenCalledWith( '$ld:ai:graph:handoff_success', testContext, - { ...getExpectedTrackData(), sourceKey: 'agent-a', targetKey: 'agent-b' }, + { ...tracker.getTrackData(), sourceKey: 'agent-a', targetKey: 'agent-b' }, 1, ); }); -it('tracks handoff failure', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); +it('trackHandoffFailure emits event with sourceKey and targetKey', () => { + const tracker = makeTracker('r'); tracker.trackHandoffFailure('agent-a', 'agent-b'); - expect(mockTrack).toHaveBeenCalledWith( '$ld:ai:graph:handoff_failure', testContext, - { ...getExpectedTrackData(), sourceKey: 'agent-a', targetKey: 'agent-b' }, + { ...tracker.getTrackData(), sourceKey: 'agent-a', targetKey: 'agent-b' }, 1, ); }); -it('returns empty summary when no metrics tracked', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); - - expect(tracker.getSummary()).toEqual({}); -}); - -it('summarizes tracked graph metrics', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); - - tracker.trackInvocationSuccess(); - tracker.trackLatency(2000); - tracker.trackTotalTokens({ total: 300, input: 100, output: 200 }); - tracker.trackPath(['node-a', 'node-b']); - - expect(tracker.getSummary()).toEqual({ - success: true, - durationMs: 2000, - tokens: { total: 300, input: 100, output: 200 }, - path: ['node-a', 'node-b'], - }); -}); - -describe('at-most-once semantics for graph-level metrics', () => { - it('drops duplicate trackInvocationSuccess calls', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); - tracker.trackInvocationSuccess(); - tracker.trackInvocationSuccess(); - - expect(mockTrack).toHaveBeenCalledTimes(1); - }); - - it('drops trackInvocationFailure after trackInvocationSuccess', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); - tracker.trackInvocationSuccess(); - tracker.trackInvocationFailure(); - - expect(mockTrack).toHaveBeenCalledTimes(1); - expect(mockTrack).toHaveBeenCalledWith( - '$ld:ai:graph:invocation_success', - expect.anything(), - expect.anything(), - expect.anything(), - ); - }); - - it('drops duplicate trackLatency calls', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); - tracker.trackLatency(1000); - tracker.trackLatency(2000); - - expect(mockTrack).toHaveBeenCalledTimes(1); - expect(mockTrack).toHaveBeenCalledWith( - '$ld:ai:graph:latency', - testContext, - getExpectedTrackData(), - 1000, - ); - }); - - it('drops duplicate trackTotalTokens calls', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); - tracker.trackTotalTokens({ total: 100, input: 40, output: 60 }); - tracker.trackTotalTokens({ total: 200, input: 80, output: 120 }); - - expect(mockTrack).toHaveBeenCalledTimes(1); - expect(mockTrack).toHaveBeenCalledWith( - '$ld:ai:graph:total_tokens', - testContext, - getExpectedTrackData(), - 100, - ); - }); - - it('drops duplicate trackPath calls', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); - tracker.trackPath(['node-a']); - tracker.trackPath(['node-b', 'node-c']); - - expect(mockTrack).toHaveBeenCalledTimes(1); - expect(mockTrack).toHaveBeenCalledWith( - '$ld:ai:graph:path', - testContext, - { ...getExpectedTrackData(), path: ['node-a'] }, - 1, - ); - }); -}); - -describe('edge-level methods can be called multiple times', () => { - it('allows multiple trackRedirect calls', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); - tracker.trackRedirect('a', 'b'); - tracker.trackRedirect('b', 'c'); - - expect(mockTrack).toHaveBeenCalledTimes(2); - }); - - it('allows multiple trackHandoffSuccess calls', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); - tracker.trackHandoffSuccess('a', 'b'); - tracker.trackHandoffSuccess('b', 'c'); - - expect(mockTrack).toHaveBeenCalledTimes(2); - }); - - it('allows multiple trackHandoffFailure calls', () => { - const tracker = new LDGraphTrackerImpl( - mockLdClient, - graphKey, - variationKey, - version, - testContext, - ); - tracker.trackHandoffFailure('a', 'b'); - tracker.trackHandoffFailure('b', 'c'); - - expect(mockTrack).toHaveBeenCalledTimes(2); - }); +it('edge-level methods can fire multiple times without warning', () => { + const tracker = makeTracker('r'); + tracker.trackHandoffSuccess('a', 'b'); + tracker.trackHandoffSuccess('a', 'b'); + tracker.trackRedirect('a', 'c'); + tracker.trackHandoffFailure('x', 'y'); + expect(mockTrack).toHaveBeenCalledTimes(4); + expect(mockWarn).not.toHaveBeenCalled(); }); diff --git a/packages/sdk/server-ai/__tests__/agentGraph.test.ts b/packages/sdk/server-ai/__tests__/agentGraph.test.ts new file mode 100644 index 0000000000..e5a52d836f --- /dev/null +++ b/packages/sdk/server-ai/__tests__/agentGraph.test.ts @@ -0,0 +1,200 @@ +import { LDContext } from '@launchdarkly/js-server-sdk-common'; + +import { AgentGraphDefinition } from '../src/api/graph/AgentGraphDefinition'; +import { LDAIClientImpl } from '../src/LDAIClientImpl'; +import { LDClientMin } from '../src/LDClientMin'; + +// --------------------------------------------------------------------------- +// Mocks +// --------------------------------------------------------------------------- + +const mockTrack = jest.fn(); +const mockVariation = jest.fn(); +const mockDebug = jest.fn(); + +const mockLdClient: LDClientMin = { + track: mockTrack, + variation: mockVariation, + logger: { + debug: mockDebug, + info: jest.fn(), + warn: jest.fn(), + error: jest.fn(), + }, +}; + +const testContext: LDContext = { kind: 'user', key: 'test-user' }; + +const makeClient = () => new LDAIClientImpl(mockLdClient); + +beforeEach(() => { + jest.clearAllMocks(); +}); + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeGraphFlagValue( + root: string, + edges: Record> = {}, + variationKey = 'v1', + version = 1, +) { + return { _ldMeta: { variationKey, version }, root, edges }; +} + +function makeAgentFlagValue(key: string, enabled = true) { + return { + _ldMeta: { variationKey: `${key}-v1`, enabled, version: 1, mode: 'agent' }, + instructions: `Instructions for ${key}`, + }; +} + +// --------------------------------------------------------------------------- +// agentGraph – disabled / validation failures +// --------------------------------------------------------------------------- + +it('returns a disabled graph when _ldMeta.enabled is false', async () => { + const client = makeClient(); + mockVariation.mockResolvedValueOnce({ _ldMeta: { enabled: false }, root: 'root' }); + const graph = await client.agentGraph('my-graph', testContext); + expect(graph).toBeInstanceOf(AgentGraphDefinition); + expect(graph.enabled).toBe(false); +}); + +it('logs debug when graph is disabled via _ldMeta.enabled', async () => { + const client = makeClient(); + mockVariation.mockResolvedValueOnce({ _ldMeta: { enabled: false }, root: 'root' }); + await client.agentGraph('my-graph', testContext); + expect(mockDebug).toHaveBeenCalledWith(expect.stringContaining('disabled')); +}); + +it('returns a disabled graph when graph flag has no root', async () => { + const client = makeClient(); + mockVariation.mockResolvedValueOnce({ root: '' }); + const graph = await client.agentGraph('my-graph', testContext); + expect(graph).toBeInstanceOf(AgentGraphDefinition); + expect(graph.enabled).toBe(false); +}); + +it('logs debug when graph has no root', async () => { + const client = makeClient(); + mockVariation.mockResolvedValueOnce({ root: '' }); + await client.agentGraph('my-graph', testContext); + expect(mockDebug).toHaveBeenCalledWith(expect.stringContaining('not fetchable')); +}); + +it('returns a disabled graph when a node is unconnected (not reachable from root)', async () => { + const client = makeClient(); + const graphValue = makeGraphFlagValue('root', { + root: [{ key: 'child' }], + orphan: [{ key: 'other' }], + }); + mockVariation.mockResolvedValueOnce(graphValue); + const graph = await client.agentGraph('my-graph', testContext); + expect(graph).toBeInstanceOf(AgentGraphDefinition); + expect(graph.enabled).toBe(false); + expect(mockDebug).toHaveBeenCalledWith(expect.stringContaining('unconnected node')); +}); + +it('returns an enabled graph and traverses a cyclic graph (each node visited once)', async () => { + const client = makeClient(); + const graphValue = makeGraphFlagValue('a', { + a: [{ key: 'b' }], + b: [{ key: 'a' }], + }); + mockVariation + .mockResolvedValueOnce(graphValue) + .mockResolvedValue(makeAgentFlagValue('agent', true)); + + const graph = await client.agentGraph('my-graph', testContext); + expect(graph.enabled).toBe(true); + + const visited: string[] = []; + graph.traverse((node) => { + visited.push(node.getKey()); + }); + expect(visited.sort()).toEqual(['a', 'b']); +}); + +it('returns a disabled graph when a child agent config is disabled', async () => { + const client = makeClient(); + const graphValue = makeGraphFlagValue('root', { root: [{ key: 'child' }] }); + mockVariation + .mockResolvedValueOnce(graphValue) + .mockResolvedValueOnce(makeAgentFlagValue('root', true)) + .mockResolvedValueOnce(makeAgentFlagValue('child', false)); + const graph = await client.agentGraph('my-graph', testContext); + expect(graph).toBeInstanceOf(AgentGraphDefinition); + expect(graph.enabled).toBe(false); + expect(mockDebug).toHaveBeenCalledWith(expect.stringContaining('not enabled')); +}); + +// --------------------------------------------------------------------------- +// agentGraph – success path +// --------------------------------------------------------------------------- + +it('returns an enabled graph for a valid graph with a single node', async () => { + const client = makeClient(); + const graphValue = makeGraphFlagValue('solo-agent'); + mockVariation + .mockResolvedValueOnce(graphValue) + .mockResolvedValueOnce(makeAgentFlagValue('solo-agent')); + const graph = await client.agentGraph('my-graph', testContext); + expect(graph).toBeInstanceOf(AgentGraphDefinition); + expect(graph.enabled).toBe(true); + expect(graph.rootNode().getKey()).toBe('solo-agent'); +}); + +it('returns an enabled graph with correct nodes for multi-node graph', async () => { + const client = makeClient(); + const graphValue = makeGraphFlagValue('root', { + root: [{ key: 'child-a' }, { key: 'child-b' }], + 'child-a': [{ key: 'leaf' }], + }); + mockVariation + .mockResolvedValueOnce(graphValue) + .mockResolvedValue(makeAgentFlagValue('agent', true)); + + const graph = await client.agentGraph('my-graph', testContext); + expect(graph.enabled).toBe(true); + expect(graph.rootNode().getKey()).toBe('root'); + expect( + graph + .getChildNodes('root') + .map((n) => n.getKey()) + .sort(), + ).toEqual(['child-a', 'child-b']); + expect( + graph + .terminalNodes() + .map((n) => n.getKey()) + .sort(), + ).toEqual(['child-b', 'leaf']); +}); + +it('tracks usage event when agentGraph is called', async () => { + const client = makeClient(); + mockVariation.mockResolvedValue({ root: '' }); + await client.agentGraph('my-graph', testContext); + expect(mockTrack).toHaveBeenCalledWith('$ld:ai:usage:agent-graph', testContext, 'my-graph', 1); +}); + +// --------------------------------------------------------------------------- +// createGraphTracker +// --------------------------------------------------------------------------- + +it('createGraphTracker reconstructs a tracker from a resumption token', () => { + const client = makeClient(); + const token = Buffer.from( + '{"runId":"run-1","graphKey":"g-key","variationKey":"v99","version":7}', + ).toString('base64url'); + + const tracker = client.createGraphTracker(token, testContext); + + expect(tracker.getTrackData().graphKey).toBe('g-key'); + expect(tracker.getTrackData().version).toBe(7); + expect(tracker.getTrackData().variationKey).toBe('v99'); + expect(tracker.getTrackData().runId).toBe('run-1'); +}); diff --git a/packages/sdk/server-ai/examples/agent-graph-traversal/README.md b/packages/sdk/server-ai/examples/agent-graph-traversal/README.md new file mode 100644 index 0000000000..2281901f1b --- /dev/null +++ b/packages/sdk/server-ai/examples/agent-graph-traversal/README.md @@ -0,0 +1,106 @@ +# Agent Graph Traversal Example + +Demonstrates how to fetch an agent graph from LaunchDarkly and wire it into +an AI framework using forward or reverse traversal. + +## Setup + +```bash +export LAUNCHDARKLY_SDK_KEY= +export LAUNCHDARKLY_GRAPH_KEY=sample-graph # optional, this is the default +yarn start +``` + +## What it does + +1. Fetches the graph flag and validates that it is enabled. +2. Runs a **forward traversal** (root → terminals), simulating how you would + build agents in a framework that constructs parents before children. +3. Runs a **reverse traversal** (terminals → root), simulating how you would + build agents in a framework that constructs children before parents. +4. Creates a tracker and records a successful invocation. + +## Choosing a traversal direction + +Both methods visit every node exactly once and pass an `executionContext` map +to each callback. The return value of your callback is stored under the node's +key, making it available to all subsequent nodes in that traversal. + +### Forward traversal (`graph.traverse`) + +Processes nodes from the root down to the terminals (BFS order). Use this when +your framework requires a **parent to be defined first** so that child agents +can be registered as handoff targets on it afterward. + +``` +orchestrator-agent → specialist-agent-a → summarizer-agent + ↘ specialist-agent-b ↗ +``` + +When `specialist-agent-a` runs, `orchestrator-agent` is already in +`executionContext`. When `summarizer-agent` runs, both specialists are there. + +Typical frameworks: **OpenAI Agents SDK** — you create the orchestrator agent +first and then attach child agents as handoff targets. + +### Reverse traversal (`graph.reverseTraverse`) + +Processes nodes from the terminals up to the root (upward BFS). Use this when +your framework requires **children to be defined first** so they can be +attached to their parent as tools or sub-graphs. + +``` +summarizer-agent → specialist-agent-a → orchestrator-agent + ↗ specialist-agent-b +``` + +When `specialist-agent-a` runs, `summarizer-agent` is already in +`executionContext`. When `orchestrator-agent` runs, both specialists are there. + +Typical frameworks: **LangGraph** — you define leaf nodes first, then compose +them into parent nodes by attaching them as edges in the graph. + +### Cyclic graphs + +Both traversal methods are cycle-safe via a visited set. For `reverseTraverse`, +a graph with no terminal nodes (every node has at least one outgoing edge) +produces no iterations — there is no starting point for upward BFS. +`traverse` handles cycles normally; the cycle back-edge is simply skipped once +the target node has already been visited. + +## Tracking + +### Graph-level tracker + +Call `graph.createTracker()` once per invocation. The tracker groups all +telemetry events (latency, tokens, success/failure) under a shared `runId` +that appears in LaunchDarkly's AI metrics. + +```typescript +const tracker = graph.createTracker(); +try { + // ... execute graph ... + tracker.trackInvocationSuccess(); +} catch { + tracker.trackInvocationFailure(); +} +``` + +If you need to record tracking events across multiple requests (e.g. streaming), +use `tracker.resumptionToken` to serialize the tracker and reconstruct it later +via `aiClient.createGraphTracker(token, context)`. + +### Node-level tracker + +Each node also carries its own `LDAIConfigTracker` for recording metrics +against the underlying agent config (tokens, latency, model usage). Access it +inside your traversal callback via `node.getConfig().createTracker?.()`. + +```typescript +graph.traverse((node, executionContext) => { + const nodeTracker = node.getConfig().createTracker?.(); + // ... invoke the node's agent ... + nodeTracker?.trackSuccess({ totalTokens: 120, inputTokens: 80, outputTokens: 40 }); + return result; +}); +``` diff --git a/packages/sdk/server-ai/examples/agent-graph-traversal/package.json b/packages/sdk/server-ai/examples/agent-graph-traversal/package.json new file mode 100644 index 0000000000..7a3fddc707 --- /dev/null +++ b/packages/sdk/server-ai/examples/agent-graph-traversal/package.json @@ -0,0 +1,19 @@ +{ + "name": "@launchdarkly/server-sdk-ai-agent-graph-traversal", + "private": true, + "version": "1.0.0", + "description": "Example demonstrating LaunchDarkly AI SDK agent graph traversal", + "type": "module", + "scripts": { + "build": "tsc", + "start": "yarn build && node ./dist/index.js" + }, + "dependencies": { + "@launchdarkly/node-server-sdk": "9.10.11", + "@launchdarkly/server-sdk-ai": "0.16.8" + }, + "devDependencies": { + "@tsconfig/node20": "20.1.4", + "typescript": "^5.5.3" + } +} diff --git a/packages/sdk/server-ai/examples/agent-graph-traversal/src/index.ts b/packages/sdk/server-ai/examples/agent-graph-traversal/src/index.ts new file mode 100644 index 0000000000..6bf1d1cf42 --- /dev/null +++ b/packages/sdk/server-ai/examples/agent-graph-traversal/src/index.ts @@ -0,0 +1,134 @@ +/* eslint-disable no-console */ +import { init, type LDContext } from '@launchdarkly/node-server-sdk'; +import { initAi } from '@launchdarkly/server-sdk-ai'; +import type { AgentGraphNode } from '@launchdarkly/server-sdk-ai'; + +const GRAPH_KEY = process.env.LAUNCHDARKLY_GRAPH_KEY || 'sample-graph'; + +const sdkKey = process.env.LAUNCHDARKLY_SDK_KEY; +if (!sdkKey) { + console.error('*** Please set the LAUNCHDARKLY_SDK_KEY env first'); + process.exit(1); +} + +const ldClient = init(sdkKey); + +const context: LDContext = { + kind: 'user', + key: 'example-user-key', + name: 'Sandy', +}; + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +// Build a provider-specific agent for this node. +// In a real implementation you would use node.getConfig() to read the agent's +// instructions/model and wire them into your framework (e.g. OpenAI Agents SDK, +// LangGraph, CrewAI). +function buildAgent(node: AgentGraphNode): string { + return ``; +} + +// --------------------------------------------------------------------------- +// Forward traversal — use when your framework builds parents before children. +// +// Each node receives the agents built by its ancestors via executionContext, +// so a parent can be passed to its children as a handoff target. +// +// Example frameworks: OpenAI Agents SDK (register tools/handoffs on the +// parent, then attach child agents). +// --------------------------------------------------------------------------- +function forwardTraversalExample(graph: ReturnType): void { + console.log('\n--- Forward traversal (root → terminals) ---'); + + graph.traverse((node: AgentGraphNode, executionContext: Record) => { + const agent = buildAgent(node); + + // Edges leaving this node tell you which agents this one can hand off to. + // Those child agents will be built in subsequent iterations and available + // in executionContext by the time they run. + const childKeys = node.getEdges().map((e) => e.key); + const ready = childKeys.filter((k) => executionContext[k]); + console.log( + ` built ${agent} children: [${childKeys.join(', ') || 'none'}] pre-built: [${ready.join(', ') || 'none'}]`, + ); + + // Store the built agent so descendants can reference it. + return agent; + }); +} + +// --------------------------------------------------------------------------- +// Reverse traversal — use when your framework builds children before parents. +// +// Each node receives already-built descendant agents via executionContext, +// so a child can be attached to its parent as a tool or sub-agent. +// +// Example frameworks: LangGraph (define leaf nodes first, then compose them +// into parent nodes as edges in the graph). +// --------------------------------------------------------------------------- +function reverseTraversalExample(graph: ReturnType): void { + console.log('\n--- Reverse traversal (terminals → root) ---'); + + graph.reverseTraverse((node: AgentGraphNode, executionContext: Record) => { + const agent = buildAgent(node); + + // Children of this node are guaranteed to already be in executionContext. + const childKeys = node.getEdges().map((e) => e.key); + const builtChildren = childKeys.map((k) => executionContext[k]).filter(Boolean); + console.log(` built ${agent} attaching children: [${builtChildren.join(', ') || 'none'}]`); + + return agent; + }); +} + +// --------------------------------------------------------------------------- +// Main +// --------------------------------------------------------------------------- + +async function main() { + try { + await ldClient.waitForInitialization({ timeout: 10 }); + console.log('*** SDK successfully initialized'); + } catch (error) { + console.log(`*** SDK failed to initialize: ${error}`); + process.exit(1); + } + + const aiClient = initAi(ldClient); + + const graph = await aiClient.agentGraph(GRAPH_KEY, context); + + if (!graph.enabled) { + console.log(`\n*** Graph "${GRAPH_KEY}" is not enabled or could not be fetched.`); + process.exit(0); + } + + console.log(`\n=== Graph: ${GRAPH_KEY} ===`); + console.log(`Root : ${graph.rootNode().getKey()}`); + console.log( + `Terminals: ${ + graph + .terminalNodes() + .map((n) => n.getKey()) + .join(', ') || '(none — cyclic graph)' + }`, + ); + + forwardTraversalExample(graph); + reverseTraversalExample(graph); + + // Create a tracker to record this graph invocation in LaunchDarkly. + // Call trackInvocationSuccess() or trackInvocationFailure() when done. + const tracker = graph.createTracker(); + tracker.trackInvocationSuccess(); + + await ldClient.close(); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); diff --git a/packages/sdk/server-ai/examples/agent-graph-traversal/tsconfig.json b/packages/sdk/server-ai/examples/agent-graph-traversal/tsconfig.json new file mode 100644 index 0000000000..6916599c7d --- /dev/null +++ b/packages/sdk/server-ai/examples/agent-graph-traversal/tsconfig.json @@ -0,0 +1,18 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "ESNext", + "moduleResolution": "node", + "esModuleInterop": true, + "allowSyntheticDefaultImports": true, + "strict": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "outDir": "./dist", + "rootDir": "./src", + "declaration": true, + "sourceMap": true + }, + "include": ["src/**/*"], + "exclude": ["node_modules", "dist"] +} diff --git a/packages/sdk/server-ai/src/LDAIClientImpl.ts b/packages/sdk/server-ai/src/LDAIClientImpl.ts index 65eb87a1a9..9bf9c2ffc3 100644 --- a/packages/sdk/server-ai/src/LDAIClientImpl.ts +++ b/packages/sdk/server-ai/src/LDAIClientImpl.ts @@ -21,11 +21,13 @@ import { LDMessage, } from './api/config'; import { LDAIConfigFlagValue, LDAIConfigUtils } from './api/config/LDAIConfigUtils'; +import { AgentGraphDefinition, LDAgentGraphFlagValue, LDGraphTracker } from './api/graph'; import { Judge } from './api/judge/Judge'; import { LDAIClient } from './api/LDAIClient'; import { AIProviderFactory, SupportedAIProvider } from './api/providers'; import { LDAIConfigTrackerImpl } from './LDAIConfigTrackerImpl'; import { LDClientMin } from './LDClientMin'; +import { LDGraphTrackerImpl } from './LDGraphTrackerImpl'; import { aiSdkLanguage, aiSdkName, aiSdkVersion } from './sdkInfo'; /** @@ -38,6 +40,7 @@ const TRACK_USAGE_JUDGE_CONFIG = '$ld:ai:usage:judge-config'; const TRACK_USAGE_CREATE_JUDGE = '$ld:ai:usage:create-judge'; const TRACK_USAGE_AGENT_CONFIG = '$ld:ai:usage:agent-config'; const TRACK_USAGE_AGENT_CONFIGS = '$ld:ai:usage:agent-configs'; +const TRACK_USAGE_AGENT_GRAPH = '$ld:ai:usage:agent-graph'; const INIT_TRACK_CONTEXT: LDContext = { kind: 'ld_ai', @@ -393,4 +396,119 @@ export class LDAIClientImpl implements LDAIClient { createTracker(token: string, context: LDContext): LDAIConfigTracker { return LDAIConfigTrackerImpl.fromResumptionToken(token, this._ldClient, context); } + + async agentGraph( + graphKey: string, + context: LDContext, + variables?: Record, + ): Promise { + this._ldClient.track(TRACK_USAGE_AGENT_GRAPH, context, graphKey, 1); + + const defaultGraphValue: LDAgentGraphFlagValue = { root: '' }; + const graphFlagValue = (await this._ldClient.variation( + graphKey, + context, + defaultGraphValue, + )) as LDAgentGraphFlagValue; + + // eslint-disable-next-line no-underscore-dangle + const variationKey = graphFlagValue._ldMeta?.variationKey; + // eslint-disable-next-line no-underscore-dangle + const version = graphFlagValue._ldMeta?.version ?? 1; + const ldClient = this._ldClient; + const trackerFactory = () => + new LDGraphTrackerImpl(ldClient, randomUUID(), graphKey, variationKey, version, context); + + const disabled = new AgentGraphDefinition(graphFlagValue, {}, false, trackerFactory); + + // eslint-disable-next-line no-underscore-dangle + if (graphFlagValue._ldMeta?.enabled === false) { + this._logger?.debug(`agentGraph: graph "${graphKey}" is disabled.`); + return disabled; + } + + if (!graphFlagValue.root) { + this._logger?.debug(`agentGraph: graph "${graphKey}" is not fetchable or has no root node.`); + return disabled; + } + + const allKeys = AgentGraphDefinition.collectAllKeys(graphFlagValue); + const reachableKeys = this._collectReachableKeys(graphFlagValue); + + const unreachableKey = [...allKeys].find((key) => !reachableKeys.has(key)); + if (unreachableKey) { + this._logger?.debug( + `agentGraph: graph "${graphKey}" has unconnected node "${unreachableKey}" that is not reachable from the root.`, + ); + return disabled; + } + + const agentConfigs: Record = {}; + const fetchResults = await Promise.all( + [...allKeys].map(async (key) => { + const config = await this._agentConfigInternal(key, context, graphKey, variables); + return { key, config }; + }), + ); + + const disabledResult = fetchResults.find(({ config }) => !config.enabled); + if (disabledResult) { + this._logger?.debug( + `agentGraph: agent config "${disabledResult.key}" in graph "${graphKey}" is not enabled or could not be fetched.`, + ); + return disabled; + } + fetchResults.forEach(({ key, config }) => { + agentConfigs[key] = config; + }); + + const nodes = AgentGraphDefinition.buildNodes(graphFlagValue, agentConfigs); + return new AgentGraphDefinition(graphFlagValue, nodes, true, trackerFactory); + } + + createGraphTracker(token: string, context: LDContext): LDGraphTracker { + return LDGraphTrackerImpl.fromResumptionToken(token, this._ldClient, context); + } + + /** + * Fetches a single agent config without tracking usage (used internally by agentGraph). + */ + private async _agentConfigInternal( + key: string, + context: LDContext, + graphKey?: string, + variables?: Record, + ): Promise { + const config = await this._evaluate( + key, + context, + disabledAIConfig, + 'agent', + variables, + graphKey, + ); + return config as LDAIAgentConfig; + } + + /** + * Returns the set of all node keys reachable from the root via BFS. + */ + private _collectReachableKeys(graph: LDAgentGraphFlagValue): Set { + const visited = new Set(); + const queue: string[] = [graph.root]; + visited.add(graph.root); + + while (queue.length > 0) { + const key = queue.shift()!; + const edges = graph.edges?.[key] ?? []; + edges.forEach((edge) => { + if (!visited.has(edge.key)) { + visited.add(edge.key); + queue.push(edge.key); + } + }); + } + + return visited; + } } diff --git a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts index d1f0602f50..6accab1959 100644 --- a/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts +++ b/packages/sdk/server-ai/src/LDGraphTrackerImpl.ts @@ -1,65 +1,129 @@ -import { LDContext } from '@launchdarkly/js-server-sdk-common'; +import type { LDContext } from '@launchdarkly/js-server-sdk-common'; -import { LDGraphMetricSummary, LDGraphTracker } from './api/graph/LDGraphTracker'; -import { LDJudgeResult } from './api/judge/types'; -import { LDTokenUsage } from './api/metrics'; -import { LDClientMin } from './LDClientMin'; +import type { LDGraphTracker } from './api/graph/LDGraphTracker'; +import type { LDGraphMetricSummary, LDGraphTrackData } from './api/graph/types'; +import type { LDJudgeResult } from './api/judge/types'; +import type { LDTokenUsage } from './api/metrics'; +import type { LDClientMin } from './LDClientMin'; +/** + * Concrete implementation of {@link LDGraphTracker}. + * + * Construct directly or reconstruct from a resumption token via + * {@link LDGraphTrackerImpl.fromResumptionToken}. + */ export class LDGraphTrackerImpl implements LDGraphTracker { - private _trackedMetrics: LDGraphMetricSummary = {}; + private _summary: LDGraphMetricSummary = {}; constructor( - private _ldClient: LDClientMin, - private _graphKey: string, - private _variationKey: string, - private _version: number, - private _context: LDContext, + private readonly _ldClient: LDClientMin, + private readonly _runId: string, + private readonly _graphKey: string, + private readonly _variationKey: string | undefined, + private readonly _version: number, + private readonly _context: LDContext, ) {} - getTrackData(): { - variationKey: string; - graphKey: string; - version: number; - } { - return { - variationKey: this._variationKey, + /** + * Reconstructs an {@link LDGraphTrackerImpl} from a resumption token, preserving + * the original `runId` so all events continue to be correlated under the same run. + * + * **Security note:** The token contains the flag variation key and version. + * Do not pass the raw token to untrusted clients. + * + * @param token URL-safe Base64-encoded token produced by {@link LDGraphTrackerImpl.resumptionToken}. + * @param ldClient LaunchDarkly client instance. + * @param context LDContext for the new tracker. + */ + static fromResumptionToken( + token: string, + ldClient: LDClientMin, + context: LDContext, + ): LDGraphTrackerImpl { + const json = Buffer.from(token, 'base64url').toString('utf8'); + const data = JSON.parse(json) as LDGraphTrackData; + return new LDGraphTrackerImpl( + ldClient, + data.runId, + data.graphKey, + data.variationKey, + data.version, + context, + ); + } + + getTrackData(): LDGraphTrackData { + const data: LDGraphTrackData = { + runId: this._runId, graphKey: this._graphKey, version: this._version, }; + if (this._variationKey !== undefined) { + data.variationKey = this._variationKey; + } + return data; + } + + getSummary(): LDGraphMetricSummary { + return { ...this._summary }; + } + + get resumptionToken(): string { + // Keys must appear in exact spec-defined order: + // runId, graphKey, variationKey (omitted if absent), version + const parts: string[] = [ + `"runId":${JSON.stringify(this._runId)}`, + `"graphKey":${JSON.stringify(this._graphKey)}`, + ]; + if (this._variationKey !== undefined) { + parts.push(`"variationKey":${JSON.stringify(this._variationKey)}`); + } + parts.push(`"version":${this._version}`); + const json = `{${parts.join(',')}}`; + return Buffer.from(json).toString('base64url'); } trackInvocationSuccess(): void { - if (this._trackedMetrics.success !== undefined) { + if (this._summary.success !== undefined) { + this._ldClient.logger?.warn( + 'LDGraphTracker: invocation success/failure already recorded for this run — dropping duplicate call.', + ); return; } - this._trackedMetrics.success = true; + this._summary.success = true; this._ldClient.track('$ld:ai:graph:invocation_success', this._context, this.getTrackData(), 1); } trackInvocationFailure(): void { - if (this._trackedMetrics.success !== undefined) { + if (this._summary.success !== undefined) { + this._ldClient.logger?.warn( + 'LDGraphTracker: invocation success/failure already recorded for this run — dropping duplicate call.', + ); return; } - this._trackedMetrics.success = false; + this._summary.success = false; this._ldClient.track('$ld:ai:graph:invocation_failure', this._context, this.getTrackData(), 1); } trackLatency(durationMs: number): void { - if (this._trackedMetrics.durationMs !== undefined) { + if (this._summary.durationMs !== undefined) { + this._ldClient.logger?.warn( + 'LDGraphTracker: trackLatency already called for this run — dropping duplicate call.', + ); return; } - this._trackedMetrics.durationMs = durationMs; + this._summary.durationMs = durationMs; this._ldClient.track('$ld:ai:graph:latency', this._context, this.getTrackData(), durationMs); } trackTotalTokens(tokens: LDTokenUsage): void { - if (this._trackedMetrics.tokens !== undefined) { - return; - } - if (tokens.total <= 0) { + if (this._summary.tokens !== undefined) { + this._ldClient.logger?.warn( + 'LDGraphTracker: trackTotalTokens already called for this run — dropping duplicate call.', + ); return; } - this._trackedMetrics.tokens = tokens; + this._summary.tokens = { ...tokens }; this._ldClient.track( '$ld:ai:graph:total_tokens', this._context, @@ -69,10 +133,13 @@ export class LDGraphTrackerImpl implements LDGraphTracker { } trackPath(path: string[]): void { - if (this._trackedMetrics.path !== undefined) { + if (this._summary.path !== undefined) { + this._ldClient.logger?.warn( + 'LDGraphTracker: trackPath already called for this run — dropping duplicate call.', + ); return; } - this._trackedMetrics.path = path; + this._summary.path = [...path]; this._ldClient.track('$ld:ai:graph:path', this._context, { ...this.getTrackData(), path }, 1); } @@ -115,8 +182,4 @@ export class LDGraphTrackerImpl implements LDGraphTracker { 1, ); } - - getSummary(): LDGraphMetricSummary { - return { ...this._trackedMetrics }; - } } diff --git a/packages/sdk/server-ai/src/api/LDAIClient.ts b/packages/sdk/server-ai/src/api/LDAIClient.ts index fd93ca92a5..5dfec98072 100644 --- a/packages/sdk/server-ai/src/api/LDAIClient.ts +++ b/packages/sdk/server-ai/src/api/LDAIClient.ts @@ -11,6 +11,7 @@ import { LDAIJudgeConfig, LDAIJudgeConfigDefault, } from './config'; +import { AgentGraphDefinition, LDGraphTracker } from './graph'; import { Judge } from './judge/Judge'; import { SupportedAIProvider } from './providers'; @@ -337,4 +338,55 @@ export interface LDAIClient { * @returns A reconstructed AIConfigTracker with the original runId preserved. */ createTracker(token: string, context: LDContext): LDAIConfigTracker; + + /** + * Fetches an agent graph configuration from LaunchDarkly and returns an + * {@link AgentGraphDefinition}. + * + * When the graph is enabled the method validates that: + * - The graph flag can be evaluated. + * - A single root node is present. + * - All nodes in the graph are reachable from the root (no disconnected nodes). + * - Every referenced agent config can be fetched and is enabled. + * + * If any validation check fails, the returned definition has + * {@link AgentGraphDefinition.enabled | enabled} set to `false` with an empty + * node collection. When the logger level is DEBUG, a message describing the + * failure is emitted. + * + * @param graphKey The LaunchDarkly flag key for the agent graph configuration. + * @param context The LaunchDarkly context used for flag evaluation and tracking. + * @param variables Optional key-value pairs used for Mustache template interpolation + * in each node's agent config instructions. Applied uniformly to all nodes. + * + * @returns A promise that resolves to an {@link AgentGraphDefinition}. Check + * {@link AgentGraphDefinition.enabled | enabled} before traversing. + * + * @example + * ```typescript + * const graph = await aiClient.agentGraph('my-agent-graph', context, { userName: 'Sandy' }); + * if (graph.enabled) { + * graph.traverse((node, ctx) => { + * // build your provider-specific node here + * }); + * } + * ``` + */ + agentGraph( + graphKey: string, + context: LDContext, + variables?: Record, + ): Promise; + + /** + * Reconstructs an {@link LDGraphTracker} from a resumption token, preserving + * the original `runId` so events from a resumed session are correlated correctly. + * + * **Security note:** The token encodes the flag variation key and version. + * Keep it server-side; do not expose it to untrusted clients. + * + * @param token URL-safe Base64-encoded token from {@link LDGraphTracker.resumptionToken}. + * @param context LDContext to associate with the reconstructed tracker. + */ + createGraphTracker(token: string, context: LDContext): LDGraphTracker; } diff --git a/packages/sdk/server-ai/src/api/graph/AgentGraphDefinition.ts b/packages/sdk/server-ai/src/api/graph/AgentGraphDefinition.ts new file mode 100644 index 0000000000..c5113b53a5 --- /dev/null +++ b/packages/sdk/server-ai/src/api/graph/AgentGraphDefinition.ts @@ -0,0 +1,253 @@ +import type { LDAIAgentConfig } from '../config'; +import { AgentGraphNode } from './AgentGraphNode'; +import type { LDGraphTracker } from './LDGraphTracker'; +import type { LDAgentGraphFlagValue, LDGraphEdge } from './types'; + +/** + * Callback function signature for graph traversal methods. + */ +export type TraversalFn = ( + node: AgentGraphNode, + executionContext: Record, +) => unknown; + +/** + * Encapsulates an agent graph configuration and its pre-built node collection. + * + * Provides graph-level orchestration including relationship queries (parent/child), + * breadth-first traversal in both forward and reverse directions, and graph tracker creation. + * + * Obtain an instance via {@link LDAIClient.agentGraph}. When the graph is disabled + * or invalid, the returned instance has {@link enabled} set to `false` and an + * empty node collection. + */ +export class AgentGraphDefinition { + constructor( + private readonly _agentGraph: LDAgentGraphFlagValue, + private readonly _nodes: Record, + readonly enabled: boolean, + private readonly _createTracker: () => LDGraphTracker, + ) {} + + /** + * Builds a node map from a raw agent graph flag value and a map of pre-fetched agent configs. + * + * @param graph Raw graph flag value from LaunchDarkly. + * @param agentConfigs Map of agent config key to resolved LDAIAgentConfig. + * @returns Record mapping agent config keys to AgentGraphNode instances. + */ + static buildNodes( + graph: LDAgentGraphFlagValue, + agentConfigs: Record, + ): Record { + const nodes: Record = {}; + const allKeys = AgentGraphDefinition.collectAllKeys(graph); + + allKeys.forEach((key) => { + const config = agentConfigs[key]; + if (!config) { + return; + } + const outgoingEdges: LDGraphEdge[] = graph.edges?.[key] ?? []; + nodes[key] = new AgentGraphNode(key, config, outgoingEdges); + }); + + return nodes; + } + + /** + * Returns the children of the node identified by `nodeKey`. + * + * @param nodeKey The agent config key of the parent node. + */ + getChildNodes(nodeKey: string): AgentGraphNode[] { + const node = this._nodes[nodeKey]; + if (!node) { + return []; + } + return node + .getEdges() + .map((edge) => this._nodes[edge.key]) + .filter((n): n is AgentGraphNode => n !== undefined); + } + + /** + * Returns all nodes that have a direct edge to the node identified by `nodeKey`. + * + * @param nodeKey The agent config key of the child node. + */ + getParentNodes(nodeKey: string): AgentGraphNode[] { + return Object.values(this._nodes).filter((node) => + node.getEdges().some((edge) => edge.key === nodeKey), + ); + } + + /** + * Returns all terminal nodes (nodes with no outgoing edges). + */ + terminalNodes(): AgentGraphNode[] { + return Object.values(this._nodes).filter((node) => node.isTerminal()); + } + + /** + * Returns the root node of the graph. + */ + rootNode(): AgentGraphNode { + return this._nodes[this._agentGraph.root]; + } + + /** + * Returns the node with the given key, or `null` if not found. + * + * @param nodeKey The agent config key to look up. + */ + getNode(nodeKey: string): AgentGraphNode | null { + return this._nodes[nodeKey] ?? null; + } + + /** + * Returns the underlying raw graph configuration from LaunchDarkly. + */ + getConfig(): LDAgentGraphFlagValue { + return this._agentGraph; + } + + /** + * Returns a new {@link LDGraphTracker} for this graph invocation. + * + * Call this once per invocation. Each call produces a tracker with a fresh `runId` + * that groups all events for that invocation. + */ + createTracker(): LDGraphTracker { + return this._createTracker(); + } + + /** + * Traverses the graph breadth-first from the root to all terminal nodes. + * + * Nodes at the same depth are processed before advancing to the next depth. + * The value returned by `fn` is stored in the mutable `executionContext` under + * the node's key, making upstream results available to downstream nodes. + * + * Cyclic graphs are handled safely — each node is visited at most once. + * + * @param fn Callback invoked for each node. Its return value is added to + * `executionContext` keyed by the node's config key. + * @param initialExecutionContext Optional initial context to seed the traversal. + */ + traverse(fn: TraversalFn, initialExecutionContext: Record = {}): void { + const root = this.rootNode(); + if (!root) { + return; + } + + const executionContext = { ...initialExecutionContext }; + const visited = new Set(); + const queue: AgentGraphNode[] = [root]; + visited.add(root.getKey()); + + while (queue.length > 0) { + const node = queue.shift()!; + const result = fn(node, executionContext); + executionContext[node.getKey()] = result; + + node.getEdges().forEach((edge) => { + if (!visited.has(edge.key)) { + const child = this._nodes[edge.key]; + if (child) { + visited.add(edge.key); + queue.push(child); + } + } + }); + } + } + + /** + * Traverses the graph from terminal nodes up to the root. + * + * Uses BFS upward via parent edges so that each node is processed only after + * all of its reachable descendants have been processed. The root is always + * visited last. Cyclic graphs are handled safely — each node is visited at + * most once; if the graph has no terminal nodes, this method returns without + * invoking `fn`. + * + * **Ordering note:** Within a single BFS level (nodes at the same depth from a + * terminal) the visit order is not strictly guaranteed. The guarantee is only + * that a node is visited before any of its ancestors — not that siblings at the + * same depth are visited in a specific order relative to each other. + * + * The value returned by `fn` is stored in the mutable `executionContext` under + * the node's key. + * + * @param fn Callback invoked for each node. Its return value is added to + * `executionContext` keyed by the node's config key. + * @param initialExecutionContext Optional initial context to seed the traversal. + */ + reverseTraverse(fn: TraversalFn, initialExecutionContext: Record = {}): void { + const terminals = this.terminalNodes(); + if (terminals.length === 0) { + return; + } + + const executionContext = { ...initialExecutionContext }; + const rootKey = this._agentGraph.root; + const visited = new Set(); + let queue: AgentGraphNode[] = terminals; + + while (queue.length > 0) { + const nextQueue: AgentGraphNode[] = []; + + queue.forEach((node) => { + const key = node.getKey(); + if (visited.has(key)) { + return; + } + visited.add(key); + + // Defer the root so it is always processed last + if (key === rootKey) { + return; + } + + const result = fn(node, executionContext); + executionContext[key] = result; + + this.getParentNodes(key).forEach((parent) => { + if (!visited.has(parent.getKey())) { + nextQueue.push(parent); + } + }); + }); + + queue = nextQueue; + } + + // Root is always last — only invoke if it was reached during traversal + const root = this._nodes[rootKey]; + if (root && visited.has(rootKey)) { + const result = fn(root, executionContext); + executionContext[rootKey] = result; + } + } + + /** + * Collects every unique node key referenced in the graph (root + all edge sources + * and targets). + */ + static collectAllKeys(graph: LDAgentGraphFlagValue): Set { + const keys = new Set(); + keys.add(graph.root); + + if (graph.edges) { + Object.entries(graph.edges).forEach(([sourceKey, edges]) => { + keys.add(sourceKey); + edges.forEach((edge) => { + keys.add(edge.key); + }); + }); + } + + return keys; + } +} diff --git a/packages/sdk/server-ai/src/api/graph/AgentGraphNode.ts b/packages/sdk/server-ai/src/api/graph/AgentGraphNode.ts new file mode 100644 index 0000000000..598bfbf0c1 --- /dev/null +++ b/packages/sdk/server-ai/src/api/graph/AgentGraphNode.ts @@ -0,0 +1,46 @@ +import type { LDAIAgentConfig } from '../config'; +import type { LDGraphEdge } from './types'; + +/** + * Represents a single node within an agent graph. + * + * Each node wraps an {@link LDAIAgentConfig} and carries the outgoing edges + * to its children. Use the node's tracker (via `getConfig().tracker`) to record + * node-level metrics against the underlying agent config. + */ +export class AgentGraphNode { + constructor( + private readonly _key: string, + private readonly _config: LDAIAgentConfig, + private readonly _edges: LDGraphEdge[], + ) {} + + /** + * Returns the agent config key that identifies this node in the graph. + */ + getKey(): string { + return this._key; + } + + /** + * Returns the underlying AIAgentConfig for this node. + * Use `getConfig().tracker` to record node-level metrics. + */ + getConfig(): LDAIAgentConfig { + return this._config; + } + + /** + * Returns the outgoing edges from this node to its children. + */ + getEdges(): LDGraphEdge[] { + return this._edges; + } + + /** + * Returns `true` if this node has no outgoing edges (i.e., it is a terminal/leaf node). + */ + isTerminal(): boolean { + return this._edges.length === 0; + } +} diff --git a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts index 9ce432d1db..25afc9b2ce 100644 --- a/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts +++ b/packages/sdk/server-ai/src/api/graph/LDGraphTracker.ts @@ -1,110 +1,120 @@ -import { LDJudgeResult } from '../judge/types'; -import { LDTokenUsage } from '../metrics'; +import type { LDJudgeResult } from '../judge/types'; +import type { LDTokenUsage } from '../metrics'; +import type { LDGraphMetricSummary, LDGraphTrackData } from './types'; /** - * Metrics tracked at the graph level. + * Tracks graph-level and edge-level metrics for an agent graph invocation. + * + * Graph-level methods enforce at-most-once semantics: calling the same method + * twice on a tracker instance drops the second call and emits a warning. + * Edge-level methods (trackRedirect, trackHandoffSuccess, trackHandoffFailure) + * are multi-fire and are not subject to this constraint. + * + * @example + * ```typescript + * const tracker = graphDefinition.createTracker(); + * try { + * // ... execute graph ... + * tracker.trackInvocationSuccess(); + * tracker.trackLatency(durationMs); + * } catch { + * tracker.trackInvocationFailure(); + * } + * ``` */ -export interface LDGraphMetricSummary { +export interface LDGraphTracker { /** - * True if the graph invocation succeeded, false if it failed, absent if not tracked. + * Returns tracking metadata to be included in every LDClient.track call. */ - success?: boolean; + getTrackData(): LDGraphTrackData; /** - * Total graph execution duration in milliseconds, if tracked. + * Returns a snapshot of all graph-level metrics tracked so far. */ - durationMs?: number; + getSummary(): LDGraphMetricSummary; /** - * Aggregated token usage across the entire graph invocation, if tracked. + * A URL-safe Base64-encoded (RFC 4648, no padding) token encoding the tracker's + * identity. Pass this token to {@link LDGraphTrackerImpl.fromResumptionToken} to + * reconstruct the tracker across process boundaries, preserving the original runId. + * + * **Security note:** The token contains the flag variation key and version. If passed + * to an untrusted client (e.g., a browser) this could expose feature-flag targeting + * details. Keep the token server-side and use an opaque reference in client-facing APIs. */ - tokens?: LDTokenUsage; + readonly resumptionToken: string; - /** - * Execution path through the graph as an array of config keys, if tracked. - */ - path?: string[]; -} + // ------------------------------------------------------------------------- + // Graph-level tracking methods (at-most-once per tracker instance) + // ------------------------------------------------------------------------- -/** - * Tracker for graph-level and edge-level metrics in AI agent graph operations. - * - * Node-level metrics are tracked via each node's {@link LDAIConfigTracker}. - */ -export interface LDGraphTracker { /** - * Get the data for tracking. - */ - getTrackData(): { - variationKey: string; - graphKey: string; - version: number; - }; - - /** - * Track a successful graph invocation. - * - * At-most-once per tracker instance. Subsequent calls are dropped. + * Tracks a successful graph invocation. + * Emits event `$ld:ai:graph:invocation_success` with metric value `1`. + * At-most-once: subsequent calls are dropped with a warning. */ trackInvocationSuccess(): void; /** - * Track an unsuccessful graph invocation. - * - * At-most-once per tracker instance. Subsequent calls are dropped. + * Tracks an unsuccessful graph invocation. + * Emits event `$ld:ai:graph:invocation_failure` with metric value `1`. + * At-most-once: subsequent calls are dropped with a warning. */ trackInvocationFailure(): void; /** - * Track the total latency of graph execution. - * - * At-most-once per tracker instance. Subsequent calls are dropped. + * Tracks the total latency of the graph execution in milliseconds. + * Emits event `$ld:ai:graph:latency` with the duration as the metric value. + * At-most-once: subsequent calls are dropped with a warning. * * @param durationMs Duration in milliseconds. */ trackLatency(durationMs: number): void; /** - * Track aggregated token usage across the entire graph invocation. - * - * At-most-once per tracker instance. Subsequent calls are dropped. + * Tracks aggregate token usage across the entire graph invocation. + * Emits event `$ld:ai:graph:total_tokens` with the total token count as the metric value. + * At-most-once: subsequent calls are dropped with a warning. * * @param tokens Token usage information. */ trackTotalTokens(tokens: LDTokenUsage): void; /** - * Track the execution path through the graph. + * Tracks the execution path through the graph. + * Emits event `$ld:ai:graph:path` with metric value `1`. + * The data payload includes the path array in addition to standard track data. + * At-most-once: subsequent calls are dropped with a warning. * - * At-most-once per tracker instance. Subsequent calls are dropped. - * - * @param path Array of config keys representing the sequence of nodes executed. + * @param path An ordered array of agent config keys representing the execution path. */ trackPath(path: string[]): void; /** - * Track a judge evaluation result for the final graph output. - * - * No event is emitted when the result was not sampled (result.sampled is false). + * Tracks a judge evaluation result for the final graph output. + * Emits one LDClient.track call when the result was sampled and successful. + * Not subject to at-most-once constraints. * * @param result Judge result containing score, reasoning, and metadata. */ trackJudgeResult(result: LDJudgeResult): void; + // ------------------------------------------------------------------------- + // Edge-level tracking methods (multi-fire, not at-most-once) + // ------------------------------------------------------------------------- + /** - * Track when a node redirects to a different target than originally specified. - * - * May be called multiple times. + * Tracks when a node redirects to a different target than originally specified. + * Emits event `$ld:ai:graph:redirect` with metric value `1`. * * @param sourceKey Config key of the source node. - * @param redirectedTarget Config key of the target node that was redirected to. + * @param redirectedTarget Config key of the actual target node. */ trackRedirect(sourceKey: string, redirectedTarget: string): void; /** - * Track a successful handoff between nodes. - * - * May be called multiple times. + * Tracks a successful handoff between two nodes. + * Emits event `$ld:ai:graph:handoff_success` with metric value `1`. * * @param sourceKey Config key of the source node. * @param targetKey Config key of the target node. @@ -112,17 +122,11 @@ export interface LDGraphTracker { trackHandoffSuccess(sourceKey: string, targetKey: string): void; /** - * Track a failed handoff between nodes. - * - * May be called multiple times. + * Tracks a failed handoff between two nodes. + * Emits event `$ld:ai:graph:handoff_failure` with metric value `1`. * * @param sourceKey Config key of the source node. * @param targetKey Config key of the target node. */ trackHandoffFailure(sourceKey: string, targetKey: string): void; - - /** - * Get a summary of the tracked graph-level metrics. - */ - getSummary(): LDGraphMetricSummary; } diff --git a/packages/sdk/server-ai/src/api/graph/index.ts b/packages/sdk/server-ai/src/api/graph/index.ts index 536e630115..9d899029d5 100644 --- a/packages/sdk/server-ai/src/api/graph/index.ts +++ b/packages/sdk/server-ai/src/api/graph/index.ts @@ -1 +1,4 @@ +export * from './types'; export * from './LDGraphTracker'; +export * from './AgentGraphNode'; +export * from './AgentGraphDefinition'; diff --git a/packages/sdk/server-ai/src/api/graph/types.ts b/packages/sdk/server-ai/src/api/graph/types.ts new file mode 100644 index 0000000000..1b578fecba --- /dev/null +++ b/packages/sdk/server-ai/src/api/graph/types.ts @@ -0,0 +1,88 @@ +import { LDTokenUsage } from '../metrics'; + +/** + * Represents a directed edge in an agent graph, connecting a source node to a target node. + */ +export interface LDGraphEdge { + /** + * The key of the target AIAgentConfig node. + */ + key: string; + + /** + * Optional handoff options that customize how data flows between nodes. + */ + handoff?: Record; +} + +/** + * Raw flag value for an agent graph configuration as returned by LaunchDarkly. + * This represents the data structure delivered by LaunchDarkly for graph configurations. + */ +export interface LDAgentGraphFlagValue { + _ldMeta?: { + variationKey?: string; + version?: number; + enabled?: boolean; + }; + + /** + * The key of the root AIAgentConfig in the graph. + */ + root: string; + + /** + * Object mapping source agent config keys to arrays of target edges. + */ + edges?: Record; +} + +/** + * Accumulated graph-level metrics collected by an LDGraphTracker. + */ +export interface LDGraphMetricSummary { + /** + * Whether the graph invocation succeeded. Absent if not yet tracked. + */ + success?: boolean; + + /** + * Total graph execution duration in milliseconds. Absent if not yet tracked. + */ + durationMs?: number; + + /** + * Aggregate token usage across the entire graph invocation. Absent if not yet tracked. + */ + tokens?: LDTokenUsage; + + /** + * Execution path through the graph as an array of config keys. Absent if not yet tracked. + */ + path?: string[]; +} + +/** + * Tracking metadata returned by {@link LDGraphTracker.getTrackData}. + */ +export interface LDGraphTrackData { + /** + * UUID v4 uniquely identifying this tracker and all events it emits. + */ + runId: string; + + /** + * The graph configuration key. + */ + graphKey: string; + + /** + * The variation key. Absent when a default config was used rather than a real flag evaluation. + */ + variationKey?: string; + + /** + * The version of the flag variation. + */ + version: number; +} diff --git a/release-please-config.json b/release-please-config.json index a667b4b38c..8aada61d70 100644 --- a/release-please-config.json +++ b/release-please-config.json @@ -167,6 +167,11 @@ "type": "json", "path": "/packages/sdk/server-ai/examples/chat-observability/package.json", "jsonpath": "$.dependencies['@launchdarkly/node-server-sdk']" + }, + { + "type": "json", + "path": "/packages/sdk/server-ai/examples/agent-graph-traversal/package.json", + "jsonpath": "$.dependencies['@launchdarkly/node-server-sdk']" } ] }, @@ -256,6 +261,11 @@ "type": "json", "path": "examples/chat-observability/package.json", "jsonpath": "$.dependencies['@launchdarkly/server-sdk-ai']" + }, + { + "type": "json", + "path": "examples/agent-graph-traversal/package.json", + "jsonpath": "$.dependencies['@launchdarkly/server-sdk-ai']" } ] }, From 400d97162e6e6ddc651bc4a73fd340f3e47110e0 Mon Sep 17 00:00:00 2001 From: Jason Bailey Date: Mon, 20 Apr 2026 13:06:34 -0500 Subject: [PATCH 7/7] chore: update deprecated API usage in server-ai examples (#1294) --- .../server-ai/examples/bedrock/src/index.ts | 2 +- .../openai-observability/src/index.ts | 19 +++++++++---------- .../server-ai/examples/openai/src/index.ts | 16 +++++++++------- 3 files changed, 19 insertions(+), 18 deletions(-) diff --git a/packages/sdk/server-ai/examples/bedrock/src/index.ts b/packages/sdk/server-ai/examples/bedrock/src/index.ts index e1cbf93e06..3efe003a31 100644 --- a/packages/sdk/server-ai/examples/bedrock/src/index.ts +++ b/packages/sdk/server-ai/examples/bedrock/src/index.ts @@ -48,7 +48,7 @@ async function main() { const aiClient = initAi(ldClient); - const aiConfig = await aiClient.config( + const aiConfig = await aiClient.completionConfig( aiConfigKey!, context, { diff --git a/packages/sdk/server-ai/examples/openai-observability/src/index.ts b/packages/sdk/server-ai/examples/openai-observability/src/index.ts index eb8d1f2470..d13c01349f 100644 --- a/packages/sdk/server-ai/examples/openai-observability/src/index.ts +++ b/packages/sdk/server-ai/examples/openai-observability/src/index.ts @@ -68,7 +68,7 @@ async function main() { { example_type: 'provider_observability_demo' }, ); - if (!aiConfig.enabled || !aiConfig.tracker) { + if (!aiConfig.enabled) { console.log('*** AI configuration is not enabled'); ldClient.close(); process.exit(0); @@ -76,15 +76,14 @@ async function main() { try { // ── 4. Call OpenAI and track metrics with the provider's extractor ── - const completion = await aiConfig.tracker.trackMetricsOf( - OpenAIProvider.getAIMetricsFromResponse, - () => - openai.chat.completions.create({ - messages: aiConfig.messages || [], - model: aiConfig.model?.name || 'gpt-4', - temperature: (aiConfig.model?.parameters?.temperature as number) ?? 0.5, - max_tokens: (aiConfig.model?.parameters?.maxTokens as number) ?? 4096, - }), + const tracker = aiConfig.createTracker!(); + const completion = await tracker.trackMetricsOf(OpenAIProvider.getAIMetricsFromResponse, () => + openai.chat.completions.create({ + messages: aiConfig.messages || [], + model: aiConfig.model?.name || 'gpt-4', + temperature: (aiConfig.model?.parameters?.temperature as number) ?? 0.5, + max_tokens: (aiConfig.model?.parameters?.maxTokens as number) ?? 4096, + }), ); console.log('AI Response:', completion.choices[0]?.message.content); diff --git a/packages/sdk/server-ai/examples/openai/src/index.ts b/packages/sdk/server-ai/examples/openai/src/index.ts index e16643d6c5..c5d348ad4b 100644 --- a/packages/sdk/server-ai/examples/openai/src/index.ts +++ b/packages/sdk/server-ai/examples/openai/src/index.ts @@ -66,13 +66,15 @@ async function main() { } const tracker = aiConfig.createTracker!(); - const completion = await tracker.trackMetricsOf(OpenAIProvider.createAIMetrics, async () => - client.chat.completions.create({ - messages: aiConfig.messages || [], - model: aiConfig.model?.name || 'gpt-4', - temperature: (aiConfig.model?.parameters?.temperature as number) ?? 0.5, - max_tokens: (aiConfig.model?.parameters?.maxTokens as number) ?? 4096, - }), + const completion = await tracker.trackMetricsOf( + OpenAIProvider.getAIMetricsFromResponse, + async () => + client.chat.completions.create({ + messages: aiConfig.messages || [], + model: aiConfig.model?.name || 'gpt-4', + temperature: (aiConfig.model?.parameters?.temperature as number) ?? 0.5, + max_tokens: (aiConfig.model?.parameters?.maxTokens as number) ?? 4096, + }), ); console.log('AI Response:', completion.choices[0]?.message.content);