Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
411 changes: 150 additions & 261 deletions packages/sdk/server-ai/__tests__/Judge.test.ts

Large diffs are not rendered by default.

164 changes: 139 additions & 25 deletions packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@
{ role: 'system', content: 'Hello John' },
{ role: 'user', content: 'Score: 42' },
]);
expect(result.tracker).toBeDefined();
expect(result.createTracker).toBeDefined();
expect(result.enabled).toBe(true);
evaluateSpy.mockRestore();
});
Expand Down Expand Up @@ -139,11 +139,18 @@
const evaluateSpy = jest.spyOn(client as any, '_evaluate');
const result = await client.agentConfig(key, testContext, defaultValue, variables);

expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'agent', variables);
expect(evaluateSpy).toHaveBeenCalledWith(
key,
testContext,
defaultValue,
'agent',
variables,
undefined,
);
expect(result.instructions).toBe(
'You are a helpful assistant. Your name is John and your score is 42',
);
expect(result.tracker).toBeDefined();
expect(result.createTracker).toBeDefined();
expect(result.enabled).toBe(true);
evaluateSpy.mockRestore();
});
Expand Down Expand Up @@ -176,7 +183,7 @@
expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined);
// Should use first value from evaluationMetricKeys
expect(result.evaluationMetricKey).toBe('relevance');
expect(result.tracker).toBeDefined();
expect(result.createTracker).toBeDefined();
expect(result.enabled).toBe(true);
evaluateSpy.mockRestore();
});
Expand Down Expand Up @@ -208,7 +215,7 @@

expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined);
expect(result.evaluationMetricKey).toBe('relevance');
expect(result.tracker).toBeDefined();
expect(result.createTracker).toBeDefined();
expect(result.enabled).toBe(true);
evaluateSpy.mockRestore();
});
Expand Down Expand Up @@ -241,7 +248,7 @@

expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined);
expect(result.evaluationMetricKey).toBe('helpfulness');
expect(result.tracker).toBeDefined();
expect(result.createTracker).toBeDefined();
expect(result.enabled).toBe(true);
evaluateSpy.mockRestore();
});
Expand Down Expand Up @@ -275,7 +282,7 @@
expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined);
// Empty string should be treated as invalid, so should fall back to first value in evaluationMetricKeys
expect(result.evaluationMetricKey).toBe('relevance');
expect(result.tracker).toBeDefined();
expect(result.createTracker).toBeDefined();
expect(result.enabled).toBe(true);
evaluateSpy.mockRestore();
});
Expand Down Expand Up @@ -308,7 +315,7 @@
expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'judge', undefined);
// Should skip empty and whitespace strings, use first valid value
expect(result.evaluationMetricKey).toBe('relevance');
expect(result.tracker).toBeDefined();
expect(result.createTracker).toBeDefined();
expect(result.enabled).toBe(true);
evaluateSpy.mockRestore();
});
Expand All @@ -331,7 +338,7 @@
const result = await client.completionConfig(key, testContext, defaultValue);

expect(result.enabled).toBe(false);
expect(result.tracker).toBeUndefined();
expect(result.createTracker).toBeUndefined();
});

it('handles missing metadata mode by defaulting to completion mode', async () => {
Expand All @@ -352,7 +359,7 @@
const result = await client.completionConfig(key, testContext, defaultValue);

expect(result.enabled).toBe(false);
expect(result.tracker).toBeDefined();
expect(result.createTracker).toBeDefined();
expect(result.messages).toEqual([{ role: 'system', content: 'Hello' }]);
expect(result.model).toEqual({ name: 'example-provider', parameters: { name: 'imagination' } });
});
Expand Down Expand Up @@ -381,7 +388,7 @@
expect(result.model).toEqual(defaultValue.model);
expect(result.messages).toEqual(defaultValue.messages);
expect(result.provider).toEqual(defaultValue.provider);
expect(result.tracker).toBeDefined();
expect(result.createTracker).toBeDefined();
expect(result.enabled).toBe(defaultValue.enabled);
expect(mockLdClient.variation).toHaveBeenCalledWith(
key,
Expand All @@ -408,7 +415,7 @@
const mockConfig = {
model: { name: 'test-model' },
messages: [],
tracker: {} as any,
createTracker: () => ({}) as any,
enabled: true,
};

Expand Down Expand Up @@ -449,7 +456,7 @@
const mockConfig = {
model: { name: 'test-model' },
instructions: 'You are a helpful assistant.',
tracker: {} as any,
createTracker: () => ({}) as any,
enabled: true,
};

Expand All @@ -464,7 +471,14 @@
key,
1,
);
expect(evaluateSpy).toHaveBeenCalledWith(key, testContext, defaultValue, 'agent', variables);
expect(evaluateSpy).toHaveBeenCalledWith(
key,
testContext,
defaultValue,
'agent',
variables,
undefined,
);
expect(result).toBe(mockConfig);
evaluateSpy.mockRestore();
});
Expand Down Expand Up @@ -527,7 +541,7 @@
},
provider: { name: 'openai' },
instructions: 'You are a research assistant specializing in climate change.',
tracker: expect.any(Object),
createTracker: expect.any(Function),
enabled: true,
},
'writing-agent': {
Expand All @@ -538,7 +552,7 @@
},
provider: { name: 'anthropic' },
instructions: 'You are a writing assistant with academic style.',
tracker: expect.any(Object),
createTracker: expect.any(Function),
enabled: true,
},
});
Expand Down Expand Up @@ -582,7 +596,7 @@
provider: { name: 'openai' },
evaluationMetricKeys: ['relevance'],
messages: [{ role: 'system' as const, content: 'You are a judge for {{metric}}.' }],
tracker: {} as any,
createTracker: () => ({}) as any,
toVercelAISDK: jest.fn(),
};

Expand Down Expand Up @@ -631,14 +645,15 @@
enabled: false,
};

const mockTrackerInstance = {} as any;
const mockJudgeConfig = {
key: 'test-judge',
enabled: true,
model: { name: 'gpt-4' },
provider: { name: 'openai' },
evaluationMetricKeys: ['relevance', 'accuracy'],
messages: [{ role: 'system' as const, content: 'You are a judge.' }],
tracker: {} as any,
createTracker: () => mockTrackerInstance,
toVercelAISDK: jest.fn(),
};

Expand All @@ -658,12 +673,7 @@
response_to_evaluate: '{{response_to_evaluate}}',
});
expect(AIProviderFactory.create).toHaveBeenCalledWith(mockJudgeConfig, undefined, undefined);
expect(Judge).toHaveBeenCalledWith(
mockJudgeConfig,
mockJudgeConfig.tracker,
mockProvider,
undefined,
);
expect(Judge).toHaveBeenCalledWith(mockJudgeConfig, mockProvider, undefined);
expect(result).toBe(mockJudge);
judgeConfigSpy.mockRestore();
});
Expand Down Expand Up @@ -706,7 +716,7 @@
provider: { name: 'openai' },
evaluationMetricKeys: ['relevance'],
messages: [{ role: 'system' as const, content: 'You are a judge.' }],
tracker: {} as any,
createTracker: () => ({}) as any,
toVercelAISDK: jest.fn(),
};

Expand Down Expand Up @@ -741,6 +751,30 @@
});
});

describe('createTracker method', () => {
it('reconstructs a tracker from a resumption token', () => {
const client = new LDAIClientImpl(mockLdClient);

// Build a token manually: { runId, configKey, variationKey, version }
const payload = JSON.stringify({
runId: 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11',
configKey: 'my-config',
variationKey: 'v1',
version: 3,
});
const token = Buffer.from(payload).toString('base64url');

const tracker = client.createTracker(token, testContext);

expect(tracker.getTrackData()).toMatchObject({
runId: 'a0eebc99-9c0b-4ef8-bb6d-6bb9bd380a11',
configKey: 'my-config',
variationKey: 'v1',
version: 3,
});
});
});

describe('optional default values', () => {
it('uses a disabled completion config when no default is provided', async () => {
const client = new LDAIClientImpl(mockLdClient);
Expand Down Expand Up @@ -799,3 +833,83 @@
expect(result.enabled).toBe(false);
});
});

describe('tools map support', () => {
it('includes tools map in completion config from flag variation', async () => {
const client = new LDAIClientImpl(mockLdClient);
const key = 'test-flag';
const defaultValue: LDAICompletionConfigDefault = { enabled: false };
const mockVariation = {
model: { name: 'example-model' },
tools: {
'web-search-tool': {
name: 'web-search-tool',
type: 'function',
parameters: { type: 'object', properties: {}, required: [] },
customParameters: { 'some-custom-parameter': 'some-custom-value' },
},
},
_ldMeta: { variationKey: 'v1', enabled: true, mode: 'completion' },
};
mockLdClient.variation.mockResolvedValue(mockVariation);

const result = await client.completionConfig(key, testContext, defaultValue);

expect(result.tools).toEqual(mockVariation.tools);
});

it('includes tools map in agent config from flag variation', async () => {
const client = new LDAIClientImpl(mockLdClient);
const key = 'test-agent';
const defaultValue: LDAIAgentConfigDefault = { enabled: false };
const mockVariation = {
model: { name: 'example-model' },
instructions: 'You are a helpful agent.',
tools: {
'search-tool': {
name: 'search-tool',
type: 'function',
customParameters: { maxResults: 10 },
},
},
_ldMeta: { variationKey: 'v1', enabled: true, mode: 'agent' },
};
mockLdClient.variation.mockResolvedValue(mockVariation);

const result = await client.agentConfig(key, testContext, defaultValue);

expect(result.tools).toEqual(mockVariation.tools);
});

it('uses tools from defaults when completion config flag has no tools', async () => {
const client = new LDAIClientImpl(mockLdClient);
const key = 'test-flag';
const defaultTools = {
'default-tool': { name: 'default-tool', type: 'function', customParameters: { priority: 'high' } },

Check failure on line 888 in packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts

View workflow job for this annotation

GitHub Actions / build-test-server-sdk-ai

Replace `·name:·'default-tool',·type:·'function',·customParameters:·{·priority:·'high'·}` with `⏎········name:·'default-tool',⏎········type:·'function',⏎········customParameters:·{·priority:·'high'·},⏎·····`
};
const defaultValue: LDAICompletionConfigDefault = { enabled: true, tools: defaultTools };
mockLdClient.variation.mockResolvedValue(defaultValue.constructor

Check failure on line 891 in packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts

View workflow job for this annotation

GitHub Actions / build-test-server-sdk-ai

Insert `⏎······`
? { _ldMeta: { enabled: true, mode: 'completion', variationKey: '' }, tools: defaultTools }

Check failure on line 892 in packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts

View workflow job for this annotation

GitHub Actions / build-test-server-sdk-ai

Insert `··`
: defaultValue,

Check failure on line 893 in packages/sdk/server-ai/__tests__/LDAIClientImpl.test.ts

View workflow job for this annotation

GitHub Actions / build-test-server-sdk-ai

Insert `··`
);

const result = await client.completionConfig(key, testContext, defaultValue);

expect(result.tools).toEqual(defaultTools);
});

it('returns undefined tools when no tools are configured', async () => {
const client = new LDAIClientImpl(mockLdClient);
const key = 'test-flag';
const defaultValue: LDAICompletionConfigDefault = { enabled: false };
const mockVariation = {
model: { name: 'example-model' },
_ldMeta: { variationKey: 'v1', enabled: true, mode: 'completion' },
};
mockLdClient.variation.mockResolvedValue(mockVariation);

const result = await client.completionConfig(key, testContext, defaultValue);

expect(result.tools).toBeUndefined();
});
});
Loading
Loading