diff --git a/ldai/client.go b/ldai/client.go index b5c797cd..38b0fba4 100644 --- a/ldai/client.go +++ b/ldai/client.go @@ -111,6 +111,21 @@ func (c *Client) Config( return c.CompletionConfig(key, context, defaultValue, variables) } +// CreateTracker reconstructs a Tracker from a resumption token and the given context. +// This delegates to TrackerFromResumptionToken. See that function for details. +func (c *Client) CreateTracker(token string, context ldcontext.Context) (*Tracker, error) { + return TrackerFromResumptionToken(token, c.sdk, context) +} + +// returnDefault sets a tracker factory on a copy of def (so CreateTracker always works) and +// returns it along with an initial tracker. Used for all error-path returns in evaluateConfig. +func (c *Client) returnDefault(key string, context ldcontext.Context, def Config) (Config, *Tracker) { + def.trackerFactory = func() *Tracker { + return newTracker(c.sdk, newRunID(), key, "", 1, context, &def, c.logger) + } + return def, newTracker(c.sdk, newRunID(), key, "", 1, context, &def, c.logger) +} + // evaluateConfig fetches and interpolates an AI Config without emitting any metric. // Callers (Config, JudgeConfig) are meant to emit their own metric before calling this. func (c *Client) evaluateConfig( @@ -125,13 +140,13 @@ func (c *Client) evaluateConfig( // empty object.) if result.Type() != ldvalue.ObjectType { c.logConfigWarning(key, "unmarshalling failed, expected JSON object but got %s", result.Type().String()) - return defaultValue, newTracker(key, "", 1, c.sdk, &defaultValue, context, c.logger) + return c.returnDefault(key, context, defaultValue) } var parsed datamodel.Config if err := json.Unmarshal([]byte(result.JSONString()), &parsed); err != nil { c.logConfigWarning(key, "unmarshalling failed: %v", err) - return defaultValue, newTracker(key, "", 1, c.sdk, &defaultValue, context, c.logger) + return c.returnDefault(key, context, defaultValue) } mergedVariables := map[string]interface{}{ @@ -169,7 +184,7 @@ func (c *Client) evaluateConfig( c.logConfigWarning(key, "malformed message at index %d: %v", i, err, ) - return defaultValue, &Tracker{} + return c.returnDefault(key, context, defaultValue) } builder.WithMessage(content, msg.Role) } @@ -181,7 +196,12 @@ func (c *Client) evaluateConfig( version = *parsed.Meta.Version } - return cfg, newTracker(key, parsed.Meta.VariationKey, version, c.sdk, &cfg, context, c.logger) + variationKey := parsed.Meta.VariationKey + cfg.trackerFactory = func() *Tracker { + return newTracker(c.sdk, newRunID(), key, variationKey, version, context, &cfg, c.logger) + } + + return cfg, newTracker(c.sdk, newRunID(), key, parsed.Meta.VariationKey, version, context, &cfg, c.logger) } func getAllAttributes(context ldcontext.Context) map[string]interface{} { diff --git a/ldai/client_test.go b/ldai/client_test.go index a74a6678..c1885861 100644 --- a/ldai/client_test.go +++ b/ldai/client_test.go @@ -1,6 +1,8 @@ package ldai import ( + "encoding/base64" + "encoding/json" "errors" "testing" @@ -96,7 +98,10 @@ func TestEvalErrorReturnsDefault(t *testing.T) { cfg, tracker := client.Config("key", ldcontext.New("user"), defaultVal, nil) assert.NotNil(t, tracker) - assert.Equal(t, defaultVal, cfg) + assert.Equal(t, defaultVal.Enabled(), cfg.Enabled()) + assert.Equal(t, defaultVal.Messages(), cfg.Messages()) + assert.Equal(t, defaultVal.ModelName(), cfg.ModelName()) + assert.Equal(t, defaultVal.ProviderName(), cfg.ProviderName()) } func TestParseMultipleMessages(t *testing.T) { @@ -191,7 +196,10 @@ func TestParseInvalidConfigReturnsDefault(t *testing.T) { defaultVal := NewConfig().Enable().WithMessage("hello", datamodel.User).Build() cfg, _ := client.Config("key", ldcontext.New("user"), defaultVal, nil) - assert.Equal(t, defaultVal, cfg) + // Verify config data matches the default + assert.Equal(t, defaultVal.AsLdValue(), cfg.AsLdValue()) + // Verify CreateTracker() now works (returnDefault always injects a factory) + assert.NotNil(t, cfg.CreateTracker()) sdk.log.AssertMessageMatch(t, true, ldlog.Warn, "AI Config 'key':") }) @@ -835,3 +843,236 @@ func TestConfig_WithoutReservedVarsWipesJudgePlaceholders(t *testing.T) { require.Len(t, msgs, 1) assert.Equal(t, "Input: \nOutput: ", msgs[0].Content, "Config without reserved vars renders placeholders as empty") } + +func TestCreateTracker_ManuallyBuiltConfig_ReturnsNil(t *testing.T) { + cfg := NewConfig().Enable().WithMessage("hello", datamodel.User).Build() + assert.Nil(t, cfg.CreateTracker(), "manually built config should not have a tracker factory") +} + +func TestCreateTracker_DisabledConfig_ReturnsTracker(t *testing.T) { + json := []byte(`{ + "_ldMeta": {"variationKey": "1", "enabled": false}, + "messages": [{"content": "hello", "role": "user"}] + }`) + + client, err := NewClient(newMockSDK(json, nil)) + require.NoError(t, err) + + cfg, _ := client.CompletionConfig("key", ldcontext.New("user"), Disabled(), nil) + assert.False(t, cfg.Enabled()) + assert.NotNil(t, cfg.CreateTracker(), "disabled config should still have a tracker factory") +} + +func TestCreateTracker_EnabledConfig_ReturnsTracker(t *testing.T) { + json := []byte(`{ + "_ldMeta": {"variationKey": "1", "enabled": true}, + "model": {"name": "gpt-4"}, + "provider": {"name": "openai"}, + "messages": [{"content": "hello", "role": "user"}] + }`) + + client, err := NewClient(newMockSDK(json, nil)) + require.NoError(t, err) + + cfg, _ := client.CompletionConfig("key", ldcontext.New("user"), Disabled(), nil) + assert.True(t, cfg.Enabled()) + + tracker := cfg.CreateTracker() + require.NotNil(t, tracker, "enabled config should have a tracker factory") +} + +func TestCreateTracker_FreshRunIdPerCall(t *testing.T) { + json := []byte(`{ + "_ldMeta": {"variationKey": "1", "enabled": true}, + "messages": [{"content": "hello", "role": "user"}] + }`) + + mockSDK := newMockSDK(json, nil) + client, err := NewClient(mockSDK) + require.NoError(t, err) + + // Clear SDK info event + mockSDK.events = nil + + cfg, _ := client.CompletionConfig("key", ldcontext.New("user"), Disabled(), nil) + + tracker1 := cfg.CreateTracker() + tracker2 := cfg.CreateTracker() + require.NotNil(t, tracker1) + require.NotNil(t, tracker2) + + // Each tracker should be able to track independently. Track success on both to emit events. + _ = tracker1.TrackSuccess() + _ = tracker2.TrackSuccess() + + // Filter out the usage event; we only want the generation events. + var genEvents []mockEvent + for _, e := range mockSDK.events { + if e.eventName == "$ld:ai:generation:success" { + genEvents = append(genEvents, e) + } + } + + require.Len(t, genEvents, 2, "each tracker should emit its own event") + + runId1 := genEvents[0].data.GetByKey("runId").StringValue() + runId2 := genEvents[1].data.GetByKey("runId").StringValue() + assert.NotEmpty(t, runId1) + assert.NotEmpty(t, runId2) + assert.NotEqual(t, runId1, runId2, "each tracker must have a unique runId") +} + +func TestCreateTracker_TrackerHasCorrectMetadata(t *testing.T) { + json := []byte(`{ + "_ldMeta": {"variationKey": "var-1", "enabled": true, "version": 5}, + "model": {"name": "gpt-4"}, + "provider": {"name": "openai"}, + "messages": [{"content": "hello", "role": "user"}] + }`) + + mockSDK := newMockSDK(json, nil) + client, err := NewClient(mockSDK) + require.NoError(t, err) + + // Clear SDK info event + mockSDK.events = nil + + cfg, _ := client.CompletionConfig("my-config", ldcontext.New("user"), Disabled(), nil) + + tracker := cfg.CreateTracker() + require.NotNil(t, tracker) + + _ = tracker.TrackSuccess() + + // Filter for the generation event (skip usage event) + var genEvent *mockEvent + for i, e := range mockSDK.events { + if e.eventName == "$ld:ai:generation:success" { + genEvent = &mockSDK.events[i] + break + } + } + require.NotNil(t, genEvent) + + data := genEvent.data + assert.Equal(t, "my-config", data.GetByKey("configKey").StringValue()) + assert.Equal(t, "var-1", data.GetByKey("variationKey").StringValue()) + assert.Equal(t, 5, data.GetByKey("version").IntValue()) + assert.Equal(t, "openai", data.GetByKey("providerName").StringValue()) + assert.Equal(t, "gpt-4", data.GetByKey("modelName").StringValue()) + assert.NotEmpty(t, data.GetByKey("runId").StringValue()) +} + +func TestCreateTracker_JudgeConfigHasFactory(t *testing.T) { + json := []byte(`{ + "_ldMeta": {"variationKey": "1", "enabled": true}, + "mode": "judge", + "evaluationMetricKey": "toxicity", + "messages": [{"content": "test", "role": "system"}] + }`) + + client, err := NewClient(newMockSDK(json, nil)) + require.NoError(t, err) + + cfg, _ := client.JudgeConfig("judge-key", ldcontext.New("user"), Disabled(), nil) + assert.True(t, cfg.Enabled()) + + tracker := cfg.CreateTracker() + require.NotNil(t, tracker, "enabled judge config should have a tracker factory") +} + +func TestClient_CreateTracker_RoundTrip(t *testing.T) { + configJSON := []byte(`{ + "_ldMeta": {"variationKey": "var-1", "enabled": true, "version": 5}, + "model": {"name": "gpt-4"}, + "provider": {"name": "openai"}, + "messages": [{"content": "hello", "role": "user"}] + }`) + + mockSDK := newMockSDK(configJSON, nil) + client, err := NewClient(mockSDK) + require.NoError(t, err) + + // Clear SDK info event + mockSDK.events = nil + + cfg, _ := client.CompletionConfig("my-config", ldcontext.New("user"), Disabled(), nil) + originalTracker := cfg.CreateTracker() + require.NotNil(t, originalTracker) + + token := originalTracker.ResumptionToken() + require.NotEmpty(t, token) + + // Reconstruct from token with a different context + newContext := ldcontext.New("other-user") + reconstructed, err := client.CreateTracker(token, newContext) + require.NoError(t, err) + require.NotNil(t, reconstructed) + + // The reconstructed tracker should produce the same resumption token + assert.Equal(t, token, reconstructed.ResumptionToken()) + + // Track feedback on the reconstructed tracker and verify it uses the original runId + _ = originalTracker.TrackSuccess() + _ = reconstructed.TrackFeedback(FeedbackPositive) + + var successEvent, feedbackEvent *mockEvent + for i, e := range mockSDK.events { + switch e.eventName { + case "$ld:ai:generation:success": + successEvent = &mockSDK.events[i] + case "$ld:ai:feedback:user:positive": + feedbackEvent = &mockSDK.events[i] + } + } + require.NotNil(t, successEvent) + require.NotNil(t, feedbackEvent) + + // Both events should share the same runId + originalRunId := successEvent.data.GetByKey("runId").StringValue() + reconstructedRunId := feedbackEvent.data.GetByKey("runId").StringValue() + assert.Equal(t, originalRunId, reconstructedRunId, "reconstructed tracker must reuse the original runId") + + // Reconstructed tracker should use the new context + assert.Equal(t, newContext, feedbackEvent.context) + + // Verify metadata preserved + assert.Equal(t, "my-config", feedbackEvent.data.GetByKey("configKey").StringValue()) + assert.Equal(t, "var-1", feedbackEvent.data.GetByKey("variationKey").StringValue()) + assert.Equal(t, 5, feedbackEvent.data.GetByKey("version").IntValue()) + + // modelName and providerName should be empty on reconstructed tracker + assert.Equal(t, "", feedbackEvent.data.GetByKey("modelName").StringValue()) + assert.Equal(t, "", feedbackEvent.data.GetByKey("providerName").StringValue()) +} + +func TestClient_CreateTracker_InvalidToken(t *testing.T) { + mockSDK := newMockSDK(nil, nil) + client, err := NewClient(mockSDK) + require.NoError(t, err) + + t.Run("invalid base64", func(t *testing.T) { + _, err := client.CreateTracker("not-valid-base64!!!", ldcontext.New("user")) + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid resumption token") + }) + + t.Run("valid base64 but invalid JSON", func(t *testing.T) { + token := base64.RawURLEncoding.EncodeToString([]byte("not json")) + _, err := client.CreateTracker(token, ldcontext.New("user")) + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid resumption token") + }) + + t.Run("valid token with missing fields uses zero values", func(t *testing.T) { + payload, _ := json.Marshal(map[string]interface{}{"runId": "test-run"}) + token := base64.RawURLEncoding.EncodeToString(payload) + tracker, err := client.CreateTracker(token, ldcontext.New("user")) + require.NoError(t, err) + require.NotNil(t, tracker) + + // Should work with partial data + resumeToken := tracker.ResumptionToken() + assert.NotEmpty(t, resumeToken) + }) +} diff --git a/ldai/config.go b/ldai/config.go index 5fba4e7e..990326f8 100644 --- a/ldai/config.go +++ b/ldai/config.go @@ -11,7 +11,8 @@ import ( // Config represents an AI Config. type Config struct { - c datamodel.Config + c datamodel.Config + trackerFactory func() *Tracker } // VariationKey is used internally by LaunchDarkly. @@ -87,6 +88,16 @@ func (c *Config) JudgeConfiguration() *datamodel.JudgeConfiguration { } } +// CreateTracker creates a new Tracker with a fresh runId for tracking metrics related to this +// AI Config evaluation. Each call returns a new, independent Tracker instance. +// Returns nil if the config was not obtained via the Client. +func (c *Config) CreateTracker() *Tracker { + if c.trackerFactory == nil { + return nil + } + return c.trackerFactory() +} + // AsLdValue is used internally. func (c *Config) AsLdValue() ldvalue.Value { return ldvalue.FromJSONMarshal(c.c) diff --git a/ldai/tracker.go b/ldai/tracker.go index 90655242..81c23b4a 100644 --- a/ldai/tracker.go +++ b/ldai/tracker.go @@ -1,6 +1,9 @@ package ldai import ( + "crypto/rand" + "encoding/base64" + "encoding/json" "fmt" "time" @@ -27,6 +30,14 @@ const ( tokenOutput = "$ld:ai:tokens:output" ) +func newRunID() string { + b := make([]byte, 16) + _, _ = rand.Read(b) + b[6] = (b[6] & 0x0f) | 0x40 + b[8] = (b[8] & 0x3f) | 0x80 + return fmt.Sprintf("%08x-%04x-%04x-%04x-%012x", b[0:4], b[4:6], b[6:8], b[8:10], b[10:]) +} + // TokenUsage represents the token usage returned by a model provider for a specific request. type TokenUsage struct { // Total is the total number of tokens used. @@ -106,16 +117,27 @@ type Stopwatch interface { Stop() time.Duration } +// resumptionPayload is the JSON structure encoded into a resumption token. +type resumptionPayload struct { + RunID string `json:"runId"` + ConfigKey string `json:"configKey"` + VariationKey string `json:"variationKey,omitempty"` + Version int `json:"version"` +} + // Tracker is used to track metrics for AI Config evaluation. // Unless otherwise noted, the Tracker's method are not safe for concurrent use. type Tracker struct { - key string - config *Config - context ldcontext.Context - events EventSink - trackData ldvalue.Value - logger interfaces.LDLoggers - stopwatch Stopwatch + key string + runID string + variationKey string + version int + config *Config + context ldcontext.Context + events EventSink + trackData ldvalue.Value + logger interfaces.LDLoggers + stopwatch Stopwatch duration ldcommon.Option[time.Duration] feedback ldcommon.Option[Feedback] @@ -139,28 +161,30 @@ func (d *defaultStopwatch) Stop() time.Duration { return time.Since(d.start) } -// newTracker creates a new Tracker with the specified key, event sink, config, context, and loggers. +// newTracker creates a new Tracker with the specified runID, key, event sink, config, context, and loggers. func newTracker( + events EventSink, + runID string, key string, variationKey string, version int, - events EventSink, - config *Config, ctx ldcontext.Context, + config *Config, loggers interfaces.LDLoggers, ) *Tracker { - return newTrackerWithStopwatch(key, variationKey, version, events, config, ctx, loggers, &defaultStopwatch{}) + return newTrackerWithStopwatch(events, runID, key, variationKey, version, ctx, config, loggers, &defaultStopwatch{}) } -// newTrackerWithStopwatch creates a new Tracker with the specified key, event sink, config, context, loggers, and -// stopwatch. This method is used for testing purposes. +// newTrackerWithStopwatch creates a new Tracker with the specified runID, key, event sink, config, context, loggers, +// and stopwatch. This method is used for testing purposes. func newTrackerWithStopwatch( + events EventSink, + runID string, key string, variationKey string, version int, - events EventSink, - config *Config, ctx ldcontext.Context, + config *Config, loggers interfaces.LDLoggers, stopwatch Stopwatch, ) *Tracker { @@ -168,22 +192,28 @@ func newTrackerWithStopwatch( panic("LaunchDarkly SDK programmer error: config must never be nil") } - trackData := ldvalue.ObjectBuild(). - Set("variationKey", ldvalue.String(variationKey)). + builder := ldvalue.ObjectBuild(). + Set("runId", ldvalue.String(runID)). Set("configKey", ldvalue.String(key)). Set("version", ldvalue.Int(version)). Set("providerName", ldvalue.String(config.ProviderName())). - Set("modelName", ldvalue.String(config.ModelName())). - Build() + Set("modelName", ldvalue.String(config.ModelName())) + if variationKey != "" { + builder.Set("variationKey", ldvalue.String(variationKey)) + } + trackData := builder.Build() return &Tracker{ - key: key, - config: config, - trackData: trackData, - events: events, - context: ctx, - logger: loggers, - stopwatch: stopwatch, + key: key, + runID: runID, + variationKey: variationKey, + version: version, + config: config, + trackData: trackData, + events: events, + context: ctx, + logger: loggers, + stopwatch: stopwatch, } } @@ -192,10 +222,70 @@ func (t *Tracker) logWarning(format string, args ...interface{}) { t.logger.Warnf(prefix+format, args...) } +// ResumptionToken returns a URL-safe Base64-encoded token that can be used to reconstruct a tracker +// in a different process (e.g., for deferred feedback). The token contains the runId, configKey, +// variationKey, and version. It does not contain modelName or providerName. +func (t *Tracker) ResumptionToken() string { + payload := resumptionPayload{ + RunID: t.runID, + ConfigKey: t.key, + VariationKey: t.variationKey, + Version: t.version, + } + jsonBytes, _ := json.Marshal(payload) + return base64.RawURLEncoding.EncodeToString(jsonBytes) +} + +// TrackerFromResumptionToken reconstructs a Tracker from a resumption token and the given context. +// This is used for cross-process scenarios (e.g., deferred feedback) where the original tracker +// is no longer available but its runId must be reused. The token is obtained from Tracker.ResumptionToken(). +// The reconstructed tracker will have empty modelName and providerName since these are not included +// in the token. +func TrackerFromResumptionToken(token string, sdk ServerSDK, context ldcontext.Context) (*Tracker, error) { + decoded, err := base64.RawURLEncoding.DecodeString(token) + if err != nil { + return nil, fmt.Errorf("invalid resumption token: %w", err) + } + var payload resumptionPayload + if err := json.Unmarshal(decoded, &payload); err != nil { + return nil, fmt.Errorf("invalid resumption token: %w", err) + } + + builder := ldvalue.ObjectBuild(). + Set("runId", ldvalue.String(payload.RunID)). + Set("configKey", ldvalue.String(payload.ConfigKey)). + Set("version", ldvalue.Int(payload.Version)). + Set("providerName", ldvalue.String("")). + Set("modelName", ldvalue.String("")) + if payload.VariationKey != "" { + builder.Set("variationKey", ldvalue.String(payload.VariationKey)) + } + trackData := builder.Build() + + emptyConfig := Disabled() + + return &Tracker{ + key: payload.ConfigKey, + runID: payload.RunID, + variationKey: payload.VariationKey, + version: payload.Version, + config: &emptyConfig, + trackData: trackData, + events: sdk, + context: context, + logger: sdk.Loggers(), + stopwatch: &defaultStopwatch{}, + }, nil +} + // TrackDuration tracks the duration of a task. For example, the duration of a model evaluation request may be // tracked here. See also TrackRequest. // The duration in milliseconds must fit within a float64. func (t *Tracker) TrackDuration(dur time.Duration) error { + if t.duration.IsSome() { + t.logWarning("Duration has already been tracked for this execution. %s", t.trackData.JSONString()) + return nil + } t.duration = ldcommon.Some(dur) return t.events.TrackMetric(duration, t.context, float64(dur.Milliseconds()), t.trackData) } @@ -203,6 +293,10 @@ func (t *Tracker) TrackDuration(dur time.Duration) error { // TrackFeedback tracks the feedback provided by a user for a model evaluation. If the feedback is not // FeedbackPositive or FeedbackNegative, returns an error and does not track anything. func (t *Tracker) TrackFeedback(feedback Feedback) error { + if t.feedback.IsSome() { + t.logWarning("Feedback has already been tracked for this execution. %s", t.trackData.JSONString()) + return nil + } switch feedback { case FeedbackPositive: t.feedback = ldcommon.Some(feedback) @@ -217,6 +311,10 @@ func (t *Tracker) TrackFeedback(feedback Feedback) error { // TrackSuccess tracks a successful model evaluation. func (t *Tracker) TrackSuccess() error { + if t.success.IsSome() { + t.logWarning("Success/error has already been tracked for this execution. %s", t.trackData.JSONString()) + return nil + } t.success = ldcommon.Some(true) return t.events.TrackMetric(generationSuccess, t.context, 1, t.trackData) @@ -224,6 +322,10 @@ func (t *Tracker) TrackSuccess() error { // TrackError tracks an unsuccessful model evaluation. func (t *Tracker) TrackError() error { + if t.success.IsSome() { + t.logWarning("Success/error has already been tracked for this execution. %s", t.trackData.JSONString()) + return nil + } t.success = ldcommon.Some(false) return t.events.TrackMetric(generationError, t.context, 1, t.trackData) @@ -231,12 +333,21 @@ func (t *Tracker) TrackError() error { // TrackTimeToFirstToken tracks the time to the first token of the streamed response. func (t *Tracker) TrackTimeToFirstToken(dur time.Duration) error { + if t.timeToFirstToken.IsSome() { + t.logWarning("Time to first token has already been tracked for this execution. %s", t.trackData.JSONString()) + return nil + } t.timeToFirstToken = ldcommon.Some(dur) return t.events.TrackMetric(timeToFirstToken, t.context, float64(dur.Milliseconds()), t.trackData) } // TrackUsage tracks the token usage for a model evaluation. func (t *Tracker) TrackUsage(usage TokenUsage) error { + if t.tokens.IsSome() { + t.logWarning("Usage has already been tracked for this execution. %s", t.trackData.JSONString()) + return nil + } + if usage.Set() { t.tokens = ldcommon.Some(usage) } diff --git a/ldai/tracker_test.go b/ldai/tracker_test.go index 29bb71e4..a95d7725 100644 --- a/ldai/tracker_test.go +++ b/ldai/tracker_test.go @@ -1,6 +1,8 @@ package ldai import ( + "encoding/base64" + "encoding/json" "testing" "time" @@ -37,38 +39,50 @@ func (m *mockEvents) TrackMetric(eventName string, context ldcontext.Context, me func TestTracker_NewPanicsWithNilConfig(t *testing.T) { assert.Panics(t, func() { - newTracker("key", "variationKey", 1, newMockEvents(), nil, ldcontext.New("key"), nil) + newTracker(newMockEvents(), newRunID(), "key", "variationKey", 1, ldcontext.New("key"), nil, nil) }) } func TestTracker_NewDoesNotPanicWithConfig(t *testing.T) { assert.NotPanics(t, func() { - newTracker("key", "variationKey", 1, newMockEvents(), &Config{}, ldcontext.New("key"), nil) + newTracker(newMockEvents(), newRunID(), "key", "variationKey", 1, ldcontext.New("key"), &Config{}, nil) }) } -func makeTrackData(configKey, variationKey string, version int, config *Config) ldvalue.Value { - return ldvalue.ObjectBuild(). - Set("variationKey", ldvalue.String(variationKey)). +func makeTrackData(configKey, variationKey string, version int, config *Config, runId string) ldvalue.Value { + builder := ldvalue.ObjectBuild(). + Set("runId", ldvalue.String(runId)). Set("configKey", ldvalue.String(configKey)). Set("version", ldvalue.Int(version)). Set("providerName", ldvalue.String(config.ProviderName())). - Set("modelName", ldvalue.String(config.ModelName())). - Build() + Set("modelName", ldvalue.String(config.ModelName())) + if variationKey != "" { + builder.Set("variationKey", ldvalue.String(variationKey)) + } + return builder.Build() +} + +func extractRunId(t *testing.T, events *mockEvents) string { + t.Helper() + require.NotEmpty(t, events.events, "expected at least one event to extract runId") + runId := events.events[0].data.GetByKey("runId").StringValue() + require.NotEmpty(t, runId, "expected runId to be non-empty") + return runId } func TestTracker_TrackSuccess(t *testing.T) { events := newMockEvents() config := &Config{} - tracker := newTracker("key", "variationKey", 1, events, config, ldcontext.New("key"), nil) + tracker := newTracker(events, newRunID(), "key", "variationKey", 1, ldcontext.New("key"), config, nil) assert.NoError(t, tracker.TrackSuccess()) + runId := extractRunId(t, events) expectedEvents := []trackEvent{ { name: "$ld:ai:generation:success", context: ldcontext.New("key"), metricValue: 1.0, - data: makeTrackData("key", "variationKey", 1, config), + data: makeTrackData("key", "variationKey", 1, config, runId), }, } @@ -78,15 +92,16 @@ func TestTracker_TrackSuccess(t *testing.T) { func TestTracker_TrackError(t *testing.T) { events := newMockEvents() config := &Config{} - tracker := newTracker("key", "variationKey", 2, events, config, ldcontext.New("key"), nil) + tracker := newTracker(events, newRunID(), "key", "variationKey", 2, ldcontext.New("key"), config, nil) assert.NoError(t, tracker.TrackError()) + runId := extractRunId(t, events) expectedEvents := []trackEvent{ { name: "$ld:ai:generation:error", context: ldcontext.New("key"), metricValue: 1.0, - data: makeTrackData("key", "variationKey", 2, config), + data: makeTrackData("key", "variationKey", 2, config, runId), }, } @@ -96,7 +111,7 @@ func TestTracker_TrackError(t *testing.T) { func TestTracker_TrackRequest(t *testing.T) { events := newMockEvents() config := &Config{} - tracker := newTracker("key", "variationKey", 3, events, config, ldcontext.New("key"), nil) + tracker := newTracker(events, newRunID(), "key", "variationKey", 3, ldcontext.New("key"), config, nil) expectedResponse := ProviderResponse{ Usage: TokenUsage{ @@ -115,30 +130,31 @@ func TestTracker_TrackRequest(t *testing.T) { assert.NoError(t, err) assert.Equal(t, expectedResponse, r) + runId := extractRunId(t, events) expectedEvents := []trackEvent{ { name: "$ld:ai:generation:success", context: ldcontext.New("key"), metricValue: 1, - data: makeTrackData("key", "variationKey", 3, config), + data: makeTrackData("key", "variationKey", 3, config, runId), }, { name: "$ld:ai:duration:total", context: ldcontext.New("key"), metricValue: 10.0, - data: makeTrackData("key", "variationKey", 3, config), + data: makeTrackData("key", "variationKey", 3, config, runId), }, { name: "$ld:ai:tokens:total", context: ldcontext.New("key"), metricValue: 1, - data: makeTrackData("key", "variationKey", 3, config), + data: makeTrackData("key", "variationKey", 3, config, runId), }, { name: "$ld:ai:tokens:ttf", context: ldcontext.New("key"), metricValue: 42.0, - data: makeTrackData("key", "variationKey", 3, config), + data: makeTrackData("key", "variationKey", 3, config, runId), }, } @@ -157,7 +173,7 @@ func TestTracker_TrackRequestReceivesConfig(t *testing.T) { Enable(). Build() - tracker := newTracker("key", "variationKey", 4, events, &expectedConfig, ldcontext.New("key"), nil) + tracker := newTracker(events, newRunID(), "key", "variationKey", 4, ldcontext.New("key"), &expectedConfig, nil) var gotConfig *Config _, _ = tracker.TrackRequest(func(c *Config) (ProviderResponse, error) { @@ -181,7 +197,7 @@ func TestTracker_LatencyMeasuredIfNotProvided(t *testing.T) { config := &Config{} tracker := newTrackerWithStopwatch( - "key", "variationKey", 5, events, config, ldcontext.New("key"), nil, mockStopwatch(42*time.Millisecond)) + events, newRunID(), "key", "variationKey", 5, ldcontext.New("key"), config, nil, mockStopwatch(42*time.Millisecond)) expectedResponse := ProviderResponse{ Usage: TokenUsage{ @@ -205,61 +221,84 @@ func TestTracker_LatencyMeasuredIfNotProvided(t *testing.T) { func TestTracker_TrackDuration(t *testing.T) { events := newMockEvents() config := &Config{} - tracker := newTracker("key", "variationKey", 6, events, config, ldcontext.New("key"), nil) + tracker := newTracker(events, newRunID(), "key", "variationKey", 6, ldcontext.New("key"), config, nil) assert.NoError(t, tracker.TrackDuration(time.Millisecond*10)) + runId := extractRunId(t, events) expectedEvent := trackEvent{ name: "$ld:ai:duration:total", context: ldcontext.New("key"), metricValue: 10.0, - data: makeTrackData("key", "variationKey", 6, config), + data: makeTrackData("key", "variationKey", 6, config, runId), } assert.ElementsMatch(t, []trackEvent{expectedEvent}, events.events) } func TestTracker_TrackFeedback(t *testing.T) { - events := newMockEvents() - config := &Config{} - tracker := newTracker("key", "variationKey", 7, events, config, ldcontext.New("key"), nil) + t.Run("positive feedback", func(t *testing.T) { + events := newMockEvents() + config := &Config{} + tracker := newTracker(events, newRunID(), "key", "variationKey", 7, ldcontext.New("key"), config, nil) - assert.NoError(t, tracker.TrackFeedback(FeedbackPositive)) - assert.NoError(t, tracker.TrackFeedback(FeedbackNegative)) - assert.Error(t, tracker.TrackFeedback("not a valid feedback value")) + assert.NoError(t, tracker.TrackFeedback(FeedbackPositive)) - expectedPositiveEvent := trackEvent{ - name: "$ld:ai:feedback:user:positive", - context: ldcontext.New("key"), - metricValue: 1.0, - data: makeTrackData("key", "variationKey", 7, config), - } + runId := extractRunId(t, events) + expectedEvent := trackEvent{ + name: "$ld:ai:feedback:user:positive", + context: ldcontext.New("key"), + metricValue: 1.0, + data: makeTrackData("key", "variationKey", 7, config, runId), + } - expectedNegativeEvent := trackEvent{ - name: "$ld:ai:feedback:user:negative", - context: ldcontext.New("key"), - metricValue: 1.0, - data: makeTrackData("key", "variationKey", 7, config), - } + assert.ElementsMatch(t, []trackEvent{expectedEvent}, events.events) + }) + + t.Run("negative feedback", func(t *testing.T) { + events := newMockEvents() + config := &Config{} + tracker := newTracker(events, newRunID(), "key", "variationKey", 7, ldcontext.New("key"), config, nil) + + assert.NoError(t, tracker.TrackFeedback(FeedbackNegative)) + + runId := extractRunId(t, events) + expectedEvent := trackEvent{ + name: "$ld:ai:feedback:user:negative", + context: ldcontext.New("key"), + metricValue: 1.0, + data: makeTrackData("key", "variationKey", 7, config, runId), + } - assert.ElementsMatch(t, []trackEvent{expectedPositiveEvent, expectedNegativeEvent}, events.events) + assert.ElementsMatch(t, []trackEvent{expectedEvent}, events.events) + }) + + t.Run("invalid feedback returns error", func(t *testing.T) { + events := newMockEvents() + config := &Config{} + tracker := newTracker(events, newRunID(), "key", "variationKey", 7, ldcontext.New("key"), config, nil) + + assert.Error(t, tracker.TrackFeedback("not a valid feedback value")) + assert.Empty(t, events.events) + }) } func TestTracker_TrackUsage(t *testing.T) { t.Run("only one field set, only one event", func(t *testing.T) { events := newMockEvents() config := &Config{} - tracker := newTracker("key", "variationKey", 8, events, config, ldcontext.New("key"), nil) + tracker := newTracker(events, newRunID(), "key", "variationKey", 8, ldcontext.New("key"), config, nil) assert.NoError(t, tracker.TrackUsage(TokenUsage{ Total: 42, })) + runId := extractRunId(t, events) expectedEvent := trackEvent{ name: "$ld:ai:tokens:total", context: ldcontext.New("key"), metricValue: 42.0, - data: makeTrackData("key", "variationKey", 8, config), + data: makeTrackData("key", "variationKey", 8, config, runId), } assert.ElementsMatch(t, []trackEvent{expectedEvent}, events.events) @@ -268,7 +307,7 @@ func TestTracker_TrackUsage(t *testing.T) { t.Run("all fields set, all events", func(t *testing.T) { events := newMockEvents() config := &Config{} - tracker := newTracker("key", "variationKey", 9, events, config, ldcontext.New("key"), nil) + tracker := newTracker(events, newRunID(), "key", "variationKey", 9, ldcontext.New("key"), config, nil) assert.NoError(t, tracker.TrackUsage(TokenUsage{ Total: 42, @@ -276,25 +315,26 @@ func TestTracker_TrackUsage(t *testing.T) { Output: 22, })) + runId := extractRunId(t, events) expectedTotal := trackEvent{ name: "$ld:ai:tokens:total", context: ldcontext.New("key"), metricValue: 42.0, - data: makeTrackData("key", "variationKey", 9, config), + data: makeTrackData("key", "variationKey", 9, config, runId), } expectedInput := trackEvent{ name: "$ld:ai:tokens:input", context: ldcontext.New("key"), metricValue: 20.0, - data: makeTrackData("key", "variationKey", 9, config), + data: makeTrackData("key", "variationKey", 9, config, runId), } expectedOutput := trackEvent{ name: "$ld:ai:tokens:output", context: ldcontext.New("key"), metricValue: 22.0, - data: makeTrackData("key", "variationKey", 9, config), + data: makeTrackData("key", "variationKey", 9, config, runId), } assert.ElementsMatch(t, []trackEvent{expectedTotal, expectedInput, expectedOutput}, events.events) @@ -304,7 +344,7 @@ func TestTracker_TrackUsage(t *testing.T) { func TestTracker_GetSummary(t *testing.T) { t.Run("empty summary when nothing tracked", func(t *testing.T) { events := newMockEvents() - tracker := newTracker("key", "variationKey", 10, events, &Config{}, ldcontext.New("key"), nil) + tracker := newTracker(events, newRunID(), "key", "variationKey", 10, ldcontext.New("key"), &Config{}, nil) summary := tracker.GetSummary() @@ -315,9 +355,9 @@ func TestTracker_GetSummary(t *testing.T) { assert.True(t, summary.TimeToFirstToken.IsNone()) }) - t.Run("latest duration is returned", func(t *testing.T) { + t.Run("first duration is returned", func(t *testing.T) { events := newMockEvents() - tracker := newTracker("key", "variationKey", 11, events, &Config{}, ldcontext.New("key"), nil) + tracker := newTracker(events, newRunID(), "key", "variationKey", 11, ldcontext.New("key"), &Config{}, events.log.Loggers) _ = tracker.TrackDuration(time.Millisecond * 10) _ = tracker.TrackDuration(time.Millisecond * 20) @@ -325,12 +365,12 @@ func TestTracker_GetSummary(t *testing.T) { summary := tracker.GetSummary() assert.True(t, summary.Duration.IsSome()) - assert.Equal(t, time.Millisecond*20, summary.Duration.Unwrap()) + assert.Equal(t, time.Millisecond*10, summary.Duration.Unwrap()) }) - t.Run("latest feedback is returned", func(t *testing.T) { + t.Run("first feedback is returned", func(t *testing.T) { events := newMockEvents() - tracker := newTracker("key", "variationKey", 12, events, &Config{}, ldcontext.New("key"), nil) + tracker := newTracker(events, newRunID(), "key", "variationKey", 12, ldcontext.New("key"), &Config{}, events.log.Loggers) _ = tracker.TrackFeedback(FeedbackPositive) _ = tracker.TrackFeedback(FeedbackNegative) @@ -338,12 +378,12 @@ func TestTracker_GetSummary(t *testing.T) { summary := tracker.GetSummary() assert.True(t, summary.Feedback.IsSome()) - assert.Equal(t, FeedbackNegative, summary.Feedback.Unwrap()) + assert.Equal(t, FeedbackPositive, summary.Feedback.Unwrap()) }) t.Run("success status tracked correctly", func(t *testing.T) { events := newMockEvents() - tracker := newTracker("key", "variationKey", 13, events, &Config{}, ldcontext.New("key"), nil) + tracker := newTracker(events, newRunID(), "key", "variationKey", 13, ldcontext.New("key"), &Config{}, nil) _ = tracker.TrackSuccess() @@ -351,18 +391,11 @@ func TestTracker_GetSummary(t *testing.T) { assert.True(t, summary.Success.IsSome()) assert.True(t, summary.Success.Unwrap()) - - _ = tracker.TrackError() - - summary = tracker.GetSummary() - - assert.True(t, summary.Success.IsSome()) - assert.False(t, summary.Success.Unwrap()) }) t.Run("time to first token is returned", func(t *testing.T) { events := newMockEvents() - tracker := newTracker("key", "variationKey", 14, events, &Config{}, ldcontext.New("key"), nil) + tracker := newTracker(events, newRunID(), "key", "variationKey", 14, ldcontext.New("key"), &Config{}, nil) duration := time.Millisecond * 30 _ = tracker.TrackTimeToFirstToken(duration) @@ -375,7 +408,7 @@ func TestTracker_GetSummary(t *testing.T) { t.Run("token usage is returned", func(t *testing.T) { events := newMockEvents() - tracker := newTracker("key", "variationKey", 15, events, &Config{}, ldcontext.New("key"), nil) + tracker := newTracker(events, newRunID(), "key", "variationKey", 15, ldcontext.New("key"), &Config{}, nil) usage := TokenUsage{ Total: 100, @@ -390,3 +423,177 @@ func TestTracker_GetSummary(t *testing.T) { assert.Equal(t, usage, summary.Tokens.Unwrap()) }) } + +func TestTracker_RunIdPresentInTrackData(t *testing.T) { + events := newMockEvents() + config := &Config{} + tracker := newTracker(events, newRunID(), "key", "variationKey", 1, ldcontext.New("key"), config, nil) + _ = tracker.TrackSuccess() + + require.NotEmpty(t, events.events) + data := events.events[0].data + runId := data.GetByKey("runId").StringValue() + assert.NotEmpty(t, runId, "runId should be present and non-empty in track data") +} + +func TestTracker_AtMostOnce(t *testing.T) { + t.Run("TrackDuration only tracks once", func(t *testing.T) { + events := newMockEvents() + config := &Config{} + tracker := newTracker(events, newRunID(), "key", "variationKey", 1, ldcontext.New("key"), config, events.log.Loggers) + + assert.NoError(t, tracker.TrackDuration(10*time.Millisecond)) + assert.NoError(t, tracker.TrackDuration(20*time.Millisecond)) + + count := 0 + for _, e := range events.events { + if e.name == "$ld:ai:duration:total" { + count++ + } + } + assert.Equal(t, 1, count, "TrackDuration should only emit one event") + }) + + t.Run("TrackTimeToFirstToken only tracks once", func(t *testing.T) { + events := newMockEvents() + config := &Config{} + tracker := newTracker(events, newRunID(), "key", "variationKey", 1, ldcontext.New("key"), config, events.log.Loggers) + + assert.NoError(t, tracker.TrackTimeToFirstToken(10*time.Millisecond)) + assert.NoError(t, tracker.TrackTimeToFirstToken(20*time.Millisecond)) + + count := 0 + for _, e := range events.events { + if e.name == "$ld:ai:tokens:ttf" { + count++ + } + } + assert.Equal(t, 1, count, "TrackTimeToFirstToken should only emit one event") + }) + + t.Run("TrackUsage only tracks once", func(t *testing.T) { + events := newMockEvents() + config := &Config{} + tracker := newTracker(events, newRunID(), "key", "variationKey", 1, ldcontext.New("key"), config, events.log.Loggers) + + assert.NoError(t, tracker.TrackUsage(TokenUsage{Total: 10})) + assert.NoError(t, tracker.TrackUsage(TokenUsage{Total: 20})) + + count := 0 + for _, e := range events.events { + if e.name == "$ld:ai:tokens:total" { + count++ + } + } + assert.Equal(t, 1, count, "TrackUsage should only emit one event") + }) + + t.Run("TrackFeedback only tracks once", func(t *testing.T) { + events := newMockEvents() + config := &Config{} + tracker := newTracker(events, newRunID(), "key", "variationKey", 1, ldcontext.New("key"), config, events.log.Loggers) + + assert.NoError(t, tracker.TrackFeedback(FeedbackPositive)) + assert.NoError(t, tracker.TrackFeedback(FeedbackNegative)) + + count := 0 + for _, e := range events.events { + if e.name == "$ld:ai:feedback:user:positive" || e.name == "$ld:ai:feedback:user:negative" { + count++ + } + } + assert.Equal(t, 1, count, "TrackFeedback should only emit one event") + }) + + t.Run("TrackSuccess only tracks once", func(t *testing.T) { + events := newMockEvents() + config := &Config{} + tracker := newTracker(events, newRunID(), "key", "variationKey", 1, ldcontext.New("key"), config, events.log.Loggers) + + assert.NoError(t, tracker.TrackSuccess()) + assert.NoError(t, tracker.TrackSuccess()) + + count := 0 + for _, e := range events.events { + if e.name == "$ld:ai:generation:success" { + count++ + } + } + assert.Equal(t, 1, count, "TrackSuccess should only emit one event") + }) + + t.Run("TrackError only tracks once", func(t *testing.T) { + events := newMockEvents() + config := &Config{} + tracker := newTracker(events, newRunID(), "key", "variationKey", 1, ldcontext.New("key"), config, events.log.Loggers) + + assert.NoError(t, tracker.TrackError()) + assert.NoError(t, tracker.TrackError()) + + count := 0 + for _, e := range events.events { + if e.name == "$ld:ai:generation:error" { + count++ + } + } + assert.Equal(t, 1, count, "TrackError should only emit one event") + }) + + t.Run("TrackSuccess then TrackError only tracks success", func(t *testing.T) { + events := newMockEvents() + config := &Config{} + tracker := newTracker(events, newRunID(), "key", "variationKey", 1, ldcontext.New("key"), config, events.log.Loggers) + + assert.NoError(t, tracker.TrackSuccess()) + assert.NoError(t, tracker.TrackError()) + + assert.Equal(t, 1, len(events.events)) + assert.Equal(t, "$ld:ai:generation:success", events.events[0].name) + }) +} + +func TestTracker_ResumptionToken(t *testing.T) { + t.Run("produces valid base64url-encoded token", func(t *testing.T) { + events := newMockEvents() + config := &Config{} + tracker := newTracker(events, newRunID(), "my-config", "var-1", 3, ldcontext.New("key"), config, nil) + + token := tracker.ResumptionToken() + assert.NotEmpty(t, token) + + // Decode and verify + decoded, err := base64.RawURLEncoding.DecodeString(token) + require.NoError(t, err) + + var payload struct { + RunID string `json:"runId"` + ConfigKey string `json:"configKey"` + VariationKey string `json:"variationKey"` + Version int `json:"version"` + } + require.NoError(t, json.Unmarshal(decoded, &payload)) + + assert.NotEmpty(t, payload.RunID) + assert.Equal(t, "my-config", payload.ConfigKey) + assert.Equal(t, "var-1", payload.VariationKey) + assert.Equal(t, 3, payload.Version) + }) + + t.Run("does not include modelName or providerName", func(t *testing.T) { + events := newMockEvents() + config := NewConfig().WithModelName("gpt-4").WithProviderName("openai").Build() + tracker := newTracker(events, newRunID(), "key", "var", 1, ldcontext.New("key"), &config, nil) + + token := tracker.ResumptionToken() + decoded, err := base64.RawURLEncoding.DecodeString(token) + require.NoError(t, err) + + var raw map[string]interface{} + require.NoError(t, json.Unmarshal(decoded, &raw)) + + _, hasModel := raw["modelName"] + _, hasProvider := raw["providerName"] + assert.False(t, hasModel, "token should not contain modelName") + assert.False(t, hasProvider, "token should not contain providerName") + }) +}