Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions examples/kitchen-sink/axiom.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export default defineConfig({

dataset: process.env.NEXT_PUBLIC_AXIOM_DATASET,
url: process.env.NEXT_PUBLIC_AXIOM_URL,
edgeUrl: process.env.NEXT_PUBLIC_AXIOM_EDGE_URL,
token: process.env.AXIOM_TOKEN,

flagSchema,
Expand Down
13 changes: 12 additions & 1 deletion packages/ai/src/cli/commands/eval.command.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import { Command, Argument, Option } from 'commander';
import { customAlphabet } from 'nanoid';
import { lstatSync } from 'node:fs';
import c from 'tinyrainbow';

import { runEvalWithContext } from '../utils/eval-context-runner';
import { validateFlagOverrides, type FlagOverrides } from '../utils/parse-flag-overrides';
import { isGlob } from '../utils/glob-utils';
import { loadConfig } from '../../config/loader';
import { AxiomCLIError } from '../../util/errors';
import { getAuthContext } from '../auth/global-auth';
import c from 'tinyrainbow';
import { validateTokenPermissions } from '../../config/validate-eval-token-permissions';

const createRunId = customAlphabet('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ', 10);

Expand Down Expand Up @@ -132,6 +134,15 @@ export const loadEvalCommand = (program: Command, flagOverrides: FlagOverrides =
console.log('');
}

// Validate token permissions before running evals (skip in debug mode)
if (!options.debug) {
const result = await validateTokenPermissions(config);
if (!result.valid) {
console.error(`\n❌ ${result.errors.join('\n')} \n`);
process.exit(1);
}
}

const runId = createRunId();

consoleUrl = options.consoleUrl;
Expand Down
29 changes: 28 additions & 1 deletion packages/ai/src/config/resolver.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,26 @@
import type { AxiomEvalInstrumentationOptions, ResolvedAxiomConfig } from './index';

const DEFAULT_EDGE_REGION = 'us-east-1';
const NON_EDGE_HOSTS = new Set(['api.axiom.co', 'api.dev.axiomtestlabs.co']);
const LOCALHOST_HOSTS = new Set(['localhost', '127.0.0.1', '::1', '0.0.0.0']);

export function resolveEdgeRegion(edgeUrl: string): string {
let hostname = '';

try {
hostname = new URL(edgeUrl).hostname.toLowerCase();
} catch {
return DEFAULT_EDGE_REGION;
}

if (NON_EDGE_HOSTS.has(hostname) || LOCALHOST_HOSTS.has(hostname)) {
return DEFAULT_EDGE_REGION;
}

const [region] = hostname.split('.');
return region ? region.toLowerCase() : DEFAULT_EDGE_REGION;
}

/**
* Builds a resources URL under the assumption that the API URL is in the format of https://api.axiom.co by replacing the subdomain with app.
* @param urlString - The API URL
Expand All @@ -8,6 +29,10 @@ import type { AxiomEvalInstrumentationOptions, ResolvedAxiomConfig } from './ind
const buildConsoleUrl = (urlString: string) => {
const url = new URL(urlString);

if (url.host.startsWith('localhost:')) {
return urlString;
}

return `${url.protocol}//app.${url.host.split('api.').at(-1)}`;
};

Expand All @@ -23,14 +48,16 @@ const buildConsoleUrl = (urlString: string) => {
export function resolveAxiomConnection(
config: ResolvedAxiomConfig,
consoleUrlOverride?: string,
): AxiomEvalInstrumentationOptions & { consoleEndpointUrl: string } {
): AxiomEvalInstrumentationOptions & { edgeRegion: string; consoleEndpointUrl: string } {
const consoleEndpointUrl = consoleUrlOverride ?? buildConsoleUrl(config.eval.url);
// Use edgeUrl for ingest/query operations, falling back to url if not specified
const edgeUrl = config.eval.edgeUrl || config.eval.url;
const edgeRegion = resolveEdgeRegion(edgeUrl);

return {
url: config.eval.url,
edgeUrl,
edgeRegion,
consoleEndpointUrl: consoleEndpointUrl,
token: config.eval.token,
dataset: config.eval.dataset,
Expand Down
144 changes: 144 additions & 0 deletions packages/ai/src/config/validate-eval-token-permissions.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import type { ResolvedAxiomConfig } from './index';
import { resolveAxiomConnection } from './resolver';
import { AxiomCLIError } from '../util/errors';

/**
* Result of token permission validation
*/
export interface TokenPermissionValidationResult {
valid: boolean;
dataset: string;
permissions: {
canRead: boolean;
canWrite: boolean;
};
errors: string[];
}

/**
* Validates that the configured token has the required permissions to run evaluations.
*
* This function:
* 1. Attempts to send a test trace to verify ingestion permissions
* 2. Attempts to query the dataset to verify read permissions
*
* @param config - Resolved Axiom configuration
* @returns Validation result with permission details
* @throws {AxiomCLIError} If validation fails with detailed error messages
*/
const buildPermissionHelp = (consoleEndpointUrl: string) => [
'To run evaluations, your token needs:',
' - Write permission to ingest traces',
' - Read permission to query results',
`Manage tokens at: ${consoleEndpointUrl}/settings/api-tokens`,
];

const indentErrorDetails = (lines: string[]) =>
lines.map((line, index) => (index === 0 ? line : ` ${line}`));

const formatPermissionErrors = (
errors: string[] | undefined,
dataset: string,
consoleEndpointUrl: string,
) => {
const details: string[] = [];
const normalized = (errors || []).map((e) => e.toLowerCase());

if (normalized.some((e) => e.includes('ingest') || e.includes('write'))) {
details.push('Missing write permission to ingest traces.');
}
if (normalized.some((e) => e.includes('read') || e.includes('query'))) {
details.push('Missing read permission to query results.');
}

if (details.length === 0 && errors && errors.length > 0) {
details.push(...errors);
}

return indentErrorDetails([
`Token does not have required permissions for dataset "${dataset}".`,
...details,
...buildPermissionHelp(consoleEndpointUrl),
]);
};

export async function validateTokenPermissions(config: ResolvedAxiomConfig) {
const connection = resolveAxiomConnection(config);

try {
const headers: Record<string, string> = {
'X-Axiom-Org-Id': connection.orgId ?? '',
'X-Axiom-Dataset': connection.dataset,
};

if (connection.token) {
headers.Authorization = `Bearer ${connection.token}`;
}

console.log({ region: connection.edgeRegion });

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Debug console.log statements left in production code

Medium Severity

console.log({ region: connection.edgeRegion }) on line 78 prints a debug object to stdout on every eval run. console.debug('validation response', { data }) on line 91 logs error response payloads. Both appear to be leftover debugging statements that will produce unexpected noisy output for users running evaluations.

Additional Locations (1)
Fix in Cursor Fix in Web


const response = await fetch(
`${connection.url}/api/v3/evaluations/validate?dataset=${connection.dataset}&region=${connection.edgeRegion}`,
{
headers,
},
);

if (!response.ok) {
let serverMessage: string | undefined;
try {
const data = await response.json();
console.debug('validation response', { data })
serverMessage = data?.error || data?.message;
} catch {
serverMessage = undefined;
}

if (response.status === 404) {
throw new AxiomCLIError(
indentErrorDetails([
`Dataset not found: "${connection.dataset}".`,
'Check eval.dataset in axiom.config.ts or AXIOM_DATASET in your environment.',
`Manage datasets at: ${connection.consoleEndpointUrl}/datasets`,
]).join('\n'),
);
}

const statusLabel = serverMessage || response.statusText || 'Unknown error';
const baseMessage = `Failed to validate token: ${statusLabel}`;

if (response.status === 401 || response.status === 403) {
throw new AxiomCLIError(
indentErrorDetails([
baseMessage,
response.status === 401
? 'The token is missing or invalid.'
: `The token does not have access to dataset "${connection.dataset}".`,
`Check AXIOM_TOKEN or eval.token in axiom.config.ts.`,
...buildPermissionHelp(connection.consoleEndpointUrl),
]).join('\n'),
);
}

throw new AxiomCLIError(
indentErrorDetails([
baseMessage,
...buildPermissionHelp(connection.consoleEndpointUrl),
]).join('\n'),
);
}

const result = (await response.json()) as TokenPermissionValidationResult;
if (!result.valid) {
result.errors = formatPermissionErrors(
result.errors,
connection.dataset,
connection.consoleEndpointUrl,
);
}
return result;
} catch (error) {
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
throw new AxiomCLIError(errorMessage);
}
}
4 changes: 3 additions & 1 deletion packages/ai/src/evals/eval.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { createFetcher, type Fetcher } from '../utils/fetcher';
import type { ResolvedAxiomConfig } from '../config/index';
import { resolveAxiomConnection } from '../config/resolver';
import { Attr } from '../otel';
import { AxiomCLIError } from '../util/errors';
import { AxiomCLIError, errorToString } from '../util/errors';
import {
getCustomOrRegularAttribute,
getCustomOrRegularNumber,
Expand Down Expand Up @@ -52,6 +52,8 @@ export class EvaluationApiClient {
const resp = await this.fetcher(`/api/v3/evaluations`, {
method: 'POST',
body: JSON.stringify(evaluation),
}).catch((error) => {
throw new AxiomCLIError(`Failed to create evaluation: ${errorToString(error)}`);
});

if (!resp.ok) {
Expand Down
66 changes: 41 additions & 25 deletions packages/ai/src/evals/eval.ts
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,7 @@ async function registerEval<
const [, instrumentationInitError] = await tryCatchAsync(instrumentationReady);
if (instrumentationInitError) {
instrumentationError = instrumentationInitError;
suite.meta.evaluation.instrumentationError = errorToString(instrumentationInitError);
}

suiteSpan = startSpan(`eval ${evalName}-${evalVersion}`, {
Expand Down Expand Up @@ -293,22 +294,28 @@ async function registerEval<
suiteSpan.setAttribute(Attr.Eval.Config.Flags, flagConfigJson);

let createEvalResponse;
let registrationError: Error | undefined = undefined;
if (!isDebug && !isList) {
createEvalResponse = await evaluationApiClient.createEvaluation({
id: evalId,
name: evalName,
capability: opts.capability,
step: opts.step,
dataset: axiomConfig.eval.dataset,
version: evalVersion,
baselineId: baselineId ?? undefined,
runId: runId,
totalCases: collection.length,
config: { overrides: injectedOverrides },
configTimeoutMs: timeoutMs,
metadata: opts.metadata,
status: 'running',
});
try {
createEvalResponse = await evaluationApiClient.createEvaluation({
id: evalId,
name: evalName,
capability: opts.capability,
step: opts.step,
dataset: axiomConfig.eval.dataset,
version: evalVersion,
baselineId: baselineId ?? undefined,
runId: runId,
totalCases: collection.length,
config: { overrides: injectedOverrides },
configTimeoutMs: timeoutMs,
metadata: opts.metadata,
status: 'running',
});
registrationError = createEvalResponse?.error;
} catch (error) {
registrationError = error as Error;
}
}

const orgId = createEvalResponse?.data?.orgId;
Expand Down Expand Up @@ -342,10 +349,10 @@ async function registerEval<
orgId: orgId ?? undefined,
baseline: baseline ?? undefined,
configFlags: opts.configFlags,
registrationStatus: instrumentationError
registrationStatus: registrationError
? {
status: 'failed',
error: errorToString(instrumentationError),
error: errorToString(registrationError),

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

instrumentationError overwritten before reporter reads it

High Severity

The instrumentationError property is set on suite.meta.evaluation at line 261, but a few lines later the entire suite.meta.evaluation object is reassigned at line 344 without including instrumentationError. This means the value is always lost, and the reporter (which reads meta.evaluation.instrumentationError to display warnings) will never see it. The new instrumentation error reporting feature in the reporter and console-utils is effectively dead code.

Additional Locations (1)
Fix in Cursor Fix in Web

}
: { status: 'success' },
trials: opts.trials,
Expand Down Expand Up @@ -424,14 +431,23 @@ async function registerEval<

// signal Axiom that evaluation finished to kick of summary calculations
if (!isDebug && !isList) {
await evaluationApiClient.updateEvaluation({
id: evalId,
status: 'completed',
totalCases: collection.length,
successCases,
erroredCases,
durationMs,
});
try {
await evaluationApiClient.updateEvaluation({
id: evalId,
status: 'completed',
totalCases: collection.length,
successCases,
erroredCases,
durationMs,
});
} catch (error) {
if (suite.meta.evaluation?.registrationStatus?.status !== 'failed') {
suite.meta.evaluation.registrationStatus = {
status: 'failed',
error: errorToString(error),
};
}
}
}
});

Expand Down
2 changes: 2 additions & 0 deletions packages/ai/src/evals/eval.types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,8 @@ export type EvaluationReport = {
overrides?: Record<string, any>;
};
registrationStatus?: RegistrationStatus;
/** Captures instrumentation/baseline loading failures for reporting */
instrumentationError?: string;
/** Number of trials per case (only shown if > 1) */
trials?: number;
};
Expand Down
Loading
Loading