diff --git a/.claude-adr.log b/.claude-adr.log deleted file mode 100644 index 9b6ce8e5..00000000 --- a/.claude-adr.log +++ /dev/null @@ -1,2 +0,0 @@ -nohup: .githooks/adr-generator.sh: No such file or directory -nohup: .githooks/adr-generator.sh: No such file or directory diff --git a/.env.example b/.env.example index 372c0fbd..5bddcfe8 100644 --- a/.env.example +++ b/.env.example @@ -1,11 +1,12 @@ # Environment variables override values from .eroderc.json. # Use .eroderc.json for project settings and .env for secrets. -# AI provider: "gemini" (default) or "anthropic" +# AI provider: "gemini" (default), "openai", or "anthropic" ERODE_AI_PROVIDER=gemini # API keys (required for the selected provider) ERODE_GEMINI_API_KEY= +ERODE_OPENAI_API_KEY= ERODE_ANTHROPIC_API_KEY= # GitHub / GitLab tokens @@ -16,8 +17,10 @@ ERODE_GITHUB_TOKEN= # Model overrides — FAST for extraction stages (1, 2), ADVANCED for analysis stages (3, 4) # ERODE_GEMINI_FAST_MODEL=gemini-2.5-flash # Default # ERODE_GEMINI_ADVANCED_MODEL=gemini-2.5-pro # Default -# ERODE_ANTHROPIC_FAST_MODEL=claude-haiku-4-5-20251001 -# ERODE_ANTHROPIC_ADVANCED_MODEL=claude-sonnet-4-5-20250929 +# ERODE_OPENAI_FAST_MODEL=gpt-5-mini # Default +# ERODE_OPENAI_ADVANCED_MODEL=gpt-5 # Default +# ERODE_ANTHROPIC_FAST_MODEL=claude-haiku-4-5 +# ERODE_ANTHROPIC_ADVANCED_MODEL=claude-sonnet-4-6 # Architecture model format # ERODE_MODEL_FORMAT=likec4 @@ -29,6 +32,7 @@ ERODE_GITHUB_TOKEN= # Timeouts (ms) # ERODE_GEMINI_TIMEOUT=60000 +# ERODE_OPENAI_TIMEOUT=60000 # ERODE_ANTHROPIC_TIMEOUT=60000 # ERODE_GITHUB_TIMEOUT=30000 diff --git a/.githooks/post-commit b/.githooks/post-commit index 72d5128b..9816d41b 100755 --- a/.githooks/post-commit +++ b/.githooks/post-commit @@ -1,29 +1,3 @@ #!/bin/sh - -# Post-commit hook: Auto-generate ADR for architectural changes -# Runs Claude Code headlessly in background to analyze commits - -COMMIT_MSG=$(git log -1 --pretty=%B) - -# Opt-out checks -if echo "$COMMIT_MSG" | grep -qiE "\[(skip-adr|no-adr)\]"; then - exit 0 -fi -[ "$SKIP_ADR" = "1" ] && exit 0 - -# Check for Claude CLI -command -v claude >/dev/null 2>&1 || exit 0 - -# Check for architectural indicators -if echo "$COMMIT_MSG" | grep -qiE "(refactor|architecture|migrate|introduce|domain layer|api version)"; then - echo "" - echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - echo " Architectural change detected" - echo " Running ADR generation in background..." - echo " Check .claude-adr.log for output" - echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" - - nohup "$(dirname "$0")/adr-generator.sh" >> .claude-adr.log 2>&1 & -fi - +echo "Post-commit checks passed!" exit 0 diff --git a/packages/core/schemas/eroderc.schema.json b/packages/core/schemas/eroderc.schema.json index a6944300..863f4a05 100644 --- a/packages/core/schemas/eroderc.schema.json +++ b/packages/core/schemas/eroderc.schema.json @@ -164,11 +164,11 @@ "maximum": 300000 }, "fastModel": { - "default": "claude-haiku-4-5-20251001", + "default": "claude-haiku-4-5", "type": "string" }, "advancedModel": { - "default": "claude-sonnet-4-5-20250929", + "default": "claude-sonnet-4-6", "type": "string" } }, @@ -210,11 +210,11 @@ "maximum": 300000 }, "fastModel": { - "default": "gpt-4.1-mini", + "default": "gpt-5-mini", "type": "string" }, "advancedModel": { - "default": "gpt-4.1", + "default": "gpt-5", "type": "string" } }, diff --git a/packages/core/src/analysis/__tests__/prompt-builder.test.ts b/packages/core/src/analysis/__tests__/prompt-builder.test.ts index fcb8fe1b..7b2c60ec 100644 --- a/packages/core/src/analysis/__tests__/prompt-builder.test.ts +++ b/packages/core/src/analysis/__tests__/prompt-builder.test.ts @@ -301,6 +301,66 @@ describe('PromptBuilder', () => { expect(result).toContain('REMOVED'); expect(result).toContain('memcached'); }); + + it('should preserve dependency evidence for drift analysis', () => { + const result = PromptBuilder.buildDriftAnalysisPrompt({ + changeRequest: { + number: 1, + title: 'Add service dependency', + description: null, + repository: 'org/repo', + author: { login: 'dev' }, + base: { ref: 'main', sha: 'base' }, + head: { ref: 'feature', sha: 'head' }, + stats: { commits: 1, additions: 10, deletions: 0, files_changed: 2 }, + commits: [{ sha: 'head', message: 'Test', author: 'dev' }], + }, + component: { id: 'api_gateway', name: 'API Gateway', type: 'service', tags: [] }, + dependencies: { + dependencies: [ + { + type: 'added', + file: 'packages/api-gateway/src/index.ts', + dependency: 'Order Service', + description: 'Existing component calls newly introduced service', + code: 'const ORDER_SERVICE = "http://order-service:3005";', + }, + { + type: 'added', + file: 'packages/order-service/src/index.ts', + dependency: 'Product Service', + description: 'Newly introduced service calls existing component', + code: 'const PRODUCT_SERVICE = "http://product-service:3002";', + }, + { + type: 'added', + file: 'packages/product-service/src/index.ts', + dependency: 'User Service', + description: 'Existing component calls existing component', + code: 'const USER_SERVICE = "http://user-service:3001";', + }, + ], + summary: 'Added service relationships', + }, + architectural: { dependencies: [], dependents: [], relationships: [] }, + allRelationships: [], + }); + + const vars = JSON.parse(result) as DriftAnalysisPromptVars; + expect(vars.dependencyChangesSection).toContain('Evidence:\n const ORDER_SERVICE'); + expect(vars.dependencyChangesSection).toContain('Evidence:\n const PRODUCT_SERVICE'); + expect(vars.dependencyChangesSection).toContain('Evidence:\n const USER_SERVICE'); + }); + + it('should instruct drift analysis to account for every added dependency', () => { + const templateDir = join(dirname(fileURLToPath(import.meta.url)), '..', 'prompts'); + const template = readFileSync(join(templateDir, 'drift-analysis.md'), 'utf-8'); + + expect(template).toContain('For every ADDED dependency'); + expect(template).toContain('Classify each dependency'); + expect(template).toContain('New component plus relationship to add'); + expect(template).toContain('Do not let one dependency that created a new component'); + }); }); describe('buildModelPatchPrompt', () => { diff --git a/packages/core/src/analysis/__tests__/section-formatters.test.ts b/packages/core/src/analysis/__tests__/section-formatters.test.ts index 1d623660..50f15484 100644 --- a/packages/core/src/analysis/__tests__/section-formatters.test.ts +++ b/packages/core/src/analysis/__tests__/section-formatters.test.ts @@ -93,6 +93,49 @@ describe('section-formatters', () => { expect(result).toContain('Added Redis'); }); + it('should include code evidence when present', () => { + const result = formatDependencyChanges({ + dependencies: [ + { + type: 'added', + file: 'src/gateway.ts', + dependency: 'Order Service', + description: 'External order service via HTTP', + code: 'const ORDER_SERVICE = "http://order-service:3005";', + }, + ], + summary: '', + }); + + expect(result).toContain('Evidence:\n const ORDER_SERVICE'); + expect(result).toContain('http://order-service:3005'); + }); + + it('should keep multiline code evidence indented under the dependency bullet', () => { + const result = formatDependencyChanges({ + dependencies: [ + { + type: 'added', + file: 'src/gateway.ts', + dependency: 'Order Service', + description: 'External order service via HTTP', + code: 'const ORDER_SERVICE = "http://order-service:3005";\nawait fetch(ORDER_SERVICE);', + }, + ], + summary: '', + }); + + expect(result).toContain( + [ + '- Order Service (src/gateway.ts)', + ' External order service via HTTP', + ' Evidence:', + ' const ORDER_SERVICE = "http://order-service:3005";', + ' await fetch(ORDER_SERVICE);', + ].join('\n') + ); + }); + it('should format modified dependencies', () => { const result = formatDependencyChanges({ dependencies: [ diff --git a/packages/core/src/analysis/prompts/drift-analysis.md b/packages/core/src/analysis/prompts/drift-analysis.md index 5ac93fe7..0c31554b 100644 --- a/packages/core/src/analysis/prompts/drift-analysis.md +++ b/packages/core/src/analysis/prompts/drift-analysis.md @@ -64,6 +64,23 @@ These are ALL relationships currently declared in the architecture model: Cross-reference the dependency changes above against the architecture model and assess: +### Dependency Coverage + +For every ADDED dependency in the DEPENDENCY CHANGES DETECTED section, account for it +explicitly. Classify each dependency as one of: + +- Already declared in the model +- New relationship to add +- New component plus relationship to add +- External package or third-party dependency that should not be modeled +- Ignored with a brief reason + +If an existing modeled component gains a dependency on a newly introduced component, +include both the new component and that relationship. If a newly introduced component +depends on an existing modeled component, include both the new component and that +relationship. Do not let one dependency that created a new component hide other +relationships to or from that component. + ### 1. New Dependencies NOT in Model (Potential Drift) - Are there new dependencies that aren't in the allowed dependencies list? diff --git a/packages/core/src/analysis/section-formatters.ts b/packages/core/src/analysis/section-formatters.ts index 5416bd99..750c5d9b 100644 --- a/packages/core/src/analysis/section-formatters.ts +++ b/packages/core/src/analysis/section-formatters.ts @@ -57,7 +57,16 @@ export function formatDependencyChanges(dependencies: DependencyExtractionResult if (items && items.length > 0) { section += `**${label} Dependencies:**\n`; section += items - .map((dep) => `- ${dep.dependency} (${dep.file})\n ${dep.description}`) + .map((dep) => { + const trimmedCode = dep.code.trim(); + const evidence = trimmedCode + ? `\n Evidence:\n${trimmedCode + .split('\n') + .map((line) => ` ${line}`) + .join('\n')}` + : ''; + return `- ${dep.dependency} (${dep.file})\n ${dep.description}${evidence}`; + }) .join('\n'); section += '\n\n'; } diff --git a/packages/core/src/providers/__tests__/generation-profile.test.ts b/packages/core/src/providers/__tests__/generation-profile.test.ts new file mode 100644 index 00000000..d9700df3 --- /dev/null +++ b/packages/core/src/providers/__tests__/generation-profile.test.ts @@ -0,0 +1,57 @@ +import { describe, expect, it } from 'vitest'; +import { AnalysisPhase } from '../analysis-phase.js'; +import { + getGenerationProfileForModelPatch, + getGenerationProfileForPhase, + resolveOutputTokenLimit, +} from '../generation-profile.js'; + +describe('getGenerationProfileForPhase', () => { + it('uses small low-effort generation for simple phases', () => { + expect(getGenerationProfileForPhase(AnalysisPhase.COMPONENT_RESOLUTION)).toEqual({ + outputSize: 'small', + reasoningEffort: 'low', + }); + expect(getGenerationProfileForPhase(AnalysisPhase.DEPENDENCY_SCAN)).toEqual({ + outputSize: 'small', + reasoningEffort: 'low', + }); + }); + + it('uses large low-effort generation for drift analysis', () => { + expect(getGenerationProfileForPhase(AnalysisPhase.CHANGE_ANALYSIS)).toEqual({ + outputSize: 'large', + reasoningEffort: 'low', + }); + }); + + it('uses medium medium-effort generation for model updates', () => { + expect(getGenerationProfileForPhase(AnalysisPhase.MODEL_UPDATE)).toEqual({ + outputSize: 'medium', + reasoningEffort: 'medium', + }); + }); + + it('adds a dynamic output content hint for model patches', () => { + const profile = getGenerationProfileForModelPatch('x'.repeat(40_000), [' comp.a -> comp.b']); + + expect(profile).toMatchObject({ + outputSize: 'medium', + reasoningEffort: 'medium', + }); + expect(profile.outputContentHint?.characters).toBeGreaterThan(16_384); + }); + + it('resolves output token limits from profile size and content hints', () => { + expect( + resolveOutputTokenLimit( + { outputSize: 'medium', outputContentHint: { characters: 40_000 } }, + { small: 600, medium: 1500, large: 3000 } + ) + ).toBe(10_000); + + expect( + resolveOutputTokenLimit({ outputSize: 'medium' }, { small: 600, medium: 1500, large: 3000 }) + ).toBe(1500); + }); +}); diff --git a/packages/core/src/providers/__tests__/provider-factory.test.ts b/packages/core/src/providers/__tests__/provider-factory.test.ts index d1e980ae..f62aba03 100644 --- a/packages/core/src/providers/__tests__/provider-factory.test.ts +++ b/packages/core/src/providers/__tests__/provider-factory.test.ts @@ -35,8 +35,8 @@ const { }, openai: { apiKey: 'test-openai-key', - fastModel: 'gpt-4.1-mini', - advancedModel: 'gpt-4.1', + fastModel: 'gpt-5-mini', + advancedModel: 'gpt-5', }, anthropic: { apiKey: 'test-anthropic-key', @@ -94,8 +94,8 @@ describe('createAIProvider', () => { mockConfig.gemini.fastModel = 'gemini-flash'; mockConfig.gemini.advancedModel = 'gemini-pro'; mockConfig.openai.apiKey = 'test-openai-key'; - mockConfig.openai.fastModel = 'gpt-4.1-mini'; - mockConfig.openai.advancedModel = 'gpt-4.1'; + mockConfig.openai.fastModel = 'gpt-5-mini'; + mockConfig.openai.advancedModel = 'gpt-5'; mockConfig.anthropic.apiKey = 'test-anthropic-key'; mockConfig.anthropic.fastModel = 'claude-haiku'; mockConfig.anthropic.advancedModel = 'claude-sonnet'; @@ -122,8 +122,8 @@ describe('createAIProvider', () => { expect(result).toBe(mockOpenAIInstance); expect(OpenAIProvider).toHaveBeenCalledWith({ apiKey: 'test-openai-key', - fastModel: 'gpt-4.1-mini', - advancedModel: 'gpt-4.1', + fastModel: 'gpt-5-mini', + advancedModel: 'gpt-5', }); }); diff --git a/packages/core/src/providers/anthropic/__tests__/provider.test.ts b/packages/core/src/providers/anthropic/__tests__/provider.test.ts index 62c91d70..e471d7cf 100644 --- a/packages/core/src/providers/anthropic/__tests__/provider.test.ts +++ b/packages/core/src/providers/anthropic/__tests__/provider.test.ts @@ -117,6 +117,12 @@ describe('AnthropicProvider', () => { makeStage1Data(['comp.frontend', 'comp.backend']) ); expect(result).toBe('comp.backend'); + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'claude-haiku-4-5', + max_tokens: 600, + }) + ); }); it('should return null when no component matches', async () => { @@ -167,6 +173,12 @@ describe('AnthropicProvider', () => { expect(result.dependencies).toHaveLength(1); expect(result.dependencies[0]?.dependency).toBe('redis'); expect(result.summary).toBe('Added Redis dependency'); + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'claude-haiku-4-5', + max_tokens: 600, + }) + ); }); it('should throw on non-JSON response', async () => { @@ -208,6 +220,12 @@ describe('AnthropicProvider', () => { expect(result.metadata).toBe(data.changeRequest); expect(result.component).toBe(data.component); expect(result.dependencyChanges).toBe(data.dependencies); + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'claude-sonnet-4-6', + max_tokens: 8192, + }) + ); }); }); @@ -230,6 +248,23 @@ describe('AnthropicProvider', () => { }); }); + describe('truncation handling', () => { + it('should explain output budget exhaustion on max_tokens', async () => { + mockCreate.mockResolvedValueOnce(makeAnthropicResponse('partial response', 'max_tokens')); + + const provider = createProvider(); + try { + await provider.selectComponent(makeStage1Data(['comp.api'])); + expect.fail('Expected error to be thrown'); + } catch (error) { + expect(error).toBeInstanceOf(ErodeError); + const erodeError = error as ErodeError; + expect(erodeError.code).toBe(ErrorCode.PROVIDER_INVALID_RESPONSE); + expect(erodeError.userMessage).toContain('output budget'); + } + }); + }); + describe('retry on rate limit', () => { it('should retry on 429 and eventually succeed', async () => { const rateLimitError = new ApiError('Rate limited', 429); @@ -311,8 +346,22 @@ describe('AnthropicProvider', () => { await provider.patchModel('model {\n}\n', [' comp.a -> comp.b'], 'likec4'); expect(mockCreate).toHaveBeenCalled(); - const callArg = mockCreate.mock.calls[0]?.[0] as { model?: string } | undefined; - expect(callArg?.model).toBe('claude-haiku-4-5-20251001'); + const callArg = mockCreate.mock.calls[0]?.[0] as + | { max_tokens?: number; model?: string } + | undefined; + expect(callArg?.model).toBe('claude-haiku-4-5'); + expect(callArg?.max_tokens).toBe(4096); + }); + + it('should increase the output budget for large model files', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; + mockCreate.mockResolvedValueOnce(makeAnthropicResponse(patchedContent)); + + const provider = createProvider(); + await provider.patchModel('x'.repeat(40_000), [' comp.a -> comp.b'], 'likec4'); + + const callArg = mockCreate.mock.calls[0]?.[0] as { max_tokens?: number } | undefined; + expect(callArg?.max_tokens).toBeGreaterThan(4096); }); it('should return patched content', async () => { diff --git a/packages/core/src/providers/anthropic/models.ts b/packages/core/src/providers/anthropic/models.ts index 227ae62e..d09bbbc5 100644 --- a/packages/core/src/providers/anthropic/models.ts +++ b/packages/core/src/providers/anthropic/models.ts @@ -1,4 +1,4 @@ export const ANTHROPIC_MODELS = { - FAST: 'claude-haiku-4-5-20251001', - ADVANCED: 'claude-sonnet-4-5-20250929', + FAST: 'claude-haiku-4-5', + ADVANCED: 'claude-sonnet-4-6', } as const; diff --git a/packages/core/src/providers/anthropic/provider.ts b/packages/core/src/providers/anthropic/provider.ts index d958a7e2..81b636a2 100644 --- a/packages/core/src/providers/anthropic/provider.ts +++ b/packages/core/src/providers/anthropic/provider.ts @@ -3,8 +3,19 @@ import { BaseProvider, type ProviderConfig } from '../base-provider.js'; import { ApiError, ErodeError, ErrorCode } from '../../errors.js'; import { ENV_VAR_NAMES, RC_FILENAME } from '../../utils/config.js'; import type { AnalysisPhase } from '../analysis-phase.js'; +import { + resolveOutputTokenLimit, + type GenerationProfile, + type OutputSize, +} from '../generation-profile.js'; import { ANTHROPIC_MODELS } from './models.js'; +const MAX_TOKENS_BY_OUTPUT_SIZE = { + small: 600, + medium: 1500, + large: 8192, +} satisfies Record; + export class AnthropicProvider extends BaseProvider { private readonly client: Anthropic; @@ -27,12 +38,14 @@ export class AnthropicProvider extends BaseProvider { model: string, prompt: string, phase: AnalysisPhase, - maxTokens: number + generationProfile: GenerationProfile ): Promise { + const outputTokenLimit = resolveOutputTokenLimit(generationProfile, MAX_TOKENS_BY_OUTPUT_SIZE); + try { const response = await this.client.messages.create({ model, - max_tokens: maxTokens, + max_tokens: outputTokenLimit, messages: [{ role: 'user', content: prompt }], }); @@ -61,8 +74,8 @@ export class AnthropicProvider extends BaseProvider { throw new ErodeError( 'Anthropic response was cut short (max_tokens reached)', ErrorCode.PROVIDER_INVALID_RESPONSE, - 'The AI response was truncated. The output may be partial.', - { model, phase, maxTokens } + 'The Anthropic response used the available output budget before completion. Try a smaller change or tune the provider output budget.', + { model, phase, outputTokenLimit } ); } diff --git a/packages/core/src/providers/base-provider.ts b/packages/core/src/providers/base-provider.ts index 58f38fe1..35ba2e43 100644 --- a/packages/core/src/providers/base-provider.ts +++ b/packages/core/src/providers/base-provider.ts @@ -15,6 +15,11 @@ import { ErodeError, ErrorCode, ApiError } from '../errors.js'; import { withRetry } from '../utils/retry.js'; import { AnalysisPhase } from './analysis-phase.js'; import { CONFIG } from '../utils/config.js'; +import { + getGenerationProfileForModelPatch, + getGenerationProfileForPhase, + type GenerationProfile, +} from './generation-profile.js'; function debugLog(msg: string, data?: unknown): void { if (CONFIG.debug.verbose) { @@ -22,6 +27,14 @@ function debugLog(msg: string, data?: unknown): void { } } +function formatDuration(startedAt: bigint): string { + const elapsedMs = Number(process.hrtime.bigint() - startedAt) / 1_000_000; + if (elapsedMs < 1000) { + return `${String(Math.round(elapsedMs))}ms`; + } + return `${(elapsedMs / 1000).toFixed(2)}s`; +} + export interface ProviderConfig { apiKey: string; fastModel?: string; @@ -48,26 +61,35 @@ export abstract class BaseProvider implements AIProvider { * @param model - The model identifier to use * @param prompt - The prompt text to send * @param phase - The analysis phase (for error context) - * @param maxTokens - Maximum tokens for the response (some providers may ignore this) + * @param generationProfile - Provider-agnostic output intent * @returns The text content of the model response */ protected abstract callModel( model: string, prompt: string, phase: AnalysisPhase, - maxTokens: number + generationProfile: GenerationProfile ): Promise; async selectComponent(data: ComponentSelectionPromptData): Promise { const prompt = PromptBuilder.buildComponentSelectionPrompt(data); + debugLog('selectComponent using model', this.fastModel); + const startedAt = process.hrtime.bigint(); const responseText = await withRetry( - () => this.callModel(this.fastModel, prompt, AnalysisPhase.COMPONENT_RESOLUTION, 256), + () => + this.callModel( + this.fastModel, + prompt, + AnalysisPhase.COMPONENT_RESOLUTION, + getGenerationProfileForPhase(AnalysisPhase.COMPONENT_RESOLUTION) + ), { retries: 2, shouldRetry: (error) => this.isRetryableError(error), } ); + debugLog('selectComponent completed in', formatDuration(startedAt)); if (!responseText) { return null; @@ -89,7 +111,6 @@ export abstract class BaseProvider implements AIProvider { prompt: PromptBuilder.buildDependencyExtractionPrompt(data), schema: DependencyExtractionResultSchema, schemaName: 'DependencyExtractionResult', - maxTokens: 4096, }); } @@ -100,7 +121,6 @@ export abstract class BaseProvider implements AIProvider { prompt: PromptBuilder.buildDriftAnalysisPrompt(data), schema: DriftAnalysisResponseSchema, schemaName: 'DriftAnalysisResponse', - maxTokens: 8192, }); return { @@ -121,17 +141,25 @@ export abstract class BaseProvider implements AIProvider { linesToInsert, modelFormat, }); - // Estimate tokens: ~4 chars per token, add 20% headroom for inserted lines - const estimatedTokens = Math.ceil(fileContent.length / 4) + linesToInsert.length * 50; - const maxTokens = Math.max(4096, Math.ceil(estimatedTokens * 1.2)); debugLog('patchModel using model', this.fastModel); + const startedAt = process.hrtime.bigint(); return withRetry( - () => this.callModel(this.fastModel, prompt, AnalysisPhase.MODEL_UPDATE, maxTokens), + () => + this.callModel( + this.fastModel, + prompt, + AnalysisPhase.MODEL_UPDATE, + getGenerationProfileForModelPatch(fileContent, linesToInsert) + ), { retries: 2, shouldRetry: (error) => this.isRetryableError(error), } - ); + ) + .then((response) => unwrapModelPatchResponse(response)) + .finally(() => { + debugLog('patchModel completed in', formatDuration(startedAt)); + }); } private async executeStage(config: { @@ -140,16 +168,20 @@ export abstract class BaseProvider implements AIProvider { prompt: string; schema: z.ZodType; schemaName: string; - maxTokens: number; + generationProfile?: GenerationProfile; }): Promise { debugLog(`executeStage ${config.phase} using model`, config.model); + const generationProfile = + config.generationProfile ?? getGenerationProfileForPhase(config.phase); + const startedAt = process.hrtime.bigint(); const responseText = await withRetry( - () => this.callModel(config.model, config.prompt, config.phase, config.maxTokens), + () => this.callModel(config.model, config.prompt, config.phase, generationProfile), { retries: 2, shouldRetry: (error) => this.isRetryableError(error), } ); + debugLog(`executeStage ${config.phase} completed in`, formatDuration(startedAt)); const jsonStr = PromptBuilder.extractJson(responseText); if (!jsonStr) { @@ -173,3 +205,16 @@ export abstract class BaseProvider implements AIProvider { return error instanceof ApiError && (error.isRateLimited || error.isTimeout); } } + +function unwrapModelPatchResponse(response: string): string { + const trimmed = response.trim(); + const lines = trimmed.split(/\r?\n/); + const firstLine = lines[0]; + const lastLine = lines.at(-1); + + if (firstLine?.startsWith('```') && lastLine === '```') { + return lines.slice(1, -1).join('\n'); + } + + return response; +} diff --git a/packages/core/src/providers/gemini/__tests__/provider.test.ts b/packages/core/src/providers/gemini/__tests__/provider.test.ts index 53b76e16..14099571 100644 --- a/packages/core/src/providers/gemini/__tests__/provider.test.ts +++ b/packages/core/src/providers/gemini/__tests__/provider.test.ts @@ -23,6 +23,11 @@ vi.mock('@google/genai', () => { SPII: 'SPII', MALFORMED_FUNCTION_CALL: 'MALFORMED_FUNCTION_CALL', }, + ThinkingLevel: { + LOW: 'LOW', + MEDIUM: 'MEDIUM', + HIGH: 'HIGH', + }, }; }); @@ -121,6 +126,12 @@ describe('GeminiProvider', () => { makeStage1Data(['comp.frontend', 'comp.backend']) ); expect(result).toBe('comp.backend'); + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemini-2.5-flash', + config: { maxOutputTokens: 1500, thinkingConfig: { thinkingBudget: 0 } }, + }) + ); }); it('should return null when no component matches', async () => { @@ -177,6 +188,12 @@ describe('GeminiProvider', () => { expect(result.dependencies).toHaveLength(1); expect(result.dependencies[0]?.dependency).toBe('redis'); expect(result.summary).toBe('Added Redis dependency'); + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemini-2.5-flash', + config: { maxOutputTokens: 1500, thinkingConfig: { thinkingBudget: 0 } }, + }) + ); }); it('should throw on non-JSON response', async () => { @@ -224,6 +241,119 @@ describe('GeminiProvider', () => { expect(result.metadata).toBe(data.changeRequest); expect(result.component).toBe(data.component); expect(result.dependencyChanges).toBe(data.dependencies); + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemini-2.5-pro', + config: { maxOutputTokens: 6000, thinkingConfig: { thinkingBudget: -1 } }, + }) + ); + }); + }); + + describe('patchModel', () => { + it('should use 2.5 Flash thinking budgets and dynamic output headroom for patching', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; + mockGenerateContent.mockResolvedValueOnce({ + text: patchedContent, + candidates: [{ finishReason: 'STOP' }], + usageMetadata: { promptTokenCount: 100, candidatesTokenCount: 20 }, + }); + + const provider = createProvider(); + await provider.patchModel('model {\n}\n', [' comp.a -> comp.b'], 'likec4'); + + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemini-2.5-flash', + config: { maxOutputTokens: 4096, thinkingConfig: { thinkingBudget: -1 } }, + }) + ); + }); + + it('should increase the output budget for large model files', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; + mockGenerateContent.mockResolvedValueOnce({ + text: patchedContent, + candidates: [{ finishReason: 'STOP' }], + usageMetadata: { promptTokenCount: 100, candidatesTokenCount: 20 }, + }); + + const provider = createProvider(); + await provider.patchModel('x'.repeat(40_000), [' comp.a -> comp.b'], 'likec4'); + + const callArg = mockGenerateContent.mock.calls[0]?.[0] as + | { config?: { maxOutputTokens?: number } } + | undefined; + expect(callArg?.config?.maxOutputTokens).toBeGreaterThan(4096); + }); + + it('should use thinkingLevel for Gemini 3 style models', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; + mockGenerateContent.mockResolvedValueOnce({ + text: patchedContent, + candidates: [{ finishReason: 'STOP' }], + usageMetadata: { promptTokenCount: 100, candidatesTokenCount: 20 }, + }); + + const provider = new GeminiProvider({ + apiKey: 'test-api-key', + fastModel: 'gemini-3-flash-preview', + advancedModel: 'gemini-3-pro-preview', + }); + await provider.patchModel('model {\n}\n', [' comp.a -> comp.b'], 'likec4'); + + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemini-3-flash-preview', + config: { maxOutputTokens: 4096, thinkingConfig: { thinkingLevel: 'MEDIUM' } }, + }) + ); + }); + + it('should use thinkingLevel for Gemini 3 point-release style models', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; + mockGenerateContent.mockResolvedValueOnce({ + text: patchedContent, + candidates: [{ finishReason: 'STOP' }], + usageMetadata: { promptTokenCount: 100, candidatesTokenCount: 20 }, + }); + + const provider = new GeminiProvider({ + apiKey: 'test-api-key', + fastModel: 'gemini-3.5-flash', + advancedModel: 'gemini-3.5-pro', + }); + await provider.patchModel('model {\n}\n', [' comp.a -> comp.b'], 'likec4'); + + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemini-3.5-flash', + config: { maxOutputTokens: 4096, thinkingConfig: { thinkingLevel: 'MEDIUM' } }, + }) + ); + }); + + it('should preserve medium thinkingLevel for Gemini 3 point-release Pro models', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; + mockGenerateContent.mockResolvedValueOnce({ + text: patchedContent, + candidates: [{ finishReason: 'STOP' }], + usageMetadata: { promptTokenCount: 100, candidatesTokenCount: 20 }, + }); + + const provider = new GeminiProvider({ + apiKey: 'test-api-key', + fastModel: 'gemini-3.1-pro-preview', + advancedModel: 'gemini-3.1-pro-preview', + }); + await provider.patchModel('model {\n}\n', [' comp.a -> comp.b'], 'likec4'); + + expect(mockGenerateContent).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gemini-3.1-pro-preview', + config: { maxOutputTokens: 4096, thinkingConfig: { thinkingLevel: 'MEDIUM' } }, + }) + ); }); }); @@ -246,6 +376,27 @@ describe('GeminiProvider', () => { }); }); + describe('truncation handling', () => { + it('should throw PROVIDER_INVALID_RESPONSE on max tokens', async () => { + mockGenerateContent.mockResolvedValueOnce({ + text: '```json\n{"dependencies": [', + candidates: [{ finishReason: 'MAX_TOKENS' }], + usageMetadata: { promptTokenCount: 500, candidatesTokenCount: 1500 }, + }); + + const provider = createProvider(); + try { + await provider.extractDependencies(makePreprocessingData()); + expect.fail('Expected error to be thrown'); + } catch (error) { + expect(error).toBeInstanceOf(ErodeError); + const erodeError = error as ErodeError; + expect(erodeError.code).toBe(ErrorCode.PROVIDER_INVALID_RESPONSE); + expect(erodeError.userMessage).toContain('output budget'); + } + }); + }); + describe('retry on rate limit', () => { it('should retry on 429 and eventually succeed', async () => { const rateLimitError = new ApiError('Rate limited', 429); diff --git a/packages/core/src/providers/gemini/provider.ts b/packages/core/src/providers/gemini/provider.ts index ef3d9e07..83294439 100644 --- a/packages/core/src/providers/gemini/provider.ts +++ b/packages/core/src/providers/gemini/provider.ts @@ -1,10 +1,22 @@ -import { FinishReason, GoogleGenAI } from '@google/genai'; +import { FinishReason, GoogleGenAI, ThinkingLevel, type ThinkingConfig } from '@google/genai'; import { BaseProvider, type ProviderConfig } from '../base-provider.js'; import { ApiError, ErodeError, ErrorCode } from '../../errors.js'; import { ENV_VAR_NAMES, RC_FILENAME } from '../../utils/config.js'; import type { AnalysisPhase } from '../analysis-phase.js'; +import { + resolveOutputTokenLimit, + type GenerationProfile, + type OutputSize, + type ReasoningEffort, +} from '../generation-profile.js'; import { GEMINI_MODELS } from './models.js'; +const MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE = { + small: 1500, + medium: 3000, + large: 6000, +} satisfies Record; + export class GeminiProvider extends BaseProvider { private readonly client: GoogleGenAI; @@ -27,12 +39,19 @@ export class GeminiProvider extends BaseProvider { model: string, prompt: string, phase: AnalysisPhase, - _maxTokens: number + generationProfile: GenerationProfile ): Promise { + const maxOutputTokens = resolveOutputTokenLimit( + generationProfile, + MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE + ); + const thinkingConfig = getThinkingConfig(model, generationProfile.reasoningEffort); + try { const response = await this.client.models.generateContent({ model, contents: prompt, + config: { maxOutputTokens, thinkingConfig }, }); const candidate = response.candidates?.[0]; @@ -45,6 +64,15 @@ export class GeminiProvider extends BaseProvider { ); } + if (candidate?.finishReason === FinishReason.MAX_TOKENS) { + throw new ErodeError( + 'Gemini response was cut short by the output token limit', + ErrorCode.PROVIDER_INVALID_RESPONSE, + 'The Gemini response used the available output budget before completion. Try a smaller change or tune the provider output budget.', + { model, phase, maxOutputTokens, outputSize: generationProfile.outputSize } + ); + } + const text = response.text; if (!text) { throw new ErodeError( @@ -62,5 +90,76 @@ export class GeminiProvider extends BaseProvider { } throw ApiError.fromGeminiError(error); } + + function getThinkingConfig( + thinkingModel: string, + reasoningIntent: ReasoningEffort | undefined + ): ThinkingConfig { + if (isGemini25Model(thinkingModel)) { + return getGemini25ThinkingConfig(thinkingModel, reasoningIntent); + } + + if (isGemini3Model(thinkingModel)) { + return getGemini3ThinkingConfig(thinkingModel, reasoningIntent); + } + + return {}; + } + + function getGemini25ThinkingConfig( + thinkingModel: string, + reasoningIntent: ReasoningEffort | undefined + ): ThinkingConfig { + if (isGemini25ProModel(thinkingModel)) { + return { thinkingBudget: -1 }; + } + + switch (reasoningIntent) { + case 'high': + case 'medium': + return { thinkingBudget: -1 }; + case 'low': + case undefined: + return { thinkingBudget: 0 }; + default: + return { thinkingBudget: 0 }; + } + } + + function getGemini3ThinkingConfig( + thinkingModel: string, + reasoningIntent: ReasoningEffort | undefined + ): ThinkingConfig { + switch (reasoningIntent) { + case 'high': + return { thinkingLevel: ThinkingLevel.HIGH }; + case 'medium': + if (isLegacyGemini3ProModel(thinkingModel)) { + return { thinkingLevel: ThinkingLevel.HIGH }; + } + return { thinkingLevel: ThinkingLevel.MEDIUM }; + case 'low': + case undefined: + return { thinkingLevel: ThinkingLevel.LOW }; + default: + return { thinkingLevel: ThinkingLevel.LOW }; + } + } + + function isGemini25Model(thinkingModel: string): boolean { + return thinkingModel.startsWith('gemini-2.5-'); + } + + function isGemini25ProModel(thinkingModel: string): boolean { + return thinkingModel.startsWith('gemini-2.5-pro'); + } + + function isGemini3Model(thinkingModel: string): boolean { + return thinkingModel.startsWith('gemini-3-') || thinkingModel.startsWith('gemini-3.'); + } + + function isLegacyGemini3ProModel(thinkingModel: string): boolean { + return thinkingModel.startsWith('gemini-3-pro'); + } } } diff --git a/packages/core/src/providers/generation-profile.ts b/packages/core/src/providers/generation-profile.ts new file mode 100644 index 00000000..8c521034 --- /dev/null +++ b/packages/core/src/providers/generation-profile.ts @@ -0,0 +1,55 @@ +import { AnalysisPhase } from './analysis-phase.js'; + +export type OutputSize = 'small' | 'medium' | 'large'; +export type ReasoningEffort = 'low' | 'medium' | 'high'; + +export interface GenerationProfile { + outputSize: OutputSize; + reasoningEffort?: ReasoningEffort; + outputContentHint?: { + characters: number; + }; +} + +export function getGenerationProfileForPhase(phase: AnalysisPhase): GenerationProfile { + switch (phase) { + case AnalysisPhase.COMPONENT_RESOLUTION: + case AnalysisPhase.DEPENDENCY_SCAN: + return { outputSize: 'small', reasoningEffort: 'low' }; + case AnalysisPhase.MODEL_UPDATE: + return { outputSize: 'medium', reasoningEffort: 'medium' }; + case AnalysisPhase.CHANGE_ANALYSIS: + return { outputSize: 'large', reasoningEffort: 'low' }; + default: + return { outputSize: 'small', reasoningEffort: 'low' }; + } +} + +export function getGenerationProfileForModelPatch( + fileContent: string, + linesToInsert: string[] +): GenerationProfile { + const insertedLineCharacters = linesToInsert.length * 200; + const estimatedCharacters = Math.ceil((fileContent.length + insertedLineCharacters) * 1.2); + const minimumCharacters = 4096 * 4; + + return { + outputSize: 'medium', + reasoningEffort: 'medium', + outputContentHint: { + characters: Math.max(minimumCharacters, estimatedCharacters), + }, + }; +} + +export function resolveOutputTokenLimit( + profile: GenerationProfile, + sizeTable: Record +): number { + const profileLimit = sizeTable[profile.outputSize]; + const hintedLimit = profile.outputContentHint + ? Math.ceil(profile.outputContentHint.characters / 4) + : 0; + + return Math.max(profileLimit, hintedLimit); +} diff --git a/packages/core/src/providers/openai/__tests__/provider.test.ts b/packages/core/src/providers/openai/__tests__/provider.test.ts index 863f2425..754f2d97 100644 --- a/packages/core/src/providers/openai/__tests__/provider.test.ts +++ b/packages/core/src/providers/openai/__tests__/provider.test.ts @@ -6,7 +6,7 @@ const mockCreate = vi.fn(); vi.mock('openai', () => { return { default: class MockOpenAI { - chat = { completions: { create: mockCreate } }; + responses = { create: mockCreate }; }, }; }); @@ -78,9 +78,39 @@ function makePrAnalysisData(): DriftAnalysisPromptData { }; } -function makeOpenAIResponse(content: string | null, finishReason = 'stop') { +function makeOpenAIResponse(content: string) { return { - choices: [{ message: { content }, finish_reason: finishReason }], + status: 'completed', + incomplete_details: null, + output_text: content, + output: [], + }; +} + +function makeOpenAIMessageResponse(content: string) { + return { + status: 'completed', + incomplete_details: null, + output_text: '', + output: [ + { + type: 'reasoning', + content: [{ type: 'output_text', text: 'ignore me' }], + }, + { + type: 'message', + content: [{ type: 'output_text', text: content }], + }, + ], + }; +} + +function makeIncompleteOpenAIResponse(reason: string) { + return { + status: 'incomplete', + incomplete_details: { reason }, + output_text: '', + output: [], }; } @@ -110,6 +140,44 @@ describe('OpenAIProvider', () => { makeStage1Data(['comp.frontend', 'comp.backend']) ); expect(result).toBe('comp.backend'); + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gpt-5-mini', + max_output_tokens: 1500, + reasoning: { effort: 'low' }, + }) + ); + }); + + it('should omit reasoning for chat-tuned GPT-5 models', async () => { + mockCreate.mockResolvedValueOnce(makeOpenAIResponse('comp.backend')); + + const provider = new OpenAIProvider({ + apiKey: 'test-api-key', + fastModel: 'gpt-5-chat-latest', + advancedModel: 'gpt-5', + }); + await provider.selectComponent(makeStage1Data(['comp.backend'])); + + const callArg = mockCreate.mock.calls[0]?.[0] as { reasoning?: unknown } | undefined; + expect(callArg).not.toHaveProperty('reasoning'); + }); + + it('should send reasoning for GPT-5 family models', async () => { + mockCreate.mockResolvedValueOnce(makeOpenAIResponse('comp.backend')); + + const provider = new OpenAIProvider({ + apiKey: 'test-api-key', + fastModel: 'gpt-5o-mini', + advancedModel: 'gpt-5', + }); + await provider.selectComponent(makeStage1Data(['comp.backend'])); + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + reasoning: { effort: 'low' }, + }) + ); }); it('should return null when no component matches', async () => { @@ -123,7 +191,7 @@ describe('OpenAIProvider', () => { }); it('should throw on empty response', async () => { - mockCreate.mockResolvedValueOnce(makeOpenAIResponse(null)); + mockCreate.mockResolvedValueOnce(makeOpenAIResponse('')); const provider = createProvider(); await expect(provider.selectComponent(makeStage1Data(['comp.frontend']))).rejects.toThrow( @@ -156,6 +224,13 @@ describe('OpenAIProvider', () => { expect(result.dependencies).toHaveLength(1); expect(result.dependencies[0]?.dependency).toBe('redis'); expect(result.summary).toBe('Added Redis dependency'); + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gpt-5-mini', + max_output_tokens: 1500, + reasoning: { effort: 'low' }, + }) + ); }); it('should throw on non-JSON response', async () => { @@ -195,12 +270,19 @@ describe('OpenAIProvider', () => { expect(result.metadata).toBe(data.changeRequest); expect(result.component).toBe(data.component); expect(result.dependencyChanges).toBe(data.dependencies); + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: 'gpt-5', + max_output_tokens: 10000, + reasoning: { effort: 'low' }, + }) + ); }); }); describe('safety filter handling', () => { it('should throw PROVIDER_SAFETY_BLOCK on content_filter', async () => { - mockCreate.mockResolvedValueOnce(makeOpenAIResponse('blocked', 'content_filter')); + mockCreate.mockResolvedValueOnce(makeIncompleteOpenAIResponse('content_filter')); const provider = createProvider(); try { @@ -214,8 +296,23 @@ describe('OpenAIProvider', () => { }); describe('truncation handling', () => { - it('should throw PROVIDER_INVALID_RESPONSE on length', async () => { - mockCreate.mockResolvedValueOnce(makeOpenAIResponse('partial...', 'length')); + it('should throw PROVIDER_INVALID_RESPONSE on max_output_tokens', async () => { + mockCreate.mockResolvedValueOnce(makeIncompleteOpenAIResponse('max_output_tokens')); + + const provider = createProvider(); + try { + await provider.selectComponent(makeStage1Data(['comp.api'])); + expect.fail('Expected error to be thrown'); + } catch (error) { + expect(error).toBeInstanceOf(ErodeError); + const erodeError = error as ErodeError; + expect(erodeError.code).toBe(ErrorCode.PROVIDER_INVALID_RESPONSE); + expect(erodeError.userMessage).toContain('output budget'); + } + }); + + it('should throw PROVIDER_INVALID_RESPONSE on unknown incomplete reasons', async () => { + mockCreate.mockResolvedValueOnce(makeIncompleteOpenAIResponse('system_error')); const provider = createProvider(); try { @@ -223,11 +320,33 @@ describe('OpenAIProvider', () => { expect.fail('Expected error to be thrown'); } catch (error) { expect(error).toBeInstanceOf(ErodeError); - expect((error as ErodeError).code).toBe(ErrorCode.PROVIDER_INVALID_RESPONSE); + const erodeError = error as ErodeError; + expect(erodeError.code).toBe(ErrorCode.PROVIDER_INVALID_RESPONSE); + expect(erodeError.userMessage).toContain('unknown provider reason'); } }); }); + describe('response text extraction', () => { + it('should read fallback output text from message items only', async () => { + mockCreate.mockResolvedValueOnce(makeOpenAIMessageResponse('comp.api')); + + const provider = createProvider(); + const result = await provider.selectComponent(makeStage1Data(['comp.api'])); + + expect(result).toBe('comp.api'); + }); + + it('should throw on empty output after fallback', async () => { + mockCreate.mockResolvedValueOnce(makeOpenAIMessageResponse('')); + + const provider = createProvider(); + await expect(provider.selectComponent(makeStage1Data(['comp.api']))).rejects.toThrow( + ErodeError + ); + }); + }); + describe('retry on rate limit', () => { it('should retry on 429 and eventually succeed', async () => { const rateLimitError = new ApiError('Rate limited', 429); @@ -309,8 +428,23 @@ describe('OpenAIProvider', () => { await provider.patchModel('model {\n}\n', [' comp.a -> comp.b'], 'likec4'); expect(mockCreate).toHaveBeenCalled(); - const callArg = mockCreate.mock.calls[0]?.[0] as { model?: string } | undefined; - expect(callArg?.model).toBe('gpt-4.1-mini'); + const callArg = mockCreate.mock.calls[0]?.[0] as + | { max_output_tokens?: number; model?: string; reasoning?: { effort?: string } } + | undefined; + expect(callArg?.model).toBe('gpt-5-mini'); + expect(callArg?.max_output_tokens).toBe(6000); + expect(callArg?.reasoning?.effort).toBe('medium'); + }); + + it('should increase the output budget for large model files', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; + mockCreate.mockResolvedValueOnce(makeOpenAIResponse(patchedContent)); + + const provider = createProvider(); + await provider.patchModel('x'.repeat(40_000), [' comp.a -> comp.b'], 'likec4'); + + const callArg = mockCreate.mock.calls[0]?.[0] as { max_output_tokens?: number } | undefined; + expect(callArg?.max_output_tokens).toBeGreaterThan(6000); }); it('should return patched content', async () => { @@ -323,6 +457,18 @@ describe('OpenAIProvider', () => { expect(result).toBe(patchedContent); }); + it('should unwrap markdown fences from patched content', async () => { + const patchedContent = 'model {\n comp.a -> comp.b\n}'; + mockCreate.mockResolvedValueOnce( + makeOpenAIResponse(`\`\`\`likec4\n${patchedContent}\n\`\`\``) + ); + + const provider = createProvider(); + const result = await provider.patchModel('model {\n}\n', [' comp.a -> comp.b'], 'likec4'); + + expect(result).toBe(patchedContent); + }); + it('should retry on rate limit', async () => { const patchedContent = 'model {\n comp.a -> comp.b\n}\n'; const rateLimitError = new ApiError('Rate limited', 429); diff --git a/packages/core/src/providers/openai/models.ts b/packages/core/src/providers/openai/models.ts index acf08b56..edcaa503 100644 --- a/packages/core/src/providers/openai/models.ts +++ b/packages/core/src/providers/openai/models.ts @@ -1,4 +1,4 @@ export const OPENAI_MODELS = { - FAST: 'gpt-4.1-mini', - ADVANCED: 'gpt-4.1', + FAST: 'gpt-5-mini', + ADVANCED: 'gpt-5', } as const; diff --git a/packages/core/src/providers/openai/provider.ts b/packages/core/src/providers/openai/provider.ts index 386f779f..b2984a64 100644 --- a/packages/core/src/providers/openai/provider.ts +++ b/packages/core/src/providers/openai/provider.ts @@ -3,8 +3,22 @@ import { BaseProvider, type ProviderConfig } from '../base-provider.js'; import { ErodeError, ErrorCode, ApiError } from '../../errors.js'; import { ENV_VAR_NAMES, RC_FILENAME } from '../../utils/config.js'; import type { AnalysisPhase } from '../analysis-phase.js'; +import { + resolveOutputTokenLimit, + type GenerationProfile, + type OutputSize, + type ReasoningEffort, +} from '../generation-profile.js'; import { OPENAI_MODELS } from './models.js'; +type OpenAIReasoningEffort = 'minimal' | 'low' | 'medium' | 'high'; + +const MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE = { + small: 1500, + medium: 6000, + large: 10000, +} satisfies Record; + export class OpenAIProvider extends BaseProvider { private readonly client: OpenAI; @@ -27,17 +41,36 @@ export class OpenAIProvider extends BaseProvider { model: string, prompt: string, phase: AnalysisPhase, - maxTokens: number + generationProfile: GenerationProfile ): Promise { + const maxOutputTokens = resolveOutputTokenLimit( + generationProfile, + MAX_OUTPUT_TOKENS_BY_OUTPUT_SIZE + ); + const reasoningEffort = getOpenAIReasoningEffort(generationProfile.reasoningEffort); + try { - const response = await this.client.chat.completions.create({ + const response = await this.client.responses.create({ model, - max_tokens: maxTokens, - messages: [{ role: 'user', content: prompt }], + input: prompt, + max_output_tokens: maxOutputTokens, + ...(supportsReasoningEffort(model) ? { reasoning: { effort: reasoningEffort } } : {}), }); - const choice = response.choices[0]; - if (!choice) { + if (response.status === 'incomplete') { + handleIncompleteResponse( + response, + model, + phase, + maxOutputTokens, + generationProfile, + reasoningEffort + ); + } + + const text = extractText(response); + + if (!text) { throw new ErodeError( 'OpenAI returned an empty response', ErrorCode.PROVIDER_INVALID_RESPONSE, @@ -46,40 +79,103 @@ export class OpenAIProvider extends BaseProvider { ); } - if (choice.finish_reason === 'content_filter') { - throw new ErodeError( - 'OpenAI safety filters blocked the response', - ErrorCode.PROVIDER_SAFETY_BLOCK, - 'Content was blocked by the AI provider safety filters. Try simplifying the input.', - { model, phase } - ); + return text; + } catch (error) { + if (error instanceof ErodeError) { + throw error; } + throw ApiError.fromOpenAIError(error); + } - const text = choice.message.content; - if (!text) { + function handleIncompleteResponse( + response: OpenAI.Responses.Response, + incompleteModel: string, + incompletePhase: AnalysisPhase, + incompleteMaxOutputTokens: number, + incompleteGenerationProfile: GenerationProfile, + incompleteReasoningEffort: OpenAIReasoningEffort + ): void { + if (response.incomplete_details?.reason === 'max_output_tokens') { throw new ErodeError( - 'OpenAI returned an empty response', + 'Model ran out of output budget before producing a complete response', ErrorCode.PROVIDER_INVALID_RESPONSE, - 'The OpenAI API returned no content', - { model, phase } + 'The AI response used the available output budget before completion. Try a smaller change or tune the provider output budget or reasoning effort.', + { + model: incompleteModel, + phase: incompletePhase, + maxOutputTokens: incompleteMaxOutputTokens, + outputSize: incompleteGenerationProfile.outputSize, + reasoningEffort: incompleteGenerationProfile.reasoningEffort, + providerReasoningEffort: incompleteReasoningEffort, + } ); } - if (choice.finish_reason === 'length') { + if (response.incomplete_details?.reason === 'content_filter') { throw new ErodeError( - 'OpenAI response was cut short (max_tokens reached)', - ErrorCode.PROVIDER_INVALID_RESPONSE, - 'The AI response was truncated. The output may be partial.', - { model, phase, maxTokens } + 'OpenAI safety filters blocked the response', + ErrorCode.PROVIDER_SAFETY_BLOCK, + 'Content was blocked by the AI provider safety filters. Try simplifying the input.', + { model: incompleteModel, phase: incompletePhase } ); } - return text; - } catch (error) { - if (error instanceof ErodeError) { - throw error; + throw new ErodeError( + 'OpenAI returned an incomplete response', + ErrorCode.PROVIDER_INVALID_RESPONSE, + 'The OpenAI response was incomplete for an unknown provider reason. Try again or tune the provider output budget.', + { + model: incompleteModel, + phase: incompletePhase, + reason: response.incomplete_details?.reason, + maxOutputTokens: incompleteMaxOutputTokens, + outputSize: incompleteGenerationProfile.outputSize, + } + ); + } + + function extractText(response: OpenAI.Responses.Response): string { + if (response.output_text.length > 0) { + return response.output_text; + } + + return response.output + .filter((item) => item.type === 'message') + .flatMap((item) => item.content) + .filter((content) => content.type === 'output_text') + .map((content) => content.text) + .join(''); + } + + function supportsReasoningEffort(reasoningModel: string): boolean { + if (reasoningModel.includes('chat')) { + return false; + } + + if (reasoningModel.startsWith('gpt-5')) { + return true; + } + + return ['o1', 'o3', 'o4'].some((prefix) => { + return reasoningModel === prefix || reasoningModel.startsWith(`${prefix}-`); + }); + } + + function getOpenAIReasoningEffort( + reasoningIntent: ReasoningEffort | undefined + ): OpenAIReasoningEffort { + switch (reasoningIntent) { + case 'high': + return 'high'; + case 'medium': + return 'medium'; + case 'low': + return 'low'; + case undefined: + return 'minimal'; + default: + return 'minimal'; } - throw ApiError.fromOpenAIError(error); } } } diff --git a/packages/eslint-config/base.js b/packages/eslint-config/base.js index ae243828..fdede397 100644 --- a/packages/eslint-config/base.js +++ b/packages/eslint-config/base.js @@ -43,3 +43,5 @@ export function createBaseConfig(tsconfigRootDir) { export const ignores = { ignores: ['dist/', 'node_modules/'], }; + +export const disableTypeChecked = tseslint.configs.disableTypeChecked; diff --git a/packages/eslint-config/index.d.ts b/packages/eslint-config/index.d.ts index df465f7f..20b029a2 100644 --- a/packages/eslint-config/index.d.ts +++ b/packages/eslint-config/index.d.ts @@ -1,4 +1,5 @@ import type { Linter } from 'eslint'; export function createBaseConfig(tsconfigRootDir: string): Linter.Config[]; +export const disableTypeChecked: Linter.Config; export const ignores: Linter.Config; diff --git a/packages/eslint-config/index.js b/packages/eslint-config/index.js index 3143cf0e..93fa68eb 100644 --- a/packages/eslint-config/index.js +++ b/packages/eslint-config/index.js @@ -1 +1 @@ -export { createBaseConfig, ignores } from './base.js'; +export { createBaseConfig, disableTypeChecked, ignores } from './base.js'; diff --git a/packages/web/eslint.config.mjs b/packages/web/eslint.config.mjs index ea1bb0b0..c57d5569 100644 --- a/packages/web/eslint.config.mjs +++ b/packages/web/eslint.config.mjs @@ -1,10 +1,14 @@ -import { createBaseConfig, ignores } from '@erode-app/eslint-config'; +import { createBaseConfig, disableTypeChecked, ignores } from '@erode-app/eslint-config'; import eslintPluginAstro from 'eslint-plugin-astro'; export default [ ignores, ...createBaseConfig(import.meta.dirname), ...eslintPluginAstro.configs.recommended, + { + files: ['**/*.astro'], + ...disableTypeChecked, + }, { ignores: ['astro.config.mjs', '.astro/', 'src/content/', 'public/architecture/'], }, diff --git a/packages/web/package.json b/packages/web/package.json index d034bcdc..1e592fbe 100644 --- a/packages/web/package.json +++ b/packages/web/package.json @@ -11,8 +11,8 @@ "build": "npm run prebuild:diagrams && npm run generate:skills && astro check && astro build", "preview": "astro preview", "typecheck": "npx tsx scripts/generate-agent-skills.ts --check && astro check --minimumFailingSeverity hint", - "check:ci": "npm run typecheck && astro build", - "lint": "eslint . --report-unused-disable-directives --max-warnings 0" + "check:ci": "npm run lint && npm run typecheck && astro build", + "lint": "astro sync && eslint . --report-unused-disable-directives --max-warnings 0" }, "dependencies": { "@astrojs/starlight": "^0.38.4", diff --git a/packages/web/public/schemas/v0/eroderc.schema.json b/packages/web/public/schemas/v0/eroderc.schema.json index a6944300..863f4a05 100644 --- a/packages/web/public/schemas/v0/eroderc.schema.json +++ b/packages/web/public/schemas/v0/eroderc.schema.json @@ -164,11 +164,11 @@ "maximum": 300000 }, "fastModel": { - "default": "claude-haiku-4-5-20251001", + "default": "claude-haiku-4-5", "type": "string" }, "advancedModel": { - "default": "claude-sonnet-4-5-20250929", + "default": "claude-sonnet-4-6", "type": "string" } }, @@ -210,11 +210,11 @@ "maximum": 300000 }, "fastModel": { - "default": "gpt-4.1-mini", + "default": "gpt-5-mini", "type": "string" }, "advancedModel": { - "default": "gpt-4.1", + "default": "gpt-5", "type": "string" } }, diff --git a/packages/web/src/content/docs/docs/contributing/adrs/011-intent-based-provider-generation-profiles.md b/packages/web/src/content/docs/docs/contributing/adrs/011-intent-based-provider-generation-profiles.md new file mode 100644 index 00000000..b0cbfb19 --- /dev/null +++ b/packages/web/src/content/docs/docs/contributing/adrs/011-intent-based-provider-generation-profiles.md @@ -0,0 +1,59 @@ +--- +title: 'ADR-011: Intent-based provider generation profiles' +description: Shared analysis stages describe generation intent, and providers translate that intent into request budgets. +--- + +**Status:** Accepted\ +**Date:** 2026-05-05\ +**Authors:** Anders Hassis + +## Context + +Erode runs the same analysis stages across Gemini, Anthropic, and OpenAI. The shared +provider flow used raw token counts when it called each model. That made stage code +carry provider-specific budget details. + +OpenAI's Responses API names the limit `max_output_tokens`, and reasoning models can +spend part of that budget before producing visible output. Anthropic and Gemini expose +different request parameters and have different response behavior. The shared pipeline +needs to describe what it wants, not how each provider should size the request. + +## Decision + +Introduce a provider-agnostic generation profile for model calls. A profile describes +the expected output size and reasoning effort. `BaseProvider` maps each analysis phase +to a default profile, and concrete providers translate the profile into SDK-specific +request parameters. + +Keep model tier selection unchanged. Component resolution, dependency scanning, and +model patching use the fast model. Drift analysis uses the advanced model. + +## Rationale + +The analysis stages know the shape of the work. Providers know their API parameters and +model behavior. Keeping those concerns separate makes it easier to tune OpenAI, +Anthropic, and Gemini independently without changing the shared pipeline. + +The profile also makes cost intent visible. Simple extraction work stays on cheaper +models with small outputs and low reasoning effort. Drift analysis keeps the stronger +model, but still defaults to low reasoning effort to keep feedback fast. Model updates +use medium reasoning because they generate a concrete patch. + +## Consequences + +### Positive + +- Provider-specific token parameters stay inside provider implementations. +- OpenAI can use `max_output_tokens` and reasoning effort without leaking those names + into shared analysis code. +- Cost-aware model tier selection remains explicit in `BaseProvider`. + +### Negative + +- Output profile names become a shared contract that providers must translate. +- Dynamic output hints can still affect provider cost, latency, quota usage, or model + output caps. + +## Related commits + +- `9289f90` - refactor openai provider with new api diff --git a/packages/web/src/content/docs/docs/contributing/adrs/index.md b/packages/web/src/content/docs/docs/contributing/adrs/index.md index 5502dd44..3b083c63 100644 --- a/packages/web/src/content/docs/docs/contributing/adrs/index.md +++ b/packages/web/src/content/docs/docs/contributing/adrs/index.md @@ -7,18 +7,19 @@ This project tracks significant architectural decisions as ADRs. Each record cap ## Records -| ADR | Decision | Date | Status | -| ----------------------------------------------------------------------- | ----------------------------------- | ---------- | -------- | -| [001](/docs/contributing/adrs/001-multi-stage-analysis-pipeline/) | Multi-stage AI analysis pipeline | 2026-02-24 | Accepted | -| [002](/docs/contributing/adrs/002-provider-agnostic-ai-interface/) | Provider-agnostic AI interface | 2026-02-26 | Accepted | -| [003](/docs/contributing/adrs/003-architecture-model-adapter-system/) | Architecture model adapter system | 2026-02-27 | Accepted | -| [004](/docs/contributing/adrs/004-template-based-prompt-system/) | Template-based prompt system | 2026-02-24 | Accepted | -| [005](/docs/contributing/adrs/005-layered-configuration-with-zod/) | Layered configuration with Zod | 2026-02-25 | Accepted | -| [006](/docs/contributing/adrs/006-structured-error-hierarchy/) | Structured error hierarchy | 2026-02-27 | Accepted | -| [007](/docs/contributing/adrs/007-multi-platform-vcs-abstraction/) | Multi-platform VCS abstraction | 2026-02-26 | Accepted | -| [008](/docs/contributing/adrs/008-monorepo-workspace-structure/) | Monorepo workspace structure | 2026-02-25 | Accepted | -| [009](/docs/contributing/adrs/009-docker-and-github-action-deployment/) | Docker and GitHub Action deployment | 2026-02-24 | Accepted | -| [010](/docs/contributing/adrs/010-local-diff-check-command/) | Local diff check command | 2026-03-07 | Accepted | +| ADR | Decision | Date | Status | +| ----------------------------------------------------------------------------- | ----------------------------------------- | ---------- | -------- | +| [001](/docs/contributing/adrs/001-multi-stage-analysis-pipeline/) | Multi-stage AI analysis pipeline | 2026-02-24 | Accepted | +| [002](/docs/contributing/adrs/002-provider-agnostic-ai-interface/) | Provider-agnostic AI interface | 2026-02-26 | Accepted | +| [003](/docs/contributing/adrs/003-architecture-model-adapter-system/) | Architecture model adapter system | 2026-02-27 | Accepted | +| [004](/docs/contributing/adrs/004-template-based-prompt-system/) | Template-based prompt system | 2026-02-24 | Accepted | +| [005](/docs/contributing/adrs/005-layered-configuration-with-zod/) | Layered configuration with Zod | 2026-02-25 | Accepted | +| [006](/docs/contributing/adrs/006-structured-error-hierarchy/) | Structured error hierarchy | 2026-02-27 | Accepted | +| [007](/docs/contributing/adrs/007-multi-platform-vcs-abstraction/) | Multi-platform VCS abstraction | 2026-02-26 | Accepted | +| [008](/docs/contributing/adrs/008-monorepo-workspace-structure/) | Monorepo workspace structure | 2026-02-25 | Accepted | +| [009](/docs/contributing/adrs/009-docker-and-github-action-deployment/) | Docker and GitHub Action deployment | 2026-02-24 | Accepted | +| [010](/docs/contributing/adrs/010-local-diff-check-command/) | Local diff check command | 2026-03-07 | Accepted | +| [011](/docs/contributing/adrs/011-intent-based-provider-generation-profiles/) | Intent-based provider generation profiles | 2026-05-05 | Accepted | ## Adding a new ADR diff --git a/packages/web/src/content/docs/docs/reference/ai-providers.md b/packages/web/src/content/docs/docs/reference/ai-providers.md index dc3259c5..1a6c70c6 100644 --- a/packages/web/src/content/docs/docs/reference/ai-providers.md +++ b/packages/web/src/content/docs/docs/reference/ai-providers.md @@ -33,17 +33,17 @@ Each provider uses two model tiers to balance cost and quality: ### OpenAI -| Tier | Default model | -| -------- | -------------- | -| Fast | `gpt-4.1-mini` | -| Advanced | `gpt-4.1` | +| Tier | Default model | +| -------- | ------------- | +| Fast | `gpt-5-mini` | +| Advanced | `gpt-5` | ### Anthropic (experimental) -| Tier | Default model | -| -------- | ---------------------------- | -| Fast | `claude-haiku-4-5-20251001` | -| Advanced | `claude-sonnet-4-5-20250929` | +| Tier | Default model | +| -------- | ------------------- | +| Fast | `claude-haiku-4-5` | +| Advanced | `claude-sonnet-4-6` | :::caution Anthropic support is experimental and may not produce consistent results across all codebases. Use Gemini or OpenAI for production workflows.