diff --git a/.gitignore b/.gitignore index d164e1ab..ce8959b2 100644 --- a/.gitignore +++ b/.gitignore @@ -47,3 +47,4 @@ temp/ docs/maestro/ docs/superpowers/ hooks/permissions.json +.worktrees diff --git a/claude/src/config/agent-modes.json b/claude/src/config/agent-modes.json new file mode 100644 index 00000000..4c3f63ef --- /dev/null +++ b/claude/src/config/agent-modes.json @@ -0,0 +1,77 @@ +{ + "quality": { + "accessibility_specialist": "gemini-3.1-pro-preview", + "analytics_engineer": "gemini-3.1-pro-preview", + "api_designer": "gemini-3.1-pro-preview", + "architect": "gemini-3.1-pro-preview", + "code_reviewer": "gemini-3.1-pro-preview", + "coder": "gemini-3.1-pro-preview", + "compliance_reviewer": "gemini-3.1-pro-preview", + "content_strategist": "gemini-3.1-pro-preview", + "copywriter": "gemini-3-flash-preview", + "data_engineer": "gemini-3.1-pro-preview", + "debugger": "gemini-3.1-pro-preview", + "design_system_engineer": "gemini-3.1-pro-preview", + "devops_engineer": "gemini-3.1-pro-preview", + "i18n_specialist": "gemini-3.1-pro-preview", + "performance_engineer": "gemini-3.1-pro-preview", + "product_manager": "gemini-3.1-pro-preview", + "refactor": "gemini-3.1-pro-preview", + "security_engineer": "gemini-3.1-pro-preview", + "seo_specialist": "gemini-3.1-pro-preview", + "technical_writer": "gemini-3-flash-preview", + "tester": "gemini-3.1-pro-preview", + "ux_designer": "gemini-3.1-pro-preview", + "codebase_investigator": "gemini-3.1-pro-preview" + }, + "balanced": { + "accessibility_specialist": "gemini-3-flash-preview", + "analytics_engineer": "gemini-3-flash-preview", + "api_designer": "gemini-3.1-pro-preview", + "architect": "gemini-3.1-pro-preview", + "code_reviewer": "gemini-3-flash-preview", + "coder": "gemini-3.1-flash-lite-preview", + "compliance_reviewer": "gemini-3-flash-preview", + "content_strategist": "gemini-3-flash-preview", + "copywriter": "gemini-3.1-flash-lite-preview", + "data_engineer": "gemini-3-flash-preview", + "debugger": "gemini-3.1-pro-preview", + "design_system_engineer": "gemini-3-flash-preview", + "devops_engineer": "gemini-3-flash-preview", + "i18n_specialist": "gemini-3-flash-preview", + "performance_engineer": "gemini-3-flash-preview", + "product_manager": "gemini-3.1-pro-preview", + "refactor": "gemini-3.1-flash-lite-preview", + "security_engineer": "gemini-3-flash-preview", + "seo_specialist": "gemini-3-flash-preview", + "technical_writer": "gemini-3-flash-preview", + "tester": "gemini-3-flash-preview", + "ux_designer": "gemini-3-flash-preview", + "codebase_investigator": "gemini-3-flash-preview" + }, + "economic": { + "accessibility_specialist": "gemini-3.1-flash-lite-preview", + "analytics_engineer": "gemini-3.1-flash-lite-preview", + "api_designer": "gemini-3-flash-preview", + "architect": "gemini-3-flash-preview", + "code_reviewer": "gemini-3-flash-preview", + "coder": "gemini-3.1-flash-lite-preview", + "compliance_reviewer": "gemini-3-flash-preview", + "content_strategist": "gemini-3.1-flash-lite-preview", + "copywriter": "gemini-3.1-flash-lite-preview", + "data_engineer": "gemini-3-flash-preview", + "debugger": "gemini-3-flash-preview", + "design_system_engineer": "gemini-3-flash-preview", + "devops_engineer": "gemini-3.1-flash-lite-preview", + "i18n_specialist": "gemini-3.1-flash-lite-preview", + "performance_engineer": "gemini-3.1-flash-lite-preview", + "product_manager": "gemini-3-flash-preview", + "refactor": "gemini-3.1-flash-lite-preview", + "security_engineer": "gemini-3-flash-preview", + "seo_specialist": "gemini-3.1-flash-lite-preview", + "technical_writer": "gemini-3.1-flash-lite-preview", + "tester": "gemini-3-flash-preview", + "ux_designer": "gemini-3-flash-preview", + "codebase_investigator": "gemini-3.1-flash-lite-preview" + } +} \ No newline at end of file diff --git a/claude/src/core/agent-registry.js b/claude/src/core/agent-registry.js index 428aa021..c6bca8bc 100644 --- a/claude/src/core/agent-registry.js +++ b/claude/src/core/agent-registry.js @@ -4,6 +4,7 @@ const KNOWN_AGENTS = Object.freeze([ 'architect', 'api_designer', 'code_reviewer', + 'codebase_investigator', // Native to gemini-cli 'coder', 'data_engineer', 'debugger', diff --git a/claude/src/mcp/handlers/setup-models.js b/claude/src/mcp/handlers/setup-models.js new file mode 100644 index 00000000..78bcd5a0 --- /dev/null +++ b/claude/src/mcp/handlers/setup-models.js @@ -0,0 +1,77 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const { atomicWriteSync } = require('../../core/atomic-write'); +const { resolveExtensionRoot } = require('../utils/extension-root'); + +/** + * Handle setup_models tool call. + * + * @param {Object} params - Tool arguments. + * @param {string} params.mode - The operating mode (quality, balanced, economic, skip). + * @param {string} projectRoot - The current project root. + * @returns {Promise} - Result of the operation. + */ +async function handleSetupModels(params, projectRoot) { + const mode = params.mode; + + if (mode === 'skip') { + return { status: 'skipped_model_setup' }; + } + + if (!['quality', 'balanced', 'economic'].includes(mode)) { + throw new Error(`Invalid mode: ${mode}`); + } + + const settingsPath = path.join(projectRoot, '.gemini', 'settings.json'); + + const extensionRoot = resolveExtensionRoot(); + const modesPath = path.join(extensionRoot, 'src', 'config', 'agent-modes.json'); + + let modes; + try { + modes = JSON.parse(fs.readFileSync(modesPath, 'utf8')); + } catch (err) { + throw new Error(`Failed to read agent-modes.json: ${err.message}`); + } + + const mapping = modes[mode]; + if (!mapping) { + throw new Error(`Unknown mode: ${mode}`); + } + + let settings = {}; + if (fs.existsSync(settingsPath)) { + try { + const content = fs.readFileSync(settingsPath, 'utf8'); + settings = JSON.parse(content || '{}'); + } catch (err) { + throw new Error(`Existing .gemini/settings.json is corrupted`); + } + } + + // Ensure experimental.enableAgents is true + settings.experimental = settings.experimental || {}; + settings.experimental.enableAgents = true; + + // Extend agents.overrides + settings.agents = settings.agents || {}; + settings.agents.overrides = settings.agents.overrides || {}; + + for (const [agent, model] of Object.entries(mapping)) { + settings.agents.overrides[agent] = { + ...(settings.agents.overrides[agent] || {}), + modelConfig: { model }, + }; + } + + // Preserve existing settings while applying overrides + atomicWriteSync(settingsPath, JSON.stringify(settings, null, 2)); + + return { status: 'success', mode }; +} + +module.exports = { + handleSetupModels, +}; diff --git a/claude/src/mcp/tool-packs/workspace/index.js b/claude/src/mcp/tool-packs/workspace/index.js index f381d3d5..32f6ed9b 100644 --- a/claude/src/mcp/tool-packs/workspace/index.js +++ b/claude/src/mcp/tool-packs/workspace/index.js @@ -9,6 +9,7 @@ const { } = require('../../handlers/assess-task-complexity'); const { handleValidatePlan } = require('../../handlers/validate-plan'); const { handleResolveSettings } = require('../../handlers/resolve-settings'); +const { handleSetupModels } = require('../../handlers/setup-models'); function createToolPack() { return defineToolPack({ @@ -76,12 +77,29 @@ function createToolPack() { }, }, }, + { + name: 'setup_models', + description: + 'Configure Maestro subagent models in .gemini/settings.json based on selected mode.', + inputSchema: { + type: 'object', + properties: { + mode: { + type: 'string', + enum: ['quality', 'balanced', 'economic', 'skip'], + description: 'The operating mode to configure.', + }, + }, + required: ['mode'], + }, + }, ], handlers: { initialize_workspace: handleInitializeWorkspace, assess_task_complexity: handleAssessTaskComplexity, validate_plan: handleValidatePlan, resolve_settings: handleResolveSettings, + setup_models: handleSetupModels, }, }); } diff --git a/claude/src/platforms/gemini/commands/maestro/orchestrate.toml b/claude/src/platforms/gemini/commands/maestro/orchestrate.toml index 24e80006..ae4aa3a6 100644 --- a/claude/src/platforms/gemini/commands/maestro/orchestrate.toml +++ b/claude/src/platforms/gemini/commands/maestro/orchestrate.toml @@ -18,10 +18,12 @@ This preamble maps generic step references to Gemini CLI tool syntax. | Load template/reference/protocol | `get_skill_content(resources: [""])` | | Delegate to agent | Call agent tool by name: `coder(query: "...")`, `tester(query: "...")`, `design_system_engineer(query: "...")` | | MCP tools | `mcp_maestro_` (Gemini CLI also accepts bare names like `resolve_settings`) | -| Enter Plan Mode | `enter_plan_mode` — if unavailable, tell user: "Run `gemini --settings` and set `experimental.plan` to `true`, then restart." Offer to continue without Plan Mode using `ask_user` for approvals. | +| Enter Plan Mode | `enter_plan_mode` — if unavailable, tell user: "Run `gemini --settings` and set `experimental.plan` to `true`, then Request Restart." Offer to continue without Plan Mode using `ask_user` for approvals. | | Exit Plan Mode | `exit_plan_mode` with `plan_filename` | | User prompt (choose) | `ask_user` with `type: 'choice'` | | User prompt (approve) | `ask_user` with `type: 'yesno'` | +| Define Agent Models | `ask_user` for valid mode (`quality`, `balanced`, `economic`, `skip`) and call `setup_models` with the selected mode. Then Request Restart. | +| Request Restart | STOP here and wait for the user to restart and signal to continue. Do NOT proceed. | ## Execute diff --git a/claude/src/references/orchestration-steps.md b/claude/src/references/orchestration-steps.md index e6f79e3b..959ed087 100644 --- a/claude/src/references/orchestration-steps.md +++ b/claude/src/references/orchestration-steps.md @@ -8,13 +8,14 @@ STARTUP (Turn 1 — tool calls only, no text output) 6. STOP. Turn 1 is ONLY steps 1-5. No text, no design questions, no file reads. CLASSIFICATION (Turn 2) - 7. Load the architecture reference: ["architecture"]. Do NOT load templates yet — they are loaded at their consumption points (steps 13, 15, 20). + 7. Load the architecture reference: ["architecture"]. Do NOT load templates yet — they are loaded at their consumption points (steps 14, 16, 21). 8. Classify task as simple/medium/complex. Present classification with rationale. - 9. Route: simple → Express (step 31). Medium/complex → continue to step 10. + 9. Route: simple → Express (step 32). Medium/complex → continue to step 10. DESIGN (Phase 1) 10. Enter Plan Mode. If unavailable, follow the runtime preamble's Plan Mode fallback instructions. -11. Call `get_skill_content` with resources: ["design-dialogue"]. Follow the loaded protocol for: +11. Define Agent Models. +12. Call `get_skill_content` with resources: ["design-dialogue"]. Follow the loaded protocol for: - Design depth selector (first design question) - Repository grounding (for existing codebases, skip for greenfield) - One question at a time via user prompt @@ -31,51 +32,51 @@ DESIGN (Phase 1) WRONG: user requests "fan site" → options include React, Next.js, Astro CORRECT: user requests "fan site" → recommended option is vanilla HTML/CSS/JS -12. Present design sections one at a time, per the design-dialogue skill's convergence protocol. +13. Present design sections one at a time, per the design-dialogue skill's convergence protocol. Each section must be presented individually and approved via user prompt before proceeding to the next. Do NOT present the full design as a single block. Quick depth may combine sections. Standard/Deep MUST validate individually. -13. Call `get_skill_content` with resources: ["design-document"]. Write approved design document to /plans/ (or Plan Mode tmp path). -14. If Plan Mode is active, exit Plan Mode with the plan path. Copy approved document to /plans/. +14. Call `get_skill_content` with resources: ["design-document"]. Write approved design document to /plans/ (or Plan Mode tmp path). +15. If Plan Mode is active, exit Plan Mode with the plan path. Copy approved document to /plans/. PLANNING (Phase 2) -15. Call `get_skill_content` with resources: ["implementation-planning", "implementation-plan"]. Follow the loaded skill protocol. -16. Call validate_plan with the generated plan and task_complexity. +16. Call `get_skill_content` with resources: ["implementation-planning", "implementation-plan"]. Follow the loaded skill protocol. +17. Call validate_plan with the generated plan and task_complexity. You MUST call validate_plan BEFORE presenting the plan for approval. Do NOT - present the plan, write it to state_dir, or proceed to step 17 without first + present the plan, write it to state_dir, or proceed to step 18 without first calling validate_plan and resolving any error-severity violations. validate_plan enforces server-side: phase count limits, dependency cycles, unknown agents, file ownership conflicts, and agent-deliverable compatibility (read-only agents cannot be assigned to file-creating phases). If it returns violations with severity "error", fix them in the plan and re-validate. -17. Present plan for user approval (Approve / Revise / Abort via user prompt). -18. Write approved implementation plan to /plans/. +18. Present plan for user approval (Approve / Revise / Abort via user prompt). +19. Write approved implementation plan to /plans/. EXECUTION SETUP (Phase 3 — pre-delegation) -19. Call `get_skill_content` with resources: ["execution"]. Follow its Execution Mode Gate. +20. Call `get_skill_content` with resources: ["execution"]. Follow its Execution Mode Gate. Present ONLY "Parallel" and "Sequential" as execution mode options. Do NOT present "Ask" as a user-facing choice — "ask" is a setting value that means "prompt the user", not an execution mode the user selects. -20. Call `get_skill_content` with resources: ["session-management", "session-state"]. -21. Create session via create_session with resolved execution_mode. Do NOT create before mode is resolved. -22. Call `get_skill_content` with resources: ["delegation", "validation", "agent-base-protocol", "filesystem-safety-protocol"]. +21. Call `get_skill_content` with resources: ["session-management", "session-state"]. +22. Create session via create_session with resolved execution_mode. Do NOT create before mode is resolved. +23. Call `get_skill_content` with resources: ["delegation", "validation", "agent-base-protocol", "filesystem-safety-protocol"]. EXECUTION (Phase 3 — delegation loop) -23. For each phase (or parallel batch): call `get_agent` for the assigned agent, then delegate using the returned methodology and tool restrictions. +24. For each phase (or parallel batch): call `get_agent` for the assigned agent, then delegate using the returned methodology and tool restrictions. Dispatch by calling the agent's registered tool directly. Do NOT use the built-in generalist tool or invoke agents by bare name. Each Maestro agent carries specialized methodology, tool restrictions, temperature, and turn limits from its frontmatter that the generalist ignores. -24. After each agent returns, parse Task Report + Downstream Context from response. -25. Call transition_phase to persist results. +25. After each agent returns, parse Task Report + Downstream Context from response. +26. Call transition_phase to persist results. For parallel batches: call transition_phase INDIVIDUALLY for EVERY completed phase in the batch. The MCP tool writes files_created, files_modified, @@ -85,24 +86,24 @@ EXECUTION (Phase 3 — delegation loop) merge all agents' files into one call — the archive attributes files per phase, so empty payloads mean lost traceability. -26. Repeat steps 23-25 until all phases complete. +27. Repeat steps 24-26 until all phases complete. COMPLETION (Phase 4) -27. Call `get_skill_content` with resources: ["code-review"]. -28. If execution changed non-documentation files, delegate to the code reviewer agent. Block on Critical/Major findings. +28. Call `get_skill_content` with resources: ["code-review"]. +29. If execution changed non-documentation files, delegate to the code reviewer agent. Block on Critical/Major findings. If Critical/Major findings: re-delegate to the implementing agent to fix. The orchestrator MUST NOT write code directly. -29. If MAESTRO_AUTO_ARCHIVE is true (or unset), call archive_session. If false, inform user session is complete but not archived. -30. Present final summary with files changed, phase outcomes, and next steps. +30. If MAESTRO_AUTO_ARCHIVE is true (or unset), call archive_session. If false, inform user session is complete but not archived. +31. Present final summary with files changed, phase outcomes, and next steps. RECOVERY (referenced from any step on user request) If the user says the flow moved too fast: return to the most recent unanswered approval gate. If the user asks for implementation before approval: remind them Maestro requires approval first. If the user asks to skip execution-mode: remind them parallel/sequential is required unless MAESTRO_EXECUTION_MODE pins it. If an answer invalidates a prior choice: restate the updated assumption and re-run the relevant gate. -If delegation collapses to parent session without fallback approval: return to step 19 or re-scope the child-agent work packages. +If delegation collapses to parent session without fallback approval: return to step 20 or re-scope the child-agent work packages. EXPRESS WORKFLOW (simple tasks only — jumped to from step 9) @@ -110,17 +111,18 @@ EXPRESS MODE GATE BYPASS: Express bypasses the execution-mode gate entirely. Exp EXPRESS MCP FALLBACK: If MCP state tools (create_session, transition_phase, archive_session) are unavailable, fall back to direct file writes on /state/active-session.md. -31. Verify classification is simple. If task requires multiple phases or agents, override to medium → step 10. +32. Verify classification is simple. If task requires multiple phases or agents, override to medium → step 10. Express sessions MUST have exactly one implementation phase with exactly one agent. -32. Ask 1-2 clarifying questions from Area 1 only. +33. Define Agent Models. +34. Ask 1-2 clarifying questions from Area 1 only. Each question MUST use the user prompt tool (not plain text). Use the choose variant with 2-4 options where possible. Do NOT ask questions as plain text in the model response — the user prompt tool is the only input mechanism. -33. Present structured Express brief as plain text, then ask for approval. +35. Present structured Express brief as plain text, then ask for approval. The brief MUST be plain text output in the model response. The approval MUST be a SEPARATE user prompt tool call — not embedded in the @@ -128,14 +130,14 @@ EXPRESS MCP FALLBACK: If MCP state tools (create_session, transition_phase, arch These are two distinct actions: first emit the brief as text, then call the user prompt tool for approval. Do NOT combine them into one text block. -34. On approval, create session with workflow_mode: "express", exactly 1 phase. +36. On approval, create session with workflow_mode: "express", exactly 1 phase. On rejection, revise. On second rejection, escalate to Standard → step 10. -35. Call `get_skill_content` with resources: ["agent-base-protocol", "filesystem-safety-protocol"] and prepend them to the delegation prompt. -36. Delegate to the assigned agent. +37. Call `get_skill_content` with resources: ["agent-base-protocol", "filesystem-safety-protocol"] and prepend them to the delegation prompt. +38. Delegate to the assigned agent. - Same dispatch rule as step 23: call agent by registered tool name, not generalist. + Same dispatch rule as step 24: call agent by registered tool name, not generalist. -37. Parse Task Report from the agent's response. Call transition_phase to persist results. +39. Parse Task Report from the agent's response. Call transition_phase to persist results. You MUST call transition_phase after the implementing agent returns. Extract files_created, files_modified, files_deleted, and downstream_context from the @@ -143,15 +145,15 @@ EXPRESS MCP FALLBACK: If MCP state tools (create_session, transition_phase, arch state has no record of what was delivered. Do NOT skip to code review or archive without calling transition_phase first. -38. Delegate to the code reviewer agent. +40. Delegate to the code reviewer agent. If Critical/Major findings: re-delegate to implementing agent (1 retry). Orchestrator MUST NOT write code directly. If retry fails, escalate to user. -39. Call archive_session. -40. Present summary. +41. Call archive_session. +42. Present summary. EXPRESS RESUME (when resuming an Express session from get_session_status) -If phase is pending: re-generate and present brief (step 33). On approval, proceed to delegation (step 36). -If phase is in_progress: re-delegate with same scope (step 36). -If phase is completed but session is in_progress: run code review (step 38), then archive (step 39). +If phase is pending: re-generate and present brief (step 35). On approval, proceed to delegation (step 38). +If phase is in_progress: re-delegate with same scope (step 38). +If phase is completed but session is in_progress: run code review (step 40), then archive (step 41). diff --git a/commands/maestro/orchestrate.toml b/commands/maestro/orchestrate.toml index 24e80006..ae4aa3a6 100644 --- a/commands/maestro/orchestrate.toml +++ b/commands/maestro/orchestrate.toml @@ -18,10 +18,12 @@ This preamble maps generic step references to Gemini CLI tool syntax. | Load template/reference/protocol | `get_skill_content(resources: [""])` | | Delegate to agent | Call agent tool by name: `coder(query: "...")`, `tester(query: "...")`, `design_system_engineer(query: "...")` | | MCP tools | `mcp_maestro_` (Gemini CLI also accepts bare names like `resolve_settings`) | -| Enter Plan Mode | `enter_plan_mode` — if unavailable, tell user: "Run `gemini --settings` and set `experimental.plan` to `true`, then restart." Offer to continue without Plan Mode using `ask_user` for approvals. | +| Enter Plan Mode | `enter_plan_mode` — if unavailable, tell user: "Run `gemini --settings` and set `experimental.plan` to `true`, then Request Restart." Offer to continue without Plan Mode using `ask_user` for approvals. | | Exit Plan Mode | `exit_plan_mode` with `plan_filename` | | User prompt (choose) | `ask_user` with `type: 'choice'` | | User prompt (approve) | `ask_user` with `type: 'yesno'` | +| Define Agent Models | `ask_user` for valid mode (`quality`, `balanced`, `economic`, `skip`) and call `setup_models` with the selected mode. Then Request Restart. | +| Request Restart | STOP here and wait for the user to restart and signal to continue. Do NOT proceed. | ## Execute diff --git a/plugins/maestro/src/config/agent-modes.json b/plugins/maestro/src/config/agent-modes.json new file mode 100644 index 00000000..4c3f63ef --- /dev/null +++ b/plugins/maestro/src/config/agent-modes.json @@ -0,0 +1,77 @@ +{ + "quality": { + "accessibility_specialist": "gemini-3.1-pro-preview", + "analytics_engineer": "gemini-3.1-pro-preview", + "api_designer": "gemini-3.1-pro-preview", + "architect": "gemini-3.1-pro-preview", + "code_reviewer": "gemini-3.1-pro-preview", + "coder": "gemini-3.1-pro-preview", + "compliance_reviewer": "gemini-3.1-pro-preview", + "content_strategist": "gemini-3.1-pro-preview", + "copywriter": "gemini-3-flash-preview", + "data_engineer": "gemini-3.1-pro-preview", + "debugger": "gemini-3.1-pro-preview", + "design_system_engineer": "gemini-3.1-pro-preview", + "devops_engineer": "gemini-3.1-pro-preview", + "i18n_specialist": "gemini-3.1-pro-preview", + "performance_engineer": "gemini-3.1-pro-preview", + "product_manager": "gemini-3.1-pro-preview", + "refactor": "gemini-3.1-pro-preview", + "security_engineer": "gemini-3.1-pro-preview", + "seo_specialist": "gemini-3.1-pro-preview", + "technical_writer": "gemini-3-flash-preview", + "tester": "gemini-3.1-pro-preview", + "ux_designer": "gemini-3.1-pro-preview", + "codebase_investigator": "gemini-3.1-pro-preview" + }, + "balanced": { + "accessibility_specialist": "gemini-3-flash-preview", + "analytics_engineer": "gemini-3-flash-preview", + "api_designer": "gemini-3.1-pro-preview", + "architect": "gemini-3.1-pro-preview", + "code_reviewer": "gemini-3-flash-preview", + "coder": "gemini-3.1-flash-lite-preview", + "compliance_reviewer": "gemini-3-flash-preview", + "content_strategist": "gemini-3-flash-preview", + "copywriter": "gemini-3.1-flash-lite-preview", + "data_engineer": "gemini-3-flash-preview", + "debugger": "gemini-3.1-pro-preview", + "design_system_engineer": "gemini-3-flash-preview", + "devops_engineer": "gemini-3-flash-preview", + "i18n_specialist": "gemini-3-flash-preview", + "performance_engineer": "gemini-3-flash-preview", + "product_manager": "gemini-3.1-pro-preview", + "refactor": "gemini-3.1-flash-lite-preview", + "security_engineer": "gemini-3-flash-preview", + "seo_specialist": "gemini-3-flash-preview", + "technical_writer": "gemini-3-flash-preview", + "tester": "gemini-3-flash-preview", + "ux_designer": "gemini-3-flash-preview", + "codebase_investigator": "gemini-3-flash-preview" + }, + "economic": { + "accessibility_specialist": "gemini-3.1-flash-lite-preview", + "analytics_engineer": "gemini-3.1-flash-lite-preview", + "api_designer": "gemini-3-flash-preview", + "architect": "gemini-3-flash-preview", + "code_reviewer": "gemini-3-flash-preview", + "coder": "gemini-3.1-flash-lite-preview", + "compliance_reviewer": "gemini-3-flash-preview", + "content_strategist": "gemini-3.1-flash-lite-preview", + "copywriter": "gemini-3.1-flash-lite-preview", + "data_engineer": "gemini-3-flash-preview", + "debugger": "gemini-3-flash-preview", + "design_system_engineer": "gemini-3-flash-preview", + "devops_engineer": "gemini-3.1-flash-lite-preview", + "i18n_specialist": "gemini-3.1-flash-lite-preview", + "performance_engineer": "gemini-3.1-flash-lite-preview", + "product_manager": "gemini-3-flash-preview", + "refactor": "gemini-3.1-flash-lite-preview", + "security_engineer": "gemini-3-flash-preview", + "seo_specialist": "gemini-3.1-flash-lite-preview", + "technical_writer": "gemini-3.1-flash-lite-preview", + "tester": "gemini-3-flash-preview", + "ux_designer": "gemini-3-flash-preview", + "codebase_investigator": "gemini-3.1-flash-lite-preview" + } +} \ No newline at end of file diff --git a/plugins/maestro/src/core/agent-registry.js b/plugins/maestro/src/core/agent-registry.js index 428aa021..c6bca8bc 100644 --- a/plugins/maestro/src/core/agent-registry.js +++ b/plugins/maestro/src/core/agent-registry.js @@ -4,6 +4,7 @@ const KNOWN_AGENTS = Object.freeze([ 'architect', 'api_designer', 'code_reviewer', + 'codebase_investigator', // Native to gemini-cli 'coder', 'data_engineer', 'debugger', diff --git a/plugins/maestro/src/mcp/handlers/setup-models.js b/plugins/maestro/src/mcp/handlers/setup-models.js new file mode 100644 index 00000000..78bcd5a0 --- /dev/null +++ b/plugins/maestro/src/mcp/handlers/setup-models.js @@ -0,0 +1,77 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const { atomicWriteSync } = require('../../core/atomic-write'); +const { resolveExtensionRoot } = require('../utils/extension-root'); + +/** + * Handle setup_models tool call. + * + * @param {Object} params - Tool arguments. + * @param {string} params.mode - The operating mode (quality, balanced, economic, skip). + * @param {string} projectRoot - The current project root. + * @returns {Promise} - Result of the operation. + */ +async function handleSetupModels(params, projectRoot) { + const mode = params.mode; + + if (mode === 'skip') { + return { status: 'skipped_model_setup' }; + } + + if (!['quality', 'balanced', 'economic'].includes(mode)) { + throw new Error(`Invalid mode: ${mode}`); + } + + const settingsPath = path.join(projectRoot, '.gemini', 'settings.json'); + + const extensionRoot = resolveExtensionRoot(); + const modesPath = path.join(extensionRoot, 'src', 'config', 'agent-modes.json'); + + let modes; + try { + modes = JSON.parse(fs.readFileSync(modesPath, 'utf8')); + } catch (err) { + throw new Error(`Failed to read agent-modes.json: ${err.message}`); + } + + const mapping = modes[mode]; + if (!mapping) { + throw new Error(`Unknown mode: ${mode}`); + } + + let settings = {}; + if (fs.existsSync(settingsPath)) { + try { + const content = fs.readFileSync(settingsPath, 'utf8'); + settings = JSON.parse(content || '{}'); + } catch (err) { + throw new Error(`Existing .gemini/settings.json is corrupted`); + } + } + + // Ensure experimental.enableAgents is true + settings.experimental = settings.experimental || {}; + settings.experimental.enableAgents = true; + + // Extend agents.overrides + settings.agents = settings.agents || {}; + settings.agents.overrides = settings.agents.overrides || {}; + + for (const [agent, model] of Object.entries(mapping)) { + settings.agents.overrides[agent] = { + ...(settings.agents.overrides[agent] || {}), + modelConfig: { model }, + }; + } + + // Preserve existing settings while applying overrides + atomicWriteSync(settingsPath, JSON.stringify(settings, null, 2)); + + return { status: 'success', mode }; +} + +module.exports = { + handleSetupModels, +}; diff --git a/plugins/maestro/src/mcp/tool-packs/workspace/index.js b/plugins/maestro/src/mcp/tool-packs/workspace/index.js index f381d3d5..32f6ed9b 100644 --- a/plugins/maestro/src/mcp/tool-packs/workspace/index.js +++ b/plugins/maestro/src/mcp/tool-packs/workspace/index.js @@ -9,6 +9,7 @@ const { } = require('../../handlers/assess-task-complexity'); const { handleValidatePlan } = require('../../handlers/validate-plan'); const { handleResolveSettings } = require('../../handlers/resolve-settings'); +const { handleSetupModels } = require('../../handlers/setup-models'); function createToolPack() { return defineToolPack({ @@ -76,12 +77,29 @@ function createToolPack() { }, }, }, + { + name: 'setup_models', + description: + 'Configure Maestro subagent models in .gemini/settings.json based on selected mode.', + inputSchema: { + type: 'object', + properties: { + mode: { + type: 'string', + enum: ['quality', 'balanced', 'economic', 'skip'], + description: 'The operating mode to configure.', + }, + }, + required: ['mode'], + }, + }, ], handlers: { initialize_workspace: handleInitializeWorkspace, assess_task_complexity: handleAssessTaskComplexity, validate_plan: handleValidatePlan, resolve_settings: handleResolveSettings, + setup_models: handleSetupModels, }, }); } diff --git a/plugins/maestro/src/platforms/gemini/commands/maestro/orchestrate.toml b/plugins/maestro/src/platforms/gemini/commands/maestro/orchestrate.toml index 24e80006..ae4aa3a6 100644 --- a/plugins/maestro/src/platforms/gemini/commands/maestro/orchestrate.toml +++ b/plugins/maestro/src/platforms/gemini/commands/maestro/orchestrate.toml @@ -18,10 +18,12 @@ This preamble maps generic step references to Gemini CLI tool syntax. | Load template/reference/protocol | `get_skill_content(resources: [""])` | | Delegate to agent | Call agent tool by name: `coder(query: "...")`, `tester(query: "...")`, `design_system_engineer(query: "...")` | | MCP tools | `mcp_maestro_` (Gemini CLI also accepts bare names like `resolve_settings`) | -| Enter Plan Mode | `enter_plan_mode` — if unavailable, tell user: "Run `gemini --settings` and set `experimental.plan` to `true`, then restart." Offer to continue without Plan Mode using `ask_user` for approvals. | +| Enter Plan Mode | `enter_plan_mode` — if unavailable, tell user: "Run `gemini --settings` and set `experimental.plan` to `true`, then Request Restart." Offer to continue without Plan Mode using `ask_user` for approvals. | | Exit Plan Mode | `exit_plan_mode` with `plan_filename` | | User prompt (choose) | `ask_user` with `type: 'choice'` | | User prompt (approve) | `ask_user` with `type: 'yesno'` | +| Define Agent Models | `ask_user` for valid mode (`quality`, `balanced`, `economic`, `skip`) and call `setup_models` with the selected mode. Then Request Restart. | +| Request Restart | STOP here and wait for the user to restart and signal to continue. Do NOT proceed. | ## Execute diff --git a/plugins/maestro/src/references/orchestration-steps.md b/plugins/maestro/src/references/orchestration-steps.md index e6f79e3b..959ed087 100644 --- a/plugins/maestro/src/references/orchestration-steps.md +++ b/plugins/maestro/src/references/orchestration-steps.md @@ -8,13 +8,14 @@ STARTUP (Turn 1 — tool calls only, no text output) 6. STOP. Turn 1 is ONLY steps 1-5. No text, no design questions, no file reads. CLASSIFICATION (Turn 2) - 7. Load the architecture reference: ["architecture"]. Do NOT load templates yet — they are loaded at their consumption points (steps 13, 15, 20). + 7. Load the architecture reference: ["architecture"]. Do NOT load templates yet — they are loaded at their consumption points (steps 14, 16, 21). 8. Classify task as simple/medium/complex. Present classification with rationale. - 9. Route: simple → Express (step 31). Medium/complex → continue to step 10. + 9. Route: simple → Express (step 32). Medium/complex → continue to step 10. DESIGN (Phase 1) 10. Enter Plan Mode. If unavailable, follow the runtime preamble's Plan Mode fallback instructions. -11. Call `get_skill_content` with resources: ["design-dialogue"]. Follow the loaded protocol for: +11. Define Agent Models. +12. Call `get_skill_content` with resources: ["design-dialogue"]. Follow the loaded protocol for: - Design depth selector (first design question) - Repository grounding (for existing codebases, skip for greenfield) - One question at a time via user prompt @@ -31,51 +32,51 @@ DESIGN (Phase 1) WRONG: user requests "fan site" → options include React, Next.js, Astro CORRECT: user requests "fan site" → recommended option is vanilla HTML/CSS/JS -12. Present design sections one at a time, per the design-dialogue skill's convergence protocol. +13. Present design sections one at a time, per the design-dialogue skill's convergence protocol. Each section must be presented individually and approved via user prompt before proceeding to the next. Do NOT present the full design as a single block. Quick depth may combine sections. Standard/Deep MUST validate individually. -13. Call `get_skill_content` with resources: ["design-document"]. Write approved design document to /plans/ (or Plan Mode tmp path). -14. If Plan Mode is active, exit Plan Mode with the plan path. Copy approved document to /plans/. +14. Call `get_skill_content` with resources: ["design-document"]. Write approved design document to /plans/ (or Plan Mode tmp path). +15. If Plan Mode is active, exit Plan Mode with the plan path. Copy approved document to /plans/. PLANNING (Phase 2) -15. Call `get_skill_content` with resources: ["implementation-planning", "implementation-plan"]. Follow the loaded skill protocol. -16. Call validate_plan with the generated plan and task_complexity. +16. Call `get_skill_content` with resources: ["implementation-planning", "implementation-plan"]. Follow the loaded skill protocol. +17. Call validate_plan with the generated plan and task_complexity. You MUST call validate_plan BEFORE presenting the plan for approval. Do NOT - present the plan, write it to state_dir, or proceed to step 17 without first + present the plan, write it to state_dir, or proceed to step 18 without first calling validate_plan and resolving any error-severity violations. validate_plan enforces server-side: phase count limits, dependency cycles, unknown agents, file ownership conflicts, and agent-deliverable compatibility (read-only agents cannot be assigned to file-creating phases). If it returns violations with severity "error", fix them in the plan and re-validate. -17. Present plan for user approval (Approve / Revise / Abort via user prompt). -18. Write approved implementation plan to /plans/. +18. Present plan for user approval (Approve / Revise / Abort via user prompt). +19. Write approved implementation plan to /plans/. EXECUTION SETUP (Phase 3 — pre-delegation) -19. Call `get_skill_content` with resources: ["execution"]. Follow its Execution Mode Gate. +20. Call `get_skill_content` with resources: ["execution"]. Follow its Execution Mode Gate. Present ONLY "Parallel" and "Sequential" as execution mode options. Do NOT present "Ask" as a user-facing choice — "ask" is a setting value that means "prompt the user", not an execution mode the user selects. -20. Call `get_skill_content` with resources: ["session-management", "session-state"]. -21. Create session via create_session with resolved execution_mode. Do NOT create before mode is resolved. -22. Call `get_skill_content` with resources: ["delegation", "validation", "agent-base-protocol", "filesystem-safety-protocol"]. +21. Call `get_skill_content` with resources: ["session-management", "session-state"]. +22. Create session via create_session with resolved execution_mode. Do NOT create before mode is resolved. +23. Call `get_skill_content` with resources: ["delegation", "validation", "agent-base-protocol", "filesystem-safety-protocol"]. EXECUTION (Phase 3 — delegation loop) -23. For each phase (or parallel batch): call `get_agent` for the assigned agent, then delegate using the returned methodology and tool restrictions. +24. For each phase (or parallel batch): call `get_agent` for the assigned agent, then delegate using the returned methodology and tool restrictions. Dispatch by calling the agent's registered tool directly. Do NOT use the built-in generalist tool or invoke agents by bare name. Each Maestro agent carries specialized methodology, tool restrictions, temperature, and turn limits from its frontmatter that the generalist ignores. -24. After each agent returns, parse Task Report + Downstream Context from response. -25. Call transition_phase to persist results. +25. After each agent returns, parse Task Report + Downstream Context from response. +26. Call transition_phase to persist results. For parallel batches: call transition_phase INDIVIDUALLY for EVERY completed phase in the batch. The MCP tool writes files_created, files_modified, @@ -85,24 +86,24 @@ EXECUTION (Phase 3 — delegation loop) merge all agents' files into one call — the archive attributes files per phase, so empty payloads mean lost traceability. -26. Repeat steps 23-25 until all phases complete. +27. Repeat steps 24-26 until all phases complete. COMPLETION (Phase 4) -27. Call `get_skill_content` with resources: ["code-review"]. -28. If execution changed non-documentation files, delegate to the code reviewer agent. Block on Critical/Major findings. +28. Call `get_skill_content` with resources: ["code-review"]. +29. If execution changed non-documentation files, delegate to the code reviewer agent. Block on Critical/Major findings. If Critical/Major findings: re-delegate to the implementing agent to fix. The orchestrator MUST NOT write code directly. -29. If MAESTRO_AUTO_ARCHIVE is true (or unset), call archive_session. If false, inform user session is complete but not archived. -30. Present final summary with files changed, phase outcomes, and next steps. +30. If MAESTRO_AUTO_ARCHIVE is true (or unset), call archive_session. If false, inform user session is complete but not archived. +31. Present final summary with files changed, phase outcomes, and next steps. RECOVERY (referenced from any step on user request) If the user says the flow moved too fast: return to the most recent unanswered approval gate. If the user asks for implementation before approval: remind them Maestro requires approval first. If the user asks to skip execution-mode: remind them parallel/sequential is required unless MAESTRO_EXECUTION_MODE pins it. If an answer invalidates a prior choice: restate the updated assumption and re-run the relevant gate. -If delegation collapses to parent session without fallback approval: return to step 19 or re-scope the child-agent work packages. +If delegation collapses to parent session without fallback approval: return to step 20 or re-scope the child-agent work packages. EXPRESS WORKFLOW (simple tasks only — jumped to from step 9) @@ -110,17 +111,18 @@ EXPRESS MODE GATE BYPASS: Express bypasses the execution-mode gate entirely. Exp EXPRESS MCP FALLBACK: If MCP state tools (create_session, transition_phase, archive_session) are unavailable, fall back to direct file writes on /state/active-session.md. -31. Verify classification is simple. If task requires multiple phases or agents, override to medium → step 10. +32. Verify classification is simple. If task requires multiple phases or agents, override to medium → step 10. Express sessions MUST have exactly one implementation phase with exactly one agent. -32. Ask 1-2 clarifying questions from Area 1 only. +33. Define Agent Models. +34. Ask 1-2 clarifying questions from Area 1 only. Each question MUST use the user prompt tool (not plain text). Use the choose variant with 2-4 options where possible. Do NOT ask questions as plain text in the model response — the user prompt tool is the only input mechanism. -33. Present structured Express brief as plain text, then ask for approval. +35. Present structured Express brief as plain text, then ask for approval. The brief MUST be plain text output in the model response. The approval MUST be a SEPARATE user prompt tool call — not embedded in the @@ -128,14 +130,14 @@ EXPRESS MCP FALLBACK: If MCP state tools (create_session, transition_phase, arch These are two distinct actions: first emit the brief as text, then call the user prompt tool for approval. Do NOT combine them into one text block. -34. On approval, create session with workflow_mode: "express", exactly 1 phase. +36. On approval, create session with workflow_mode: "express", exactly 1 phase. On rejection, revise. On second rejection, escalate to Standard → step 10. -35. Call `get_skill_content` with resources: ["agent-base-protocol", "filesystem-safety-protocol"] and prepend them to the delegation prompt. -36. Delegate to the assigned agent. +37. Call `get_skill_content` with resources: ["agent-base-protocol", "filesystem-safety-protocol"] and prepend them to the delegation prompt. +38. Delegate to the assigned agent. - Same dispatch rule as step 23: call agent by registered tool name, not generalist. + Same dispatch rule as step 24: call agent by registered tool name, not generalist. -37. Parse Task Report from the agent's response. Call transition_phase to persist results. +39. Parse Task Report from the agent's response. Call transition_phase to persist results. You MUST call transition_phase after the implementing agent returns. Extract files_created, files_modified, files_deleted, and downstream_context from the @@ -143,15 +145,15 @@ EXPRESS MCP FALLBACK: If MCP state tools (create_session, transition_phase, arch state has no record of what was delivered. Do NOT skip to code review or archive without calling transition_phase first. -38. Delegate to the code reviewer agent. +40. Delegate to the code reviewer agent. If Critical/Major findings: re-delegate to implementing agent (1 retry). Orchestrator MUST NOT write code directly. If retry fails, escalate to user. -39. Call archive_session. -40. Present summary. +41. Call archive_session. +42. Present summary. EXPRESS RESUME (when resuming an Express session from get_session_status) -If phase is pending: re-generate and present brief (step 33). On approval, proceed to delegation (step 36). -If phase is in_progress: re-delegate with same scope (step 36). -If phase is completed but session is in_progress: run code review (step 38), then archive (step 39). +If phase is pending: re-generate and present brief (step 35). On approval, proceed to delegation (step 38). +If phase is in_progress: re-delegate with same scope (step 38). +If phase is completed but session is in_progress: run code review (step 40), then archive (step 41). diff --git a/src/config/agent-modes.json b/src/config/agent-modes.json new file mode 100644 index 00000000..4c3f63ef --- /dev/null +++ b/src/config/agent-modes.json @@ -0,0 +1,77 @@ +{ + "quality": { + "accessibility_specialist": "gemini-3.1-pro-preview", + "analytics_engineer": "gemini-3.1-pro-preview", + "api_designer": "gemini-3.1-pro-preview", + "architect": "gemini-3.1-pro-preview", + "code_reviewer": "gemini-3.1-pro-preview", + "coder": "gemini-3.1-pro-preview", + "compliance_reviewer": "gemini-3.1-pro-preview", + "content_strategist": "gemini-3.1-pro-preview", + "copywriter": "gemini-3-flash-preview", + "data_engineer": "gemini-3.1-pro-preview", + "debugger": "gemini-3.1-pro-preview", + "design_system_engineer": "gemini-3.1-pro-preview", + "devops_engineer": "gemini-3.1-pro-preview", + "i18n_specialist": "gemini-3.1-pro-preview", + "performance_engineer": "gemini-3.1-pro-preview", + "product_manager": "gemini-3.1-pro-preview", + "refactor": "gemini-3.1-pro-preview", + "security_engineer": "gemini-3.1-pro-preview", + "seo_specialist": "gemini-3.1-pro-preview", + "technical_writer": "gemini-3-flash-preview", + "tester": "gemini-3.1-pro-preview", + "ux_designer": "gemini-3.1-pro-preview", + "codebase_investigator": "gemini-3.1-pro-preview" + }, + "balanced": { + "accessibility_specialist": "gemini-3-flash-preview", + "analytics_engineer": "gemini-3-flash-preview", + "api_designer": "gemini-3.1-pro-preview", + "architect": "gemini-3.1-pro-preview", + "code_reviewer": "gemini-3-flash-preview", + "coder": "gemini-3.1-flash-lite-preview", + "compliance_reviewer": "gemini-3-flash-preview", + "content_strategist": "gemini-3-flash-preview", + "copywriter": "gemini-3.1-flash-lite-preview", + "data_engineer": "gemini-3-flash-preview", + "debugger": "gemini-3.1-pro-preview", + "design_system_engineer": "gemini-3-flash-preview", + "devops_engineer": "gemini-3-flash-preview", + "i18n_specialist": "gemini-3-flash-preview", + "performance_engineer": "gemini-3-flash-preview", + "product_manager": "gemini-3.1-pro-preview", + "refactor": "gemini-3.1-flash-lite-preview", + "security_engineer": "gemini-3-flash-preview", + "seo_specialist": "gemini-3-flash-preview", + "technical_writer": "gemini-3-flash-preview", + "tester": "gemini-3-flash-preview", + "ux_designer": "gemini-3-flash-preview", + "codebase_investigator": "gemini-3-flash-preview" + }, + "economic": { + "accessibility_specialist": "gemini-3.1-flash-lite-preview", + "analytics_engineer": "gemini-3.1-flash-lite-preview", + "api_designer": "gemini-3-flash-preview", + "architect": "gemini-3-flash-preview", + "code_reviewer": "gemini-3-flash-preview", + "coder": "gemini-3.1-flash-lite-preview", + "compliance_reviewer": "gemini-3-flash-preview", + "content_strategist": "gemini-3.1-flash-lite-preview", + "copywriter": "gemini-3.1-flash-lite-preview", + "data_engineer": "gemini-3-flash-preview", + "debugger": "gemini-3-flash-preview", + "design_system_engineer": "gemini-3-flash-preview", + "devops_engineer": "gemini-3.1-flash-lite-preview", + "i18n_specialist": "gemini-3.1-flash-lite-preview", + "performance_engineer": "gemini-3.1-flash-lite-preview", + "product_manager": "gemini-3-flash-preview", + "refactor": "gemini-3.1-flash-lite-preview", + "security_engineer": "gemini-3-flash-preview", + "seo_specialist": "gemini-3.1-flash-lite-preview", + "technical_writer": "gemini-3.1-flash-lite-preview", + "tester": "gemini-3-flash-preview", + "ux_designer": "gemini-3-flash-preview", + "codebase_investigator": "gemini-3.1-flash-lite-preview" + } +} \ No newline at end of file diff --git a/src/core/agent-registry.js b/src/core/agent-registry.js index 428aa021..c6bca8bc 100644 --- a/src/core/agent-registry.js +++ b/src/core/agent-registry.js @@ -4,6 +4,7 @@ const KNOWN_AGENTS = Object.freeze([ 'architect', 'api_designer', 'code_reviewer', + 'codebase_investigator', // Native to gemini-cli 'coder', 'data_engineer', 'debugger', diff --git a/src/mcp/handlers/setup-models.js b/src/mcp/handlers/setup-models.js new file mode 100644 index 00000000..78bcd5a0 --- /dev/null +++ b/src/mcp/handlers/setup-models.js @@ -0,0 +1,77 @@ +'use strict'; + +const fs = require('fs'); +const path = require('path'); +const { atomicWriteSync } = require('../../core/atomic-write'); +const { resolveExtensionRoot } = require('../utils/extension-root'); + +/** + * Handle setup_models tool call. + * + * @param {Object} params - Tool arguments. + * @param {string} params.mode - The operating mode (quality, balanced, economic, skip). + * @param {string} projectRoot - The current project root. + * @returns {Promise} - Result of the operation. + */ +async function handleSetupModels(params, projectRoot) { + const mode = params.mode; + + if (mode === 'skip') { + return { status: 'skipped_model_setup' }; + } + + if (!['quality', 'balanced', 'economic'].includes(mode)) { + throw new Error(`Invalid mode: ${mode}`); + } + + const settingsPath = path.join(projectRoot, '.gemini', 'settings.json'); + + const extensionRoot = resolveExtensionRoot(); + const modesPath = path.join(extensionRoot, 'src', 'config', 'agent-modes.json'); + + let modes; + try { + modes = JSON.parse(fs.readFileSync(modesPath, 'utf8')); + } catch (err) { + throw new Error(`Failed to read agent-modes.json: ${err.message}`); + } + + const mapping = modes[mode]; + if (!mapping) { + throw new Error(`Unknown mode: ${mode}`); + } + + let settings = {}; + if (fs.existsSync(settingsPath)) { + try { + const content = fs.readFileSync(settingsPath, 'utf8'); + settings = JSON.parse(content || '{}'); + } catch (err) { + throw new Error(`Existing .gemini/settings.json is corrupted`); + } + } + + // Ensure experimental.enableAgents is true + settings.experimental = settings.experimental || {}; + settings.experimental.enableAgents = true; + + // Extend agents.overrides + settings.agents = settings.agents || {}; + settings.agents.overrides = settings.agents.overrides || {}; + + for (const [agent, model] of Object.entries(mapping)) { + settings.agents.overrides[agent] = { + ...(settings.agents.overrides[agent] || {}), + modelConfig: { model }, + }; + } + + // Preserve existing settings while applying overrides + atomicWriteSync(settingsPath, JSON.stringify(settings, null, 2)); + + return { status: 'success', mode }; +} + +module.exports = { + handleSetupModels, +}; diff --git a/src/mcp/tool-packs/workspace/index.js b/src/mcp/tool-packs/workspace/index.js index f381d3d5..32f6ed9b 100644 --- a/src/mcp/tool-packs/workspace/index.js +++ b/src/mcp/tool-packs/workspace/index.js @@ -9,6 +9,7 @@ const { } = require('../../handlers/assess-task-complexity'); const { handleValidatePlan } = require('../../handlers/validate-plan'); const { handleResolveSettings } = require('../../handlers/resolve-settings'); +const { handleSetupModels } = require('../../handlers/setup-models'); function createToolPack() { return defineToolPack({ @@ -76,12 +77,29 @@ function createToolPack() { }, }, }, + { + name: 'setup_models', + description: + 'Configure Maestro subagent models in .gemini/settings.json based on selected mode.', + inputSchema: { + type: 'object', + properties: { + mode: { + type: 'string', + enum: ['quality', 'balanced', 'economic', 'skip'], + description: 'The operating mode to configure.', + }, + }, + required: ['mode'], + }, + }, ], handlers: { initialize_workspace: handleInitializeWorkspace, assess_task_complexity: handleAssessTaskComplexity, validate_plan: handleValidatePlan, resolve_settings: handleResolveSettings, + setup_models: handleSetupModels, }, }); } diff --git a/src/platforms/gemini/commands/maestro/orchestrate.toml b/src/platforms/gemini/commands/maestro/orchestrate.toml index 24e80006..ae4aa3a6 100644 --- a/src/platforms/gemini/commands/maestro/orchestrate.toml +++ b/src/platforms/gemini/commands/maestro/orchestrate.toml @@ -18,10 +18,12 @@ This preamble maps generic step references to Gemini CLI tool syntax. | Load template/reference/protocol | `get_skill_content(resources: [""])` | | Delegate to agent | Call agent tool by name: `coder(query: "...")`, `tester(query: "...")`, `design_system_engineer(query: "...")` | | MCP tools | `mcp_maestro_` (Gemini CLI also accepts bare names like `resolve_settings`) | -| Enter Plan Mode | `enter_plan_mode` — if unavailable, tell user: "Run `gemini --settings` and set `experimental.plan` to `true`, then restart." Offer to continue without Plan Mode using `ask_user` for approvals. | +| Enter Plan Mode | `enter_plan_mode` — if unavailable, tell user: "Run `gemini --settings` and set `experimental.plan` to `true`, then Request Restart." Offer to continue without Plan Mode using `ask_user` for approvals. | | Exit Plan Mode | `exit_plan_mode` with `plan_filename` | | User prompt (choose) | `ask_user` with `type: 'choice'` | | User prompt (approve) | `ask_user` with `type: 'yesno'` | +| Define Agent Models | `ask_user` for valid mode (`quality`, `balanced`, `economic`, `skip`) and call `setup_models` with the selected mode. Then Request Restart. | +| Request Restart | STOP here and wait for the user to restart and signal to continue. Do NOT proceed. | ## Execute diff --git a/src/references/orchestration-steps.md b/src/references/orchestration-steps.md index e6f79e3b..959ed087 100644 --- a/src/references/orchestration-steps.md +++ b/src/references/orchestration-steps.md @@ -8,13 +8,14 @@ STARTUP (Turn 1 — tool calls only, no text output) 6. STOP. Turn 1 is ONLY steps 1-5. No text, no design questions, no file reads. CLASSIFICATION (Turn 2) - 7. Load the architecture reference: ["architecture"]. Do NOT load templates yet — they are loaded at their consumption points (steps 13, 15, 20). + 7. Load the architecture reference: ["architecture"]. Do NOT load templates yet — they are loaded at their consumption points (steps 14, 16, 21). 8. Classify task as simple/medium/complex. Present classification with rationale. - 9. Route: simple → Express (step 31). Medium/complex → continue to step 10. + 9. Route: simple → Express (step 32). Medium/complex → continue to step 10. DESIGN (Phase 1) 10. Enter Plan Mode. If unavailable, follow the runtime preamble's Plan Mode fallback instructions. -11. Call `get_skill_content` with resources: ["design-dialogue"]. Follow the loaded protocol for: +11. Define Agent Models. +12. Call `get_skill_content` with resources: ["design-dialogue"]. Follow the loaded protocol for: - Design depth selector (first design question) - Repository grounding (for existing codebases, skip for greenfield) - One question at a time via user prompt @@ -31,51 +32,51 @@ DESIGN (Phase 1) WRONG: user requests "fan site" → options include React, Next.js, Astro CORRECT: user requests "fan site" → recommended option is vanilla HTML/CSS/JS -12. Present design sections one at a time, per the design-dialogue skill's convergence protocol. +13. Present design sections one at a time, per the design-dialogue skill's convergence protocol. Each section must be presented individually and approved via user prompt before proceeding to the next. Do NOT present the full design as a single block. Quick depth may combine sections. Standard/Deep MUST validate individually. -13. Call `get_skill_content` with resources: ["design-document"]. Write approved design document to /plans/ (or Plan Mode tmp path). -14. If Plan Mode is active, exit Plan Mode with the plan path. Copy approved document to /plans/. +14. Call `get_skill_content` with resources: ["design-document"]. Write approved design document to /plans/ (or Plan Mode tmp path). +15. If Plan Mode is active, exit Plan Mode with the plan path. Copy approved document to /plans/. PLANNING (Phase 2) -15. Call `get_skill_content` with resources: ["implementation-planning", "implementation-plan"]. Follow the loaded skill protocol. -16. Call validate_plan with the generated plan and task_complexity. +16. Call `get_skill_content` with resources: ["implementation-planning", "implementation-plan"]. Follow the loaded skill protocol. +17. Call validate_plan with the generated plan and task_complexity. You MUST call validate_plan BEFORE presenting the plan for approval. Do NOT - present the plan, write it to state_dir, or proceed to step 17 without first + present the plan, write it to state_dir, or proceed to step 18 without first calling validate_plan and resolving any error-severity violations. validate_plan enforces server-side: phase count limits, dependency cycles, unknown agents, file ownership conflicts, and agent-deliverable compatibility (read-only agents cannot be assigned to file-creating phases). If it returns violations with severity "error", fix them in the plan and re-validate. -17. Present plan for user approval (Approve / Revise / Abort via user prompt). -18. Write approved implementation plan to /plans/. +18. Present plan for user approval (Approve / Revise / Abort via user prompt). +19. Write approved implementation plan to /plans/. EXECUTION SETUP (Phase 3 — pre-delegation) -19. Call `get_skill_content` with resources: ["execution"]. Follow its Execution Mode Gate. +20. Call `get_skill_content` with resources: ["execution"]. Follow its Execution Mode Gate. Present ONLY "Parallel" and "Sequential" as execution mode options. Do NOT present "Ask" as a user-facing choice — "ask" is a setting value that means "prompt the user", not an execution mode the user selects. -20. Call `get_skill_content` with resources: ["session-management", "session-state"]. -21. Create session via create_session with resolved execution_mode. Do NOT create before mode is resolved. -22. Call `get_skill_content` with resources: ["delegation", "validation", "agent-base-protocol", "filesystem-safety-protocol"]. +21. Call `get_skill_content` with resources: ["session-management", "session-state"]. +22. Create session via create_session with resolved execution_mode. Do NOT create before mode is resolved. +23. Call `get_skill_content` with resources: ["delegation", "validation", "agent-base-protocol", "filesystem-safety-protocol"]. EXECUTION (Phase 3 — delegation loop) -23. For each phase (or parallel batch): call `get_agent` for the assigned agent, then delegate using the returned methodology and tool restrictions. +24. For each phase (or parallel batch): call `get_agent` for the assigned agent, then delegate using the returned methodology and tool restrictions. Dispatch by calling the agent's registered tool directly. Do NOT use the built-in generalist tool or invoke agents by bare name. Each Maestro agent carries specialized methodology, tool restrictions, temperature, and turn limits from its frontmatter that the generalist ignores. -24. After each agent returns, parse Task Report + Downstream Context from response. -25. Call transition_phase to persist results. +25. After each agent returns, parse Task Report + Downstream Context from response. +26. Call transition_phase to persist results. For parallel batches: call transition_phase INDIVIDUALLY for EVERY completed phase in the batch. The MCP tool writes files_created, files_modified, @@ -85,24 +86,24 @@ EXECUTION (Phase 3 — delegation loop) merge all agents' files into one call — the archive attributes files per phase, so empty payloads mean lost traceability. -26. Repeat steps 23-25 until all phases complete. +27. Repeat steps 24-26 until all phases complete. COMPLETION (Phase 4) -27. Call `get_skill_content` with resources: ["code-review"]. -28. If execution changed non-documentation files, delegate to the code reviewer agent. Block on Critical/Major findings. +28. Call `get_skill_content` with resources: ["code-review"]. +29. If execution changed non-documentation files, delegate to the code reviewer agent. Block on Critical/Major findings. If Critical/Major findings: re-delegate to the implementing agent to fix. The orchestrator MUST NOT write code directly. -29. If MAESTRO_AUTO_ARCHIVE is true (or unset), call archive_session. If false, inform user session is complete but not archived. -30. Present final summary with files changed, phase outcomes, and next steps. +30. If MAESTRO_AUTO_ARCHIVE is true (or unset), call archive_session. If false, inform user session is complete but not archived. +31. Present final summary with files changed, phase outcomes, and next steps. RECOVERY (referenced from any step on user request) If the user says the flow moved too fast: return to the most recent unanswered approval gate. If the user asks for implementation before approval: remind them Maestro requires approval first. If the user asks to skip execution-mode: remind them parallel/sequential is required unless MAESTRO_EXECUTION_MODE pins it. If an answer invalidates a prior choice: restate the updated assumption and re-run the relevant gate. -If delegation collapses to parent session without fallback approval: return to step 19 or re-scope the child-agent work packages. +If delegation collapses to parent session without fallback approval: return to step 20 or re-scope the child-agent work packages. EXPRESS WORKFLOW (simple tasks only — jumped to from step 9) @@ -110,17 +111,18 @@ EXPRESS MODE GATE BYPASS: Express bypasses the execution-mode gate entirely. Exp EXPRESS MCP FALLBACK: If MCP state tools (create_session, transition_phase, archive_session) are unavailable, fall back to direct file writes on /state/active-session.md. -31. Verify classification is simple. If task requires multiple phases or agents, override to medium → step 10. +32. Verify classification is simple. If task requires multiple phases or agents, override to medium → step 10. Express sessions MUST have exactly one implementation phase with exactly one agent. -32. Ask 1-2 clarifying questions from Area 1 only. +33. Define Agent Models. +34. Ask 1-2 clarifying questions from Area 1 only. Each question MUST use the user prompt tool (not plain text). Use the choose variant with 2-4 options where possible. Do NOT ask questions as plain text in the model response — the user prompt tool is the only input mechanism. -33. Present structured Express brief as plain text, then ask for approval. +35. Present structured Express brief as plain text, then ask for approval. The brief MUST be plain text output in the model response. The approval MUST be a SEPARATE user prompt tool call — not embedded in the @@ -128,14 +130,14 @@ EXPRESS MCP FALLBACK: If MCP state tools (create_session, transition_phase, arch These are two distinct actions: first emit the brief as text, then call the user prompt tool for approval. Do NOT combine them into one text block. -34. On approval, create session with workflow_mode: "express", exactly 1 phase. +36. On approval, create session with workflow_mode: "express", exactly 1 phase. On rejection, revise. On second rejection, escalate to Standard → step 10. -35. Call `get_skill_content` with resources: ["agent-base-protocol", "filesystem-safety-protocol"] and prepend them to the delegation prompt. -36. Delegate to the assigned agent. +37. Call `get_skill_content` with resources: ["agent-base-protocol", "filesystem-safety-protocol"] and prepend them to the delegation prompt. +38. Delegate to the assigned agent. - Same dispatch rule as step 23: call agent by registered tool name, not generalist. + Same dispatch rule as step 24: call agent by registered tool name, not generalist. -37. Parse Task Report from the agent's response. Call transition_phase to persist results. +39. Parse Task Report from the agent's response. Call transition_phase to persist results. You MUST call transition_phase after the implementing agent returns. Extract files_created, files_modified, files_deleted, and downstream_context from the @@ -143,15 +145,15 @@ EXPRESS MCP FALLBACK: If MCP state tools (create_session, transition_phase, arch state has no record of what was delivered. Do NOT skip to code review or archive without calling transition_phase first. -38. Delegate to the code reviewer agent. +40. Delegate to the code reviewer agent. If Critical/Major findings: re-delegate to implementing agent (1 retry). Orchestrator MUST NOT write code directly. If retry fails, escalate to user. -39. Call archive_session. -40. Present summary. +41. Call archive_session. +42. Present summary. EXPRESS RESUME (when resuming an Express session from get_session_status) -If phase is pending: re-generate and present brief (step 33). On approval, proceed to delegation (step 36). -If phase is in_progress: re-delegate with same scope (step 36). -If phase is completed but session is in_progress: run code review (step 38), then archive (step 39). +If phase is pending: re-generate and present brief (step 35). On approval, proceed to delegation (step 38). +If phase is in_progress: re-delegate with same scope (step 38). +If phase is completed but session is in_progress: run code review (step 40), then archive (step 41). diff --git a/tests/transforms/mcp-pack-composition.test.js b/tests/transforms/mcp-pack-composition.test.js index 43e687ca..8629e9d7 100644 --- a/tests/transforms/mcp-pack-composition.test.js +++ b/tests/transforms/mcp-pack-composition.test.js @@ -20,6 +20,7 @@ describe('mcp pack composition', () => { 'assess_task_complexity', 'validate_plan', 'resolve_settings', + 'setup_models', 'create_session', 'get_session_status', 'update_session', diff --git a/tests/transforms/mcp-setup-models.test.js b/tests/transforms/mcp-setup-models.test.js new file mode 100644 index 00000000..9a4ef464 --- /dev/null +++ b/tests/transforms/mcp-setup-models.test.js @@ -0,0 +1,118 @@ +'use strict'; + +const { describe, it, after } = require('node:test'); +const assert = require('node:assert/strict'); +const fs = require('fs'); +const path = require('path'); +const os = require('os'); +const { handleSetupModels } = require('../../src/mcp/handlers/setup-models'); +const { KNOWN_AGENTS } = require('../../src/core/agent-registry'); + +// Mock atomicWriteSync to avoid real file writes if necessary, +// but handleSetupModels uses it from ../../core/atomic-write +// For these tests, we'll use a real temporary directory to ensure everything works as expected. + +// Mock MAESTRO_EXTENSION_PATH to the actual repo root during tests +const REPO_ROOT = path.resolve(__dirname, '../../'); +process.env.MAESTRO_EXTENSION_PATH = REPO_ROOT; + +describe('handleSetupModels', () => { + const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'maestro-test-')); + const dotGemini = path.join(tempDir, '.gemini'); + const settingsPath = path.join(dotGemini, 'settings.json'); + + it('should return skipped_model_setup when mode is skip', async () => { + const result = await handleSetupModels({ mode: 'skip' }, tempDir); + assert.deepEqual(result, { status: 'skipped_model_setup' }); + }); + + it('should throw if mode is invalid', async () => { + await assert.rejects( + () => handleSetupModels({ mode: 'invalid' }, tempDir), + /Invalid mode: invalid/ + ); + }); + + it('should create a fresh settings file if it does not exist', async () => { + if (fs.existsSync(settingsPath)) fs.unlinkSync(settingsPath); + if (!fs.existsSync(dotGemini)) fs.mkdirSync(dotGemini, { recursive: true }); + + const result = await handleSetupModels({ mode: 'balanced' }, tempDir); + assert.equal(result.status, 'success'); + assert.equal(result.mode, 'balanced'); + + assert.ok(fs.existsSync(settingsPath)); + const settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8')); + assert.equal(settings.experimental.enableAgents, true); + assert.ok(settings.agents.overrides); + assert.ok(settings.agents.overrides.architect); + }); + + it('should preserve existing settings properties after merge', async () => { + const existingSettings = { + existingProp: 'value', + experimental: { + otherProp: true + }, + agents: { + overrides: { + architect: { + customProp: 'custom' + } + } + } + }; + fs.writeFileSync(settingsPath, JSON.stringify(existingSettings)); + + const result = await handleSetupModels({ mode: 'quality' }, tempDir); + assert.equal(result.status, 'success'); + + const settings = JSON.parse(fs.readFileSync(settingsPath, 'utf8')); + assert.equal(settings.existingProp, 'value'); + assert.equal(settings.experimental.otherProp, true); + assert.equal(settings.experimental.enableAgents, true); + assert.equal(settings.agents.overrides.architect.customProp, 'custom'); + assert.ok(settings.agents.overrides.architect.modelConfig.model); + }); + + it('should throw meaningful error if settings file is corrupted', async () => { + fs.writeFileSync(settingsPath, 'corrupted json {'); + await assert.rejects( + () => handleSetupModels({ mode: 'balanced' }, tempDir), + /Existing .gemini\/settings.json is corrupted/ + ); + }); + + it('should throw meaningful error if agent-modes.json is missing', async () => { + const originalPath = process.env.MAESTRO_EXTENSION_PATH; + process.env.MAESTRO_EXTENSION_PATH = '/non/existent/path'; + try { + await assert.rejects( + () => handleSetupModels({ mode: 'balanced' }, tempDir), + /Failed to read agent-modes.json/ + ); + } finally { + process.env.MAESTRO_EXTENSION_PATH = originalPath; + } + }); + + after(() => { + fs.rmSync(tempDir, { recursive: true, force: true }); + }); +}); + +describe('agent-modes.json validation', () => { + it('should ensure all agents in all modes exist in KNOWN_AGENTS', () => { + const modesPath = path.resolve(__dirname, '../../src/config/agent-modes.json'); + const modes = JSON.parse(fs.readFileSync(modesPath, 'utf8')); + + for (const [mode, mapping] of Object.entries(modes)) { + for (const agent of Object.keys(mapping)) { + assert.ok( + KNOWN_AGENTS.includes(agent), + `Agent "${agent}" in mode "${mode}" is not in KNOWN_AGENTS` + ); + } + } + }); +}); diff --git a/tests/transforms/mcp-workspace-pack.test.js b/tests/transforms/mcp-workspace-pack.test.js index 250f5cd9..0e295158 100644 --- a/tests/transforms/mcp-workspace-pack.test.js +++ b/tests/transforms/mcp-workspace-pack.test.js @@ -42,6 +42,7 @@ describe('workspace tool pack', () => { 'assess_task_complexity', 'validate_plan', 'resolve_settings', + 'setup_models', ] ); });