diff --git a/crates/puffer-core/runtime/claude_tools/workflow/support.rs b/crates/puffer-core/runtime/claude_tools/workflow/support.rs index e0677f2b5..359714569 100644 --- a/crates/puffer-core/runtime/claude_tools/workflow/support.rs +++ b/crates/puffer-core/runtime/claude_tools/workflow/support.rs @@ -3,15 +3,14 @@ use super::store::{ find_team_for_session, git_ahead_count, git_dirty, git_head_commit, git_toplevel, is_git_repo, load_store, messages_path, next_task_id, now_ms, register_team_member, remove_claude_team_artifacts, resolve_recipients, save_store, shutdown_requests_path, - task_output_path, tasks_path, team_lead_agent_id, teams_path, todos_path, - validate_ask_user_questions, workflow_root, worktrees_path, write_claude_team_file, AgentInput, - AgentStore, AskUserQuestionInput, ClaudeTeamFile, ClaudeTeamMember, ConfigInput, - EnterWorktreeInput, ExitWorktreeInput, MessageStore, PendingShutdownRequest, PowerShellInput, - SendMessageInput, ShutdownRequestStore, StoredAgent, StoredMessage, StoredTask, StoredTeam, - StoredTodo, StoredWorktree, TaskStore, TeamCreateInput, TeamStore, TodoStore, TodoWriteInput, - WorktreeStore, + task_output_path, tasks_path, team_lead_agent_id, teams_path, validate_ask_user_questions, + workflow_root, worktrees_path, write_claude_team_file, AgentInput, AgentStore, + AskUserQuestionInput, ClaudeTeamFile, ClaudeTeamMember, ConfigInput, EnterWorktreeInput, + ExitWorktreeInput, MessageStore, PendingShutdownRequest, PowerShellInput, SendMessageInput, + ShutdownRequestStore, StoredAgent, StoredMessage, StoredTask, StoredTeam, StoredWorktree, + TaskStore, TeamCreateInput, TeamStore, WorktreeStore, }; -use super::task_runtime::{terminal_task_status, validate_todos, wait_for_child_output}; +use super::task_runtime::{terminal_task_status, wait_for_child_output}; use crate::config_settings::{ config_setting_path, config_setting_scope, get_config_value, persist_config_setting, scope_label, set_config_value, @@ -618,33 +617,6 @@ pub(super) fn execute_team_delete( }))?) } -/// Executes the live `TodoWrite` workflow tool. -pub(super) fn execute_todo_write( - state: &mut AppState, - _cwd: &Path, - input: Value, -) -> Result { - let parsed: TodoWriteInput = - serde_json::from_value(input).context("invalid TodoWrite input")?; - validate_todos(&parsed.todos)?; - let mut store = load_store::(&todos_path(state.session.cwd.as_path()))?; - let old = store.todos.clone(); - store.todos = parsed - .todos - .into_iter() - .map(|todo| StoredTodo { - content: todo.content, - status: todo.status, - active_form: todo.active_form, - }) - .collect(); - save_store(&todos_path(state.session.cwd.as_path()), &store)?; - Ok(serde_json::to_string_pretty(&json!({ - "oldTodos": old, - "newTodos": store.todos - }))?) -} - /// Persists one background Bash task into the shared workflow task store. pub(super) fn register_background_shell_task( cwd: &Path, diff --git a/crates/puffer-core/runtime/claude_tools/workflow/task_runtime.rs b/crates/puffer-core/runtime/claude_tools/workflow/task_runtime.rs index 833a18d11..d58d42792 100644 --- a/crates/puffer-core/runtime/claude_tools/workflow/task_runtime.rs +++ b/crates/puffer-core/runtime/claude_tools/workflow/task_runtime.rs @@ -1,7 +1,8 @@ use super::store::{ - load_store, now_ms, process_is_running, save_store, tasks_path, StoredTask, TaskStore, - TodoInputItem, + load_store, now_ms, process_is_running, save_store, tasks_path, StoredTask, StoredTodo, + TaskStore, TodoInputItem, }; +use crate::{AppState, MessageRole}; use anyhow::{Context, Result}; use puffer_config::ConfigPaths; use serde_json::Value; @@ -12,6 +13,8 @@ use std::thread; use std::time::{Duration, Instant}; const MAX_PROCESS_OUTPUT_CHARS: usize = 30_000; +pub(super) const VERIFICATION_NUDGE: &str = + "NOTE: You just closed out 3+ tasks and none of them was a verification step. Before writing your final summary, spawn the verification agent (subagent_type=\"verification\"). You cannot self-assign PARTIAL by listing caveats in your summary — only the verifier issues a verdict."; /// Returns the persisted runtime-agent output path for a task id. pub(super) fn runtime_agent_output_path(session_cwd: &Path, task_id: &str) -> std::path::PathBuf { @@ -84,6 +87,43 @@ pub(super) fn validate_todos(todos: &[TodoInputItem]) -> anyhow::Result<()> { Ok(()) } +/// Returns true when the completed todo list should surface the verification reminder. +pub(super) fn should_emit_verification_nudge_for_todos(todos: &[StoredTodo]) -> bool { + todos.len() >= 3 + && todos.iter().all(|todo| todo.status == "completed") + && !todos + .iter() + .any(|todo| contains_verification_marker(&todo.content)) +} + +/// Returns true when the completed visible task list should surface the verification reminder. +pub(super) fn should_emit_verification_nudge_for_tasks(tasks: &[StoredTask]) -> bool { + let visible = tasks + .iter() + .filter(|task| { + !task + .metadata + .get("_internal") + .and_then(Value::as_bool) + .unwrap_or(false) + }) + .collect::>(); + visible.len() >= 3 + && visible.iter().all(|task| task.status == "completed") + && !visible + .iter() + .any(|task| contains_verification_marker(&task.subject)) +} + +/// Returns true when the current runtime context belongs to a nested subagent. +pub(super) fn is_subagent_context(state: &AppState) -> bool { + state.transcript.first().is_some_and(|message| { + message.role == MessageRole::System + && (message.text.contains("You are a coding subagent") + || message.text.contains("You are a verification specialist")) + }) +} + /// Captures child-process output together with timeout state. pub(super) struct TimedProcessOutput { pub(super) stdout: String, @@ -211,9 +251,18 @@ fn truncate_process_output(output: String) -> String { output.chars().take(MAX_PROCESS_OUTPUT_CHARS).collect() } +fn contains_verification_marker(text: &str) -> bool { + text.to_ascii_lowercase().contains("verif") +} + #[cfg(test)] mod tests { - use super::{truncate_process_output, MAX_PROCESS_OUTPUT_CHARS}; + use super::{ + should_emit_verification_nudge_for_tasks, should_emit_verification_nudge_for_todos, + truncate_process_output, MAX_PROCESS_OUTPUT_CHARS, + }; + use crate::runtime::claude_tools::workflow::store::{StoredTask, StoredTodo}; + use serde_json::{Map, Value}; #[test] fn truncate_process_output_leaves_short_text_unchanged() { @@ -230,4 +279,117 @@ mod tests { assert_eq!(truncated.chars().count(), MAX_PROCESS_OUTPUT_CHARS); assert!(truncated.chars().all(|ch| ch == 'x')); } + + #[test] + fn verification_nudge_for_todos_requires_large_completed_non_verification_list() { + let todos = vec![ + StoredTodo { + content: "Ship feature".to_string(), + status: "completed".to_string(), + active_form: "Shipping feature".to_string(), + }, + StoredTodo { + content: "Run tests".to_string(), + status: "completed".to_string(), + active_form: "Running tests".to_string(), + }, + StoredTodo { + content: "Write summary".to_string(), + status: "completed".to_string(), + active_form: "Writing summary".to_string(), + }, + ]; + assert!(should_emit_verification_nudge_for_todos(&todos)); + + let mut with_verification = todos.clone(); + with_verification[2].content = "Verification sweep".to_string(); + assert!(!should_emit_verification_nudge_for_todos( + &with_verification + )); + } + + #[test] + fn verification_nudge_for_tasks_ignores_internal_entries() { + let mut internal_metadata = Map::new(); + internal_metadata.insert("_internal".to_string(), Value::Bool(true)); + let tasks = vec![ + StoredTask { + task_id: "1".to_string(), + subject: "Ship feature".to_string(), + description: String::new(), + active_form: String::new(), + status: "completed".to_string(), + owner: None, + blocks: Vec::new(), + blocked_by: Vec::new(), + metadata: Map::new(), + output: None, + task_type: None, + command: None, + process_id: None, + output_file: None, + started_at_ms: None, + updated_at_ms: None, + exit_code: None, + }, + StoredTask { + task_id: "2".to_string(), + subject: "Run tests".to_string(), + description: String::new(), + active_form: String::new(), + status: "completed".to_string(), + owner: None, + blocks: Vec::new(), + blocked_by: Vec::new(), + metadata: Map::new(), + output: None, + task_type: None, + command: None, + process_id: None, + output_file: None, + started_at_ms: None, + updated_at_ms: None, + exit_code: None, + }, + StoredTask { + task_id: "3".to_string(), + subject: "Write summary".to_string(), + description: String::new(), + active_form: String::new(), + status: "completed".to_string(), + owner: None, + blocks: Vec::new(), + blocked_by: Vec::new(), + metadata: Map::new(), + output: None, + task_type: None, + command: None, + process_id: None, + output_file: None, + started_at_ms: None, + updated_at_ms: None, + exit_code: None, + }, + StoredTask { + task_id: "4".to_string(), + subject: "Internal bookkeeping".to_string(), + description: String::new(), + active_form: String::new(), + status: "pending".to_string(), + owner: None, + blocks: Vec::new(), + blocked_by: Vec::new(), + metadata: internal_metadata, + output: None, + task_type: None, + command: None, + process_id: None, + output_file: None, + started_at_ms: None, + updated_at_ms: None, + exit_code: None, + }, + ]; + assert!(should_emit_verification_nudge_for_tasks(&tasks)); + } } diff --git a/crates/puffer-core/runtime/claude_tools/workflow/task_tools.rs b/crates/puffer-core/runtime/claude_tools/workflow/task_tools.rs index a3c9aa45e..5bc6de6de 100644 --- a/crates/puffer-core/runtime/claude_tools/workflow/task_tools.rs +++ b/crates/puffer-core/runtime/claude_tools/workflow/task_tools.rs @@ -5,9 +5,10 @@ use super::store::{ TaskStopInput, TaskStore, TaskUpdateInput, }; use super::task_runtime::{ - read_runtime_agent_output, read_task_output, refresh_stored_task, runtime_agent_output_path, - runtime_agent_terminal_status, terminal_task_status, wait_for_runtime_agent_output, - wait_for_stored_task, + is_subagent_context, read_runtime_agent_output, read_task_output, refresh_stored_task, + runtime_agent_output_path, runtime_agent_terminal_status, + should_emit_verification_nudge_for_tasks, terminal_task_status, wait_for_runtime_agent_output, + wait_for_stored_task, VERIFICATION_NUDGE, }; use crate::AppState; use anyhow::{anyhow, bail, Context, Result}; @@ -252,12 +253,21 @@ pub(super) fn execute_task_update( } } task.updated_at_ms = Some(now_ms()); + let verification_nudge_needed = !is_subagent_context(state) + && status_change + .as_ref() + .and_then(|change| change.get("to")) + .and_then(Value::as_str) + == Some("completed") + && should_emit_verification_nudge_for_tasks(&store.tasks); save_store(&tasks_path(state.session.cwd.as_path()), &store)?; Ok(serde_json::to_string_pretty(&json!({ "success": true, "taskId": task_id, "updatedFields": updated_fields, "statusChange": status_change, + "verificationNudgeNeeded": verification_nudge_needed, + "note": verification_nudge_needed.then_some(VERIFICATION_NUDGE), }))?) } diff --git a/crates/puffer-core/runtime/claude_tools/workflow/todo_write.rs b/crates/puffer-core/runtime/claude_tools/workflow/todo_write.rs index 68d6cbaf2..734f758c9 100644 --- a/crates/puffer-core/runtime/claude_tools/workflow/todo_write.rs +++ b/crates/puffer-core/runtime/claude_tools/workflow/todo_write.rs @@ -1,9 +1,37 @@ use crate::AppState; -use anyhow::Result; -use serde_json::Value; +use anyhow::{Context, Result}; +use serde_json::{json, Value}; use std::path::Path; +use super::store::{load_store, save_store, todos_path, StoredTodo, TodoStore, TodoWriteInput}; +use super::task_runtime::{ + is_subagent_context, should_emit_verification_nudge_for_todos, validate_todos, + VERIFICATION_NUDGE, +}; + /// Executes the Claude-compatible `TodoWrite` tool scaffold. pub fn execute_todo_write(state: &mut AppState, cwd: &Path, input: Value) -> Result { - super::support::execute_todo_write(state, cwd, input) + let parsed: TodoWriteInput = + serde_json::from_value(input).context("invalid TodoWrite input")?; + validate_todos(&parsed.todos)?; + let mut store = load_store::(&todos_path(cwd))?; + let old = store.todos.clone(); + store.todos = parsed + .todos + .into_iter() + .map(|todo| StoredTodo { + content: todo.content, + status: todo.status, + active_form: todo.active_form, + }) + .collect(); + let verification_nudge_needed = + !is_subagent_context(state) && should_emit_verification_nudge_for_todos(&store.todos); + save_store(&todos_path(cwd), &store)?; + Ok(serde_json::to_string_pretty(&json!({ + "oldTodos": old, + "newTodos": store.todos, + "verificationNudgeNeeded": verification_nudge_needed, + "note": verification_nudge_needed.then_some(VERIFICATION_NUDGE), + }))?) } diff --git a/crates/puffer-core/runtime/tests/tool_execution/workflow.rs b/crates/puffer-core/runtime/tests/tool_execution/workflow.rs index 8a59f5067..bd3fd9f7a 100644 --- a/crates/puffer-core/runtime/tests/tool_execution/workflow.rs +++ b/crates/puffer-core/runtime/tests/tool_execution/workflow.rs @@ -1,4 +1,5 @@ use super::*; +use crate::MessageRole; use puffer_config::ConfigPaths; use std::fs; use std::time::Duration; @@ -25,6 +26,80 @@ fn todo_write_rejects_multiple_in_progress_items() { assert!(error.to_string().contains("at most one in_progress")); } +#[test] +fn todo_write_emits_verification_nudge_for_main_thread_completion() { + let mut state = temp_state(); + let cwd = state.cwd.clone(); + let output = crate::runtime::claude_tools::workflow::todo_write::execute_todo_write( + &mut state, + &cwd, + json!({ + "todos": [ + {"content": "Ship feature", "status": "completed", "activeForm": "Shipping feature"}, + {"content": "Run tests", "status": "completed", "activeForm": "Running tests"}, + {"content": "Write summary", "status": "completed", "activeForm": "Writing summary"} + ] + }), + ) + .unwrap(); + let parsed: Value = serde_json::from_str(&output).unwrap(); + assert_eq!(parsed["verificationNudgeNeeded"], true); + assert!(parsed["note"] + .as_str() + .unwrap_or_default() + .contains("subagent_type=\"verification\"")); +} + +#[test] +fn todo_write_still_emits_verification_nudge_for_main_thread_team_lead() { + let mut state = temp_state(); + state.active_team_name = Some("alpha-team".to_string()); + let cwd = state.cwd.clone(); + let output = crate::runtime::claude_tools::workflow::todo_write::execute_todo_write( + &mut state, + &cwd, + json!({ + "todos": [ + {"content": "Ship feature", "status": "completed", "activeForm": "Shipping feature"}, + {"content": "Run tests", "status": "completed", "activeForm": "Running tests"}, + {"content": "Write summary", "status": "completed", "activeForm": "Writing summary"} + ] + }), + ) + .unwrap(); + let parsed: Value = serde_json::from_str(&output).unwrap(); + assert_eq!(parsed["verificationNudgeNeeded"], true); + assert!(parsed["note"] + .as_str() + .unwrap_or_default() + .contains("subagent_type=\"verification\"")); +} + +#[test] +fn todo_write_skips_verification_nudge_for_subagent_context() { + let mut state = temp_state(); + state.push_message( + MessageRole::System, + "You are a coding subagent for Puffer Code.", + ); + let cwd = state.cwd.clone(); + let output = crate::runtime::claude_tools::workflow::todo_write::execute_todo_write( + &mut state, + &cwd, + json!({ + "todos": [ + {"content": "Ship feature", "status": "completed", "activeForm": "Shipping feature"}, + {"content": "Run tests", "status": "completed", "activeForm": "Running tests"}, + {"content": "Write summary", "status": "completed", "activeForm": "Writing summary"} + ] + }), + ) + .unwrap(); + let parsed: Value = serde_json::from_str(&output).unwrap(); + assert_eq!(parsed["verificationNudgeNeeded"], false); + assert!(parsed["note"].is_null()); +} + #[test] fn config_tool_supports_editor_mode() { let mut state = temp_state(); @@ -471,6 +546,146 @@ fn task_update_sets_timestamps_for_progress() { assert!(task["updated_at_ms"].is_number()); } +#[test] +fn task_update_emits_verification_nudge_when_last_visible_task_completes() { + let mut state = temp_state(); + let cwd = state.cwd.clone(); + let mut task_ids = Vec::new(); + for subject in ["Ship feature", "Run tests", "Write summary"] { + let created = crate::runtime::claude_tools::workflow::task_create::execute_task_create( + &mut state, + &cwd, + json!({ + "subject": subject, + "description": subject + }), + ) + .unwrap(); + let created: Value = serde_json::from_str(&created).unwrap(); + task_ids.push(created["task"]["id"].as_str().unwrap().to_string()); + } + + for task_id in &task_ids[0..2] { + crate::runtime::claude_tools::workflow::task_update::execute_task_update( + &mut state, + &cwd, + json!({ + "taskId": task_id, + "status": "completed" + }), + ) + .unwrap(); + } + + let output = crate::runtime::claude_tools::workflow::task_update::execute_task_update( + &mut state, + &cwd, + json!({ + "taskId": task_ids[2], + "status": "completed" + }), + ) + .unwrap(); + let parsed: Value = serde_json::from_str(&output).unwrap(); + assert_eq!(parsed["verificationNudgeNeeded"], true); + assert!(parsed["note"] + .as_str() + .unwrap_or_default() + .contains("subagent_type=\"verification\"")); +} + +#[test] +fn task_update_still_emits_verification_nudge_for_main_thread_team_lead() { + let mut state = temp_state(); + state.active_team_name = Some("alpha-team".to_string()); + let cwd = state.cwd.clone(); + let mut task_ids = Vec::new(); + for subject in ["Ship feature", "Run tests", "Write summary"] { + let created = crate::runtime::claude_tools::workflow::task_create::execute_task_create( + &mut state, + &cwd, + json!({ + "subject": subject, + "description": subject + }), + ) + .unwrap(); + let created: Value = serde_json::from_str(&created).unwrap(); + task_ids.push(created["task"]["id"].as_str().unwrap().to_string()); + } + + let mut last_output = None; + for task_id in task_ids { + last_output = Some( + crate::runtime::claude_tools::workflow::task_update::execute_task_update( + &mut state, + &cwd, + json!({ + "taskId": task_id, + "status": "completed" + }), + ) + .unwrap(), + ); + } + + let parsed: Value = serde_json::from_str(&last_output.unwrap()).unwrap(); + assert_eq!(parsed["verificationNudgeNeeded"], true); + assert!(parsed["note"] + .as_str() + .unwrap_or_default() + .contains("subagent_type=\"verification\"")); + + let tasks_path = ConfigPaths::discover(&cwd) + .workspace_config_dir + .join("runtime/claude_workflow/tasks.json"); + let persisted: Value = serde_json::from_str(&fs::read_to_string(tasks_path).unwrap()).unwrap(); + assert_eq!(persisted["tasks"].as_array().unwrap().len(), 3); +} + +#[test] +fn task_update_skips_verification_nudge_for_subagent_context() { + let mut state = temp_state(); + state.push_message( + MessageRole::System, + "You are a coding subagent for Puffer Code.", + ); + let cwd = state.cwd.clone(); + let mut task_ids = Vec::new(); + for subject in ["Ship feature", "Run tests", "Write summary"] { + let created = crate::runtime::claude_tools::workflow::task_create::execute_task_create( + &mut state, + &cwd, + json!({ + "subject": subject, + "description": subject + }), + ) + .unwrap(); + let created: Value = serde_json::from_str(&created).unwrap(); + task_ids.push(created["task"]["id"].as_str().unwrap().to_string()); + } + + let mut last_output = None; + for task_id in task_ids { + last_output = Some( + crate::runtime::claude_tools::workflow::task_update::execute_task_update( + &mut state, + &cwd, + json!({ + "taskId": task_id, + "status": "completed" + }), + ) + .unwrap(), + ); + } + + let parsed: Value = serde_json::from_str(&last_output.unwrap()).unwrap(); + assert_eq!(parsed["verificationNudgeNeeded"], false); + assert!(parsed["note"].is_null()); +} + #[test] fn task_output_waits_for_agent_completion() { let mut state = temp_state();