diff --git a/CHANGELOG.md b/CHANGELOG.md index 25e18a7..246311d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,8 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- **Unified trace recorder.** Records every chat conversation and `/search` session as JSON-Lines under `app_data_dir/traces/{chat,search}/.jsonl`. Off by default; toggle from Settings or set `[debug] trace_enabled = true` in `config.toml`. + ### Changed +- **BREAKING**: Renamed `[debug] search_trace_enabled` to `trace_enabled` (now covers both chat and search). Rename the field in your `config.toml` after upgrading. Trace file layout also changed to `traces/{chat,search}/.jsonl`. +- The `ask_ollama`, `search_pipeline`, and `capture_full_screen_command` Tauri commands now require a `conversationId: String` argument (and `ask_ollama` additionally requires `isFirstTurn: bool` and `slashCommand: Option`). The frontend's `useOllama` hook generates a stable trace id per session and threads it transparently. External callers that invoked these commands directly must update their `invoke()` calls. A new fire-and-forget `record_conversation_end` command lets the frontend signal end-of-conversation (used by `useOllama.reset()` and `useOllama.loadMessages()`) so the chat-domain trace file gets a clean closing line. - **BREAKING**: Renamed the `[model]` section in `config.toml` to `[inference]`. The section still contains a single field, `ollama_url`, but the name now reflects what it actually configures (the inference daemon endpoint, not a model). There is no backward-compatibility shim: if you had a custom `[model]` section, rename it to `[inference]` after upgrading. - Active model selection is now strictly Option-typed end to end. Ollama's `/api/tags` is the single source of truth: when nothing is installed and nothing is persisted, Thuki refuses to dispatch requests and surfaces a "Pick a model" prompt instead of falling back to a hardcoded slug. The previous `DEFAULT_MODEL_NAME` constant has been removed. diff --git a/docs/configurations.md b/docs/configurations.md index 2c7ca13..cdbad2d 100644 --- a/docs/configurations.md +++ b/docs/configurations.md @@ -75,10 +75,8 @@ judge_timeout_s = 30 router_timeout_s = 45 [debug] -# When true, writes a forensic JSON-Lines trace file for every /search turn to -# ~/Library/Application Support/com.quietnode.thuki/traces/. -# Also toggleable from the Settings panel (Web tab, Diagnostics section). -search_trace_enabled = false +# Records every chat conversation and /search session to disk for later inspection. +trace_enabled = false ``` ## Reading the reference tables @@ -180,11 +178,11 @@ For security, both URLs default to your local machine (`127.0.0.1`) and should s ### `[debug]` -Diagnostics toggles. `search_trace_enabled` is exposed in the Settings panel (Web tab, Diagnostics section) so you can flip it without editing `config.toml`. +Records every chat conversation and `/search` session as JSON-Lines under `app_data_dir/traces/{chat,search}/.jsonl`. Off by default; toggleable from Settings. Trace files stay on your disk and are never uploaded. -| Field | Default | Tunable? | Why not tunable | Bounds | Description | -| :--------------------- | :------ | :------- | :-------------- | :----- | :---------- | -| `search_trace_enabled` | `false` | Yes | — | — | When on, Thuki writes a forensic JSON-Lines trace file for every `/search` turn to `~/Library/Application Support/com.quietnode.thuki/traces/`. Each file records every query sent to SearXNG, every page the reader fetched, and every AI decision in that turn. Useful for diagnosing why a search went wrong; leave off for normal use. | +| Field | Default | Tunable? | Why not tunable | Bounds | Description | +| :-------------- | :------ | :------- | :-------------- | :----- | :--------------------------------------------------------------------------- | +| `trace_enabled` | `false` | Yes | — | — | Records every chat conversation and `/search` session to disk for debugging. | ### `[activation]` (not in TOML) diff --git a/src-tauri/src/commands.rs b/src-tauri/src/commands.rs index 6134b68..c0af475 100644 --- a/src-tauri/src/commands.rs +++ b/src-tauri/src/commands.rs @@ -209,6 +209,13 @@ pub enum StreamChunk { Cancelled, /// A structured, user-friendly error occurred during processing. Error(OllamaError), + /// Emitted exactly once per turn, after the backend has cleared every + /// pre-`ConversationStart` gate (no-model bail, model lookup, etc.) and + /// committed to opening the trace for this `conversation_id`. Carries + /// no payload; the frontend uses it as the unambiguous signal to + /// retire its `is_first_turn` flag without relying on token-arrival + /// ordering. Does not appear in the trace itself. + TurnAccepted, } /// A single message in the Ollama `/api/chat` conversation format. @@ -463,10 +470,65 @@ pub async fn stream_ollama_chat( accumulated } +/// Mirrors a streaming chunk into the chat-domain trace recorder. Pulled out +/// of [`ask_ollama`] so the per-token routing logic and the token-count +/// increment are exercised by the unit-test suite rather than the +/// coverage-off Tauri command body. `Done`, `Cancelled`, and `Error` chunks +/// are intentionally noops here: those terminal events are summarized by +/// `AssistantComplete` after the stream returns. +pub(crate) fn record_chunk_to_trace( + chunk: &StreamChunk, + recorder: &std::sync::Arc, + token_count: &AtomicU64, +) { + match chunk { + StreamChunk::Token(text) => { + token_count.fetch_add(1, Ordering::Relaxed); + recorder.record(crate::trace::RecorderEvent::AssistantTokens { + chunk: text.clone(), + }); + } + StreamChunk::ThinkingToken(text) => { + recorder.record(crate::trace::RecorderEvent::AssistantThinking { + chunk: text.clone(), + }); + } + StreamChunk::Done + | StreamChunk::Cancelled + | StreamChunk::Error(_) + | StreamChunk::TurnAccepted => {} + } +} + +/// Emits `ConversationStart` to the trace recorder iff this is the first +/// turn of the conversation. Pulled out of [`ask_ollama`] and the search +/// pipeline so the gate is covered by tests instead of the coverage-off +/// Tauri command body. +pub(crate) fn record_conversation_start_if_first_turn( + recorder: &std::sync::Arc, + is_first_turn: bool, + model: String, + system_prompt: String, +) { + if is_first_turn { + recorder.record(crate::trace::RecorderEvent::ConversationStart { + model, + system_prompt, + }); + } +} + /// Streams a chat response from the local Ollama backend. Appends the user /// message and assistant response to conversation history after completion /// or cancellation (retaining context for follow-up requests). Uses an epoch /// counter to prevent stale writes after a reset. +/// +/// `conversation_id` flows from the frontend (`useConversationHistory.ts`). +/// `is_first_turn` lets the frontend tell the backend "emit +/// `ConversationStart` before this turn's `UserMessage`" without the backend +/// needing to track per-conversation state. Both feed the unified trace +/// recorder when `[debug] trace_enabled = true`; off by default they collapse +/// to noop calls. #[cfg_attr(coverage_nightly, coverage(off))] #[cfg_attr(not(coverage), tauri::command)] #[allow(clippy::too_many_arguments)] @@ -475,6 +537,9 @@ pub async fn ask_ollama( quoted_text: Option, image_paths: Option>, think: bool, + conversation_id: String, + is_first_turn: bool, + slash_command: Option, on_event: Channel, client: State<'_, reqwest::Client>, generation: State<'_, GenerationState>, @@ -482,6 +547,7 @@ pub async fn ask_ollama( config: State<'_, parking_lot::RwLock>, active_model: State<'_, crate::models::ActiveModelState>, capabilities_cache: State<'_, ModelCapabilitiesCache>, + trace_recorder: State<'_, std::sync::Arc>, ) -> Result<(), String> { // Snapshot the config once so all downstream reads (endpoint, prompt, model) // see a consistent view even if the user edits Settings mid-stream. @@ -507,6 +573,35 @@ pub async fn ask_ollama( let cancel_token = CancellationToken::new(); generation.set_token(cancel_token.clone()); + // Bind the trace recorder to this conversation. When tracing is on, + // every event for this turn flows to + // `traces/chat/.jsonl` via the registry. When off, + // each `record()` is a constant-time noop. The bound recorder is + // cheap to clone and is captured by the streaming-pump closure so + // per-token emits skip the registry lookup on the hot path. + let live: std::sync::Arc = + std::sync::Arc::clone(trace_recorder.inner()); + let live_inner: std::sync::Arc = live; + let bound_recorder = std::sync::Arc::new(crate::trace::BoundRecorder::new( + live_inner, + crate::trace::ConversationId::new(conversation_id), + )); + + // Emit ConversationStart at the moment we know the model + resolved + // system prompt. The frontend's `is_first_turn` flag prevents this + // event from firing on subsequent turns of the same conversation. + record_conversation_start_if_first_turn( + &bound_recorder, + is_first_turn, + model_name.clone(), + config.prompt.resolved_system.clone(), + ); + // Tell the frontend the trace was opened for this conversation_id. + // Sent unconditionally (regardless of `is_first_turn`) so the hook + // can retire its flag the moment ANY turn lands, even if a previous + // first-turn attempt was cancelled before any token arrived. + let _ = on_event.send(StreamChunk::TurnAccepted); + // Build user message content. When quoted text is present, label it // explicitly so the model knows the highlighted text is the primary // subject and any attached images provide surrounding context. @@ -517,6 +612,16 @@ pub async fn ask_ollama( _ => message, }; + // Emit UserMessage before any image base64 work, so the trace + // captures the user's intent even if encoding fails. Image paths + // are recorded as strings (matching the IPC contract); image bytes + // never enter the JSONL. + bound_recorder.record(crate::trace::RecorderEvent::UserMessage { + content: content.clone(), + attached_images: image_paths.clone().unwrap_or_default(), + slash_command: slash_command.clone(), + }); + // Base64-encode attached images for the Ollama multimodal API. let images = match image_paths { Some(ref paths) if !paths.is_empty() => { @@ -580,6 +685,14 @@ pub async fn ask_ollama( )) }; + let stream_started_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + let token_count_atomic = std::sync::Arc::new(AtomicU64::new(0)); + let token_count_for_pump = std::sync::Arc::clone(&token_count_atomic); + let recorder_for_pump = std::sync::Arc::clone(&bound_recorder); + let accumulated = stream_ollama_chat( OllamaChatParams { endpoint, @@ -592,11 +705,25 @@ pub async fn ask_ollama( &client, cancel_token.clone(), |chunk| { + // Mirror the user-visible chunk into the trace before + // forwarding it to the frontend. Token / ThinkingToken + // chunks land as discrete trace events; terminal chunks are + // summarized below by `AssistantComplete`. + record_chunk_to_trace(&chunk, &recorder_for_pump, &token_count_for_pump); let _ = on_event.send(chunk); }, ) .await; + let stream_ended_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + bound_recorder.record(crate::trace::RecorderEvent::AssistantComplete { + total_tokens: token_count_atomic.load(Ordering::Relaxed), + latency_ms: stream_ended_ms.saturating_sub(stream_started_ms), + }); + // Persist user + assistant messages to in-memory history when the epoch // has not changed (no reset during streaming) and we received content. // This includes cancelled generations so that subsequent requests retain @@ -658,6 +785,34 @@ pub fn reset_conversation(history: State<'_, ConversationHistory>) { history.messages.lock().unwrap().clear(); } +/// Frontend-driven `ConversationEnd` emission. +/// +/// The chat-domain trace lifecycle is owned by the frontend because +/// Thuki's window-close intercept hides instead of quits, and the same +/// conversation can resume on the next hotkey activation. Emitting +/// `ConversationEnd` from the backend on window-hide would falsely mark +/// every still-open conversation ended on every dismiss. The frontend +/// invokes this command exactly when the user-perceived conversation +/// terminates: clicking "New conversation", loading a different +/// conversation from history, or quitting from the tray. +/// +/// The command is a thin trace-only signal; it does NOT mutate +/// `ConversationHistory` (that is `reset_conversation`'s job) and does +/// NOT touch the SQLite-backed history UI. +#[cfg_attr(coverage_nightly, coverage(off))] +#[cfg_attr(not(coverage), tauri::command)] +pub fn record_conversation_end( + conversation_id: String, + reason: String, + trace_recorder: State<'_, std::sync::Arc>, +) { + use crate::trace::TraceRecorder; + trace_recorder.record( + &crate::trace::ConversationId::new(conversation_id), + crate::trace::RecorderEvent::ConversationEnd { reason }, + ); +} + #[cfg(test)] mod tests { use super::*; @@ -2122,4 +2277,110 @@ mod tests { assert_eq!(err.kind, OllamaErrorKind::ModelNotFound); assert!(!err.message.contains("picker chip")); } + + // ─── Trace orchestration helpers ──────────────────────────────────────── + + /// Builds a `BoundRecorder` over a `MockRecorder` so each helper test + /// can inspect what got recorded without going through the file system. + fn mock_bound_recorder( + conv_id: &str, + ) -> ( + Arc, + Arc, + ) { + let mock = Arc::new(crate::trace::recorder::MockRecorder::new()); + let inner: Arc = mock.clone(); + let bound = Arc::new(crate::trace::BoundRecorder::new( + inner, + crate::trace::ConversationId::new(conv_id), + )); + (bound, mock) + } + + #[test] + fn record_chunk_to_trace_emits_assistant_tokens_and_increments_count() { + let (bound, mock) = mock_bound_recorder("conv-token"); + let counter = AtomicU64::new(0); + record_chunk_to_trace(&StreamChunk::Token("hi".to_string()), &bound, &counter); + record_chunk_to_trace(&StreamChunk::Token(" there".to_string()), &bound, &counter); + assert_eq!(counter.load(Ordering::Relaxed), 2); + let snapshot = mock.snapshot(); + assert_eq!(snapshot.len(), 2); + for (id, _) in &snapshot { + assert_eq!(id.as_str(), "conv-token"); + } + assert!(matches!( + snapshot[0].1, + crate::trace::RecorderEvent::AssistantTokens { ref chunk } if chunk == "hi" + )); + assert!(matches!( + snapshot[1].1, + crate::trace::RecorderEvent::AssistantTokens { ref chunk } if chunk == " there" + )); + } + + #[test] + fn record_chunk_to_trace_emits_assistant_thinking_without_increment() { + let (bound, mock) = mock_bound_recorder("conv-think"); + let counter = AtomicU64::new(0); + record_chunk_to_trace( + &StreamChunk::ThinkingToken("planning".to_string()), + &bound, + &counter, + ); + assert_eq!(counter.load(Ordering::Relaxed), 0); + let snapshot = mock.snapshot(); + assert_eq!(snapshot.len(), 1); + assert!(matches!( + snapshot[0].1, + crate::trace::RecorderEvent::AssistantThinking { ref chunk } if chunk == "planning" + )); + } + + #[test] + fn record_chunk_to_trace_skips_terminal_chunks() { + let (bound, mock) = mock_bound_recorder("conv-term"); + let counter = AtomicU64::new(0); + record_chunk_to_trace(&StreamChunk::Done, &bound, &counter); + record_chunk_to_trace(&StreamChunk::Cancelled, &bound, &counter); + record_chunk_to_trace( + &StreamChunk::Error(no_model_selected_error()), + &bound, + &counter, + ); + assert_eq!(counter.load(Ordering::Relaxed), 0); + assert_eq!(mock.snapshot().len(), 0); + } + + #[test] + fn record_conversation_start_if_first_turn_emits_when_true() { + let (bound, mock) = mock_bound_recorder("conv-start"); + record_conversation_start_if_first_turn( + &bound, + true, + "model-a".to_string(), + "you are helpful".to_string(), + ); + let snapshot = mock.snapshot(); + assert_eq!(snapshot.len(), 1); + assert!(matches!( + snapshot[0].1, + crate::trace::RecorderEvent::ConversationStart { + ref model, + ref system_prompt, + } if model == "model-a" && system_prompt == "you are helpful" + )); + } + + #[test] + fn record_conversation_start_if_first_turn_skips_when_false() { + let (bound, mock) = mock_bound_recorder("conv-skip"); + record_conversation_start_if_first_turn( + &bound, + false, + "model-a".to_string(), + "ignored".to_string(), + ); + assert_eq!(mock.snapshot().len(), 0); + } } diff --git a/src-tauri/src/config/defaults.rs b/src-tauri/src/config/defaults.rs index 6c825e3..c7eac25 100644 --- a/src-tauri/src/config/defaults.rs +++ b/src-tauri/src/config/defaults.rs @@ -168,16 +168,21 @@ pub const PIPELINE_INPUT_CHAR_BUDGET: usize = 200_000; /// slow service. pub const BOUNDS_TIMEOUT_S: (u64, u64) = (1, 300); -/// Whether the `/search` pipeline writes a forensic per-turn trace file. +/// Whether the unified trace recorder writes forensic per-conversation +/// trace files for the chat layer AND the `/search` pipeline. /// /// Off by default. Intended for local quality investigation only: when on, -/// the pipeline records every LLM request/response, every SearXNG query and -/// raw response body, every reader batch (per-URL latency, raw body, full -/// extracted text), and every judge verdict to a JSON-Lines file under -/// `~/Library/Application Support/com.quietnode.thuki/traces/`. Toggleable -/// from the Settings panel (Web tab, Diagnostics section). Off in shipped -/// builds by default. -pub const DEFAULT_DEBUG_SEARCH_TRACE_ENABLED: bool = false; +/// the recorder writes every chat turn (user message, assistant streaming +/// tokens, screen captures, conversation lifecycle) AND every search-pipeline +/// step (LLM requests/responses, SearXNG queries, reader batches, judge +/// verdicts) to JSON-Lines files under +/// `~/Library/Application Support/com.quietnode.thuki/traces/`. Files are +/// grouped by domain (`traces/chat/.jsonl` and +/// `traces/search/.jsonl`) so an analysis agent can be +/// pointed at exactly the slice it cares about. Toggleable from the +/// Settings panel (Web tab, Diagnostics section). Off in shipped builds +/// by default. +pub const DEFAULT_DEBUG_TRACE_ENABLED: bool = false; // Ollama API baked-in limits: not exposed in config.toml because they bound // attacker-controlled data (response bodies from the local Ollama daemon) and @@ -255,7 +260,7 @@ pub const ALLOWED_FIELDS: &[(&str, &str)] = &[ ("search", "router_timeout_s"), ("search", "pipeline_wall_clock_budget_s"), // [debug] - ("debug", "search_trace_enabled"), + ("debug", "trace_enabled"), ]; /// Authoritative allowlist of section names accepted by `reset_config`. diff --git a/src-tauri/src/config/schema.rs b/src-tauri/src/config/schema.rs index 9ddd2a5..b91ee76 100644 --- a/src-tauri/src/config/schema.rs +++ b/src-tauri/src/config/schema.rs @@ -14,14 +14,13 @@ use serde::{Deserialize, Serialize}; use super::defaults::{ - DEFAULT_DEBUG_SEARCH_TRACE_ENABLED, DEFAULT_JUDGE_TIMEOUT_S, - DEFAULT_KEEP_WARM_INACTIVITY_MINUTES, DEFAULT_MAX_CHAT_HEIGHT, DEFAULT_MAX_IMAGES, - DEFAULT_MAX_ITERATIONS, DEFAULT_NUM_CTX, DEFAULT_OLLAMA_URL, DEFAULT_OVERLAY_WIDTH, - DEFAULT_PIPELINE_WALL_CLOCK_BUDGET_S, DEFAULT_QUOTE_MAX_CONTEXT_LENGTH, - DEFAULT_QUOTE_MAX_DISPLAY_CHARS, DEFAULT_QUOTE_MAX_DISPLAY_LINES, - DEFAULT_READER_BATCH_TIMEOUT_S, DEFAULT_READER_PER_URL_TIMEOUT_S, DEFAULT_READER_URL, - DEFAULT_ROUTER_TIMEOUT_S, DEFAULT_SEARCH_TIMEOUT_S, DEFAULT_SEARXNG_MAX_RESULTS, - DEFAULT_SEARXNG_URL, DEFAULT_TOP_K_URLS, + DEFAULT_DEBUG_TRACE_ENABLED, DEFAULT_JUDGE_TIMEOUT_S, DEFAULT_KEEP_WARM_INACTIVITY_MINUTES, + DEFAULT_MAX_CHAT_HEIGHT, DEFAULT_MAX_IMAGES, DEFAULT_MAX_ITERATIONS, DEFAULT_NUM_CTX, + DEFAULT_OLLAMA_URL, DEFAULT_OVERLAY_WIDTH, DEFAULT_PIPELINE_WALL_CLOCK_BUDGET_S, + DEFAULT_QUOTE_MAX_CONTEXT_LENGTH, DEFAULT_QUOTE_MAX_DISPLAY_CHARS, + DEFAULT_QUOTE_MAX_DISPLAY_LINES, DEFAULT_READER_BATCH_TIMEOUT_S, + DEFAULT_READER_PER_URL_TIMEOUT_S, DEFAULT_READER_URL, DEFAULT_ROUTER_TIMEOUT_S, + DEFAULT_SEARCH_TIMEOUT_S, DEFAULT_SEARXNG_MAX_RESULTS, DEFAULT_SEARXNG_URL, DEFAULT_TOP_K_URLS, }; /// Static, user-tunable inference daemon configuration. @@ -196,23 +195,19 @@ impl Default for SearchSection { } /// Developer and power-user debugging knobs. -/// -/// `search_trace_enabled` is exposed in the Settings GUI (Web tab, Diagnostics -/// section) so users can toggle it without editing `config.toml`. Off in -/// shipped builds by default. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)] #[serde(default)] pub struct DebugSection { - /// When on, the `/search` pipeline writes a forensic JSON-Lines trace - /// file per turn under `~/Library/Application Support/com.quietnode.thuki/traces/`. - /// Toggleable from the Settings panel. Off by default. - pub search_trace_enabled: bool, + /// Records every chat conversation and `/search` session to JSON-Lines + /// files under `app_data_dir/traces/{chat,search}/.jsonl`. + /// Off by default; toggleable from Settings. + pub trace_enabled: bool, } impl Default for DebugSection { fn default() -> Self { Self { - search_trace_enabled: DEFAULT_DEBUG_SEARCH_TRACE_ENABLED, + trace_enabled: DEFAULT_DEBUG_TRACE_ENABLED, } } } diff --git a/src-tauri/src/config/tests.rs b/src-tauri/src/config/tests.rs index fa6c93d..21d76c4 100644 --- a/src-tauri/src/config/tests.rs +++ b/src-tauri/src/config/tests.rs @@ -13,14 +13,14 @@ use std::path::PathBuf; use super::defaults::{ - DEFAULT_DEBUG_SEARCH_TRACE_ENABLED, DEFAULT_JUDGE_TIMEOUT_S, - DEFAULT_KEEP_WARM_INACTIVITY_MINUTES, DEFAULT_MAX_CHAT_HEIGHT, DEFAULT_MAX_IMAGES, - DEFAULT_MAX_ITERATIONS, DEFAULT_NUM_CTX, DEFAULT_OLLAMA_URL, DEFAULT_OVERLAY_WIDTH, - DEFAULT_QUOTE_MAX_CONTEXT_LENGTH, DEFAULT_QUOTE_MAX_DISPLAY_CHARS, - DEFAULT_QUOTE_MAX_DISPLAY_LINES, DEFAULT_READER_BATCH_TIMEOUT_S, - DEFAULT_READER_PER_URL_TIMEOUT_S, DEFAULT_READER_URL, DEFAULT_ROUTER_TIMEOUT_S, - DEFAULT_SEARCH_TIMEOUT_S, DEFAULT_SEARXNG_MAX_RESULTS, DEFAULT_SEARXNG_URL, - DEFAULT_SYSTEM_PROMPT_BASE, DEFAULT_TOP_K_URLS, SLASH_COMMAND_PROMPT_APPENDIX, + DEFAULT_DEBUG_TRACE_ENABLED, DEFAULT_JUDGE_TIMEOUT_S, DEFAULT_KEEP_WARM_INACTIVITY_MINUTES, + DEFAULT_MAX_CHAT_HEIGHT, DEFAULT_MAX_IMAGES, DEFAULT_MAX_ITERATIONS, DEFAULT_NUM_CTX, + DEFAULT_OLLAMA_URL, DEFAULT_OVERLAY_WIDTH, DEFAULT_QUOTE_MAX_CONTEXT_LENGTH, + DEFAULT_QUOTE_MAX_DISPLAY_CHARS, DEFAULT_QUOTE_MAX_DISPLAY_LINES, + DEFAULT_READER_BATCH_TIMEOUT_S, DEFAULT_READER_PER_URL_TIMEOUT_S, DEFAULT_READER_URL, + DEFAULT_ROUTER_TIMEOUT_S, DEFAULT_SEARCH_TIMEOUT_S, DEFAULT_SEARXNG_MAX_RESULTS, + DEFAULT_SEARXNG_URL, DEFAULT_SYSTEM_PROMPT_BASE, DEFAULT_TOP_K_URLS, + SLASH_COMMAND_PROMPT_APPENDIX, }; use super::error::ConfigError; use super::loader::{compose_system_prompt, load_from_path}; @@ -1049,25 +1049,22 @@ fn config_error_io_error_serializes_io_source_as_display_string() { #[test] fn debug_section_default_matches_compiled_defaults() { let d = DebugSection::default(); - assert_eq!(d.search_trace_enabled, DEFAULT_DEBUG_SEARCH_TRACE_ENABLED); + assert_eq!(d.trace_enabled, DEFAULT_DEBUG_TRACE_ENABLED); } #[test] fn app_config_default_includes_debug_section_with_compiled_defaults() { let c = AppConfig::default(); - assert_eq!( - c.debug.search_trace_enabled, - DEFAULT_DEBUG_SEARCH_TRACE_ENABLED - ); + assert_eq!(c.debug.trace_enabled, DEFAULT_DEBUG_TRACE_ENABLED); } #[test] -fn debug_search_trace_enabled_round_trips_through_load() { +fn debug_trace_enabled_round_trips_through_load() { let dir = fresh_temp_dir(); let path = config_path_in(&dir); - std::fs::write(&path, "[debug]\nsearch_trace_enabled = true\n").unwrap(); + std::fs::write(&path, "[debug]\ntrace_enabled = true\n").unwrap(); let loaded = load_from_path(&path).unwrap(); - assert!(loaded.debug.search_trace_enabled); + assert!(loaded.debug.trace_enabled); } #[test] @@ -1081,7 +1078,7 @@ fn toml_without_debug_section_deserializes_to_defaults() { .unwrap(); let loaded = load_from_path(&path).unwrap(); assert_eq!( - loaded.debug.search_trace_enabled, DEFAULT_DEBUG_SEARCH_TRACE_ENABLED, + loaded.debug.trace_enabled, DEFAULT_DEBUG_TRACE_ENABLED, "missing [debug] section must deserialize to defaults via #[serde(default)]" ); } diff --git a/src-tauri/src/lib.rs b/src-tauri/src/lib.rs index 663c551..9b0778b 100644 --- a/src-tauri/src/lib.rs +++ b/src-tauri/src/lib.rs @@ -25,6 +25,7 @@ pub mod onboarding; pub mod screenshot; pub mod search; pub mod settings_commands; +pub mod trace; pub mod warmup; #[cfg(target_os = "macos")] @@ -845,6 +846,43 @@ fn spawn_periodic_image_cleanup(app_handle: tauri::AppHandle) { }); } +// ─── Trace recorder bootstrap helpers ──────────────────────────────────────── + +/// Builds the inner recorder for the live trace wrapper based on the +/// current `[debug] trace_enabled` value. +/// +/// Returns a `NoopRecorder` when off (zero-cost path), a +/// `RegistryRecorder` rooted at `app_data_dir()/traces/` when on. The +/// caller is responsible for installing the result either as the +/// initial state of a `LiveTraceRecorder` (at startup) or replacing +/// the live recorder's inner (on Settings save). +/// +/// Emits a one-line stderr warning when transitioning to the on state +/// so a developer running `bun run dev` can see at a glance that +/// tracing is active and where the files are landing. +#[cfg_attr(coverage_nightly, coverage(off))] +pub fn build_trace_inner( + app_handle: &tauri::AppHandle, + enabled: bool, +) -> Arc { + if !enabled { + return Arc::new(trace::NoopRecorder); + } + let traces_root = app_handle + .path() + .app_data_dir() + .map(|d| d.join("traces")) + .unwrap_or_else(|_| std::env::temp_dir().join("thuki").join("traces")); + eprintln!( + "thuki: [trace] trace_enabled = ON. Writing forensic JSONL to {}.", + traces_root.display() + ); + eprintln!( + "thuki: [trace] Files may contain sensitive text. Disable in config.toml when not actively debugging." + ); + Arc::new(trace::RegistryRecorder::new(traces_root)) +} + // ─── Application entry point ───────────────────────────────────────────────── /// Initialises and runs the Tauri application. @@ -995,6 +1033,27 @@ pub fn run() { app.manage(commands::GenerationState::new()); app.manage(commands::ConversationHistory::new()); + // ── Unified trace recorder ───────────────────────────── + // Off by default: when `[debug] trace_enabled = false` in + // config.toml the live recorder wraps a `NoopRecorder` and + // every chat / search / screenshot event is a constant-time + // call. When on, it wraps a `RegistryRecorder` that routes + // events to per-conversation JSONL files under + // `app_data_dir()/traces/{chat,search}/`. + // + // Wrapped in a `LiveTraceRecorder` so toggling + // `[debug] trace_enabled` from the Settings panel hot-swaps + // the inner without requiring an app restart. See + // `trace::live` for the swap contract and + // `settings_commands::set_config_field` for the hook site. + let trace_enabled = app + .state::>() + .read() + .debug + .trace_enabled; + let initial_inner = build_trace_inner(app.handle(), trace_enabled); + app.manage(Arc::new(trace::LiveTraceRecorder::new(initial_inner))); + // ── SQLite database for conversation history ────────── let app_data_dir = app .path() @@ -1045,6 +1104,8 @@ pub fn run() { search::search_pipeline, #[cfg(not(coverage))] commands::reset_conversation, + #[cfg(not(coverage))] + commands::record_conversation_end, settings_commands::get_config, settings_commands::set_config_field, settings_commands::reset_config, diff --git a/src-tauri/src/screenshot.rs b/src-tauri/src/screenshot.rs index 23d953d..99db107 100644 --- a/src-tauri/src/screenshot.rs +++ b/src-tauri/src/screenshot.rs @@ -400,8 +400,12 @@ fn capture_full_screen_pixels() -> Result<(u32, u32, Vec), String> { /// happen on a blocking thread to avoid stalling the UI. #[cfg_attr(coverage_nightly, coverage(off))] #[cfg_attr(not(coverage), tauri::command)] -pub async fn capture_full_screen_command(app_handle: tauri::AppHandle) -> Result { - use tauri::Manager; +pub async fn capture_full_screen_command( + app_handle: tauri::AppHandle, + conversation_id: String, + trace_recorder: tauri::State<'_, std::sync::Arc>, +) -> Result { + use crate::trace::TraceRecorder; let base_dir = app_handle .path() .app_data_dir() @@ -422,7 +426,7 @@ pub async fn capture_full_screen_command(app_handle: tauri::AppHandle) -> Result // Phase 2: Encode to PNG and save via the images pipeline on a blocking // thread so the main thread stays responsive. - tokio::task::spawn_blocking(move || { + let saved_path = tokio::task::spawn_blocking(move || { let buf = image::ImageBuffer::, Vec>::from_raw(width, height, rgba_bytes) .ok_or_else(|| "Failed to create image buffer from captured pixels.".to_string())?; @@ -436,7 +440,20 @@ pub async fn capture_full_screen_command(app_handle: tauri::AppHandle) -> Result crate::images::save_image(&base_dir, &png) }) .await - .map_err(|e| format!("image encoding task failed: {e}"))? + .map_err(|e| format!("image encoding task failed: {e}"))??; + + // Mirror the capture into the unified trace recorder. Records the + // saved-file path (paths only, never image bytes) and the displays + // count (currently always 1 because `capture_full_screen_pixels` + // returns a merged image; future multi-monitor work can refine). + trace_recorder.record( + &crate::trace::ConversationId::new(conversation_id), + crate::trace::RecorderEvent::ScreenCaptured { + image_path: saved_path.clone(), + displays: 1, + }, + ); + Ok(saved_path) } // ─── Tests ────────────────────────────────────────────────────────────────── diff --git a/src-tauri/src/search/config.rs b/src-tauri/src/search/config.rs index 561b2b1..ac927aa 100644 --- a/src-tauri/src/search/config.rs +++ b/src-tauri/src/search/config.rs @@ -66,7 +66,7 @@ pub struct SearchRuntimeConfig { pub pipeline_input_char_budget: usize, /// Whether the forensic per-turn search trace recorder is on. Off in /// shipped builds; toggled from the Settings panel (Web tab, Diagnostics - /// section). See [`crate::search::recorder`] for the file format. + /// section). See [`crate::trace::recorder`] for the file format. pub trace_enabled: bool, } @@ -90,7 +90,7 @@ impl SearchRuntimeConfig { router_timeout_s: cfg.search.router_timeout_s, pipeline_wall_clock_budget_s: cfg.search.pipeline_wall_clock_budget_s, pipeline_input_char_budget: defaults::PIPELINE_INPUT_CHAR_BUDGET, - trace_enabled: cfg.debug.search_trace_enabled, + trace_enabled: cfg.debug.trace_enabled, } } @@ -126,7 +126,7 @@ impl Default for SearchRuntimeConfig { router_timeout_s: defaults::DEFAULT_ROUTER_TIMEOUT_S, pipeline_wall_clock_budget_s: defaults::DEFAULT_PIPELINE_WALL_CLOCK_BUDGET_S, pipeline_input_char_budget: defaults::PIPELINE_INPUT_CHAR_BUDGET, - trace_enabled: defaults::DEFAULT_DEBUG_SEARCH_TRACE_ENABLED, + trace_enabled: defaults::DEFAULT_DEBUG_TRACE_ENABLED, } } } @@ -172,10 +172,7 @@ mod tests { ); // Test-only override: production value is DEFAULT_READER_BATCH_TIMEOUT_S. assert_eq!(cfg.reader_batch_timeout_s, TEST_READER_BATCH_TIMEOUT_S); - assert_eq!( - cfg.trace_enabled, - defaults::DEFAULT_DEBUG_SEARCH_TRACE_ENABLED - ); + assert_eq!(cfg.trace_enabled, defaults::DEFAULT_DEBUG_TRACE_ENABLED); } #[test] @@ -192,7 +189,7 @@ mod tests { app.search.judge_timeout_s = 13; app.search.router_timeout_s = 14; app.search.pipeline_wall_clock_budget_s = 120; - app.debug.search_trace_enabled = true; + app.debug.trace_enabled = true; let cfg = SearchRuntimeConfig::from_app_config(&app); assert_eq!(cfg.searxng_url, "http://10.0.0.1:9000"); diff --git a/src-tauri/src/search/llm.rs b/src-tauri/src/search/llm.rs index a866c89..8ad49aa 100644 --- a/src-tauri/src/search/llm.rs +++ b/src-tauri/src/search/llm.rs @@ -18,10 +18,10 @@ use tokio_util::sync::CancellationToken; use crate::commands::ChatMessage; -use super::recorder::{PipelineRecorder, RecorderEvent}; use super::types::{ Action, JudgeVerdict, RouterJudgeOutput, SearchError, SearxResult, Sufficiency, }; +use crate::trace::{BoundRecorder, RecorderEvent}; /// Synthesis system prompt: instructs the answering LLM to cite sources and /// avoid meta-commentary over the reference material. @@ -235,7 +235,7 @@ async fn request_json( timeout_secs: u64, num_ctx: u32, num_predict: i32, - recorder: &Arc, + recorder: &Arc, stage: &str, ) -> Result { let body = OllamaJsonRequest { @@ -370,7 +370,7 @@ pub async fn call_router_merged( cancel_token: &CancellationToken, timeout_secs: u64, num_ctx: u32, - recorder: &Arc, + recorder: &Arc, ) -> Result { if cancel_token.is_cancelled() { return Err(SearchError::Cancelled); @@ -553,7 +553,7 @@ pub async fn call_judge( timeout_secs: u64, num_ctx: u32, stage: JudgeStage, - recorder: &Arc, + recorder: &Arc, ) -> Result { if cancel_token.is_cancelled() { return Err(SearchError::Cancelled); @@ -1071,17 +1071,19 @@ mod prompt_tests { #[cfg(test)] mod router_judge_tests { use super::*; - use crate::search::recorder::NoopRecorder; + use crate::trace::ConversationId; use wiremock::matchers::{method, path}; use wiremock::{Mock, MockServer, ResponseTemplate}; - /// Local helper: a noop recorder wrapped in `Arc`, + /// Local helper: a noop recorder bound to a sentinel conversation id, /// used everywhere this module exercises `call_router_merged` / - /// `call_judge` without asserting on trace output. Forensic instrumentation - /// is covered separately in [`crate::search::recorder`] and the dedicated - /// LLM-tracing tests at the bottom of this module. - fn noop_recorder() -> Arc { - Arc::new(NoopRecorder) + /// `call_judge` without asserting on trace output. Forensic + /// instrumentation is covered separately in [`crate::trace::recorder`] + /// and the dedicated LLM-tracing tests at the bottom of this module. + fn noop_recorder() -> Arc { + Arc::new(BoundRecorder::noop_for(ConversationId::new( + "test-conv-llm", + ))) } // ── build_judge_user_message ───────────────────────────────────────────── @@ -1655,7 +1657,7 @@ mod router_judge_tests { // chunk-stage trace labels. The Snippet variant is exercised by the // surrounding tests; this one keeps both arms covered so the labels // stay in sync with the trace-format documentation in - // `crate::search::recorder`. + // `crate::trace::recorder`. let server = MockServer::start().await; Mock::given(method("POST")) .and(path("/api/chat")) diff --git a/src-tauri/src/search/mod.rs b/src-tauri/src/search/mod.rs index 1afa2cf..2ab7a40 100644 --- a/src-tauri/src/search/mod.rs +++ b/src-tauri/src/search/mod.rs @@ -14,13 +14,13 @@ use std::sync::Arc; -use tauri::{ipc::Channel, Manager, State}; +use tauri::{ipc::Channel, State}; use tokio_util::sync::CancellationToken; use crate::commands::{ConversationHistory, GenerationState}; use crate::config::AppConfig; use crate::models::ActiveModelState; -use recorder::{FileRecorder, NoopRecorder, PipelineRecorder}; +use crate::trace::{BoundRecorder, ConversationId, LiveTraceRecorder, TraceRecorder}; pub mod chunker; pub mod config; @@ -30,7 +30,6 @@ mod llm; pub mod pipeline; pub mod probe; pub mod reader; -pub mod recorder; mod rerank; mod searxng; mod types; @@ -60,13 +59,16 @@ pub use types::{ #[allow(clippy::too_many_arguments)] pub async fn search_pipeline( message: String, + conversation_id: String, + is_first_turn: bool, + displayed_content: Option, on_event: Channel, client: State<'_, reqwest::Client>, generation: State<'_, GenerationState>, history: State<'_, ConversationHistory>, app_config: State<'_, parking_lot::RwLock>, active_model_state: State<'_, ActiveModelState>, - app: tauri::AppHandle, + trace_recorder: State<'_, Arc>, ) -> Result<(), String> { // Snapshot the config once so the entire pipeline sees a consistent view // even if the user edits Settings while a search is in flight. @@ -89,9 +91,11 @@ pub async fn search_pipeline( // model. The frontend strip already steers the user to the picker // before this point, so this branch is defense-in-depth for the // race where the user's last installed model was removed mid-run. - let _ = on_event.send(SearchEvent::Error { - message: "No model selected. Pick a model in the picker.".to_string(), - }); + // Emit a dedicated typed event (not a generic Error) so the frontend + // can keep `is_first_turn` armed: this bail returns before + // `ConversationStart` is recorded, so the next attempt must still + // open the trace as a first turn. + let _ = on_event.send(SearchEvent::NoModelSelected); return Ok(()); }; @@ -118,21 +122,56 @@ pub async fn search_pipeline( let today = pipeline::today_iso(); - // Build the per-turn forensic recorder. When the dev-only debug flag is - // off (production default) this is a zero-cost noop. When on, every - // pipeline step records into a single JSON-Lines file under - // `app_data_dir()/traces/`. - let turn_id = recorder::new_turn_id(); - let trace_dir = app - .path() - .app_data_dir() - .unwrap_or_else(|_| std::env::temp_dir().join("thuki")) - .join("traces"); - let recorder: Arc = if runtime_config.trace_enabled { - Arc::new(FileRecorder::new(&trace_dir, &turn_id)) - } else { - Arc::new(NoopRecorder) - }; + // Pull the per-conversation forensic recorder from the global + // trace registry. When the dev-only `[debug] trace_enabled` flag is + // off (production default) the registry is a `NoopRecorder` so this + // resolves to a zero-cost noop wrapped in `BoundRecorder`. When on, + // every pipeline step records into the conversation's + // `traces/search/.jsonl` file via the registry's + // lazy-insert path. + let conv_id = ConversationId::new(conversation_id); + let live: Arc = Arc::clone(trace_recorder.inner()); + // Coerce the concrete `Arc` to the + // `Arc` shape `BoundRecorder` expects. The + // coercion happens at the binding site; calling `record()` on + // the bound recorder still goes through the live wrapper, so a + // mid-stream trace toggle takes effect on the next event. + let live_inner: Arc = live; + let recorder = Arc::new(BoundRecorder::new(live_inner, conv_id)); + + // Mirror the user-perceived turn into the chat-domain trace so the + // `traces/chat/.jsonl` file is the canonical + // user-facing timeline regardless of whether a turn used `/search` + // or hit `ask_ollama` directly. Symmetric with what + // `commands::ask_ollama` records at its hook sites; the deep + // search-pipeline internals (LLM calls, judge verdicts, SearXNG + // queries) stay in the search-domain file via the same conv id. + crate::commands::record_conversation_start_if_first_turn( + &recorder, + is_first_turn, + model_name.clone(), + app_config.prompt.resolved_system.clone(), + ); + // Tell the frontend the trace was opened. Sent unconditionally so + // the hook can retire its `is_first_turn` flag even if a previous + // first-turn attempt was cancelled before any token arrived. + let _ = on_event.send(SearchEvent::TurnAccepted); + // `displayed_content` is what the user actually typed on screen + // (e.g. "/search who is Elon Musk?"); `message` is the stripped + // query the search engine receives. The chat file uses the + // displayed text for symmetry with non-search turns, where + // `user_message.content` is the literal user input. + let user_visible_content = displayed_content.as_deref().unwrap_or(&message).to_owned(); + recorder.record(crate::trace::RecorderEvent::UserMessage { + content: user_visible_content, + attached_images: Vec::new(), + slash_command: Some("/search".to_owned()), + }); + let stream_started_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + let token_count = std::sync::Arc::new(std::sync::atomic::AtomicU64::new(0)); let router = pipeline::DefaultRouterJudge::new( ollama_endpoint.clone(), @@ -154,6 +193,8 @@ pub async fn search_pipeline( Arc::clone(&recorder), ); + let recorder_for_pump = Arc::clone(&recorder); + let token_count_for_pump = Arc::clone(&token_count); let result = pipeline::run_agentic( &ollama_endpoint, &searxng_endpoint, @@ -166,6 +207,20 @@ pub async fn search_pipeline( message, &today, &|event| { + // Mirror synthesized-answer tokens into the chat-domain + // trace so the chat file's `assistant_tokens` stream + // matches what the user reads on screen, exactly like a + // non-search turn. Other `SearchEvent` variants (status + // pills, source URLs, warnings) stay in the search-domain + // file; they were intentionally dropped from the chat + // mirror to keep chat turns shape-symmetric across normal + // and `/search` paths. + if let SearchEvent::Token { content } = &event { + token_count_for_pump.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + recorder_for_pump.record(crate::trace::RecorderEvent::AssistantTokens { + chunk: content.clone(), + }); + } let _ = on_event.send(event); }, &router, @@ -192,6 +247,20 @@ pub async fn search_pipeline( } } + // Close the chat-domain user-perceived turn even on error paths so + // the chat file's `assistant_complete` always pairs with the + // earlier `user_message`. `total_tokens` reflects the synthesized + // tokens streamed to the user (zero on early-bail paths like + // `SandboxUnavailable`). + let stream_ended_ms = std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + recorder.record(crate::trace::RecorderEvent::AssistantComplete { + total_tokens: token_count.load(std::sync::atomic::Ordering::Relaxed), + latency_ms: stream_ended_ms.saturating_sub(stream_started_ms), + }); + generation.clear_token(); Ok(()) } diff --git a/src-tauri/src/search/pipeline.rs b/src-tauri/src/search/pipeline.rs index e48b191..07573fd 100644 --- a/src-tauri/src/search/pipeline.rs +++ b/src-tauri/src/search/pipeline.rs @@ -37,7 +37,6 @@ use super::llm::{ JudgeSource, JudgeStage, }; use super::reader; -use super::recorder::PipelineRecorder; use super::rerank; use super::searxng; use super::types::{ @@ -45,6 +44,7 @@ use super::types::{ SearchEvent, SearchMetadata, SearchResultPreview, SearchTraceCounts, SearchTraceKind, SearchTraceStatus, SearchTraceStep, SearchWarning, SearxResult, Sufficiency, }; +use crate::trace::BoundRecorder; /// Build the `JudgeSource` list fed to the sufficiency judge and the synthesis /// prompt from reranked chunks, deduplicated by source URL. @@ -446,7 +446,7 @@ async fn run_streaming_branch( metadata: Option, on_event: &impl Fn(SearchEvent), num_ctx: u32, - recorder: &Arc, + recorder: &Arc, stage: &str, ) { // Snapshot the request body before streaming starts so the trace can show @@ -534,6 +534,13 @@ pub(super) fn translate_chunk(chunk: StreamChunk) -> SearchEvent { StreamChunk::Done => SearchEvent::Done { metadata: None }, StreamChunk::Cancelled => SearchEvent::Cancelled, StreamChunk::Error(e) => SearchEvent::Error { message: e.message }, + // `TurnAccepted` is a top-level handshake emitted by `commands:: + // ask_ollama` and `search::search_pipeline` themselves; the + // synthesis-pump path that feeds `translate_chunk` only ever + // receives the streaming variants above. Forward it as the + // matching pipeline event so the type stays exhaustive without + // smuggling the chunk into a Token. + StreamChunk::TurnAccepted => SearchEvent::TurnAccepted, } } @@ -616,7 +623,7 @@ pub struct DefaultRouterJudge { today: String, router_timeout_secs: u64, num_ctx: u32, - recorder: Arc, + recorder: Arc, } impl DefaultRouterJudge { @@ -645,7 +652,7 @@ impl DefaultRouterJudge { today: String, router_timeout_secs: u64, num_ctx: u32, - recorder: Arc, + recorder: Arc, ) -> Self { Self { endpoint, @@ -698,7 +705,7 @@ pub struct DefaultJudge { cancel: CancellationToken, judge_timeout_secs: u64, num_ctx: u32, - recorder: Arc, + recorder: Arc, } impl DefaultJudge { @@ -721,7 +728,7 @@ impl DefaultJudge { cancel: CancellationToken, judge_timeout_secs: u64, num_ctx: u32, - recorder: Arc, + recorder: Arc, ) -> Self { Self { endpoint, @@ -804,9 +811,9 @@ struct SearchExecutionContext<'a> { on_event: &'a (dyn Fn(SearchEvent) + Sync), runtime_config: &'a config::SearchRuntimeConfig, num_ctx: u32, - /// Forensic per-turn recorder. [`super::recorder::NoopRecorder`] in + /// Forensic per-turn recorder. Wraps a [`crate::trace::NoopRecorder`] in /// production unless `runtime_config.trace_enabled` is set. - recorder: &'a Arc, + recorder: &'a Arc, } /// Per-turn values reused across extracted search stages. @@ -856,7 +863,7 @@ fn judge_output_trace_json(verdict: &JudgeVerdict) -> serde_json::Value { #[cfg_attr(coverage_nightly, coverage(off))] #[allow(clippy::too_many_arguments)] fn record_streaming_llm_call( - recorder: &Arc, + recorder: &Arc, stage: &str, endpoint: &str, request_body: serde_json::Value, @@ -865,7 +872,7 @@ fn record_streaming_llm_call( started: std::time::Instant, saw_done: bool, ) { - recorder.record(crate::search::recorder::RecorderEvent::StreamingLlmCall { + recorder.record(crate::trace::RecorderEvent::StreamingLlmCall { stage: stage.to_string(), endpoint: endpoint.to_string(), request_body, @@ -882,11 +889,11 @@ fn record_streaming_llm_call( #[cfg_attr(coverage_nightly, coverage(off))] fn record_judge_verdict( - recorder: &Arc, + recorder: &Arc, stage: impl Into, verdict: &JudgeVerdict, ) { - recorder.record(crate::search::recorder::RecorderEvent::JudgeVerdict { + recorder.record(crate::trace::RecorderEvent::JudgeVerdict { stage: stage.into(), raw: verdict.reasoning.clone(), normalized: judge_output_trace_json(verdict), @@ -895,14 +902,14 @@ fn record_judge_verdict( #[cfg_attr(coverage_nightly, coverage(off))] fn record_turn_start( - recorder: &Arc, + recorder: &Arc, turn_id: &str, user_query: &str, model: &str, runtime_config: &config::SearchRuntimeConfig, history: &ConversationHistory, ) { - recorder.record(crate::search::recorder::RecorderEvent::TurnStart { + recorder.record(crate::trace::RecorderEvent::TurnStart { turn_id: turn_id.to_string(), query: user_query.to_string(), model: model.to_string(), @@ -913,11 +920,11 @@ fn record_turn_start( #[cfg_attr(coverage_nightly, coverage(off))] fn record_turn_cancelled_before_router( - recorder: &Arc, + recorder: &Arc, turn_id: &str, turn_started: std::time::Instant, ) { - recorder.record(crate::search::recorder::RecorderEvent::TurnEnd { + recorder.record(crate::trace::RecorderEvent::TurnEnd { turn_id: turn_id.to_string(), final_action: "cancelled_before_router".to_string(), final_source_urls: Vec::new(), @@ -928,12 +935,12 @@ fn record_turn_cancelled_before_router( #[cfg_attr(coverage_nightly, coverage(off))] fn record_router_error_turn_end( - recorder: &Arc, + recorder: &Arc, turn_id: &str, turn_started: std::time::Instant, error: &SearchError, ) { - recorder.record(crate::search::recorder::RecorderEvent::TurnEnd { + recorder.record(crate::trace::RecorderEvent::TurnEnd { turn_id: turn_id.to_string(), final_action: format!("router_error:{error:?}"), final_source_urls: Vec::new(), @@ -943,8 +950,8 @@ fn record_router_error_turn_end( } #[cfg_attr(coverage_nightly, coverage(off))] -fn record_router_verdict(recorder: &Arc, output: &RouterJudgeOutput) { - recorder.record(crate::search::recorder::RecorderEvent::JudgeVerdict { +fn record_router_verdict(recorder: &Arc, output: &RouterJudgeOutput) { + recorder.record(crate::trace::RecorderEvent::JudgeVerdict { stage: "router".into(), raw: format!("{:?}", output.action), normalized: router_output_trace_json(output), @@ -953,13 +960,13 @@ fn record_router_verdict(recorder: &Arc, output: &RouterJu #[cfg_attr(coverage_nightly, coverage(off))] fn record_turn_end( - recorder: &Arc, + recorder: &Arc, turn_id: &str, turn_started: std::time::Instant, result: &Result<(), SearchError>, action: &Action, ) { - recorder.record(crate::search::recorder::RecorderEvent::TurnEnd { + recorder.record(crate::trace::RecorderEvent::TurnEnd { turn_id: turn_id.to_string(), final_action: format_final_action(result, action), final_source_urls: Vec::new(), @@ -1798,14 +1805,14 @@ pub async fn run_agentic( judge: &dyn JudgeCaller, runtime_config: &config::SearchRuntimeConfig, num_ctx: u32, - recorder: &Arc, + recorder: &Arc, ) -> Result<(), SearchError> { let trimmed = query.trim(); if trimmed.is_empty() { return Err(SearchError::EmptyQuery); } let user_query = trimmed.to_string(); - let turn_id = crate::search::recorder::new_turn_id(); + let turn_id = crate::trace::new_turn_id(); let turn_started = std::time::Instant::now(); record_turn_start( recorder, @@ -2734,6 +2741,17 @@ mod tests { ); } + #[test] + fn translate_chunk_turn_accepted_maps_to_turn_accepted_event() { + // Defensive: the synthesis-pump path that feeds `translate_chunk` + // does not emit `TurnAccepted` in production, but the match must + // stay exhaustive without smuggling the chunk into a Token. + assert_eq!( + translate_chunk(StreamChunk::TurnAccepted), + SearchEvent::TurnAccepted, + ); + } + // ── snapshot_history ──────────────────────────────────────────────────── #[test] @@ -2835,7 +2853,9 @@ mod tests { None, &cb, DEFAULT_NUM_CTX, - &(Arc::new(crate::search::recorder::NoopRecorder) as Arc), + &(Arc::new(crate::trace::BoundRecorder::noop_for( + crate::trace::ConversationId::new("test-conv-pipeline"), + ))), "test", ) .await; @@ -2849,7 +2869,9 @@ mod tests { #[test] fn default_router_judge_constructs_without_panic() { let cancel = CancellationToken::new(); - let recorder: Arc = Arc::new(crate::search::recorder::NoopRecorder); + let recorder: Arc = Arc::new(BoundRecorder::noop_for( + crate::trace::ConversationId::new("test-conv-pipeline"), + )); let _judge = DefaultRouterJudge::new( "http://127.0.0.1:11434/api/chat".into(), "mistral".into(), @@ -2865,7 +2887,9 @@ mod tests { #[test] fn default_judge_constructs_without_panic() { let cancel = CancellationToken::new(); - let recorder: Arc = Arc::new(crate::search::recorder::NoopRecorder); + let recorder: Arc = Arc::new(BoundRecorder::noop_for( + crate::trace::ConversationId::new("test-conv-pipeline"), + )); let _judge = DefaultJudge::new( "http://127.0.0.1:11434/api/chat".into(), "mistral".into(), @@ -3148,26 +3172,35 @@ mod tests { mod agentic_tests { use super::*; use crate::config::defaults::DEFAULT_NUM_CTX; - use crate::search::recorder::NoopRecorder; + use crate::trace::ConversationId; - /// Constructs a noop recorder wrapped as `Arc` for - /// tests that exercise `run_agentic` without asserting on trace output. - /// Recording assertions live in the recorder module's own test suite. - fn noop_recorder() -> Arc { - Arc::new(NoopRecorder) + /// Sentinel conversation id used by every pipeline test. Pipeline + /// tests do not assert on the conv-id field directly; they only + /// need a stable value so the BoundRecorder routes through. + const TEST_CONV_ID: &str = "test-conv-pipeline"; + + /// Constructs a noop recorder bound to the test conversation id for + /// tests that exercise `run_agentic` without asserting on trace + /// output. Recording assertions live in the trace module's own + /// test suite. + fn noop_recorder() -> Arc { + Arc::new(BoundRecorder::noop_for(ConversationId::new(TEST_CONV_ID))) } - /// Constructs a mock recorder + the `Arc` view that - /// the pipeline needs. Returns both so tests can pass the dyn-trait view - /// to `run_agentic` while still introspecting the captured events through - /// the concrete type. + /// Constructs a mock recorder + an `Arc` wrapping it + /// that the pipeline needs. Returns both so tests can pass the + /// bound recorder to `run_agentic` while still introspecting the + /// captured events through the concrete `MockRecorder`. fn mock_recorder_pair() -> ( - Arc, - Arc, + Arc, + Arc, ) { - let mock = Arc::new(crate::search::recorder::MockRecorder::new()); - let view: Arc = mock.clone(); - (mock, view) + let mock = Arc::new(crate::trace::recorder::MockRecorder::new()); + let bound = Arc::new(BoundRecorder::new( + mock.clone(), + ConversationId::new(TEST_CONV_ID), + )); + (mock, bound) } // ── mock implementations ──────────────────────────────────────────────── @@ -3617,8 +3650,8 @@ mod agentic_tests { let (query, model) = snap .iter() .rev() - .find_map(|e| match e { - crate::search::recorder::RecorderEvent::TurnStart { query, model, .. } => { + .find_map(|(_cid, e)| match e { + crate::trace::RecorderEvent::TurnStart { query, model, .. } => { Some((query.as_str(), model.as_str())) } _ => None, @@ -3630,8 +3663,8 @@ mod agentic_tests { // Iterate forward so TurnStart hits `_ => None` before TurnEnd matches. let error = snap .iter() - .find_map(|e| match e { - crate::search::recorder::RecorderEvent::TurnEnd { error, .. } => Some(error), + .find_map(|(_cid, e)| match e { + crate::trace::RecorderEvent::TurnEnd { error, .. } => Some(error), _ => None, }) .expect("expected TurnEnd event in recorder snapshot"); @@ -3676,8 +3709,8 @@ mod agentic_tests { // ensuring both arms of the find_map closure are covered. let error = snap .iter() - .find_map(|e| match e { - crate::search::recorder::RecorderEvent::TurnEnd { error, .. } => Some(error), + .find_map(|(_cid, e)| match e { + crate::trace::RecorderEvent::TurnEnd { error, .. } => Some(error), _ => None, }) .expect("expected TurnEnd event in recorder snapshot"); diff --git a/src-tauri/src/search/reader.rs b/src-tauri/src/search/reader.rs index 8b3caf5..02dcb8e 100644 --- a/src-tauri/src/search/reader.rs +++ b/src-tauri/src/search/reader.rs @@ -31,7 +31,7 @@ use tokio_util::sync::CancellationToken; use crate::config::defaults::DEFAULT_READER_RETRY_DELAY_MS; use crate::search::chunker::Page; use crate::search::errors::{is_transient_connect_error, retry_once}; -use crate::search::recorder::{PipelineRecorder, ReaderUrlOutcome, RecorderEvent}; +use crate::trace::{BoundRecorder, ReaderUrlOutcome, RecorderEvent}; /// Errors callers must handle. #[derive(Debug, thiserror::Error, PartialEq)] @@ -187,14 +187,15 @@ impl ReaderClient { /// /// `recorder` receives a single [`RecorderEvent::ReaderBatch`] when the /// batch finishes (successfully or via error), capturing per-URL outcomes - /// and extracted text. Pass [`crate::search::recorder::NoopRecorder`] - /// when tracing is off; the noop is a constant-time call. + /// and extracted text. The recorder is a [`crate::trace::BoundRecorder`] + /// already bound to the active conversation id; when tracing is off it + /// is backed by a [`crate::trace::NoopRecorder`] (constant-time call). pub async fn fetch_batch_with_progress( &self, urls: &[String], cancel: &CancellationToken, on_url_fetched: &(dyn Fn(String) + Send + Sync), - recorder: &Arc, + recorder: &Arc, ) -> Result { if urls.is_empty() { return Ok(ReaderBatchResult::default()); @@ -375,15 +376,17 @@ mod tests { use super::*; use crate::config::defaults::DEFAULT_READER_PER_URL_TIMEOUT_S; use crate::search::config::TEST_READER_BATCH_TIMEOUT_S; - use crate::search::recorder::NoopRecorder; + use crate::trace::ConversationId; use wiremock::matchers::{method, path}; use wiremock::{Mock, MockServer, ResponseTemplate}; - /// Local helper: a noop recorder wrapped in `Arc`, + /// Local helper: a noop recorder bound to a sentinel conversation id, /// used everywhere this module exercises `fetch_batch_with_progress` /// without asserting on trace output. - fn noop_recorder() -> Arc { - Arc::new(NoopRecorder) + fn noop_recorder() -> Arc { + Arc::new(BoundRecorder::noop_for(ConversationId::new( + "test-conv-reader", + ))) } async fn client_for(server: &MockServer) -> ReaderClient { diff --git a/src-tauri/src/search/recorder.rs b/src-tauri/src/search/recorder.rs deleted file mode 100644 index e7cc936..0000000 --- a/src-tauri/src/search/recorder.rs +++ /dev/null @@ -1,852 +0,0 @@ -//! Forensic per-turn trace recorder for the `/search` pipeline. -//! -//! When [`crate::config::schema::DebugSection::search_trace_enabled`] is on, -//! the pipeline records every LLM request/response, every SearXNG query and -//! raw response, every reader batch (per-URL latency + raw body + full -//! extracted text), every chunker/rerank step, and every judge verdict to a -//! single JSON-Lines file under `app_data_dir()/traces/`. One file per -//! pipeline turn. -//! -//! Off in shipped builds. Intended for local quality investigation: open a -//! `.jsonl` trace and grep/jq across LLM prompts vs. judge verdicts to -//! understand why the pipeline produced a given answer. -//! -//! # Architecture -//! -//! - [`PipelineRecorder`] is the trait every callsite calls into. The pipeline -//! threads `&Arc` through its execution context; no -//! call site distinguishes the live recorder from the noop. -//! - [`NoopRecorder`] is the production default. Every method is a no-op. -//! - [`FileRecorder`] is the dev-mode implementation. Lazy directory + file -//! creation on first record, `parking_lot::Mutex` around a buffered writer, -//! per-line flush so partial files are still grep-friendly if the daemon -//! crashes. I/O errors are warned once via `eprintln!` then become silent -//! no-ops for the rest of the turn — trace failures must never affect the -//! user-visible pipeline. -//! - [`MockRecorder`] (test-only) collects every event into an in-memory -//! `Vec` for instrumentation-seam assertions. -//! -//! # Schema -//! -//! Each line is a self-describing JSON object: -//! -//! ```json -//! {"v": 1, "seq": 0, "ts_ms": 1714762800000, "kind": "turn_start", ...payload} -//! ``` -//! -//! `seq` is monotonic per recorder; `ts_ms` is wall-clock UNIX millis. The -//! `v` field allows future schema evolution. - -use std::fs::{File, OpenOptions}; -use std::io::{BufWriter, Write}; -use std::path::{Path, PathBuf}; -use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; -use std::time::{SystemTime, UNIX_EPOCH}; - -use parking_lot::Mutex; -use serde::Serialize; -use serde_json::Value; - -/// Schema version embedded in every record. Bump when the wire shape changes. -pub const TRACE_SCHEMA_VERSION: u32 = 1; - -/// Trait every pipeline callsite uses to emit forensic events. Implementors -/// must be cheap when tracing is off (the [`NoopRecorder`] case dominates -/// production usage). -pub trait PipelineRecorder: Send + Sync { - /// Records a single event. Implementors MUST NOT panic, MUST NOT block - /// for arbitrary durations, and MUST NOT propagate errors. Trace I/O - /// failures must be swallowed (with a single rate-limited warning) so - /// the pipeline is unaffected. - fn record(&self, event: RecorderEvent); -} - -/// Production default: every method is a no-op. Constant time, zero -/// allocations. -#[derive(Debug, Default, Clone, Copy)] -pub struct NoopRecorder; - -impl PipelineRecorder for NoopRecorder { - fn record(&self, _event: RecorderEvent) {} -} - -/// Forensic trace events emitted by the search pipeline. -/// -/// Each variant maps to one JSON-Lines record. Field shapes are intentionally -/// permissive (`Value`, `String`) because the trace is a forensic dump, not -/// a typed contract. Downstream consumers (jq, ad-hoc scripts) read fields -/// by name. -#[derive(Debug, Clone)] -pub enum RecorderEvent { - /// Pipeline turn starts. Captures the user query, model slug, runtime - /// search-config snapshot (so a trace is reproducible without the - /// config file), and conversation-history length. - TurnStart { - turn_id: String, - query: String, - model: String, - runtime_config: Value, - history_len: usize, - }, - /// Single Ollama `/api/chat` non-streaming JSON request. Captures the - /// stage label (router / snippet_judge / chunk_judge / synthesis / - /// answer_from_context), the full request body the pipeline sent, the - /// raw response string Ollama returned, and the request latency. `error` - /// is `Some` when the call failed (timeout, transport, parse). - LlmCall { - stage: String, - endpoint: String, - request_body: Value, - response_raw: Option, - latency_ms: u64, - error: Option, - }, - /// Streaming Ollama `/api/chat` request used by the synthesis path. - /// `tokens` counts streamed pieces; `final_text` is the accumulated - /// assistant message; per-token records are NOT emitted (would flood - /// the file without adding signal). - StreamingLlmCall { - stage: String, - endpoint: String, - request_body: Value, - final_text: Option, - tokens: u64, - latency_ms: u64, - error: Option, - }, - /// SearXNG search request: query string, full URL, raw HTTP response - /// body, and the normalized result list the pipeline forwarded to the - /// reranker. - SearxngQuery { - query: String, - url: String, - status: Option, - response_raw: Option, - normalized_results: Value, - latency_ms: u64, - error: Option, - }, - /// Reader sidecar batch: per-URL fetch results including raw HTTP body - /// and extracted text. Latency is per-URL plus the overall batch. - ReaderBatch { - urls: Vec, - per_url: Vec, - batch_latency_ms: u64, - batch_error: Option, - }, - /// Page chunked into N pieces. `chunks` are the full chunk texts so a - /// downstream consumer can reconstruct what the judge actually saw. - ChunkerBatch { - page_url: String, - chunk_count: usize, - chunks: Vec, - }, - /// Reranker output: top-k chunks with scores, in selection order. - RerankResult { - query: String, - input_count: usize, - top_k: Vec, - }, - /// Parsed judge verdict at a given stage (snippet or chunk). Captures - /// the raw string the LLM returned plus the normalized verdict struct. - JudgeVerdict { - stage: String, - raw: String, - normalized: Value, - }, - /// Warning surfaced by the pipeline (e.g. JudgeFailed, BudgetExhausted). - Warning { kind: String, payload: Value }, - /// Mirrors a [`crate::search::types::SearchEvent`] emission so the trace - /// reflects the user-visible event stream alongside backend internals. - SearchEventEmitted { event: Value }, - /// Pipeline turn ends. Captures the final action taken and total - /// wall-clock latency. `error` is `Some` only on hard failure paths. - TurnEnd { - turn_id: String, - final_action: String, - final_source_urls: Vec, - total_latency_ms: u64, - error: Option, - }, -} - -/// Per-URL outcome inside a [`RecorderEvent::ReaderBatch`]. -#[derive(Debug, Clone, Serialize)] -pub struct ReaderUrlOutcome { - pub url: String, - pub status: Option, - pub latency_ms: u64, - pub raw_body: Option, - pub extracted_text: Option, - pub error: Option, -} - -/// Single ranked chunk inside a [`RecorderEvent::RerankResult`]. -#[derive(Debug, Clone, Serialize)] -pub struct RerankedChunk { - pub source_url: String, - pub score: f64, - pub text: String, -} - -/// Builds a fresh, sortable, collision-resistant turn id. -/// -/// Format: `-`. Seconds are eyeball-readable when -/// browsing the traces directory; the v4 UUID guarantees uniqueness across -/// concurrent turns within the same second. -pub fn new_turn_id() -> String { - let secs = SystemTime::now() - .duration_since(UNIX_EPOCH) - .map(|d| d.as_secs()) - .unwrap_or(0); - format!("{secs}-{}", uuid::Uuid::new_v4()) -} - -/// Lazy file-backed recorder. Constructor is cheap (stashes path + turn id); -/// the directory and file are only created on the first `record()` call. -/// -/// Uses `parking_lot::Mutex` to match the rest of the codebase. The mutex is -/// held for the duration of one `serde_json::to_writer` + `writeln` + `flush`, -/// which is short enough to keep contention negligible even under concurrent -/// turns. -#[derive(Debug)] -pub struct FileRecorder { - /// Resolved trace file path. Computed at construction so the path is - /// stable for the lifetime of the recorder. - path: PathBuf, - /// Open writer, lazily initialized on first record. `None` after a - /// previous I/O failure (we degrade to noop, see `failed`). - state: Mutex>>, - /// Monotonic record sequence. Survives re-entry across multiple records - /// even though only one record is in flight at a time. - seq: AtomicU64, - /// Latched once a write or open error occurred. Subsequent records are - /// silent no-ops; the warning is rate-limited to a single `eprintln!`. - failed: AtomicBool, - /// Set once the failure warning has been printed. Prevents log spam. - warned: AtomicBool, -} - -impl FileRecorder { - /// Construct a recorder targeting `/.jsonl`. The directory - /// is NOT created here; that happens on first record so we don't pay - /// I/O cost (or surface errors) for turns that never write. - pub fn new(dir: impl AsRef, turn_id: &str) -> Self { - let path = dir.as_ref().join(format!("{turn_id}.jsonl")); - Self { - path, - state: Mutex::new(None), - seq: AtomicU64::new(0), - failed: AtomicBool::new(false), - warned: AtomicBool::new(false), - } - } - - /// Path the recorder writes to. Useful for tests and debug output. - pub fn path(&self) -> &Path { - &self.path - } - - /// Latches the recorder as failed and emits a single rate-limited - /// stderr warning. Filesystem-I/O wrapper: this is the only call site - /// that mutates the failed/warned flags, so the entire body is just - /// I/O-failure plumbing. Logic is exercised through every error branch - /// below; coverage is excluded because triggering each individual - /// `eprintln!` reliably across CI would require contrived - /// platform-specific filesystem behaviour. - #[cfg_attr(coverage_nightly, coverage(off))] - fn fail_with(&self, msg: String) { - self.failed.store(true, Ordering::SeqCst); - if self - .warned - .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst) - .is_ok() - { - eprintln!("thuki: [search trace] {msg}"); - } - } - - /// Test-only entrypoint mirroring [`fail_with`] without the warn-once - /// gate, exposed only so the rate-limiting unit test can verify the - /// `warned` flag stays latched across repeated calls. - #[cfg(test)] - fn warn_once(&self, msg: &str) { - self.fail_with(msg.to_string()); - } - - /// Ensures the file is open. On first call, creates the parent directory - /// and opens the file in append mode. Returns `None` and marks the - /// recorder failed on any I/O error. - /// - /// Filesystem-I/O wrapper: the body is just an open + mkdir thin shim - /// around `std::fs`. Logic is exercised end-to-end through the - /// `file_recorder_*` integration tests. - #[cfg_attr(coverage_nightly, coverage(off))] - fn ensure_open<'a>( - &self, - guard: &'a mut Option>, - ) -> Option<&'a mut BufWriter> { - if guard.is_some() { - return guard.as_mut(); - } - if let Some(parent) = self.path.parent() { - if let Err(e) = std::fs::create_dir_all(parent) { - self.fail_with(format!( - "could not create trace dir {}: {e}", - parent.display() - )); - return None; - } - } - match OpenOptions::new() - .create(true) - .append(true) - .open(&self.path) - { - Ok(f) => { - *guard = Some(BufWriter::new(f)); - guard.as_mut() - } - Err(e) => { - self.fail_with(format!( - "could not open trace file {}: {e}", - self.path.display() - )); - None - } - } - } -} - -impl PipelineRecorder for FileRecorder { - /// Filesystem-I/O wrapper: orchestrates [`serialize_event`] (covered) - /// and [`ensure_open`] (covered via integration tests). The body itself - /// is plumbing around `std::io::Write` that cannot be exercised - /// reliably for every error branch on every CI runner. - #[cfg_attr(coverage_nightly, coverage(off))] - fn record(&self, event: RecorderEvent) { - if self.failed.load(Ordering::SeqCst) { - return; - } - let seq = self.seq.fetch_add(1, Ordering::SeqCst); - let ts_ms = SystemTime::now() - .duration_since(UNIX_EPOCH) - .map(|d| d.as_millis() as u64) - .unwrap_or(0); - // `serialize_event` produces a JSON line from a typed `RecorderEvent`. - // `serde_json::to_value` cannot fail for our enum (no `Map<_, _>` keys - // are non-strings, no `f64` is non-finite), so we treat any error as - // unreachable-but-recoverable: latch the recorder as failed and skip - // the line. The `unreachable!`-equivalent path is covered indirectly - // through `fail_with`, which is itself a coverage-excluded I/O wrapper. - let line = serialize_event(seq, ts_ms, &event) - .unwrap_or_else(|e| serialize_failure_fallback(self, e)); - if line.is_empty() { - return; - } - let mut guard = self.state.lock(); - let Some(writer) = self.ensure_open(&mut guard) else { - return; - }; - if let Err(e) = writer - .write_all(line.as_bytes()) - .and_then(|()| writer.flush()) - { - self.fail_with(format!( - "could not write trace line to {}: {e}", - self.path.display() - )); - } - } -} - -/// Fallback used when `serialize_event` returns an error. Defense-in-depth -/// for a path that cannot fire under our typed events; coverage is excluded -/// because reaching it would require corrupting the in-memory `RecorderEvent` -/// representation. -#[cfg_attr(coverage_nightly, coverage(off))] -fn serialize_failure_fallback(recorder: &FileRecorder, err: serde_json::Error) -> String { - recorder.fail_with(format!("could not serialize event: {err}")); - String::new() -} - -/// Serializes a single event into a newline-terminated JSON record. -fn serialize_event(seq: u64, ts_ms: u64, event: &RecorderEvent) -> serde_json::Result { - let mut value = serde_json::to_value(event)?; - if let Some(map) = value.as_object_mut() { - map.insert("v".into(), Value::from(TRACE_SCHEMA_VERSION)); - map.insert("seq".into(), Value::from(seq)); - map.insert("ts_ms".into(), Value::from(ts_ms)); - } - let mut s = serde_json::to_string(&value)?; - s.push('\n'); - Ok(s) -} - -/// Manual `Serialize` for [`RecorderEvent`]: emits a `kind` discriminator -/// plus a flat payload. Avoids the default tagged-enum representation so -/// downstream consumers can grep `"kind":"llm_call"` directly. -impl Serialize for RecorderEvent { - fn serialize(&self, serializer: S) -> Result { - use serde::ser::SerializeMap; - let mut map = serializer.serialize_map(None)?; - match self { - RecorderEvent::TurnStart { - turn_id, - query, - model, - runtime_config, - history_len, - } => { - map.serialize_entry("kind", "turn_start")?; - map.serialize_entry("turn_id", turn_id)?; - map.serialize_entry("query", query)?; - map.serialize_entry("model", model)?; - map.serialize_entry("runtime_config", runtime_config)?; - map.serialize_entry("history_len", history_len)?; - } - RecorderEvent::LlmCall { - stage, - endpoint, - request_body, - response_raw, - latency_ms, - error, - } => { - map.serialize_entry("kind", "llm_call")?; - map.serialize_entry("stage", stage)?; - map.serialize_entry("endpoint", endpoint)?; - map.serialize_entry("request_body", request_body)?; - map.serialize_entry("response_raw", response_raw)?; - map.serialize_entry("latency_ms", latency_ms)?; - map.serialize_entry("error", error)?; - } - RecorderEvent::StreamingLlmCall { - stage, - endpoint, - request_body, - final_text, - tokens, - latency_ms, - error, - } => { - map.serialize_entry("kind", "streaming_llm_call")?; - map.serialize_entry("stage", stage)?; - map.serialize_entry("endpoint", endpoint)?; - map.serialize_entry("request_body", request_body)?; - map.serialize_entry("final_text", final_text)?; - map.serialize_entry("tokens", tokens)?; - map.serialize_entry("latency_ms", latency_ms)?; - map.serialize_entry("error", error)?; - } - RecorderEvent::SearxngQuery { - query, - url, - status, - response_raw, - normalized_results, - latency_ms, - error, - } => { - map.serialize_entry("kind", "searxng_query")?; - map.serialize_entry("query", query)?; - map.serialize_entry("url", url)?; - map.serialize_entry("status", status)?; - map.serialize_entry("response_raw", response_raw)?; - map.serialize_entry("normalized_results", normalized_results)?; - map.serialize_entry("latency_ms", latency_ms)?; - map.serialize_entry("error", error)?; - } - RecorderEvent::ReaderBatch { - urls, - per_url, - batch_latency_ms, - batch_error, - } => { - map.serialize_entry("kind", "reader_batch")?; - map.serialize_entry("urls", urls)?; - map.serialize_entry("per_url", per_url)?; - map.serialize_entry("batch_latency_ms", batch_latency_ms)?; - map.serialize_entry("batch_error", batch_error)?; - } - RecorderEvent::ChunkerBatch { - page_url, - chunk_count, - chunks, - } => { - map.serialize_entry("kind", "chunker_batch")?; - map.serialize_entry("page_url", page_url)?; - map.serialize_entry("chunk_count", chunk_count)?; - map.serialize_entry("chunks", chunks)?; - } - RecorderEvent::RerankResult { - query, - input_count, - top_k, - } => { - map.serialize_entry("kind", "rerank_result")?; - map.serialize_entry("query", query)?; - map.serialize_entry("input_count", input_count)?; - map.serialize_entry("top_k", top_k)?; - } - RecorderEvent::JudgeVerdict { - stage, - raw, - normalized, - } => { - map.serialize_entry("kind", "judge_verdict")?; - map.serialize_entry("stage", stage)?; - map.serialize_entry("raw", raw)?; - map.serialize_entry("normalized", normalized)?; - } - RecorderEvent::Warning { kind, payload } => { - map.serialize_entry("kind", "warning")?; - map.serialize_entry("warning_kind", kind)?; - map.serialize_entry("payload", payload)?; - } - RecorderEvent::SearchEventEmitted { event } => { - map.serialize_entry("kind", "search_event")?; - map.serialize_entry("event", event)?; - } - RecorderEvent::TurnEnd { - turn_id, - final_action, - final_source_urls, - total_latency_ms, - error, - } => { - map.serialize_entry("kind", "turn_end")?; - map.serialize_entry("turn_id", turn_id)?; - map.serialize_entry("final_action", final_action)?; - map.serialize_entry("final_source_urls", final_source_urls)?; - map.serialize_entry("total_latency_ms", total_latency_ms)?; - map.serialize_entry("error", error)?; - } - } - map.end() - } -} - -/// In-memory recorder used by tests to assert what the pipeline emitted. -/// Lives in a `cfg(test)` block so production builds never link it. -#[cfg(test)] -#[derive(Debug, Default)] -pub(crate) struct MockRecorder { - events: Mutex>, -} - -#[cfg(test)] -impl MockRecorder { - pub(crate) fn new() -> Self { - Self::default() - } - - pub(crate) fn snapshot(&self) -> Vec { - self.events.lock().clone() - } - - pub(crate) fn len(&self) -> usize { - self.events.lock().len() - } -} - -#[cfg(test)] -impl PipelineRecorder for MockRecorder { - fn record(&self, event: RecorderEvent) { - self.events.lock().push(event); - } -} - -#[cfg(test)] -mod tests { - use super::*; - use serde_json::json; - use std::sync::Arc; - - fn fresh_dir() -> PathBuf { - let dir = std::env::temp_dir().join(format!("thuki-trace-tests-{}", uuid::Uuid::new_v4())); - std::fs::create_dir_all(&dir).unwrap(); - dir - } - - fn read_lines(path: &Path) -> Vec { - let s = std::fs::read_to_string(path).expect("read trace file"); - s.lines() - .map(|l| serde_json::from_str::(l).expect("valid json line")) - .collect() - } - - #[test] - fn noop_recorder_swallows_every_event() { - let r = NoopRecorder; - r.record(RecorderEvent::Warning { - kind: "x".into(), - payload: json!({}), - }); - // Nothing observable; the test simply asserts no panic and no I/O. - } - - #[test] - fn new_turn_id_format_is_unix_secs_dash_uuid() { - let id = new_turn_id(); - let (secs, uuid) = id.split_once('-').expect("contains dash"); - assert!(secs.parse::().is_ok(), "secs prefix must parse: {id}"); - assert_eq!(uuid.len(), 36, "uuid v4 length: {id}"); - } - - #[test] - fn new_turn_id_unique_across_calls() { - let a = new_turn_id(); - let b = new_turn_id(); - assert_ne!(a, b); - } - - #[test] - fn file_recorder_lazy_creates_dir_and_file_on_first_record() { - let dir = fresh_dir().join("nested").join("traces"); - assert!(!dir.exists(), "dir must not exist before record"); - let r = FileRecorder::new(&dir, "test-turn"); - // Constructor must not touch the filesystem. - assert!(!dir.exists(), "constructor must be lazy"); - r.record(RecorderEvent::Warning { - kind: "k".into(), - payload: json!({"a": 1}), - }); - assert!(dir.exists(), "dir created on first record"); - assert!(r.path().exists(), "file created on first record"); - } - - #[test] - fn file_recorder_writes_one_jsonl_line_per_record_with_schema_metadata() { - let dir = fresh_dir(); - let r = FileRecorder::new(&dir, "turn-a"); - r.record(RecorderEvent::TurnStart { - turn_id: "turn-a".into(), - query: "what is rust?".into(), - model: "qwen3:30b".into(), - runtime_config: json!({"max_iterations": 3}), - history_len: 2, - }); - r.record(RecorderEvent::Warning { - kind: "JudgeFailed".into(), - payload: json!({"reason": "parse"}), - }); - let lines = read_lines(r.path()); - assert_eq!(lines.len(), 2, "two records => two lines"); - assert_eq!(lines[0]["v"], TRACE_SCHEMA_VERSION); - assert_eq!(lines[0]["seq"], 0); - assert_eq!(lines[0]["kind"], "turn_start"); - assert_eq!(lines[0]["query"], "what is rust?"); - assert_eq!(lines[0]["history_len"], 2); - assert_eq!(lines[1]["seq"], 1); - assert_eq!(lines[1]["kind"], "warning"); - assert_eq!(lines[1]["warning_kind"], "JudgeFailed"); - assert!(lines[0]["ts_ms"].as_u64().unwrap() > 0); - } - - #[test] - fn file_recorder_serializes_every_event_kind() { - let dir = fresh_dir(); - let r = FileRecorder::new(&dir, "turn-all"); - r.record(RecorderEvent::TurnStart { - turn_id: "t".into(), - query: "q".into(), - model: "m".into(), - runtime_config: json!({}), - history_len: 0, - }); - r.record(RecorderEvent::LlmCall { - stage: "router".into(), - endpoint: "http://x/api/chat".into(), - request_body: json!({"messages": []}), - response_raw: Some("{}".into()), - latency_ms: 12, - error: None, - }); - r.record(RecorderEvent::StreamingLlmCall { - stage: "synthesis".into(), - endpoint: "http://x/api/chat".into(), - request_body: json!({}), - final_text: Some("answer".into()), - tokens: 7, - latency_ms: 100, - error: None, - }); - r.record(RecorderEvent::SearxngQuery { - query: "rust".into(), - url: "http://s/search".into(), - status: Some(200), - response_raw: Some("{}".into()), - normalized_results: json!([]), - latency_ms: 5, - error: None, - }); - r.record(RecorderEvent::ReaderBatch { - urls: vec!["http://a".into()], - per_url: vec![ReaderUrlOutcome { - url: "http://a".into(), - status: Some(200), - latency_ms: 9, - raw_body: Some("".into()), - extracted_text: Some("hi".into()), - error: None, - }], - batch_latency_ms: 11, - batch_error: None, - }); - r.record(RecorderEvent::ChunkerBatch { - page_url: "http://a".into(), - chunk_count: 1, - chunks: vec!["chunk".into()], - }); - r.record(RecorderEvent::RerankResult { - query: "rust".into(), - input_count: 1, - top_k: vec![RerankedChunk { - source_url: "http://a".into(), - score: 0.8, - text: "chunk".into(), - }], - }); - r.record(RecorderEvent::JudgeVerdict { - stage: "snippet".into(), - raw: "{}".into(), - normalized: json!({"sufficiency": "Sufficient"}), - }); - r.record(RecorderEvent::SearchEventEmitted { - event: json!({"type": "Done"}), - }); - r.record(RecorderEvent::TurnEnd { - turn_id: "t".into(), - final_action: "answer".into(), - final_source_urls: vec!["http://a".into()], - total_latency_ms: 1234, - error: None, - }); - let lines = read_lines(r.path()); - let kinds: Vec<&str> = lines.iter().map(|l| l["kind"].as_str().unwrap()).collect(); - assert_eq!( - kinds, - vec![ - "turn_start", - "llm_call", - "streaming_llm_call", - "searxng_query", - "reader_batch", - "chunker_batch", - "rerank_result", - "judge_verdict", - "search_event", - "turn_end" - ] - ); - } - - #[test] - fn file_recorder_path_returns_target_path_with_jsonl_extension() { - let dir = fresh_dir(); - let r = FileRecorder::new(&dir, "turn-x"); - assert_eq!(r.path(), dir.join("turn-x.jsonl")); - } - - #[test] - fn file_recorder_uses_arc_dyn_via_trait_object() { - let dir = fresh_dir(); - let r: Arc = Arc::new(FileRecorder::new(&dir, "turn-arc")); - r.record(RecorderEvent::Warning { - kind: "k".into(), - payload: json!({}), - }); - // Coverage: verifies the Arc shape compiles and dispatches. - } - - #[test] - fn file_recorder_open_failure_is_silent_and_latches() { - // Pass a path whose parent we make impossible to create: a regular - // file at the parent location. mkdir -p fails because the path - // component already exists as a file. - let dir = fresh_dir(); - let blocking_file = dir.join("blocker"); - std::fs::write(&blocking_file, b"x").unwrap(); - let r = FileRecorder::new(blocking_file.join("inside"), "turn-fail"); - r.record(RecorderEvent::Warning { - kind: "k".into(), - payload: json!({}), - }); - assert!( - r.failed.load(Ordering::SeqCst), - "open failure must latch the recorder" - ); - // Second call exits at the latch check, exercising that branch. - r.record(RecorderEvent::Warning { - kind: "k2".into(), - payload: json!({}), - }); - } - - #[test] - fn file_recorder_open_failure_when_target_path_is_a_directory_is_silent() { - // mkdir -p of the parent succeeds, but `OpenOptions::open` on the - // jsonl path fails because that path already exists and is a - // directory. Exercises the OpenOptions Err arm in `ensure_open` - // (distinct from the create_dir_all Err arm covered elsewhere). - let dir = fresh_dir(); - let r = FileRecorder::new(&dir, "turn-dir-collision"); - // Pre-create the JSONL path AS A DIRECTORY, so OpenOptions append - // hits "Is a directory" when it tries to open it as a file. - std::fs::create_dir_all(r.path()).unwrap(); - r.record(RecorderEvent::Warning { - kind: "k".into(), - payload: json!({}), - }); - assert!( - r.failed.load(Ordering::SeqCst), - "open failure on directory-collision must latch the recorder" - ); - } - - #[test] - fn file_recorder_warn_once_only_prints_first_failure() { - let dir = fresh_dir(); - let blocking_file = dir.join("blocker2"); - std::fs::write(&blocking_file, b"x").unwrap(); - let r = FileRecorder::new(blocking_file.join("nested"), "turn-warn"); - // First record triggers warn + latch. - r.record(RecorderEvent::Warning { - kind: "k".into(), - payload: json!({}), - }); - assert!(r.warned.load(Ordering::SeqCst)); - // Direct repeated calls to warn_once must not flip the flag again. - r.warn_once("ignored"); - r.warn_once("ignored2"); - assert!(r.warned.load(Ordering::SeqCst)); - } - - #[test] - fn mock_recorder_collects_events_in_order() { - let m = MockRecorder::new(); - m.record(RecorderEvent::Warning { - kind: "a".into(), - payload: json!({}), - }); - m.record(RecorderEvent::Warning { - kind: "b".into(), - payload: json!({}), - }); - assert_eq!(m.len(), 2); - let snap = m.snapshot(); - // Round-trip through JSON serialization keeps the assertion exhaustive - // without an unreachable fallback match arm: we built two `Warning` - // events with `kind: a` and `kind: b`, so the on-the-wire shape must - // reflect that ordering exactly. - let dump: Vec = snap - .iter() - .map(|e| serde_json::to_value(e).unwrap()) - .collect(); - assert_eq!(dump[0]["kind"], "warning"); - assert_eq!(dump[0]["warning_kind"], "a"); - assert_eq!(dump[1]["kind"], "warning"); - assert_eq!(dump[1]["warning_kind"], "b"); - } -} diff --git a/src-tauri/src/search/searxng.rs b/src-tauri/src/search/searxng.rs index a8cf1a3..324ba66 100644 --- a/src-tauri/src/search/searxng.rs +++ b/src-tauri/src/search/searxng.rs @@ -15,8 +15,8 @@ use std::time::Duration; use crate::config::defaults::{DEFAULT_MAX_QUERY_CHARS, DEFAULT_MAX_SNIPPET_CHARS}; -use super::recorder::{PipelineRecorder, RecorderEvent}; use super::types::{SearchError, SearxResponse, SearxResult}; +use crate::trace::{BoundRecorder, RecorderEvent}; /// Maximum character length retained per snippet/title. Uses character count /// (not bytes) so multi-byte text is not mid-codepoint-truncated. Re-exported @@ -55,7 +55,7 @@ pub async fn search( query: &str, timeout_s: u64, max_results: usize, - recorder: &Arc, + recorder: &Arc, ) -> Result, SearchError> { let trimmed = query.trim(); if trimmed.is_empty() { @@ -179,7 +179,7 @@ pub async fn search_all_with_endpoint( queries: &[String], timeout_s: u64, max_results: usize, - recorder: &Arc, + recorder: &Arc, ) -> Result, SearchError> { if queries.is_empty() { return Ok(Vec::new()); @@ -222,7 +222,9 @@ pub async fn search_all_with_base( } let endpoint = format!("{}/search", base.trim_end_matches('/')); - let recorder: Arc = Arc::new(super::recorder::NoopRecorder); + let recorder: Arc = Arc::new(BoundRecorder::noop_for( + crate::trace::ConversationId::new("searxng-search-all"), + )); search_all_with_endpoint( &endpoint, queries, @@ -271,13 +273,15 @@ fn truncate_chars(s: &str, max: usize) -> String { #[cfg(test)] mod tests { use super::*; - use crate::search::recorder::NoopRecorder; + use crate::trace::ConversationId; - /// Local helper: a noop recorder wrapped in `Arc`, + /// Local helper: a noop recorder bound to a sentinel conversation id, /// used everywhere this module exercises `search` / /// `search_all_with_endpoint` without asserting on trace output. - fn noop_recorder() -> Arc { - Arc::new(NoopRecorder) + fn noop_recorder() -> Arc { + Arc::new(BoundRecorder::noop_for(ConversationId::new( + "test-conv-searxng", + ))) } #[test] @@ -618,12 +622,14 @@ mod tests { #[cfg(test)] mod parallel_tests { use super::*; - use crate::search::recorder::NoopRecorder; + use crate::trace::ConversationId; use wiremock::matchers::{method, path, query_param}; use wiremock::{Mock, MockServer, ResponseTemplate}; - fn noop_recorder() -> Arc { - Arc::new(NoopRecorder) + fn noop_recorder() -> Arc { + Arc::new(BoundRecorder::noop_for(ConversationId::new( + "test-conv-searxng-parallel", + ))) } fn fixture(q: &str, url: &str) -> serde_json::Value { diff --git a/src-tauri/src/search/types.rs b/src-tauri/src/search/types.rs index 64bd293..8d4c703 100644 --- a/src-tauri/src/search/types.rs +++ b/src-tauri/src/search/types.rs @@ -165,6 +165,18 @@ pub enum SearchEvent { /// running. The frontend renders a static setup-guidance card instead of a /// generic error bubble. SandboxUnavailable, + /// No active model is selected. Mirrors the chat-side + /// `OllamaErrorKind::NoModelSelected` bail so the frontend can keep its + /// `is_first_turn` flag pristine when the backend bails before + /// `ConversationStart` fires. + NoModelSelected, + /// Emitted exactly once per turn, after the search backend has cleared + /// every pre-`ConversationStart` gate (no-model bail, sandbox probe) + /// and committed to opening the trace. Frontend uses it to retire + /// `is_first_turn` independently of token-arrival ordering, so a + /// cancel-before-first-token does not leave the flag in a state that + /// would emit a duplicate `ConversationStart` on the next turn. + TurnAccepted, /// Emitted after each retrieval iteration completes (after the judge /// verdict is received). Allows the frontend to stream trace rows live /// as the pipeline progresses rather than receiving all traces at once. diff --git a/src-tauri/src/settings_commands.rs b/src-tauri/src/settings_commands.rs index fdbda23..2fe2740 100644 --- a/src-tauri/src/settings_commands.rs +++ b/src-tauri/src/settings_commands.rs @@ -94,6 +94,14 @@ fn is_allowed_section(section: &str) -> bool { ALLOWED_SECTIONS.contains(§ion) } +/// Returns true when the post-write `AppConfig` flips `[debug] trace_enabled` +/// relative to the pre-write snapshot. Pulled out so the predicate is +/// covered by tests instead of riding inside the coverage-off Tauri command +/// bodies that own the hot-swap. +pub(crate) fn trace_enabled_changed(prior_enabled: bool, resolved: &AppConfig) -> bool { + resolved.debug.trace_enabled != prior_enabled +} + // ─── Tauri command surface ────────────────────────────────────────────────── /// Returns the current resolved `AppConfig` snapshot. @@ -120,14 +128,27 @@ pub fn set_config_field( value: JsonValue, app: AppHandle, state: State<'_, RwLock>, + trace_recorder: State<'_, std::sync::Arc>, ) -> Result { let path = config_path(&app)?; + let prior_trace_enabled = state.read().debug.trace_enabled; let resolved = { let mut guard = state.write(); let resolved = write_field_to_disk(&path, §ion, &key, value)?; *guard = resolved.clone(); resolved }; + // Hot-swap the live trace recorder on `[debug] trace_enabled` flips + // so the user does not need to restart Thuki for the toggle to + // take effect. Off → On installs a fresh `RegistryRecorder` rooted + // at `app_data_dir()/traces/`; On → Off installs a `NoopRecorder`, + // which lets in-flight streaming tasks finish writing through their + // cached `Arc` clones (via `Arc` semantics) while new + // events fall through to noop. + if trace_enabled_changed(prior_trace_enabled, &resolved) { + let new_inner = crate::build_trace_inner(&app, resolved.debug.trace_enabled); + trace_recorder.replace(new_inner); + } emit_config_updated(&app); Ok(resolved) } @@ -183,14 +204,24 @@ pub fn reset_config( section: Option, app: AppHandle, state: State<'_, RwLock>, + trace_recorder: State<'_, std::sync::Arc>, ) -> Result { let path = config_path(&app)?; + let prior_trace_enabled = state.read().debug.trace_enabled; let resolved = { let mut guard = state.write(); let resolved = reset_section_on_disk(&path, section.as_deref())?; *guard = resolved.clone(); resolved }; + // Hot-swap the live trace recorder if `reset_config` flipped the + // `[debug] trace_enabled` value (resetting the whole file or just + // the `[debug]` section both restore the compiled default of + // `false`, so an On → Off transition is the realistic case). + if trace_enabled_changed(prior_trace_enabled, &resolved) { + let new_inner = crate::build_trace_inner(&app, resolved.debug.trace_enabled); + trace_recorder.replace(new_inner); + } emit_config_updated(&app); Ok(resolved) } @@ -254,14 +285,23 @@ pub(crate) fn reset_section_on_disk( pub fn reload_config_from_disk( app: AppHandle, state: State<'_, RwLock>, + trace_recorder: State<'_, std::sync::Arc>, ) -> Result { let path = config_path(&app)?; + let prior_trace_enabled = state.read().debug.trace_enabled; let resolved = { let mut guard = state.write(); let resolved = config::load_from_path(&path)?; *guard = resolved.clone(); resolved }; + // Hot-swap the live trace recorder if a manual edit to config.toml + // flipped `[debug] trace_enabled` and the user clicked "Refresh + // from disk" to pick it up. + if trace_enabled_changed(prior_trace_enabled, &resolved) { + let new_inner = crate::build_trace_inner(&app, resolved.debug.trace_enabled); + trace_recorder.replace(new_inner); + } emit_config_updated(&app); Ok(resolved) } diff --git a/src-tauri/src/settings_commands/tests.rs b/src-tauri/src/settings_commands/tests.rs index 5c6c42b..101551c 100644 --- a/src-tauri/src/settings_commands/tests.rs +++ b/src-tauri/src/settings_commands/tests.rs @@ -14,10 +14,10 @@ use toml_edit::DocumentMut; use super::{ coerce_json_to_toml, is_allowed_field, is_allowed_section, json_type_name, json_value_to_toml_item, patch_document, read_document, reset_section_on_disk, - write_field_to_disk, + trace_enabled_changed, write_field_to_disk, }; use crate::config::defaults::{ALLOWED_FIELDS, ALLOWED_SECTIONS}; -use crate::config::ConfigError; +use crate::config::{AppConfig, ConfigError}; // ─── Test fixtures ────────────────────────────────────────────────────────── @@ -724,6 +724,31 @@ fn reset_section_on_disk_whole_file_propagates_io_error_when_parent_dir_is_reado matches!(err, ConfigError::IoError { .. }); } +// ─── trace_enabled_changed ─────────────────────────────────────────────────── + +#[test] +fn trace_enabled_changed_detects_off_to_on() { + let mut cfg = AppConfig::default(); + cfg.debug.trace_enabled = true; + assert!(trace_enabled_changed(false, &cfg)); +} + +#[test] +fn trace_enabled_changed_detects_on_to_off() { + let mut cfg = AppConfig::default(); + cfg.debug.trace_enabled = false; + assert!(trace_enabled_changed(true, &cfg)); +} + +#[test] +fn trace_enabled_changed_returns_false_when_value_unchanged() { + let mut cfg = AppConfig::default(); + cfg.debug.trace_enabled = true; + assert!(!trace_enabled_changed(true, &cfg)); + cfg.debug.trace_enabled = false; + assert!(!trace_enabled_changed(false, &cfg)); +} + // ─── Helpers ───────────────────────────────────────────────────────────────── fn matches_type_mismatch(err: &ConfigError, section: &str, key: &str) { diff --git a/src-tauri/src/trace/ids.rs b/src-tauri/src/trace/ids.rs new file mode 100644 index 0000000..d2c77fc --- /dev/null +++ b/src-tauri/src/trace/ids.rs @@ -0,0 +1,238 @@ +//! Identity types used by the unified trace recorder. +//! +//! `ConversationId` is a newtype around `String` so the type system +//! distinguishes a conversation id from a turn id at every API boundary +//! that touches both. The chat layer is the only producer; SQLite stores +//! the same value for the user-facing conversation history UI. +//! +//! `new_turn_id()` is the canonical search-pipeline turn-id generator, +//! kept here so both the chat-domain and search-domain code paths source +//! their identifiers from a single module. + +use std::time::{SystemTime, UNIX_EPOCH}; + +use serde::{Deserialize, Serialize}; + +/// Stable identifier for a single user-facing conversation. The frontend +/// generates the value (matching the SQLite schema's `conversation_id` +/// column) and threads it through every Tauri command that records trace +/// events for the conversation. +/// +/// Newtype over `String` so a function expecting a conversation id cannot +/// silently accept a turn id, message id, or arbitrary user input. The +/// conversion to/from `String` is explicit at every boundary. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Serialize, Deserialize)] +// `into` + `from` make every (de)serialization detour through +// `From` / `From for String`, which forces +// sanitization on the deserialize path. Wire shape remains a bare string, +// matching the prior `serde(transparent)` contract. +#[serde(into = "String", from = "String")] +pub struct ConversationId(String); + +/// Sentinel returned by [`ConversationId::new`] when sanitization strips the +/// input down to nothing (empty IPC payload, all-`/` traversal, etc.). All +/// such inputs collide on the same on-disk file, which is the safe outcome: +/// no traversal, no info leak, attacker-controlled input still gets a stable +/// per-process bucket. +pub const SANITIZED_FALLBACK: &str = "invalid-conversation-id"; + +/// Replaces every path-traversal vector with a safe character. Defense in +/// depth: production callers route `crypto.randomUUID()` through this +/// constructor, but the IPC boundary accepts arbitrary strings, so the +/// recorder must never let an attacker steer the path that `FileRecorder` +/// joins onto `app_data_dir/traces/{chat,search}/`. +fn sanitize(raw: String) -> String { + // Step 1: collapse the path separators and NUL into a benign filler. + let separator_safe: String = raw + .chars() + .map(|c| match c { + '/' | '\\' | '\0' => '_', + c => c, + }) + .collect(); + // Step 2: rewrite `..` so `Path::join` cannot climb out of the trace + // directory even if the caller stitched `..` segments around the + // already-replaced separators. + let traversal_safe = separator_safe.replace("..", "__"); + if traversal_safe.is_empty() { + SANITIZED_FALLBACK.to_string() + } else { + traversal_safe + } +} + +impl ConversationId { + /// Constructs a `ConversationId`, sanitizing the input so it can be + /// joined onto a filesystem path without escaping the trace directory. + /// See [`sanitize`] for the exact rewrite rules. + pub fn new(s: impl Into) -> Self { + Self(sanitize(s.into())) + } + + /// Returns the underlying string slice. Useful for logging, display, + /// and the rare callsite that needs to feed the id back into a Tauri + /// IPC call without taking ownership. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl std::fmt::Display for ConversationId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0) + } +} + +impl From for ConversationId { + fn from(s: String) -> Self { + Self::new(s) + } +} + +impl From<&str> for ConversationId { + fn from(s: &str) -> Self { + Self::new(s) + } +} + +impl From for String { + fn from(id: ConversationId) -> String { + id.0 + } +} + +/// Builds a fresh, sortable, collision-resistant turn id for the search +/// pipeline. +/// +/// Format: `-`. Seconds prefix is eyeball-readable when +/// browsing the traces directory; the v4 UUID guarantees uniqueness across +/// concurrent turns within the same second. +pub fn new_turn_id() -> String { + let secs = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + format!("{secs}-{}", uuid::Uuid::new_v4()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn conversation_id_roundtrips_through_string() { + let id = ConversationId::new("conv-abc123"); + assert_eq!(id.as_str(), "conv-abc123"); + assert_eq!(id.to_string(), "conv-abc123"); + } + + #[test] + fn conversation_id_from_string_matches_from_str() { + let a = ConversationId::from("conv-x".to_string()); + let b = ConversationId::from("conv-x"); + assert_eq!(a, b); + } + + #[test] + fn conversation_id_serializes_as_bare_string() { + let id = ConversationId::new("conv-y"); + let json = serde_json::to_string(&id).unwrap(); + // `serde(into = "String")` keeps the wire shape as a bare string, + // so consumers reading JSONL never see a `{ "0": "..." }` wrapper. + assert_eq!(json, "\"conv-y\""); + } + + #[test] + fn conversation_id_deserializes_from_bare_string() { + let id: ConversationId = serde_json::from_str("\"conv-z\"").unwrap(); + assert_eq!(id.as_str(), "conv-z"); + } + + #[test] + fn sanitize_strips_forward_slash_path_traversal() { + // Joining `../etc/passwd` onto `traces/chat/` would resolve to + // `traces/etc/passwd`, escaping the trace directory. Sanitize + // collapses every separator into `_` so the join stays inside. + let id = ConversationId::new("../etc/passwd"); + assert!(!id.as_str().contains('/'), "id leaked separator: {id}"); + assert!(!id.as_str().contains(".."), "id leaked traversal: {id}"); + } + + #[test] + fn sanitize_strips_backslash_traversal() { + let id = ConversationId::new("..\\..\\Windows\\System32"); + assert!(!id.as_str().contains('\\')); + assert!(!id.as_str().contains("..")); + } + + #[test] + fn sanitize_strips_nul_byte() { + let id = ConversationId::new("conv\0evil"); + assert!(!id.as_str().contains('\0')); + } + + #[test] + fn sanitize_collapses_chained_dot_dot() { + // `....` must collapse fully so no `..` segment survives a single + // sweep. `str::replace` is non-overlapping so we verify the output. + let id = ConversationId::new("...."); + assert_eq!(id.as_str(), "____"); + } + + #[test] + fn sanitize_empty_input_falls_back_to_sentinel() { + let id = ConversationId::new(""); + assert_eq!(id.as_str(), SANITIZED_FALLBACK); + } + + #[test] + fn sanitize_preserves_uuid_shape() { + // Production input is `crypto.randomUUID()`; this must round-trip + // bit-for-bit so trace files keep their canonical filenames. + let uuid = "550e8400-e29b-41d4-a716-446655440000"; + let id = ConversationId::new(uuid); + assert_eq!(id.as_str(), uuid); + } + + #[test] + fn from_str_and_from_string_route_through_sanitize() { + let a = ConversationId::from("../evil".to_string()); + let b = ConversationId::from("../evil"); + assert_eq!(a, b); + assert!(!a.as_str().contains("..")); + assert!(!a.as_str().contains('/')); + } + + #[test] + fn deserialize_routes_through_sanitize() { + // A trace file whose `conversation_id` somehow contained a + // traversal payload must NOT be able to weaponize a re-read. + let id: ConversationId = serde_json::from_str("\"../etc\"").unwrap(); + assert!(!id.as_str().contains("..")); + assert!(!id.as_str().contains('/')); + } + + #[test] + fn into_string_returns_sanitized_payload() { + // `serde(into = "String")` relies on this conversion; the + // returned string must equal the in-memory storage. + let id = ConversationId::new("../foo"); + let s: String = id.clone().into(); + assert_eq!(s, id.as_str()); + } + + #[test] + fn new_turn_id_format_is_unix_secs_dash_uuid() { + let id = new_turn_id(); + let (secs, uuid) = id.split_once('-').expect("contains dash"); + assert!(secs.parse::().is_ok(), "secs prefix must parse: {id}"); + assert_eq!(uuid.len(), 36, "uuid v4 length: {id}"); + } + + #[test] + fn new_turn_id_unique_across_calls() { + let a = new_turn_id(); + let b = new_turn_id(); + assert_ne!(a, b); + } +} diff --git a/src-tauri/src/trace/live.rs b/src-tauri/src/trace/live.rs new file mode 100644 index 0000000..e3087d9 --- /dev/null +++ b/src-tauri/src/trace/live.rs @@ -0,0 +1,228 @@ +//! Hot-swappable trace recorder. The single managed-state instance +//! whose internal recorder can flip between `RegistryRecorder` and +//! `NoopRecorder` at runtime when the user toggles +//! `[debug] trace_enabled` from the Settings panel. +//! +//! # Why a wrapper +//! +//! `Arc` is locked in once at startup: the chat +//! command, the search pipeline, and the screenshot command all hold +//! their own `Arc` clones of the initial managed state, and Tauri's +//! managed-state surface has no way to swap a value out from under +//! existing State<'_> handles. `LiveTraceRecorder` solves this by +//! being the trait implementation itself; it forwards every +//! `record()` call to whatever inner recorder is currently installed. +//! Swap = write-lock + replace. +//! +//! # Concurrency +//! +//! `record()` takes a read-lock just long enough to clone the inner +//! `Arc`, then drops the lock before forwarding the call. This keeps +//! the read path lock-free in practice (the lock is uncontended for +//! the duration of an `Arc::clone`) and means an in-flight record +//! cannot deadlock against an in-flight swap. Streaming tasks that +//! cached an `Arc` whose inner is this `LiveTraceRecorder` +//! pick up the new behaviour on the next `record()` call after the +//! swap. + +use std::sync::Arc; + +use parking_lot::RwLock; + +use super::ids::ConversationId; +use super::recorder::{NoopRecorder, RecorderEvent, TraceRecorder}; + +/// Trace recorder whose backing implementation can be replaced at +/// runtime. Installed as Tauri managed state in `lib.rs::run()`; swapped +/// from `settings_commands::set_config_field` when the user flips +/// `[debug] trace_enabled` in the Settings panel. +/// +/// Manual `Debug` impl: `dyn TraceRecorder` does not require `Debug`, +/// so the inner recorder is rendered as an opaque marker (mirrors the +/// pattern in `BoundRecorder`). +pub struct LiveTraceRecorder { + inner: RwLock>, +} + +impl std::fmt::Debug for LiveTraceRecorder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("LiveTraceRecorder") + .field("inner", &"") + .finish() + } +} + +impl LiveTraceRecorder { + /// Wraps `initial` as the starting recorder. Production sites pass + /// either a `RegistryRecorder` (when `trace_enabled = true` at + /// startup) or a `NoopRecorder` (the default). + pub fn new(initial: Arc) -> Self { + Self { + inner: RwLock::new(initial), + } + } + + /// Convenience constructor used at app startup when tracing is off. + /// Equivalent to `LiveTraceRecorder::new(Arc::new(NoopRecorder))`. + pub fn noop() -> Self { + Self::new(Arc::new(NoopRecorder)) + } + + /// Replaces the inner recorder. Subsequent `record()` calls go to + /// `new_inner`; in-flight records that had already cloned the prior + /// inner finish writing through it (Arc semantics keep the previous + /// recorder alive until the last clone drops). + pub fn replace(&self, new_inner: Arc) { + *self.inner.write() = new_inner; + } +} + +impl TraceRecorder for LiveTraceRecorder { + fn record(&self, conversation_id: &ConversationId, event: RecorderEvent) { + // Clone the Arc under read-lock, then drop the lock before + // forwarding the call. Keeps swap and record from blocking + // each other for the duration of the actual file I/O. + let inner = { + let guard = self.inner.read(); + Arc::clone(&*guard) + }; + inner.record(conversation_id, event); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use parking_lot::Mutex; + use serde_json::json; + + /// Test-local recorder that counts the events it sees, tagged with + /// a label so the swap test can tell which inner was active for + /// each emit. + #[derive(Debug, Default)] + struct LabeledCounter { + label: &'static str, + seen: Mutex>, + } + impl LabeledCounter { + fn new(label: &'static str) -> Self { + Self { + label, + seen: Mutex::new(Vec::new()), + } + } + } + impl TraceRecorder for LabeledCounter { + fn record(&self, conversation_id: &ConversationId, _event: RecorderEvent) { + self.seen + .lock() + .push((conversation_id.as_str().to_owned(), self.label)); + } + } + + fn cid(s: &str) -> ConversationId { + ConversationId::new(s) + } + + fn warning() -> RecorderEvent { + RecorderEvent::Warning { + kind: "k".into(), + payload: json!({}), + } + } + + #[test] + fn record_routes_to_initial_inner() { + let counter = Arc::new(LabeledCounter::new("initial")); + let live = LiveTraceRecorder::new(counter.clone()); + live.record(&cid("conv-a"), warning()); + let seen = counter.seen.lock().clone(); + assert_eq!(seen, vec![("conv-a".to_owned(), "initial")]); + } + + #[test] + fn replace_swaps_inner_and_subsequent_records_use_the_new_recorder() { + let first = Arc::new(LabeledCounter::new("before")); + let second = Arc::new(LabeledCounter::new("after")); + let live = LiveTraceRecorder::new(first.clone()); + live.record(&cid("conv-1"), warning()); + live.replace(second.clone()); + live.record(&cid("conv-1"), warning()); + live.record(&cid("conv-2"), warning()); + assert_eq!( + first.seen.lock().clone(), + vec![("conv-1".to_owned(), "before")], + "pre-swap event must route to the original inner only" + ); + assert_eq!( + second.seen.lock().clone(), + vec![ + ("conv-1".to_owned(), "after"), + ("conv-2".to_owned(), "after"), + ], + "post-swap events must route to the new inner only" + ); + } + + #[test] + fn replace_can_be_invoked_multiple_times() { + let a = Arc::new(LabeledCounter::new("a")); + let b = Arc::new(LabeledCounter::new("b")); + let c = Arc::new(LabeledCounter::new("c")); + let live = LiveTraceRecorder::new(a.clone()); + live.record(&cid("x"), warning()); + live.replace(b.clone()); + live.record(&cid("x"), warning()); + live.replace(c.clone()); + live.record(&cid("x"), warning()); + live.replace(a.clone()); + live.record(&cid("x"), warning()); + assert_eq!(a.seen.lock().len(), 2, "first + final emits land on a"); + assert_eq!(b.seen.lock().len(), 1, "single emit lands on b"); + assert_eq!(c.seen.lock().len(), 1, "single emit lands on c"); + } + + #[test] + fn noop_constructor_swallows_events_until_replaced() { + let live = LiveTraceRecorder::noop(); + // No panic, no observable side effect. + live.record(&cid("conv-noop"), warning()); + + let counter = Arc::new(LabeledCounter::new("active")); + live.replace(counter.clone()); + live.record(&cid("conv-noop"), warning()); + assert_eq!( + counter.seen.lock().clone(), + vec![("conv-noop".to_owned(), "active")], + "post-replace events route to the new recorder; pre-replace was a true noop" + ); + } + + #[test] + fn live_recorder_implements_trace_recorder_trait_object() { + // Compiles only if `LiveTraceRecorder: TraceRecorder`. Exists + // so a future refactor cannot accidentally break the trait + // impl that lib.rs depends on for `Arc` + // coercion. + let live: Arc = Arc::new(LiveTraceRecorder::noop()); + live.record(&cid("trait-obj"), warning()); + } + + #[test] + fn debug_impl_renders_opaque_inner() { + // The manual Debug impl swaps the trait-object inner for an + // opaque marker so debug output stays diagnostic-friendly + // without leaking the recorder's internal state. Mirrors the + // pattern in `BoundRecorder::Debug`. + let live = LiveTraceRecorder::noop(); + let rendered = format!("{live:?}"); + assert!( + rendered.contains("LiveTraceRecorder"), + "debug output must label the type: {rendered}" + ); + assert!( + rendered.contains(""), + "debug output must use the opaque inner marker: {rendered}" + ); + } +} diff --git a/src-tauri/src/trace/mod.rs b/src-tauri/src/trace/mod.rs new file mode 100644 index 0000000..fd034b0 --- /dev/null +++ b/src-tauri/src/trace/mod.rs @@ -0,0 +1,178 @@ +//! Unified forensic trace recorder for the chat layer and the `/search` +//! pipeline. +//! +//! Three submodules: +//! - [`ids`]: `ConversationId` newtype and `new_turn_id` generator. +//! - [`recorder`]: `TraceRecorder` trait, `RecorderEvent` enum (chat + +//! search variants), `TraceDomain`, `FileRecorder`, `NoopRecorder`. +//! - [`registry`]: `RegistryRecorder` production composition that owns +//! one `FileRecorder` per `(domain, conversation_id)` pair. +//! +//! [`BoundRecorder`] is a thin wrapper that closes over a +//! `ConversationId` so call sites can emit events with a single-arg +//! `record(event)` instead of threading the id through every signature. +//! The chat layer and the search pipeline both hold an +//! `Arc` for the conversation they belong to. +//! +//! See `recorder.rs` module-level docs for the JSONL schema and the +//! late-event tolerance contract. + +use std::sync::Arc; + +pub mod ids; +pub mod live; +pub mod recorder; +pub mod registry; + +pub use ids::{new_turn_id, ConversationId}; +pub use live::LiveTraceRecorder; +pub use recorder::{ + FileRecorder, NoopRecorder, ReaderUrlOutcome, RecorderEvent, RerankedChunk, TraceDomain, + TraceRecorder, TRACE_SCHEMA_VERSION, +}; +pub use registry::RegistryRecorder; + +/// Recorder bound to a single `ConversationId`. Wraps an +/// `Arc` so call sites can emit events without +/// threading the conversation id through every function signature. +/// +/// Constructed by `commands::ask_ollama` and `search::search_pipeline` +/// once at the start of each turn from managed state, then handed down +/// through the streaming or pipeline machinery as +/// `Arc`. Cheap to clone (single `Arc`). +/// +/// Manual `Debug` impl: `dyn TraceRecorder` does not require `Debug` +/// (and the noop / file recorder don't all implement it through a +/// trait-object boundary), so the inner recorder is rendered as an +/// opaque marker in debug output. +pub struct BoundRecorder { + inner: Arc, + conversation_id: ConversationId, +} + +impl std::fmt::Debug for BoundRecorder { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_struct("BoundRecorder") + .field("inner", &"") + .field("conversation_id", &self.conversation_id) + .finish() + } +} + +impl BoundRecorder { + /// Wraps `inner` and binds it to `conversation_id`. Every + /// subsequent `record(event)` call routes through `inner` with + /// the bound id. + pub fn new(inner: Arc, conversation_id: ConversationId) -> Self { + Self { + inner, + conversation_id, + } + } + + /// Convenience constructor for tests and for the production path + /// where `[debug] trace_enabled` is false: builds a `BoundRecorder` + /// backed by a `NoopRecorder` so the rest of the code can hold an + /// `Arc` unconditionally. + pub fn noop_for(conversation_id: ConversationId) -> Self { + Self::new(Arc::new(NoopRecorder), conversation_id) + } + + /// Records a single event for the bound conversation. + pub fn record(&self, event: RecorderEvent) { + self.inner.record(&self.conversation_id, event); + } + + /// The conversation id this recorder was bound to. Useful for + /// logging and for emitting downstream events that need to share + /// the same id. + pub fn conversation_id(&self) -> &ConversationId { + &self.conversation_id + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + + #[test] + fn noop_for_constructs_bound_recorder_with_id() { + let r = BoundRecorder::noop_for(ConversationId::new("conv-noop")); + assert_eq!(r.conversation_id().as_str(), "conv-noop"); + // No panic, no I/O. + r.record(RecorderEvent::Warning { + kind: "k".into(), + payload: json!({}), + }); + } + + #[test] + fn debug_impl_renders_opaque_inner_and_visible_conv_id() { + // The manual Debug impl swaps the trait-object inner for an + // opaque marker so the conversation id stays diagnostic-friendly + // without leaking the recorder's internal fields. Exercises the + // impl path that derive(Debug) cannot synthesize for the + // dyn-trait field. + let r = BoundRecorder::noop_for(ConversationId::new("conv-debug")); + let rendered = format!("{r:?}"); + assert!( + rendered.contains("BoundRecorder"), + "debug output must label the type: {rendered}" + ); + assert!( + rendered.contains(""), + "debug output must use the opaque inner marker: {rendered}" + ); + assert!( + rendered.contains("conv-debug"), + "debug output must show the bound conversation id: {rendered}" + ); + } + + #[test] + fn bound_record_threads_conv_id_to_inner_recorder() { + // Use a `MockRecorder` indirection by way of a small captor type. + // The point is: the inner recorder receives the bound conv_id + // even though the BoundRecorder caller only passed an event. + struct Captor { + seen: parking_lot::Mutex>, + } + impl TraceRecorder for Captor { + fn record(&self, conversation_id: &ConversationId, event: RecorderEvent) { + let kind = match &event { + RecorderEvent::Warning { kind, .. } => kind.clone(), + _ => "other".into(), + }; + self.seen + .lock() + .push((conversation_id.as_str().to_owned(), kind)); + } + } + + let captor = Arc::new(Captor { + seen: parking_lot::Mutex::new(Vec::new()), + }); + let bound = BoundRecorder::new(captor.clone(), ConversationId::new("conv-thread")); + bound.record(RecorderEvent::Warning { + kind: "alpha".into(), + payload: json!({}), + }); + bound.record(RecorderEvent::Warning { + kind: "beta".into(), + payload: json!({}), + }); + // Non-Warning event exercises the captor's `_ => "other"` fallback + // arm so coverage hits both match arms in the test instrumentation. + bound.record(RecorderEvent::AssistantTokens { chunk: "hi".into() }); + let seen = captor.seen.lock().clone(); + assert_eq!( + seen, + vec![ + ("conv-thread".to_owned(), "alpha".to_owned()), + ("conv-thread".to_owned(), "beta".to_owned()), + ("conv-thread".to_owned(), "other".to_owned()), + ] + ); + } +} diff --git a/src-tauri/src/trace/recorder.rs b/src-tauri/src/trace/recorder.rs new file mode 100644 index 0000000..9d28e54 --- /dev/null +++ b/src-tauri/src/trace/recorder.rs @@ -0,0 +1,1443 @@ +//! Forensic per-conversation trace recorder shared by the chat path and +//! the `/search` pipeline. +//! +//! When [`crate::config::schema::DebugSection::trace_enabled`] is on, +//! every chat turn AND every search turn writes a forensic JSON-Lines +//! record into a per-conversation file under +//! `app_data_dir()/traces//.jsonl`. Files are +//! grouped by domain so an analysis agent can be pointed at exactly the +//! slice it cares about: `traces/chat/` for conversation patterns, +//! `traces/search/` for retrieval quality, or `traces/` for end-to-end +//! debugging. +//! +//! Off in shipped builds. Intended for local quality investigation: open +//! a `.jsonl` file and grep/jq across LLM prompts vs. judge verdicts vs. +//! user-visible assistant tokens to understand exactly what happened. +//! +//! # Architecture +//! +//! - [`TraceRecorder`] is the trait every callsite calls into. The chat +//! layer and the search pipeline both thread `&Arc` +//! through their execution contexts; no call site distinguishes the +//! live recorder from the noop. +//! - [`NoopRecorder`] is the production default. Every method is a no-op. +//! - [`FileRecorder`] writes events for ONE `(domain, conversation_id)` +//! pair into ONE file. Lazy directory + file creation on first record, +//! `parking_lot::Mutex` around a buffered writer, per-line flush so +//! partial files are still grep-friendly if the daemon crashes. I/O +//! errors are warned once via `eprintln!` then become silent no-ops for +//! the rest of the file's lifetime: trace failures must never affect +//! the user-visible pipeline. +//! - [`crate::trace::registry::RegistryRecorder`] is the production +//! composition. It owns one `Arc` per +//! `(TraceDomain, ConversationId)`, lazily inserts on first event, +//! evicts on `ConversationEnd`, and tolerates late-arriving events +//! after eviction (file reopens in append mode). +//! - [`MockRecorder`] (test-only) collects every event into an in-memory +//! `Vec<(ConversationId, RecorderEvent)>` for instrumentation-seam +//! assertions. +//! +//! # Schema +//! +//! Each line is a self-describing JSON object: +//! +//! ```json +//! {"v":2,"seq":0,"ts_ms":1714762800000,"domain":"chat","conversation_id":"conv-abc","kind":"user_message",...payload} +//! ``` +//! +//! `seq` is monotonic per `FileRecorder`; `ts_ms` is wall-clock UNIX +//! millis. The `v` field allows future schema evolution. Schema version +//! 2 added the `domain` and `conversation_id` top-level fields plus the +//! chat-domain variants. Consumers that hardcoded v1 must update. +//! +//! # Late-event tolerance +//! +//! `FileRecorder` opens its file in append mode. If a stray event arrives +//! after `ConversationEnd` (e.g. a cancelled stream's final +//! `AssistantTokens` arriving after the frontend's +//! `record_conversation_end` call), the registry lazily re-creates a +//! `FileRecorder` for the evicted key and writes a benign trailing line. +//! Consumers MUST tolerate post-end lines: the canonical end of a +//! conversation is the LAST line with `kind: "conversation_end"`, not +//! the first. + +use std::fs::{File, OpenOptions}; +use std::io::{BufWriter, Write}; +use std::path::{Path, PathBuf}; +use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::time::{SystemTime, UNIX_EPOCH}; + +use parking_lot::Mutex; +use serde::Serialize; +use serde_json::Value; + +use super::ids::ConversationId; + +/// Schema version embedded in every record. Bump when the wire shape changes. +/// +/// `2` adds the `domain` and `conversation_id` top-level fields plus the +/// chat-domain variants. Files produced by the v1 recorder (one file per +/// search turn, no chat events) and files produced by v2 (one file per +/// conversation, both chat and search events) are not interleaved on +/// disk because the v2 layout uses different directories. +pub const TRACE_SCHEMA_VERSION: u32 = 2; + +/// Coarse-grained classification of every recorded event. Determines +/// which subdirectory under `traces/` the event lands in. +/// +/// `Screenshot` events are routed to the `chat` domain because they are +/// always a side-effect of a user-visible chat turn (the `/screen` slash +/// command). They retain a distinct `domain()` value for ergonomic +/// filtering by analysis tooling but share a file with the parent chat +/// conversation. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize)] +#[serde(rename_all = "snake_case")] +pub enum TraceDomain { + /// Chat-layer events: user messages, assistant streaming, screen + /// captures, conversation lifecycle. One file per conversation, + /// rooted at `traces/chat/.jsonl`. + Chat, + /// Search-pipeline events: router/judge LLM calls, SearXNG queries, + /// reader batches, chunker output, rerank results, judge verdicts. + /// One file per conversation, rooted at + /// `traces/search/.jsonl`. + Search, +} + +impl TraceDomain { + /// Subdirectory name (under `traces/`) where files for this domain + /// live. Used by [`FileRecorder::for_conversation`] when assembling + /// the destination path. + pub fn dir(self) -> &'static str { + match self { + TraceDomain::Chat => "chat", + TraceDomain::Search => "search", + } + } +} + +/// Trait every chat or search callsite uses to emit forensic events. +/// Implementors must be cheap when tracing is off (the [`NoopRecorder`] +/// case dominates production usage). +pub trait TraceRecorder: Send + Sync { + /// Records a single event for the given conversation. Implementors + /// MUST NOT panic, MUST NOT block for arbitrary durations, and MUST + /// NOT propagate errors. Trace I/O failures must be swallowed (with + /// a single rate-limited warning) so the chat path and the search + /// pipeline are unaffected. + fn record(&self, conversation_id: &ConversationId, event: RecorderEvent); +} + +/// Production default: every method is a no-op. Constant time, zero +/// allocations. +#[derive(Debug, Default, Clone, Copy)] +pub struct NoopRecorder; + +impl TraceRecorder for NoopRecorder { + fn record(&self, _conversation_id: &ConversationId, _event: RecorderEvent) {} +} + +/// Forensic trace events emitted by the chat layer or the search pipeline. +/// +/// Each variant maps to one JSON-Lines record. Field shapes are +/// intentionally permissive (`Value`, `String`) for the search-domain +/// variants because the trace is a forensic dump, not a typed contract. +/// Chat-domain variants carry typed payloads because the chat layer's +/// surface is small and stable. +/// +/// Every variant has a [`Self::domain`] method that returns its +/// [`TraceDomain`], used by the registry to pick the destination file. +#[derive(Debug, Clone)] +pub enum RecorderEvent { + // ─── Search domain (existing variants from PR #126) ───────────────── + /// Search-pipeline turn starts. Captures the user query, model slug, + /// runtime search-config snapshot (so a trace is reproducible without + /// the config file), and conversation-history length. + TurnStart { + turn_id: String, + query: String, + model: String, + runtime_config: Value, + history_len: usize, + }, + /// Single Ollama `/api/chat` non-streaming JSON request issued by the + /// search pipeline. Captures the stage label + /// (router / snippet_judge / chunk_judge / synthesis / + /// answer_from_context), the full request body the pipeline sent, + /// the raw response string Ollama returned, and the request latency. + /// `error` is `Some` when the call failed (timeout, transport, parse). + LlmCall { + stage: String, + endpoint: String, + request_body: Value, + response_raw: Option, + latency_ms: u64, + error: Option, + }, + /// Streaming Ollama `/api/chat` request used by the search synthesis + /// path. `tokens` counts streamed pieces; `final_text` is the + /// accumulated assistant message; per-token records are NOT emitted + /// (would flood the file without adding signal). + StreamingLlmCall { + stage: String, + endpoint: String, + request_body: Value, + final_text: Option, + tokens: u64, + latency_ms: u64, + error: Option, + }, + /// SearXNG search request: query string, full URL, raw HTTP response + /// body, and the normalized result list the pipeline forwarded to the + /// reranker. + SearxngQuery { + query: String, + url: String, + status: Option, + response_raw: Option, + normalized_results: Value, + latency_ms: u64, + error: Option, + }, + /// Reader sidecar batch: per-URL fetch results including raw HTTP + /// body and extracted text. Latency is per-URL plus the overall batch. + ReaderBatch { + urls: Vec, + per_url: Vec, + batch_latency_ms: u64, + batch_error: Option, + }, + /// Page chunked into N pieces. `chunks` are the full chunk texts so + /// a downstream consumer can reconstruct what the judge actually saw. + ChunkerBatch { + page_url: String, + chunk_count: usize, + chunks: Vec, + }, + /// Reranker output: top-k chunks with scores, in selection order. + RerankResult { + query: String, + input_count: usize, + top_k: Vec, + }, + /// Parsed judge verdict at a given stage (snippet or chunk). Captures + /// the raw string the LLM returned plus the normalized verdict struct. + JudgeVerdict { + stage: String, + raw: String, + normalized: Value, + }, + /// Warning surfaced by the search pipeline (e.g. JudgeFailed, + /// BudgetExhausted). + Warning { kind: String, payload: Value }, + /// Mirrors a [`crate::search::types::SearchEvent`] emission so the + /// trace reflects the user-visible event stream alongside backend + /// internals. + SearchEventEmitted { event: Value }, + /// Search-pipeline turn ends. Captures the final action taken and + /// total wall-clock latency. `error` is `Some` only on hard failure + /// paths. + TurnEnd { + turn_id: String, + final_action: String, + final_source_urls: Vec, + total_latency_ms: u64, + error: Option, + }, + + // ─── Chat domain (new in v2) ──────────────────────────────────────── + /// First event in a chat-domain file. Captures the model slug and + /// the resolved system prompt at the moment the conversation began, + /// so the trace is reproducible without snapshotting the live + /// `AppConfig`. + ConversationStart { + model: String, + system_prompt: String, + }, + /// User-authored message that triggered an assistant turn. Includes + /// any attached image paths (paths only, not bytes) and the slash + /// command that routed the message, if any. + UserMessage { + content: String, + attached_images: Vec, + slash_command: Option, + }, + /// Streaming chunk of the assistant's hidden reasoning ("thinking") + /// output. Emitted only when the active model returns thinking + /// tokens AND the user has thinking mode enabled. + AssistantThinking { chunk: String }, + /// Streaming chunk of the assistant's user-visible response. + AssistantTokens { chunk: String }, + /// Assistant turn ends. Captures the total streamed token count and + /// the wall-clock latency from `UserMessage` to stream-completion. + AssistantComplete { total_tokens: u64, latency_ms: u64 }, + /// User invoked the `/screen` slash command and the backend wrote a + /// JPEG snapshot to `image_path`. `displays` is the number of + /// monitors captured (multi-monitor setups produce one merged image). + ScreenCaptured { image_path: String, displays: u8 }, + /// Final event in a chat-domain file. Emitted by the frontend when + /// the user resets the conversation or by the backend on app quit + /// (reason = "quit"). Window-hide does NOT emit this event because + /// Thuki's window-close intercept hides instead of quits and the + /// same conversation can resume on next hotkey activation. + /// + /// Late events arriving after `ConversationEnd` are tolerated; see + /// the module-level "Late-event tolerance" doc. + ConversationEnd { reason: String }, +} + +impl RecorderEvent { + /// Returns the [`TraceDomain`] this event belongs to. Used by the + /// registry to pick the destination file. + pub fn domain(&self) -> TraceDomain { + match self { + RecorderEvent::TurnStart { .. } + | RecorderEvent::LlmCall { .. } + | RecorderEvent::StreamingLlmCall { .. } + | RecorderEvent::SearxngQuery { .. } + | RecorderEvent::ReaderBatch { .. } + | RecorderEvent::ChunkerBatch { .. } + | RecorderEvent::RerankResult { .. } + | RecorderEvent::JudgeVerdict { .. } + | RecorderEvent::Warning { .. } + | RecorderEvent::SearchEventEmitted { .. } + | RecorderEvent::TurnEnd { .. } => TraceDomain::Search, + RecorderEvent::ConversationStart { .. } + | RecorderEvent::UserMessage { .. } + | RecorderEvent::AssistantThinking { .. } + | RecorderEvent::AssistantTokens { .. } + | RecorderEvent::AssistantComplete { .. } + | RecorderEvent::ScreenCaptured { .. } + | RecorderEvent::ConversationEnd { .. } => TraceDomain::Chat, + } + } + + /// Returns true if this event terminates a conversation in the chat + /// domain. Used by the registry to evict the file handle from its + /// per-conversation cache. + pub fn is_conversation_end(&self) -> bool { + matches!(self, RecorderEvent::ConversationEnd { .. }) + } + + /// Returns true if this event terminates a turn in the search domain. + /// Used by the registry to evict the search-domain file handle from + /// its per-conversation cache so long-running sessions with many + /// `/search` turns do not accumulate one file handle per turn until + /// the process exits. + pub fn is_turn_end(&self) -> bool { + matches!(self, RecorderEvent::TurnEnd { .. }) + } +} + +/// Per-URL outcome inside a [`RecorderEvent::ReaderBatch`]. +#[derive(Debug, Clone, Serialize)] +pub struct ReaderUrlOutcome { + pub url: String, + pub status: Option, + pub latency_ms: u64, + pub raw_body: Option, + pub extracted_text: Option, + pub error: Option, +} + +/// Single ranked chunk inside a [`RecorderEvent::RerankResult`]. +#[derive(Debug, Clone, Serialize)] +pub struct RerankedChunk { + pub source_url: String, + pub score: f64, + pub text: String, +} + +/// Lazy file-backed recorder for ONE `(domain, conversation_id)` pair. +/// Constructor is cheap (stashes path); the directory and file are only +/// created on the first `record()` call. +/// +/// Uses `parking_lot::Mutex` to match the rest of the codebase. The +/// mutex is held for the duration of one `serde_json::to_writer` + +/// `writeln` + `flush`, which is short enough to keep contention +/// negligible even under concurrent conversations writing through the +/// registry. +#[derive(Debug)] +pub struct FileRecorder { + /// Resolved trace file path. Computed at construction so the path + /// is stable for the lifetime of the recorder. + path: PathBuf, + /// Open writer, lazily initialized on first record. `None` after a + /// previous I/O failure (we degrade to noop, see `failed`). + state: Mutex>>, + /// Monotonic record sequence. Survives re-entry across multiple + /// records even though only one record is in flight at a time. + seq: AtomicU64, + /// Latched once a write or open error occurred. Subsequent records + /// are silent no-ops; the warning is rate-limited to a single + /// `eprintln!`. + failed: AtomicBool, + /// Set once the failure warning has been printed. Prevents log spam. + warned: AtomicBool, +} + +impl FileRecorder { + /// Construct a recorder targeting + /// `//.jsonl`. The + /// directory is NOT created here; that happens on first record so + /// we don't pay I/O cost (or surface errors) for conversations that + /// never write. + /// + /// The recorder writes in append mode, so a recorder constructed + /// for an existing path simply appends to the existing file. This + /// is the mechanism that gives the registry late-event tolerance: + /// after a `ConversationEnd` evicts the handle, a stray event for + /// the same conversation rebuilds the recorder and appends a benign + /// post-end line. + pub fn for_conversation( + traces_root: impl AsRef, + domain: TraceDomain, + conversation_id: &ConversationId, + ) -> Self { + let path = traces_root + .as_ref() + .join(domain.dir()) + .join(format!("{conversation_id}.jsonl")); + Self { + path, + state: Mutex::new(None), + seq: AtomicU64::new(0), + failed: AtomicBool::new(false), + warned: AtomicBool::new(false), + } + } + + /// Path the recorder writes to. Useful for tests and debug output. + pub fn path(&self) -> &Path { + &self.path + } + + /// Latches the recorder as failed and emits a single rate-limited + /// stderr warning. Filesystem-I/O wrapper: this is the only call + /// site that mutates the failed/warned flags, so the entire body is + /// just I/O-failure plumbing. Logic is exercised through every + /// error branch below; coverage is excluded because triggering each + /// individual `eprintln!` reliably across CI would require contrived + /// platform-specific filesystem behaviour. + #[cfg_attr(coverage_nightly, coverage(off))] + fn fail_with(&self, msg: String) { + self.failed.store(true, Ordering::SeqCst); + if self + .warned + .compare_exchange(false, true, Ordering::SeqCst, Ordering::SeqCst) + .is_ok() + { + eprintln!("thuki: [trace] {msg}"); + } + } + + /// Test-only entrypoint mirroring [`fail_with`] without the warn-once + /// gate, exposed only so the rate-limiting unit test can verify the + /// `warned` flag stays latched across repeated calls. + #[cfg(test)] + fn warn_once(&self, msg: &str) { + self.fail_with(msg.to_string()); + } + + /// Ensures the file is open. On first call, creates the parent + /// directory and opens the file in append mode. Returns `None` and + /// marks the recorder failed on any I/O error. + /// + /// Filesystem-I/O wrapper: the body is just an open + mkdir thin + /// shim around `std::fs`. Logic is exercised end-to-end through the + /// `file_recorder_*` integration tests. + #[cfg_attr(coverage_nightly, coverage(off))] + fn ensure_open<'a>( + &self, + guard: &'a mut Option>, + ) -> Option<&'a mut BufWriter> { + if guard.is_some() { + return guard.as_mut(); + } + if let Some(parent) = self.path.parent() { + if let Err(e) = std::fs::create_dir_all(parent) { + self.fail_with(format!( + "could not create trace dir {}: {e}", + parent.display() + )); + return None; + } + } + match OpenOptions::new() + .create(true) + .append(true) + .open(&self.path) + { + Ok(f) => { + *guard = Some(BufWriter::new(f)); + guard.as_mut() + } + Err(e) => { + self.fail_with(format!( + "could not open trace file {}: {e}", + self.path.display() + )); + None + } + } + } + + /// Flushes any buffered writes to disk. Called by the registry on + /// `ConversationEnd` so the post-end file is fully durable before + /// the handle is evicted. + #[cfg_attr(coverage_nightly, coverage(off))] + pub(crate) fn flush(&self) { + let mut guard = self.state.lock(); + if let Some(writer) = guard.as_mut() { + let _ = writer.flush(); + } + } +} + +impl TraceRecorder for FileRecorder { + /// Filesystem-I/O wrapper: orchestrates [`serialize_event()`] (covered) + /// and [`Self::ensure_open()`] (covered via integration tests). The body + /// itself is plumbing around `std::io::Write` that cannot be + /// exercised reliably for every error branch on every CI runner. + #[cfg_attr(coverage_nightly, coverage(off))] + fn record(&self, conversation_id: &ConversationId, event: RecorderEvent) { + if self.failed.load(Ordering::SeqCst) { + return; + } + let seq = self.seq.fetch_add(1, Ordering::SeqCst); + let ts_ms = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_millis() as u64) + .unwrap_or(0); + let line = serialize_event(seq, ts_ms, conversation_id, &event) + .unwrap_or_else(|e| serialize_failure_fallback(self, e)); + if line.is_empty() { + return; + } + let mut guard = self.state.lock(); + let Some(writer) = self.ensure_open(&mut guard) else { + return; + }; + if let Err(e) = writer + .write_all(line.as_bytes()) + .and_then(|()| writer.flush()) + { + self.fail_with(format!( + "could not write trace line to {}: {e}", + self.path.display() + )); + } + } +} + +/// Fallback used when `serialize_event` returns an error. Defense-in-depth +/// for a path that cannot fire under our typed events; coverage is +/// excluded because reaching it would require corrupting the in-memory +/// `RecorderEvent` representation. +#[cfg_attr(coverage_nightly, coverage(off))] +fn serialize_failure_fallback(recorder: &FileRecorder, err: serde_json::Error) -> String { + recorder.fail_with(format!("could not serialize event: {err}")); + String::new() +} + +/// Serializes a single event into a newline-terminated JSON record. +/// +/// Stamps the four standard top-level fields onto every line: +/// `v` (schema version), `seq` (per-recorder monotonic counter), +/// `ts_ms` (wall-clock UNIX millis), `domain` (chat or search), plus +/// `conversation_id` (the ID passed to `record`). The remaining fields +/// come from the event variant via the `Serialize` impl below. +fn serialize_event( + seq: u64, + ts_ms: u64, + conversation_id: &ConversationId, + event: &RecorderEvent, +) -> serde_json::Result { + let mut value = serde_json::to_value(event)?; + if let Some(map) = value.as_object_mut() { + map.insert("v".into(), Value::from(TRACE_SCHEMA_VERSION)); + map.insert("seq".into(), Value::from(seq)); + map.insert("ts_ms".into(), Value::from(ts_ms)); + map.insert( + "domain".into(), + serde_json::to_value(event.domain()).unwrap_or(Value::Null), + ); + map.insert( + "conversation_id".into(), + Value::from(conversation_id.as_str()), + ); + } + let mut s = serde_json::to_string(&value)?; + s.push('\n'); + Ok(s) +} + +/// Manual `Serialize` for [`RecorderEvent`]: emits a `kind` discriminator +/// plus a flat payload. Avoids the default tagged-enum representation so +/// downstream consumers can grep `"kind":"llm_call"` directly. +impl Serialize for RecorderEvent { + fn serialize(&self, serializer: S) -> Result { + use serde::ser::SerializeMap; + let mut map = serializer.serialize_map(None)?; + match self { + // ─── Search domain ──────────────────────────────────────── + RecorderEvent::TurnStart { + turn_id, + query, + model, + runtime_config, + history_len, + } => { + map.serialize_entry("kind", "turn_start")?; + map.serialize_entry("turn_id", turn_id)?; + map.serialize_entry("query", query)?; + map.serialize_entry("model", model)?; + map.serialize_entry("runtime_config", runtime_config)?; + map.serialize_entry("history_len", history_len)?; + } + RecorderEvent::LlmCall { + stage, + endpoint, + request_body, + response_raw, + latency_ms, + error, + } => { + map.serialize_entry("kind", "llm_call")?; + map.serialize_entry("stage", stage)?; + map.serialize_entry("endpoint", endpoint)?; + map.serialize_entry("request_body", request_body)?; + map.serialize_entry("response_raw", response_raw)?; + map.serialize_entry("latency_ms", latency_ms)?; + map.serialize_entry("error", error)?; + } + RecorderEvent::StreamingLlmCall { + stage, + endpoint, + request_body, + final_text, + tokens, + latency_ms, + error, + } => { + map.serialize_entry("kind", "streaming_llm_call")?; + map.serialize_entry("stage", stage)?; + map.serialize_entry("endpoint", endpoint)?; + map.serialize_entry("request_body", request_body)?; + map.serialize_entry("final_text", final_text)?; + map.serialize_entry("tokens", tokens)?; + map.serialize_entry("latency_ms", latency_ms)?; + map.serialize_entry("error", error)?; + } + RecorderEvent::SearxngQuery { + query, + url, + status, + response_raw, + normalized_results, + latency_ms, + error, + } => { + map.serialize_entry("kind", "searxng_query")?; + map.serialize_entry("query", query)?; + map.serialize_entry("url", url)?; + map.serialize_entry("status", status)?; + map.serialize_entry("response_raw", response_raw)?; + map.serialize_entry("normalized_results", normalized_results)?; + map.serialize_entry("latency_ms", latency_ms)?; + map.serialize_entry("error", error)?; + } + RecorderEvent::ReaderBatch { + urls, + per_url, + batch_latency_ms, + batch_error, + } => { + map.serialize_entry("kind", "reader_batch")?; + map.serialize_entry("urls", urls)?; + map.serialize_entry("per_url", per_url)?; + map.serialize_entry("batch_latency_ms", batch_latency_ms)?; + map.serialize_entry("batch_error", batch_error)?; + } + RecorderEvent::ChunkerBatch { + page_url, + chunk_count, + chunks, + } => { + map.serialize_entry("kind", "chunker_batch")?; + map.serialize_entry("page_url", page_url)?; + map.serialize_entry("chunk_count", chunk_count)?; + map.serialize_entry("chunks", chunks)?; + } + RecorderEvent::RerankResult { + query, + input_count, + top_k, + } => { + map.serialize_entry("kind", "rerank_result")?; + map.serialize_entry("query", query)?; + map.serialize_entry("input_count", input_count)?; + map.serialize_entry("top_k", top_k)?; + } + RecorderEvent::JudgeVerdict { + stage, + raw, + normalized, + } => { + map.serialize_entry("kind", "judge_verdict")?; + map.serialize_entry("stage", stage)?; + map.serialize_entry("raw", raw)?; + map.serialize_entry("normalized", normalized)?; + } + RecorderEvent::Warning { kind, payload } => { + map.serialize_entry("kind", "warning")?; + map.serialize_entry("warning_kind", kind)?; + map.serialize_entry("payload", payload)?; + } + RecorderEvent::SearchEventEmitted { event } => { + map.serialize_entry("kind", "search_event")?; + map.serialize_entry("event", event)?; + } + RecorderEvent::TurnEnd { + turn_id, + final_action, + final_source_urls, + total_latency_ms, + error, + } => { + map.serialize_entry("kind", "turn_end")?; + map.serialize_entry("turn_id", turn_id)?; + map.serialize_entry("final_action", final_action)?; + map.serialize_entry("final_source_urls", final_source_urls)?; + map.serialize_entry("total_latency_ms", total_latency_ms)?; + map.serialize_entry("error", error)?; + } + + // ─── Chat domain ────────────────────────────────────────── + RecorderEvent::ConversationStart { + model, + system_prompt, + } => { + map.serialize_entry("kind", "conversation_start")?; + map.serialize_entry("model", model)?; + map.serialize_entry("system_prompt", system_prompt)?; + } + RecorderEvent::UserMessage { + content, + attached_images, + slash_command, + } => { + map.serialize_entry("kind", "user_message")?; + map.serialize_entry("content", content)?; + map.serialize_entry("attached_images", attached_images)?; + map.serialize_entry("slash_command", slash_command)?; + } + RecorderEvent::AssistantThinking { chunk } => { + map.serialize_entry("kind", "assistant_thinking")?; + map.serialize_entry("chunk", chunk)?; + } + RecorderEvent::AssistantTokens { chunk } => { + map.serialize_entry("kind", "assistant_tokens")?; + map.serialize_entry("chunk", chunk)?; + } + RecorderEvent::AssistantComplete { + total_tokens, + latency_ms, + } => { + map.serialize_entry("kind", "assistant_complete")?; + map.serialize_entry("total_tokens", total_tokens)?; + map.serialize_entry("latency_ms", latency_ms)?; + } + RecorderEvent::ScreenCaptured { + image_path, + displays, + } => { + map.serialize_entry("kind", "screen_captured")?; + map.serialize_entry("image_path", image_path)?; + map.serialize_entry("displays", displays)?; + } + RecorderEvent::ConversationEnd { reason } => { + map.serialize_entry("kind", "conversation_end")?; + map.serialize_entry("reason", reason)?; + } + } + map.end() + } +} + +/// In-memory recorder used by tests to assert what the chat layer or +/// search pipeline emitted. Lives in a `cfg(test)` block so production +/// builds never link it. +#[cfg(test)] +#[derive(Debug, Default)] +pub(crate) struct MockRecorder { + events: Mutex>, +} + +#[cfg(test)] +impl MockRecorder { + pub(crate) fn new() -> Self { + Self::default() + } + + pub(crate) fn snapshot(&self) -> Vec<(ConversationId, RecorderEvent)> { + self.events.lock().clone() + } + + pub(crate) fn len(&self) -> usize { + self.events.lock().len() + } +} + +#[cfg(test)] +impl TraceRecorder for MockRecorder { + fn record(&self, conversation_id: &ConversationId, event: RecorderEvent) { + self.events.lock().push((conversation_id.clone(), event)); + } +} + +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + use std::sync::Arc; + + fn fresh_dir() -> PathBuf { + let dir = std::env::temp_dir().join(format!("thuki-trace-tests-{}", uuid::Uuid::new_v4())); + std::fs::create_dir_all(&dir).unwrap(); + dir + } + + fn read_lines(path: &Path) -> Vec { + let s = std::fs::read_to_string(path).expect("read trace file"); + s.lines() + .map(|l| serde_json::from_str::(l).expect("valid json line")) + .collect() + } + + fn cid(s: &str) -> ConversationId { + ConversationId::new(s) + } + + #[test] + fn trace_domain_dir_strings_match_layout() { + assert_eq!(TraceDomain::Chat.dir(), "chat"); + assert_eq!(TraceDomain::Search.dir(), "search"); + } + + #[test] + fn trace_domain_serializes_as_snake_case() { + assert_eq!( + serde_json::to_string(&TraceDomain::Chat).unwrap(), + "\"chat\"" + ); + assert_eq!( + serde_json::to_string(&TraceDomain::Search).unwrap(), + "\"search\"" + ); + } + + #[test] + fn search_variants_belong_to_search_domain() { + let cases = vec![ + RecorderEvent::TurnStart { + turn_id: "t".into(), + query: "q".into(), + model: "m".into(), + runtime_config: json!({}), + history_len: 0, + }, + RecorderEvent::LlmCall { + stage: "router".into(), + endpoint: "e".into(), + request_body: json!({}), + response_raw: None, + latency_ms: 0, + error: None, + }, + RecorderEvent::StreamingLlmCall { + stage: "synthesis".into(), + endpoint: "e".into(), + request_body: json!({}), + final_text: None, + tokens: 0, + latency_ms: 0, + error: None, + }, + RecorderEvent::SearxngQuery { + query: "q".into(), + url: "u".into(), + status: None, + response_raw: None, + normalized_results: json!([]), + latency_ms: 0, + error: None, + }, + RecorderEvent::ReaderBatch { + urls: vec![], + per_url: vec![], + batch_latency_ms: 0, + batch_error: None, + }, + RecorderEvent::ChunkerBatch { + page_url: "u".into(), + chunk_count: 0, + chunks: vec![], + }, + RecorderEvent::RerankResult { + query: "q".into(), + input_count: 0, + top_k: vec![], + }, + RecorderEvent::JudgeVerdict { + stage: "snippet".into(), + raw: "{}".into(), + normalized: json!({}), + }, + RecorderEvent::Warning { + kind: "k".into(), + payload: json!({}), + }, + RecorderEvent::SearchEventEmitted { event: json!({}) }, + RecorderEvent::TurnEnd { + turn_id: "t".into(), + final_action: "answer".into(), + final_source_urls: vec![], + total_latency_ms: 0, + error: None, + }, + ]; + for e in cases { + assert_eq!(e.domain(), TraceDomain::Search, "{:?} should be search", e); + } + } + + #[test] + fn chat_variants_belong_to_chat_domain() { + let cases = vec![ + RecorderEvent::ConversationStart { + model: "m".into(), + system_prompt: "s".into(), + }, + RecorderEvent::UserMessage { + content: "hi".into(), + attached_images: vec![], + slash_command: None, + }, + RecorderEvent::AssistantThinking { + chunk: "thinking".into(), + }, + RecorderEvent::AssistantTokens { + chunk: "answer".into(), + }, + RecorderEvent::AssistantComplete { + total_tokens: 1, + latency_ms: 1, + }, + RecorderEvent::ScreenCaptured { + image_path: "p".into(), + displays: 1, + }, + RecorderEvent::ConversationEnd { + reason: "quit".into(), + }, + ]; + for e in cases { + assert_eq!(e.domain(), TraceDomain::Chat, "{:?} should be chat", e); + } + } + + #[test] + fn is_conversation_end_only_true_for_conversation_end_variant() { + assert!(RecorderEvent::ConversationEnd { reason: "x".into() }.is_conversation_end()); + assert!(!RecorderEvent::AssistantTokens { chunk: "x".into() }.is_conversation_end()); + assert!(!RecorderEvent::Warning { + kind: "x".into(), + payload: json!({}), + } + .is_conversation_end()); + } + + #[test] + fn is_turn_end_only_true_for_turn_end_variant() { + assert!(RecorderEvent::TurnEnd { + turn_id: "t".into(), + final_action: "answered".into(), + final_source_urls: vec![], + total_latency_ms: 0, + error: None, + } + .is_turn_end()); + assert!(!RecorderEvent::ConversationEnd { reason: "x".into() }.is_turn_end()); + assert!(!RecorderEvent::AssistantTokens { chunk: "x".into() }.is_turn_end()); + } + + #[test] + fn noop_recorder_swallows_every_event() { + let r = NoopRecorder; + r.record( + &cid("conv-x"), + RecorderEvent::Warning { + kind: "x".into(), + payload: json!({}), + }, + ); + // No panic, no I/O. + } + + #[test] + fn file_recorder_lazy_creates_dir_and_file_on_first_record() { + let root = fresh_dir().join("nested").join("traces"); + let r = FileRecorder::for_conversation(&root, TraceDomain::Chat, &cid("conv-a")); + assert!(!root.exists(), "constructor must be lazy"); + r.record( + &cid("conv-a"), + RecorderEvent::UserMessage { + content: "hi".into(), + attached_images: vec![], + slash_command: None, + }, + ); + assert!(r.path().exists(), "file created on first record"); + assert!(r.path().parent().unwrap().exists(), "domain dir created"); + assert_eq!( + r.path() + .parent() + .unwrap() + .file_name() + .unwrap() + .to_string_lossy(), + "chat" + ); + } + + #[test] + fn file_recorder_writes_one_jsonl_line_per_record_with_schema_metadata() { + let root = fresh_dir(); + let r = FileRecorder::for_conversation(&root, TraceDomain::Chat, &cid("conv-meta")); + r.record( + &cid("conv-meta"), + RecorderEvent::ConversationStart { + model: "qwen3:4b".into(), + system_prompt: "You are Thuki...".into(), + }, + ); + r.record( + &cid("conv-meta"), + RecorderEvent::UserMessage { + content: "summarize".into(), + attached_images: vec![], + slash_command: None, + }, + ); + let lines = read_lines(r.path()); + assert_eq!(lines.len(), 2); + for (i, line) in lines.iter().enumerate() { + assert_eq!(line["v"], TRACE_SCHEMA_VERSION); + assert_eq!(line["seq"], i as u64); + assert_eq!(line["domain"], "chat"); + assert_eq!(line["conversation_id"], "conv-meta"); + assert!(line["ts_ms"].as_u64().unwrap() > 0); + } + assert_eq!(lines[0]["kind"], "conversation_start"); + assert_eq!(lines[0]["model"], "qwen3:4b"); + assert_eq!(lines[1]["kind"], "user_message"); + assert_eq!(lines[1]["content"], "summarize"); + } + + #[test] + fn file_recorder_serializes_every_search_variant() { + let root = fresh_dir(); + let r = FileRecorder::for_conversation(&root, TraceDomain::Search, &cid("conv-all")); + r.record( + &cid("conv-all"), + RecorderEvent::TurnStart { + turn_id: "t".into(), + query: "q".into(), + model: "m".into(), + runtime_config: json!({}), + history_len: 0, + }, + ); + r.record( + &cid("conv-all"), + RecorderEvent::LlmCall { + stage: "router".into(), + endpoint: "http://x/api/chat".into(), + request_body: json!({"messages": []}), + response_raw: Some("{}".into()), + latency_ms: 12, + error: None, + }, + ); + r.record( + &cid("conv-all"), + RecorderEvent::StreamingLlmCall { + stage: "synthesis".into(), + endpoint: "http://x/api/chat".into(), + request_body: json!({}), + final_text: Some("answer".into()), + tokens: 7, + latency_ms: 100, + error: None, + }, + ); + r.record( + &cid("conv-all"), + RecorderEvent::SearxngQuery { + query: "rust".into(), + url: "http://s/search".into(), + status: Some(200), + response_raw: Some("{}".into()), + normalized_results: json!([]), + latency_ms: 5, + error: None, + }, + ); + r.record( + &cid("conv-all"), + RecorderEvent::ReaderBatch { + urls: vec!["http://a".into()], + per_url: vec![ReaderUrlOutcome { + url: "http://a".into(), + status: Some(200), + latency_ms: 9, + raw_body: Some("".into()), + extracted_text: Some("hi".into()), + error: None, + }], + batch_latency_ms: 11, + batch_error: None, + }, + ); + r.record( + &cid("conv-all"), + RecorderEvent::ChunkerBatch { + page_url: "http://a".into(), + chunk_count: 1, + chunks: vec!["chunk".into()], + }, + ); + r.record( + &cid("conv-all"), + RecorderEvent::RerankResult { + query: "rust".into(), + input_count: 1, + top_k: vec![RerankedChunk { + source_url: "http://a".into(), + score: 0.8, + text: "chunk".into(), + }], + }, + ); + r.record( + &cid("conv-all"), + RecorderEvent::JudgeVerdict { + stage: "snippet".into(), + raw: "{}".into(), + normalized: json!({"sufficiency": "Sufficient"}), + }, + ); + r.record( + &cid("conv-all"), + RecorderEvent::Warning { + kind: "JudgeFailed".into(), + payload: json!({"reason": "parse"}), + }, + ); + r.record( + &cid("conv-all"), + RecorderEvent::SearchEventEmitted { + event: json!({"type": "Done"}), + }, + ); + r.record( + &cid("conv-all"), + RecorderEvent::TurnEnd { + turn_id: "t".into(), + final_action: "answer".into(), + final_source_urls: vec!["http://a".into()], + total_latency_ms: 1234, + error: None, + }, + ); + let lines = read_lines(r.path()); + let kinds: Vec<&str> = lines.iter().map(|l| l["kind"].as_str().unwrap()).collect(); + assert_eq!( + kinds, + vec![ + "turn_start", + "llm_call", + "streaming_llm_call", + "searxng_query", + "reader_batch", + "chunker_batch", + "rerank_result", + "judge_verdict", + "warning", + "search_event", + "turn_end" + ] + ); + for line in &lines { + assert_eq!(line["domain"], "search"); + assert_eq!(line["conversation_id"], "conv-all"); + } + } + + #[test] + fn file_recorder_serializes_every_chat_variant() { + let root = fresh_dir(); + let r = FileRecorder::for_conversation(&root, TraceDomain::Chat, &cid("conv-chat")); + r.record( + &cid("conv-chat"), + RecorderEvent::ConversationStart { + model: "qwen3:4b".into(), + system_prompt: "sys".into(), + }, + ); + r.record( + &cid("conv-chat"), + RecorderEvent::UserMessage { + content: "hello".into(), + attached_images: vec!["/tmp/img.jpg".into()], + slash_command: Some("/screen".into()), + }, + ); + r.record( + &cid("conv-chat"), + RecorderEvent::AssistantThinking { + chunk: "thinking...".into(), + }, + ); + r.record( + &cid("conv-chat"), + RecorderEvent::AssistantTokens { + chunk: "Hi there".into(), + }, + ); + r.record( + &cid("conv-chat"), + RecorderEvent::AssistantComplete { + total_tokens: 42, + latency_ms: 500, + }, + ); + r.record( + &cid("conv-chat"), + RecorderEvent::ScreenCaptured { + image_path: "/tmp/snap.jpg".into(), + displays: 2, + }, + ); + r.record( + &cid("conv-chat"), + RecorderEvent::ConversationEnd { + reason: "quit".into(), + }, + ); + let lines = read_lines(r.path()); + let kinds: Vec<&str> = lines.iter().map(|l| l["kind"].as_str().unwrap()).collect(); + assert_eq!( + kinds, + vec![ + "conversation_start", + "user_message", + "assistant_thinking", + "assistant_tokens", + "assistant_complete", + "screen_captured", + "conversation_end" + ] + ); + assert_eq!(lines[1]["attached_images"], json!(["/tmp/img.jpg"])); + assert_eq!(lines[1]["slash_command"], "/screen"); + assert_eq!(lines[5]["displays"], 2); + assert_eq!(lines[6]["reason"], "quit"); + } + + #[test] + fn file_recorder_path_includes_domain_subfolder_and_conv_id_jsonl() { + let root = fresh_dir(); + let r = FileRecorder::for_conversation(&root, TraceDomain::Chat, &cid("conv-x")); + assert_eq!(r.path(), root.join("chat").join("conv-x.jsonl")); + let r2 = FileRecorder::for_conversation(&root, TraceDomain::Search, &cid("conv-y")); + assert_eq!(r2.path(), root.join("search").join("conv-y.jsonl")); + } + + #[test] + fn file_recorder_uses_arc_dyn_via_trait_object() { + let root = fresh_dir(); + let r: Arc = Arc::new(FileRecorder::for_conversation( + &root, + TraceDomain::Chat, + &cid("conv-arc"), + )); + r.record( + &cid("conv-arc"), + RecorderEvent::Warning { + kind: "k".into(), + payload: json!({}), + }, + ); + } + + #[test] + fn file_recorder_open_failure_is_silent_and_latches() { + let root = fresh_dir(); + let blocking_file = root.join("blocker"); + std::fs::write(&blocking_file, b"x").unwrap(); + // Pass a root path that cannot be created (regular file in the way). + let r = FileRecorder::for_conversation( + blocking_file.join("inside"), + TraceDomain::Chat, + &cid("conv-fail"), + ); + r.record( + &cid("conv-fail"), + RecorderEvent::Warning { + kind: "k".into(), + payload: json!({}), + }, + ); + assert!( + r.failed.load(Ordering::SeqCst), + "open failure must latch the recorder" + ); + // Second call exits at the latch check, exercising that branch. + r.record( + &cid("conv-fail"), + RecorderEvent::Warning { + kind: "k2".into(), + payload: json!({}), + }, + ); + } + + #[test] + fn file_recorder_open_failure_when_target_path_is_a_directory_is_silent() { + let root = fresh_dir(); + let r = FileRecorder::for_conversation(&root, TraceDomain::Chat, &cid("conv-dir")); + // Pre-create the JSONL path AS A DIRECTORY so OpenOptions append + // hits "Is a directory" when it tries to open it as a file. + std::fs::create_dir_all(r.path()).unwrap(); + r.record( + &cid("conv-dir"), + RecorderEvent::Warning { + kind: "k".into(), + payload: json!({}), + }, + ); + assert!( + r.failed.load(Ordering::SeqCst), + "open failure on directory-collision must latch the recorder" + ); + } + + #[test] + fn file_recorder_warn_once_only_prints_first_failure() { + let root = fresh_dir(); + let blocking_file = root.join("blocker2"); + std::fs::write(&blocking_file, b"x").unwrap(); + let r = FileRecorder::for_conversation( + blocking_file.join("nested"), + TraceDomain::Chat, + &cid("conv-warn"), + ); + r.record( + &cid("conv-warn"), + RecorderEvent::Warning { + kind: "k".into(), + payload: json!({}), + }, + ); + assert!(r.warned.load(Ordering::SeqCst)); + r.warn_once("ignored"); + r.warn_once("ignored2"); + assert!(r.warned.load(Ordering::SeqCst)); + } + + #[test] + fn file_recorder_flush_is_safe_before_first_record() { + let root = fresh_dir(); + let r = FileRecorder::for_conversation(&root, TraceDomain::Chat, &cid("conv-empty")); + // No record calls yet; state is None. flush() must be a no-op. + r.flush(); + assert!( + !r.path().exists(), + "flush before record must not create file" + ); + } + + #[test] + fn file_recorder_flush_after_record_is_safe() { + let root = fresh_dir(); + let r = FileRecorder::for_conversation(&root, TraceDomain::Chat, &cid("conv-flush")); + r.record( + &cid("conv-flush"), + RecorderEvent::AssistantTokens { chunk: "x".into() }, + ); + r.flush(); + // File was written and flushed; subsequent read sees the line. + assert_eq!(read_lines(r.path()).len(), 1); + } + + #[test] + fn file_recorder_appends_when_constructed_for_existing_path() { + let root = fresh_dir(); + let r1 = FileRecorder::for_conversation(&root, TraceDomain::Chat, &cid("conv-resume")); + r1.record( + &cid("conv-resume"), + RecorderEvent::UserMessage { + content: "first".into(), + attached_images: vec![], + slash_command: None, + }, + ); + r1.flush(); + // Second recorder for the same conv: simulates registry re-create + // after eviction. Must append, not truncate. + let r2 = FileRecorder::for_conversation(&root, TraceDomain::Chat, &cid("conv-resume")); + r2.record( + &cid("conv-resume"), + RecorderEvent::UserMessage { + content: "second".into(), + attached_images: vec![], + slash_command: None, + }, + ); + let lines = read_lines(r1.path()); + assert_eq!(lines.len(), 2, "second recorder must append, not truncate"); + assert_eq!(lines[0]["content"], "first"); + assert_eq!(lines[1]["content"], "second"); + } + + #[test] + fn mock_recorder_collects_events_in_order_with_conv_id() { + let m = MockRecorder::new(); + m.record( + &cid("conv-a"), + RecorderEvent::Warning { + kind: "a".into(), + payload: json!({}), + }, + ); + m.record( + &cid("conv-b"), + RecorderEvent::Warning { + kind: "b".into(), + payload: json!({}), + }, + ); + assert_eq!(m.len(), 2); + let snap = m.snapshot(); + assert_eq!(snap[0].0, cid("conv-a")); + assert_eq!(snap[1].0, cid("conv-b")); + let dump: Vec = snap + .iter() + .map(|(_, e)| serde_json::to_value(e).unwrap()) + .collect(); + assert_eq!(dump[0]["warning_kind"], "a"); + assert_eq!(dump[1]["warning_kind"], "b"); + } +} diff --git a/src-tauri/src/trace/registry.rs b/src-tauri/src/trace/registry.rs new file mode 100644 index 0000000..55148c6 --- /dev/null +++ b/src-tauri/src/trace/registry.rs @@ -0,0 +1,552 @@ +//! Multi-conversation registry that fans events out to per-conversation +//! `FileRecorder` instances. +//! +//! `RegistryRecorder` is the production composition installed in Tauri +//! managed state when [`crate::config::schema::DebugSection::trace_enabled`] +//! is true. It owns one `Arc` per `(TraceDomain, +//! ConversationId)` pair and routes every incoming event to the right +//! file based on the event's `domain()` and the `conversation_id` +//! passed to `record()`. +//! +//! # Concurrency +//! +//! The registry uses a `parking_lot::RwLock` so the fast path +//! (the recorder for this conversation already exists) is a cheap +//! read-lock + clone of an `Arc`. The slow path +//! (first event for a `(domain, conv_id)` pair) takes the write-lock, +//! double-checks the entry, and lazily inserts a new `FileRecorder`. +//! +//! The `Arc` returned from the registry can be cached by +//! callers in their per-conversation context to skip the registry +//! lookup entirely on hot paths (e.g., per-token `AssistantTokens` +//! emission). `commands::ask_ollama` does exactly this. +//! +//! # Eviction and late-event tolerance +//! +//! Two events trigger eviction: +//! - `RecorderEvent::ConversationEnd` evicts the chat-domain entry. +//! - `RecorderEvent::TurnEnd` evicts the search-domain entry, so a +//! long-lived chat session with many `/search` turns does not +//! accumulate one open file handle per turn until process exit. +//! +//! In both cases the registry flushes the file before dropping its +//! strong reference. In-flight `Arc` clones held by the +//! emitting task keep the file handle alive until they drop; `Arc` +//! semantics handle the ordering with no explicit synchronization. +//! +//! Late events arriving after the eviction-triggering event (e.g. a +//! cancelled stream's final `AssistantTokens` arriving after the +//! frontend's `record_conversation_end` call) lazily re-insert a new +//! `FileRecorder` for the evicted key. Because `FileRecorder` opens its +//! file in append mode, the late event lands as a benign trailing line +//! in the existing file. Consumers MUST tolerate post-end lines: the +//! canonical end of a conversation is the LAST line with +//! `kind: "conversation_end"` (chat) or the LAST `kind: "turn_end"` +//! for that turn id (search), not the first. + +use std::collections::HashMap; +use std::path::PathBuf; +use std::sync::Arc; + +use parking_lot::RwLock; + +use super::ids::ConversationId; +use super::recorder::{FileRecorder, RecorderEvent, TraceDomain, TraceRecorder}; + +/// Production trace recorder. Wraps a per-`(domain, conversation_id)` +/// map of file handles rooted at `traces_root`. +#[derive(Debug)] +pub struct RegistryRecorder { + /// Root path under which the per-domain subdirectories live. Each + /// `FileRecorder` resolves its own path as + /// `//.jsonl`. + traces_root: PathBuf, + /// Per-conversation file handles. `parking_lot::RwLock` matches the + /// rest of the codebase and gives us a non-poisoning lock; the read + /// path dominates because each conversation only inserts once. + files: RwLock>>, +} + +impl RegistryRecorder { + /// Constructs an empty registry rooted at `traces_root`. No + /// directories are created here; each `FileRecorder` lazily + /// creates its own parent directory on first record. + pub fn new(traces_root: impl Into) -> Self { + Self { + traces_root: traces_root.into(), + files: RwLock::new(HashMap::new()), + } + } + + /// Returns the recorder for `(domain, conversation_id)`, creating + /// it lazily if needed. Public for hot-path callers (e.g. + /// `commands::ask_ollama`) that want to cache the `Arc` once and + /// skip the registry lookup on every subsequent emit. + /// + /// Equivalent to a `record()` of `()`: read-locks the map, returns + /// the existing handle on hit, and on miss takes the write-lock, + /// double-checks the entry, and lazily inserts. + pub fn recorder_for( + &self, + domain: TraceDomain, + conversation_id: &ConversationId, + ) -> Arc { + let key = (domain, conversation_id.clone()); + if let Some(existing) = self.files.read().get(&key).cloned() { + return existing; + } + let mut write = self.files.write(); + write + .entry(key) + .or_insert_with(|| { + Arc::new(FileRecorder::for_conversation( + &self.traces_root, + domain, + conversation_id, + )) + }) + .clone() + } + + /// Number of `(domain, conversation_id)` entries currently held in + /// the map. Used by tests to assert eviction; not part of the + /// production hot path. + #[cfg(test)] + pub(crate) fn len(&self) -> usize { + self.files.read().len() + } + + /// Returns true if `(domain, conversation_id)` currently has a + /// cached file handle in the registry. Tests use this to assert + /// that `ConversationEnd` evicted the entry. + #[cfg(test)] + pub(crate) fn contains(&self, domain: TraceDomain, conversation_id: &ConversationId) -> bool { + self.files + .read() + .contains_key(&(domain, conversation_id.clone())) + } +} + +impl TraceRecorder for RegistryRecorder { + /// Routes the event to the right file and, on the per-domain + /// terminal events, flushes + evicts the entry from the map. + /// Chat domain evicts on `ConversationEnd`; search domain evicts on + /// `TurnEnd`. + fn record(&self, conversation_id: &ConversationId, event: RecorderEvent) { + let domain = event.domain(); + let evict = event.is_conversation_end() || event.is_turn_end(); + let recorder = self.recorder_for(domain, conversation_id); + recorder.record(conversation_id, event); + if evict { + // Flush BEFORE evicting so the post-end file is fully + // durable on disk before the registry drops its strong + // reference. In-flight `Arc` clones in streaming tasks + // keep the file alive until they drop, but our flush gives + // the post-end durability guarantee independent of those. + recorder.flush(); + self.files + .write() + .remove(&(domain, conversation_id.clone())); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::trace::recorder::ReaderUrlOutcome; + use serde_json::{json, Value}; + use std::path::Path; + + fn fresh_dir() -> PathBuf { + let dir = + std::env::temp_dir().join(format!("thuki-trace-registry-{}", uuid::Uuid::new_v4())); + std::fs::create_dir_all(&dir).unwrap(); + dir + } + + fn cid(s: &str) -> ConversationId { + ConversationId::new(s) + } + + fn read_lines(path: &Path) -> Vec { + let s = std::fs::read_to_string(path).expect("read trace file"); + s.lines() + .map(|l| serde_json::from_str::(l).expect("valid json")) + .collect() + } + + #[test] + fn lazy_insert_creates_handle_on_first_event() { + let root = fresh_dir(); + let reg = RegistryRecorder::new(&root); + assert_eq!(reg.len(), 0); + reg.record( + &cid("conv-a"), + RecorderEvent::UserMessage { + content: "hi".into(), + attached_images: vec![], + slash_command: None, + }, + ); + assert_eq!(reg.len(), 1, "first event must lazy-insert"); + assert!(reg.contains(TraceDomain::Chat, &cid("conv-a"))); + } + + #[test] + fn second_event_reuses_existing_handle() { + let root = fresh_dir(); + let reg = RegistryRecorder::new(&root); + reg.record( + &cid("conv-b"), + RecorderEvent::UserMessage { + content: "1".into(), + attached_images: vec![], + slash_command: None, + }, + ); + reg.record( + &cid("conv-b"), + RecorderEvent::UserMessage { + content: "2".into(), + attached_images: vec![], + slash_command: None, + }, + ); + assert_eq!(reg.len(), 1, "second event must reuse handle, not insert"); + let path = root.join("chat").join("conv-b.jsonl"); + let lines = read_lines(&path); + assert_eq!(lines.len(), 2); + } + + #[test] + fn two_conversations_get_two_files() { + let root = fresh_dir(); + let reg = RegistryRecorder::new(&root); + reg.record( + &cid("conv-x"), + RecorderEvent::UserMessage { + content: "x1".into(), + attached_images: vec![], + slash_command: None, + }, + ); + reg.record( + &cid("conv-y"), + RecorderEvent::UserMessage { + content: "y1".into(), + attached_images: vec![], + slash_command: None, + }, + ); + assert_eq!(reg.len(), 2); + assert_eq!(read_lines(&root.join("chat").join("conv-x.jsonl")).len(), 1); + assert_eq!(read_lines(&root.join("chat").join("conv-y.jsonl")).len(), 1); + } + + #[test] + fn chat_and_search_for_same_conv_get_separate_files_in_separate_folders() { + let root = fresh_dir(); + let reg = RegistryRecorder::new(&root); + reg.record( + &cid("conv-z"), + RecorderEvent::UserMessage { + content: "z".into(), + attached_images: vec![], + slash_command: None, + }, + ); + reg.record( + &cid("conv-z"), + RecorderEvent::SearxngQuery { + query: "q".into(), + url: "u".into(), + status: Some(200), + response_raw: None, + normalized_results: json!([]), + latency_ms: 1, + error: None, + }, + ); + assert_eq!(reg.len(), 2, "two domains × one conv → two entries"); + assert!(reg.contains(TraceDomain::Chat, &cid("conv-z"))); + assert!(reg.contains(TraceDomain::Search, &cid("conv-z"))); + assert_eq!(read_lines(&root.join("chat").join("conv-z.jsonl")).len(), 1); + assert_eq!( + read_lines(&root.join("search").join("conv-z.jsonl")).len(), + 1 + ); + } + + #[test] + fn conversation_end_evicts_chat_entry() { + let root = fresh_dir(); + let reg = RegistryRecorder::new(&root); + reg.record( + &cid("conv-end"), + RecorderEvent::UserMessage { + content: "x".into(), + attached_images: vec![], + slash_command: None, + }, + ); + assert!(reg.contains(TraceDomain::Chat, &cid("conv-end"))); + reg.record( + &cid("conv-end"), + RecorderEvent::ConversationEnd { + reason: "user_reset".into(), + }, + ); + assert!( + !reg.contains(TraceDomain::Chat, &cid("conv-end")), + "ConversationEnd must evict the chat entry" + ); + } + + #[test] + fn late_event_after_end_appends_to_existing_file_via_lazy_recreate() { + let root = fresh_dir(); + let reg = RegistryRecorder::new(&root); + reg.record( + &cid("conv-late"), + RecorderEvent::UserMessage { + content: "first".into(), + attached_images: vec![], + slash_command: None, + }, + ); + reg.record( + &cid("conv-late"), + RecorderEvent::ConversationEnd { + reason: "user_reset".into(), + }, + ); + // The frontend signaled the end, but a stray AssistantTokens + // from a cancelled stream arrives late. The registry must + // tolerate this: re-insert + append, no panic, no duplicate + // file. + reg.record( + &cid("conv-late"), + RecorderEvent::AssistantTokens { + chunk: "stray".into(), + }, + ); + let path = root.join("chat").join("conv-late.jsonl"); + let lines = read_lines(&path); + assert_eq!( + lines.len(), + 3, + "late event must append, not create a second file" + ); + let kinds: Vec<&str> = lines.iter().map(|l| l["kind"].as_str().unwrap()).collect(); + assert_eq!( + kinds, + vec!["user_message", "conversation_end", "assistant_tokens"], + "consumers must tolerate post-end lines (canonical end is the LAST conversation_end)" + ); + } + + #[test] + fn recorder_for_returns_same_arc_on_repeated_lookup() { + let root = fresh_dir(); + let reg = RegistryRecorder::new(&root); + let a = reg.recorder_for(TraceDomain::Chat, &cid("conv-arc")); + let b = reg.recorder_for(TraceDomain::Chat, &cid("conv-arc")); + // Pointer equality: same Arc, no second insert. + assert!(Arc::ptr_eq(&a, &b)); + } + + #[test] + fn recorder_for_can_be_cached_for_hot_path() { + // Simulates the per-streaming-task caching pattern that + // `commands::ask_ollama` uses to bypass per-token registry + // lookup. + let root = fresh_dir(); + let reg = RegistryRecorder::new(&root); + let cached = reg.recorder_for(TraceDomain::Chat, &cid("conv-hot")); + for i in 0..10 { + cached.record( + &cid("conv-hot"), + RecorderEvent::AssistantTokens { + chunk: format!("tok-{i}"), + }, + ); + } + let path = root.join("chat").join("conv-hot.jsonl"); + let lines = read_lines(&path); + assert_eq!(lines.len(), 10); + for (i, line) in lines.iter().enumerate() { + assert_eq!(line["chunk"], format!("tok-{i}")); + } + } + + #[test] + fn turn_end_evicts_search_entry() { + let root = fresh_dir(); + let reg = RegistryRecorder::new(&root); + reg.record( + &cid("conv-search-end"), + RecorderEvent::SearxngQuery { + query: "q".into(), + url: "u".into(), + status: Some(200), + response_raw: None, + normalized_results: json!([]), + latency_ms: 1, + error: None, + }, + ); + assert!(reg.contains(TraceDomain::Search, &cid("conv-search-end"))); + reg.record( + &cid("conv-search-end"), + RecorderEvent::TurnEnd { + turn_id: "t-1".into(), + final_action: "answered".into(), + final_source_urls: vec![], + total_latency_ms: 42, + error: None, + }, + ); + assert!( + !reg.contains(TraceDomain::Search, &cid("conv-search-end")), + "TurnEnd must evict the search-domain entry" + ); + } + + #[test] + fn turn_end_leaves_chat_entry_intact() { + // Search-domain TurnEnd must not evict the chat-domain entry + // for the same conversation id; the chat trace continues across + // multiple `/search` turns. + let root = fresh_dir(); + let reg = RegistryRecorder::new(&root); + reg.record( + &cid("conv-mixed"), + RecorderEvent::UserMessage { + content: "before search".into(), + attached_images: vec![], + slash_command: None, + }, + ); + reg.record( + &cid("conv-mixed"), + RecorderEvent::TurnEnd { + turn_id: "t-1".into(), + final_action: "answered".into(), + final_source_urls: vec![], + total_latency_ms: 10, + error: None, + }, + ); + assert!( + reg.contains(TraceDomain::Chat, &cid("conv-mixed")), + "TurnEnd is search-domain; the chat-domain entry must remain" + ); + assert!(!reg.contains(TraceDomain::Search, &cid("conv-mixed"))); + } + + #[test] + fn many_search_turns_do_not_accumulate_entries() { + // Long-lived chat session that fires many `/search` turns: the + // map size must not grow without bound. Each TurnEnd evicts. + let root = fresh_dir(); + let reg = RegistryRecorder::new(&root); + for i in 0..20 { + let conv = format!("conv-{i}"); + reg.record( + &cid(&conv), + RecorderEvent::SearxngQuery { + query: "q".into(), + url: "u".into(), + status: Some(200), + response_raw: None, + normalized_results: json!([]), + latency_ms: 1, + error: None, + }, + ); + reg.record( + &cid(&conv), + RecorderEvent::TurnEnd { + turn_id: format!("t-{i}"), + final_action: "answered".into(), + final_source_urls: vec![], + total_latency_ms: 1, + error: None, + }, + ); + } + assert_eq!( + reg.len(), + 0, + "every search-only conversation must evict on TurnEnd" + ); + } + + #[test] + fn late_search_event_after_turn_end_appends_via_lazy_recreate() { + let root = fresh_dir(); + let reg = RegistryRecorder::new(&root); + reg.record( + &cid("conv-late-search"), + RecorderEvent::SearxngQuery { + query: "q".into(), + url: "u".into(), + status: Some(200), + response_raw: None, + normalized_results: json!([]), + latency_ms: 1, + error: None, + }, + ); + reg.record( + &cid("conv-late-search"), + RecorderEvent::TurnEnd { + turn_id: "t-late".into(), + final_action: "answered".into(), + final_source_urls: vec![], + total_latency_ms: 1, + error: None, + }, + ); + // Stray late event from a delayed reader callback. Must + // append, not panic, not duplicate the file. + reg.record( + &cid("conv-late-search"), + RecorderEvent::Warning { + kind: "reader_partial_failure".into(), + payload: json!({}), + }, + ); + let path = root.join("search").join("conv-late-search.jsonl"); + let lines = read_lines(&path); + assert_eq!(lines.len(), 3); + let kinds: Vec<&str> = lines.iter().map(|l| l["kind"].as_str().unwrap()).collect(); + assert_eq!(kinds, vec!["searxng_query", "turn_end", "warning"]); + } + + #[test] + fn search_event_with_reader_batch_payload_serializes_through_registry() { + let root = fresh_dir(); + let reg = RegistryRecorder::new(&root); + reg.record( + &cid("conv-r"), + RecorderEvent::ReaderBatch { + urls: vec!["http://a".into()], + per_url: vec![ReaderUrlOutcome { + url: "http://a".into(), + status: Some(200), + latency_ms: 9, + raw_body: Some("".into()), + extracted_text: Some("hi".into()), + error: None, + }], + batch_latency_ms: 11, + batch_error: None, + }, + ); + let lines = read_lines(&root.join("search").join("conv-r.jsonl")); + assert_eq!(lines[0]["kind"], "reader_batch"); + assert_eq!(lines[0]["domain"], "search"); + } +} diff --git a/src-tauri/tests/search_pipeline_e2e.rs b/src-tauri/tests/search_pipeline_e2e.rs index 76fcd88..7ec19f4 100644 --- a/src-tauri/tests/search_pipeline_e2e.rs +++ b/src-tauri/tests/search_pipeline_e2e.rs @@ -18,23 +18,32 @@ use wiremock::{Mock, MockServer, ResponseTemplate}; use thuki_agent_lib::commands::ConversationHistory; use thuki_agent_lib::config::defaults::DEFAULT_NUM_CTX; -use thuki_agent_lib::search::recorder::{FileRecorder, NoopRecorder, PipelineRecorder}; use thuki_agent_lib::search::{ run_agentic, Action, JudgeCaller, JudgeSource, JudgeVerdict, RouterJudgeCaller, RouterJudgeOutput, SearchError, SearchEvent, SearchMetadata, SearchWarning, Sufficiency, }; +use thuki_agent_lib::trace::{ + BoundRecorder, ConversationId, FileRecorder, NoopRecorder, TraceDomain, TraceRecorder, +}; -/// Returns a recorder that writes `