From 98a53d38b422a38f85a10fddca693e29a5fee120 Mon Sep 17 00:00:00 2001 From: Matjaz Domen Pecan Date: Wed, 29 Apr 2026 23:07:01 +0200 Subject: [PATCH 1/2] fix(rewrite): preserve newline separators in compound commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When an agent submits a multi-line bash command via a hook (Claude Code, Gemini, Cursor) and at least one segment matches a tokf filter, the rewrite engine used to drop the newlines between segments — gluing the adjacent commands together. With a `head -N` pipe in one line and any following token, the result was malformed: `head -1\necho` collapsed to `head -1echo`, which the shell parsed as `head -1 -e -c -h -o > 1echo` and silently created stray files (1echo, 1cat, 1tokf, …) in the agent's cwd. See #355. Root cause: rable parses `cmd1\ncmd2` as two top-level AST nodes rather than a single List node, so `compound_segments` emitted an empty separator for them. The reassembly in `rewrite_with_config_and_options` joined segments with that empty separator and the newline was lost. Fix: between consecutive top-level nodes, compute the separator as the literal source slice between each node's end-span and the next node's start-span. That captures `\n`, `;`, whitespace, comments, etc. byte-for-byte, so `seg + sep` round-trips the original source. Also add `TOKF_HOOK_LOG=/path/to/log`, an opt-in diagnostic that appends one YAML record per hook invocation (BEFORE / AFTER / outcome). The bug in #355 was invisible from `tokf rewrite` runs because the reporter tested without filters; live-session bisection needs this. Closes #355 Co-Authored-By: Claude Opus 4.7 (1M context) --- README.md | 36 ++++ crates/tokf-cli/src/hook/debug_log.rs | 201 ++++++++++++++++++ crates/tokf-cli/src/hook/mod.rs | 72 +++++-- crates/tokf-cli/src/rewrite/bash_ast.rs | 20 +- crates/tokf-cli/src/rewrite/bash_ast_tests.rs | 66 ++++++ crates/tokf-cli/src/rewrite/tests.rs | 33 +++ crates/tokf-cli/tests/cli_hook_handle.rs | 133 ++++++++++++ docs/diagnostics.md | 36 ++++ 8 files changed, 577 insertions(+), 20 deletions(-) create mode 100644 crates/tokf-cli/src/hook/debug_log.rs diff --git a/README.md b/README.md index 445ae02..a72bc14 100644 --- a/README.md +++ b/README.md @@ -2175,6 +2175,42 @@ tokf completions nushell | save -f ~/.config/nushell/tokf.nu source ~/.config/nushell/tokf.nu ``` +## Logging hook activity (`TOKF_HOOK_LOG`) + +When chasing a rewrite bug that only manifests during a live AI-tool session, set the `TOKF_HOOK_LOG` environment variable to a writable file path. Every `tokf hook handle` invocation then appends a YAML record covering the BEFORE / AFTER command strings and the resulting outcome. Activate it in the same process group as Claude Code (or whichever tool is running the hook) — for Claude Code, that means setting it in your shell profile (`.zshrc`/`.bashrc`) before launching the agent. + +```sh +export TOKF_HOOK_LOG=$HOME/.cache/tokf/hook.log +``` + +Each record looks like: + +```yaml +--- +ts: 1714408876 +tool: Bash +format: claude-code +outcome: Allow +before: |- + cargo test + ls | head -1 + echo hi +after: |- + tokf run cargo test + tokf run --baseline-pipe 'head -1' ls + echo hi +``` + +- `ts` is Unix epoch seconds (use `date -r ` on macOS / `date -d @` on Linux to humanise). +- `tool` is the AI tool's tool-name (`Bash`, `run_shell_command`, etc.). +- `format` is `claude-code` / `gemini` / `cursor`. +- `outcome` is one of `Allow`, `Ask`, `Deny`, `PassThrough`. +- `after: ~` (YAML null) means the hook chose not to rewrite — the agent ran the original command verbatim. + +Records are appended; nothing rotates or trims the file, so prune it yourself if it grows large. When the variable is unset (the default), nothing is written and the hook has zero filesystem overhead. + +This was added in response to issue #355, where stray `1echo` files in the agent's cwd turned out to come from a multi-line rewrite collapsing newlines into adjacent tokens — the kind of bug that's invisible in `tokf rewrite "..."` runs but obvious in a hook log. + --- diff --git a/crates/tokf-cli/src/hook/debug_log.rs b/crates/tokf-cli/src/hook/debug_log.rs new file mode 100644 index 0000000..0be9dd1 --- /dev/null +++ b/crates/tokf-cli/src/hook/debug_log.rs @@ -0,0 +1,201 @@ +//! Append-only diagnostic log for hook invocations. Activated by setting +//! the `TOKF_HOOK_LOG` env var to a writable file path. Each invocation +//! writes one YAML record covering the BEFORE / AFTER command strings and +//! the outcome. Best-effort: any I/O error is silently dropped so a +//! missing/unwritable log path never blocks the hook (#355). + +use std::fs::OpenOptions; +use std::io::Write; +use std::path::PathBuf; +use std::time::{SystemTime, UNIX_EPOCH}; + +use tokf_hook_types::HookFormat; + +use super::HookOutcome; + +const ENV_VAR: &str = "TOKF_HOOK_LOG"; + +/// Write a single hook-event record to the path in `TOKF_HOOK_LOG`, if set. +/// +/// `after` is the rewritten command string when the rewrite changed the +/// input; pass `None` when the hook passed the command through unchanged. +pub(super) fn log_event( + tool: &str, + format: HookFormat, + before: &str, + after: Option<&str>, + outcome: HookOutcome, +) { + let Some(path) = log_path() else { + return; + }; + let record = format_record(&Record { + tool, + format, + before, + after, + outcome, + now: SystemTime::now(), + }); + let _ = OpenOptions::new() + .create(true) + .append(true) + .open(&path) + .and_then(|mut f| f.write_all(record.as_bytes())); +} + +fn log_path() -> Option { + let raw = std::env::var_os(ENV_VAR)?; + if raw.is_empty() { + return None; + } + Some(PathBuf::from(raw)) +} + +struct Record<'a> { + tool: &'a str, + format: HookFormat, + before: &'a str, + after: Option<&'a str>, + outcome: HookOutcome, + now: SystemTime, +} + +fn format_record(rec: &Record<'_>) -> String { + let ts = rec + .now + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + let outcome_str = match rec.outcome { + HookOutcome::Allow => "Allow", + HookOutcome::Ask => "Ask", + HookOutcome::Deny => "Deny", + HookOutcome::PassThrough => "PassThrough", + }; + let after_block = rec.after.map_or_else( + || "after: ~\n".to_string(), + |s| format!("after: |-\n{}\n", indent(s, " ")), + ); + format!( + "---\nts: {ts}\ntool: {tool}\nformat: {format}\noutcome: {outcome}\nbefore: |-\n{before}\n{after_block}", + tool = rec.tool, + format = rec.format.as_str(), + outcome = outcome_str, + before = indent(rec.before, " "), + ) +} + +fn indent(s: &str, prefix: &str) -> String { + if s.is_empty() { + return prefix.to_string(); + } + s.lines() + .map(|line| format!("{prefix}{line}")) + .collect::>() + .join("\n") +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use std::time::Duration; + + use super::*; + + fn at_epoch(secs: u64) -> SystemTime { + UNIX_EPOCH + Duration::from_secs(secs) + } + + #[test] + fn record_with_rewrite() { + let r = format_record(&Record { + tool: "Bash", + format: HookFormat::ClaudeCode, + before: "git status", + after: Some("tokf run git status"), + outcome: HookOutcome::Allow, + now: at_epoch(1_700_000_000), + }); + assert!(r.starts_with("---\nts: 1700000000\n")); + assert!(r.contains("tool: Bash\n")); + assert!(r.contains("format: claude-code\n")); + assert!(r.contains("outcome: Allow\n")); + assert!(r.contains("before: |-\n git status\n")); + assert!(r.contains("after: |-\n tokf run git status\n")); + } + + #[test] + fn record_passthrough_has_null_after() { + let r = format_record(&Record { + tool: "Bash", + format: HookFormat::ClaudeCode, + before: "ls", + after: None, + outcome: HookOutcome::PassThrough, + now: at_epoch(1_700_000_000), + }); + assert!(r.contains("outcome: PassThrough\n")); + assert!(r.contains("after: ~\n")); + } + + #[test] + fn multiline_before_indented_under_block_scalar() { + // The most useful diagnostic case (#355): newline-bearing BEFORE + // blocks must be indented so the YAML block scalar parses cleanly. + let r = format_record(&Record { + tool: "Bash", + format: HookFormat::ClaudeCode, + before: "cargo test\nls | head -1\necho hi", + after: Some("tokf run cargo test\ntokf run --baseline-pipe 'head -1' ls\necho hi"), + outcome: HookOutcome::Allow, + now: at_epoch(0), + }); + assert!( + r.contains("before: |-\n cargo test\n ls | head -1\n echo hi\n"), + "BEFORE block not indented as expected: {r}" + ); + assert!( + r.contains( + "after: |-\n tokf run cargo test\n tokf run --baseline-pipe 'head -1' ls\n echo hi\n" + ), + "AFTER block not indented as expected: {r}" + ); + } + + #[test] + fn empty_input_does_not_panic() { + let r = format_record(&Record { + tool: "Bash", + format: HookFormat::ClaudeCode, + before: "", + after: None, + outcome: HookOutcome::PassThrough, + now: at_epoch(0), + }); + // Block scalar with empty body indents to a single prefix-only line. + assert!(r.contains("before: |-\n \n")); + } + + #[test] + fn gemini_and_cursor_format_strings() { + let r_g = format_record(&Record { + tool: "run_shell_command", + format: HookFormat::Gemini, + before: "ls", + after: None, + outcome: HookOutcome::PassThrough, + now: at_epoch(0), + }); + assert!(r_g.contains("format: gemini\n")); + let r_c = format_record(&Record { + tool: "Shell", + format: HookFormat::Cursor, + before: "ls", + after: None, + outcome: HookOutcome::PassThrough, + now: at_epoch(0), + }); + assert!(r_c.contains("format: cursor\n")); + } +} diff --git a/crates/tokf-cli/src/hook/mod.rs b/crates/tokf-cli/src/hook/mod.rs index cfcb4da..6067f95 100644 --- a/crates/tokf-cli/src/hook/mod.rs +++ b/crates/tokf-cli/src/hook/mod.rs @@ -3,6 +3,7 @@ pub mod cline; pub mod codex; pub mod copilot; pub mod cursor; +mod debug_log; pub mod gemini; pub mod instructions; pub mod opencode; @@ -199,7 +200,8 @@ fn handle_cursor_json_inner( }; process_command( - command, + &command, + "shell", json, HookFormat::Cursor, user_config, @@ -273,7 +275,8 @@ fn handle_generic( }; process_command( - command, + &command, + expected_tool, json, format, user_config, @@ -287,10 +290,12 @@ fn handle_generic( /// Shared post-deserialization logic for all hook formats. /// /// Handles both the external-engine path (verdict on every command) and -/// the no-engine path (rewrite-only, auto-allow). +/// the no-engine path (rewrite-only, auto-allow). When `TOKF_HOOK_LOG` +/// is set in the env, every invocation appends one diagnostic record. #[allow(clippy::too_many_arguments)] fn process_command( - command: String, + command: &str, + tool_name: &str, json: &str, format: HookFormat, user_config: &RewriteConfig, @@ -299,42 +304,73 @@ fn process_command( build_ask: impl FnOnce(String, Option) -> R, build_deny: impl FnOnce(String, Option) -> R, ) -> HookOutcome { + let (outcome, after) = decide( + command, + json, + format, + user_config, + search_dirs, + build_allow, + build_ask, + build_deny, + ); + debug_log::log_event(tool_name, format, command, after.as_deref(), outcome); + outcome +} + +/// Compute the hook decision and emit the response for it. Returns the +/// outcome plus the rewritten command string when one was emitted (for +/// the diagnostic log; `None` means no rewrite was sent to the agent). +#[allow(clippy::too_many_arguments)] +fn decide( + command: &str, + json: &str, + format: HookFormat, + user_config: &RewriteConfig, + search_dirs: &[PathBuf], + build_allow: impl FnOnce(String, Option) -> R, + build_ask: impl FnOnce(String, Option) -> R, + build_deny: impl FnOnce(String, Option) -> R, +) -> (HookOutcome, Option) { // When an external permission engine is configured, consult it on every // command — even ones tokf has no filter for. - if let Some(verdict) = query_external_engine(&command, json, format, user_config) { + if let Some(verdict) = query_external_engine(command, json, format, user_config) { // Deny doesn't need a rewrite — the command won't execute. if verdict.decision == PermissionDecision::Deny { - if emit_response(&build_deny(command, verdict.reason)) { - return HookOutcome::Deny; + if emit_response(&build_deny(command.to_string(), verdict.reason)) { + return (HookOutcome::Deny, None); } - return HookOutcome::PassThrough; + return (HookOutcome::PassThrough, None); } - let rewritten = rewrite::rewrite_with_config(&command, user_config, search_dirs, false); - let output_cmd = if rewritten == command { - command - } else { + let rewritten = rewrite::rewrite_with_config(command, user_config, search_dirs, false); + let rewrite_changed = rewritten != command; + let output_cmd = if rewrite_changed { rewritten + } else { + command.to_string() }; + let logged_after = rewrite_changed.then(|| output_cmd.clone()); let (response, outcome) = match verdict.decision { PermissionDecision::Ask => (build_ask(output_cmd, verdict.reason), HookOutcome::Ask), _ => (build_allow(output_cmd, verdict.reason), HookOutcome::Allow), }; if emit_response(&response) { - return outcome; + return (outcome, logged_after); } - return HookOutcome::PassThrough; + return (HookOutcome::PassThrough, logged_after); } // No external engine — only act when tokf has a matching filter. - let rewritten = rewrite::rewrite_with_config(&command, user_config, search_dirs, false); + let rewritten = rewrite::rewrite_with_config(command, user_config, search_dirs, false); if rewritten == command { - return HookOutcome::PassThrough; + return (HookOutcome::PassThrough, None); } + let logged_after = Some(rewritten.clone()); if emit_response(&build_allow(rewritten, None)) { - HookOutcome::Allow + (HookOutcome::Allow, logged_after) } else { - HookOutcome::PassThrough + (HookOutcome::PassThrough, logged_after) } } diff --git a/crates/tokf-cli/src/rewrite/bash_ast.rs b/crates/tokf-cli/src/rewrite/bash_ast.rs index dcb3705..ff05d48 100644 --- a/crates/tokf-cli/src/rewrite/bash_ast.rs +++ b/crates/tokf-cli/src/rewrite/bash_ast.rs @@ -87,10 +87,26 @@ impl ParsedCommand { /// nodes so each leaf command becomes its own segment. The **last** /// child of an inner list inherits its parent's operator, because in /// source order that operator is what visually follows it. + /// + /// Newline-separated commands (`cmd1\ncmd2`) are returned as multiple + /// **top-level** rable nodes rather than a single `List`, so the + /// separator between them is computed as the source slice between + /// each node's end-span and the next node's start-span. That captures + /// `\n`, `;`, whitespace, comments, etc. byte-for-byte, so a downstream + /// rebuild via `seg + sep` reproduces the original source. Without this + /// (issue #355) `head -1\necho` collapsed to `head -1echo`, producing + /// stray files in the agent's cwd. pub fn compound_segments(&self) -> Vec<(String, String)> { let mut result = Vec::new(); - for node in &self.nodes { - flatten_segments(node, &self.source, String::new(), &mut result); + let n = self.nodes.len(); + for (i, node) in self.nodes.iter().enumerate() { + let parent_sep = if i + 1 < n { + let next = &self.nodes[i + 1]; + self.source[node.span.end..next.span.start].to_string() + } else { + String::new() + }; + flatten_segments(node, &self.source, parent_sep, &mut result); } if result.is_empty() { return vec![(self.source.clone(), String::new())]; diff --git a/crates/tokf-cli/src/rewrite/bash_ast_tests.rs b/crates/tokf-cli/src/rewrite/bash_ast_tests.rs index 7b17e01..5521391 100644 --- a/crates/tokf-cli/src/rewrite/bash_ast_tests.rs +++ b/crates/tokf-cli/src/rewrite/bash_ast_tests.rs @@ -80,6 +80,72 @@ fn mixed_and_or_chain() { assert!(segs[2].1.is_empty()); } +// --- Regression tests for #355: top-level newline-separated commands --- +// +// rable parses `cmd1\ncmd2` as TWO top-level nodes (not a single List with +// a Newline operator), so each lands in `compound_segments` with the +// hard-coded empty separator. When the rewrite engine reassembles segments +// via `out.push_str(&rewritten); out.push_str(sep);`, the dropped newline +// glued e.g. `head -1` to a following `echo`, producing `head -1echo` as +// both a malformed flag and a redirect target file in the agent's cwd. + +#[test] +fn compound_segments_preserves_newline_separator() { + let p = ParsedCommand::parse("cmd1\ncmd2").unwrap(); + let segs = p.compound_segments(); + assert_eq!(segs.len(), 2); + assert_eq!(segs[0].0, "cmd1"); + assert!( + segs[0].1.contains('\n'), + "expected newline in separator, got {:?}", + segs[0].1 + ); + assert_eq!(segs[1].0, "cmd2"); + assert!(segs[1].1.is_empty()); +} + +#[test] +fn compound_segments_round_trip_byte_for_byte() { + // The reassembled `seg + sep` chain must reproduce the original source + // for any compound command, otherwise a downstream rewriter that + // returns segments unchanged could still corrupt the command. + for input in [ + "cmd1\ncmd2", + "cmd1\ncmd2\ncmd3", + "cmd1 && cmd2\ncmd3", + "cmd1\ncmd2 | head -1\ncmd3", + "cmd1; cmd2\ncmd3 && cmd4", + ] { + let p = ParsedCommand::parse(input).unwrap(); + let mut rebuilt = String::new(); + for (seg, sep) in p.compound_segments() { + rebuilt.push_str(&seg); + rebuilt.push_str(&sep); + } + assert_eq!(rebuilt, input, "round-trip failed for {input:?}"); + } +} + +#[test] +fn compound_segments_mixed_newline_and_and() { + // `cmd1 && cmd2\ncmd3` parses as a List(cmd1, cmd2) followed by a + // top-level Command(cmd3). The list's last item inherits a parent_sep + // computed from the gap between the list's source span and cmd3's span. + let p = ParsedCommand::parse("cmd1 && cmd2\ncmd3").unwrap(); + let segs = p.compound_segments(); + assert_eq!(segs.len(), 3); + assert_eq!(segs[0].0, "cmd1"); + assert!(segs[0].1.contains("&&")); + assert_eq!(segs[1].0, "cmd2"); + assert!( + segs[1].1.contains('\n'), + "expected newline after cmd2, got {:?}", + segs[1].1 + ); + assert_eq!(segs[2].0, "cmd3"); + assert!(segs[2].1.is_empty()); +} + // --- pipe detection --- #[test] diff --git a/crates/tokf-cli/src/rewrite/tests.rs b/crates/tokf-cli/src/rewrite/tests.rs index 33657f1..c1dc1a8 100644 --- a/crates/tokf-cli/src/rewrite/tests.rs +++ b/crates/tokf-cli/src/rewrite/tests.rs @@ -542,3 +542,36 @@ fn wrapper_build_rules_count() { regex::Regex::new(&rule.match_pattern).expect("built-in wrapper regex should compile"); } } + +// --- Regression test for #355: multi-line rewrite must preserve newlines --- + +#[test] +fn rewrite_multiline_preserves_newline_separators() { + // Reproducer for #355: when an agent submits a multi-line bash command + // and at least one segment matches a filter, the rewrite engine used to + // drop the newline between segments, producing `tokf run cargo testls` + // style runs and stray `1echo` files in cwd from `head -1\necho` glomming. + let dir = TempDir::new().unwrap(); + fs::write( + dir.path().join("cargo-test.toml"), + "command = \"cargo test\"", + ) + .unwrap(); + + let config = RewriteConfig::default(); + let result = rewrite_with_config( + "cargo test\nls | head -1\necho hi", + &config, + &[dir.path().to_path_buf()], + false, + ); + // Each segment that matches a filter is rewritten; the unchanged ones + // (echo) are preserved; and crucially the newlines between segments + // are preserved byte-for-byte. The embedded stdlib has filters for + // `cargo test` and `ls`, so both are rewritten — `ls | head -1` via + // pipe-stripping into `--baseline-pipe`. + assert_eq!( + result, "tokf run cargo test\ntokf run --baseline-pipe 'head -1' ls\necho hi", + "multi-line separator must be preserved verbatim" + ); +} diff --git a/crates/tokf-cli/tests/cli_hook_handle.rs b/crates/tokf-cli/tests/cli_hook_handle.rs index 7839544..6442198 100644 --- a/crates/tokf-cli/tests/cli_hook_handle.rs +++ b/crates/tokf-cli/tests/cli_hook_handle.rs @@ -439,3 +439,136 @@ fn hook_handle_ask_verdict_exits_zero_and_emits_ask_json() { "exit 0 alone is not enough — the JSON ask verdict must reach Claude Code" ); } + +// --- TOKF_HOOK_LOG diagnostic logging (issue #355) --- + +/// Run a hook invocation with `TOKF_HOOK_LOG` set to a file, then return +/// the file's contents. The hook is invoked from a temp cwd so embedded +/// stdlib filters apply with no project config. +fn hook_handle_with_log(json: &str) -> (bool, String) { + let dir = tempfile::TempDir::new().unwrap(); + let log_path = dir.path().join("hook.log"); + + let mut child = tokf() + .args(["hook", "handle"]) + .env("TOKF_HOOK_LOG", &log_path) + .current_dir(dir.path()) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + { + use std::io::Write; + child + .stdin + .as_mut() + .unwrap() + .write_all(json.as_bytes()) + .unwrap(); + } + let output = child.wait_with_output().unwrap(); + let success = output.status.success(); + let log = std::fs::read_to_string(&log_path).unwrap_or_default(); + (success, log) +} + +#[test] +fn hook_log_records_rewrite() { + let json = r#"{"tool_name":"Bash","tool_input":{"command":"git status"}}"#; + let (success, log) = hook_handle_with_log(json); + assert!(success); + assert!(log.starts_with("---\n"), "log not YAML: {log:?}"); + assert!(log.contains("tool: Bash\n"), "missing tool field: {log:?}"); + assert!( + log.contains("format: claude-code\n"), + "missing format field: {log:?}" + ); + assert!( + log.contains("outcome: Allow\n"), + "missing outcome field: {log:?}" + ); + assert!( + log.contains("before: |-\n git status\n"), + "missing/malformed before block: {log:?}" + ); + assert!( + log.contains("after: |-\n tokf run git status\n"), + "missing/malformed after block: {log:?}" + ); +} + +#[test] +fn hook_log_records_passthrough_with_null_after() { + // No filter for `unknown-tool`, so the hook returns PassThrough and + // the log records `after: ~` to make the no-rewrite case unambiguous. + let json = r#"{"tool_name":"Bash","tool_input":{"command":"unknown-tool foo"}}"#; + let (success, log) = hook_handle_with_log(json); + assert!(success); + assert!( + log.contains("outcome: PassThrough\n"), + "expected PassThrough outcome: {log:?}" + ); + assert!(log.contains("after: ~\n"), "expected null after: {log:?}"); +} + +#[test] +fn hook_log_preserves_multiline_command_355() { + // Regression for #355: the BEFORE block must show the original + // newline-separated command, and the AFTER block must show the + // rewritten command with newlines preserved between segments + // (not glued into `head -1echo` style malformed output). + let json = + r#"{"tool_name":"Bash","tool_input":{"command":"git status\nls | head -1\necho hi"}}"#; + let (success, log) = hook_handle_with_log(json); + assert!(success); + assert!( + log.contains("before: |-\n git status\n ls | head -1\n echo hi\n"), + "BEFORE block not preserved verbatim: {log:?}" + ); + assert!( + log.contains( + "after: |-\n tokf run git status\n tokf run --baseline-pipe 'head -1' ls\n echo hi\n" + ), + "AFTER block does not show preserved newlines: {log:?}" + ); + assert!( + !log.contains("head -1echo"), + "AFTER block contains the bug-shape malformed token: {log:?}" + ); +} + +#[test] +fn hook_log_skipped_when_env_unset() { + // Sanity: with no TOKF_HOOK_LOG env var, the hook still runs but + // creates no log file. Avoids surprise file writes for users who + // didn't opt into logging. + let dir = tempfile::TempDir::new().unwrap(); + let log_path = dir.path().join("hook.log"); + + let json = r#"{"tool_name":"Bash","tool_input":{"command":"git status"}}"#; + let mut child = tokf() + .args(["hook", "handle"]) + .env_remove("TOKF_HOOK_LOG") + .current_dir(dir.path()) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + { + use std::io::Write; + child + .stdin + .as_mut() + .unwrap() + .write_all(json.as_bytes()) + .unwrap(); + } + let output = child.wait_with_output().unwrap(); + assert!(output.status.success()); + assert!( + !log_path.exists(), + "hook should not create log file when env unset" + ); +} diff --git a/docs/diagnostics.md b/docs/diagnostics.md index 2b4ac2c..4ffade8 100644 --- a/docs/diagnostics.md +++ b/docs/diagnostics.md @@ -240,3 +240,39 @@ eval (tokf completions elvish | slurp) tokf completions nushell | save -f ~/.config/nushell/tokf.nu source ~/.config/nushell/tokf.nu ``` + +## Logging hook activity (`TOKF_HOOK_LOG`) + +When chasing a rewrite bug that only manifests during a live AI-tool session, set the `TOKF_HOOK_LOG` environment variable to a writable file path. Every `tokf hook handle` invocation then appends a YAML record covering the BEFORE / AFTER command strings and the resulting outcome. Activate it in the same process group as Claude Code (or whichever tool is running the hook) — for Claude Code, that means setting it in your shell profile (`.zshrc`/`.bashrc`) before launching the agent. + +```sh +export TOKF_HOOK_LOG=$HOME/.cache/tokf/hook.log +``` + +Each record looks like: + +```yaml +--- +ts: 1714408876 +tool: Bash +format: claude-code +outcome: Allow +before: |- + cargo test + ls | head -1 + echo hi +after: |- + tokf run cargo test + tokf run --baseline-pipe 'head -1' ls + echo hi +``` + +- `ts` is Unix epoch seconds (use `date -r ` on macOS / `date -d @` on Linux to humanise). +- `tool` is the AI tool's tool-name (`Bash`, `run_shell_command`, etc.). +- `format` is `claude-code` / `gemini` / `cursor`. +- `outcome` is one of `Allow`, `Ask`, `Deny`, `PassThrough`. +- `after: ~` (YAML null) means the hook chose not to rewrite — the agent ran the original command verbatim. + +Records are appended; nothing rotates or trims the file, so prune it yourself if it grows large. When the variable is unset (the default), nothing is written and the hook has zero filesystem overhead. + +This was added in response to issue #355, where stray `1echo` files in the agent's cwd turned out to come from a multi-line rewrite collapsing newlines into adjacent tokens — the kind of bug that's invisible in `tokf rewrite "..."` runs but obvious in a hook log. From 11a8ce229b367c720def431ac126b4775729d188 Mon Sep 17 00:00:00 2001 From: Matjaz Domen Pecan Date: Wed, 29 Apr 2026 23:16:16 +0200 Subject: [PATCH 2/2] test(rewrite,hook): cover review-surfaced edge cases for #355 Address minor findings from the multi-agent review of PR #355: - bash_ast: add heredoc and line-continuation cases to the round-trip test to lock in that the inter-node source slice doesn't drop heredoc bodies or backslash-newline continuations - hook: add empty-env-var, unwritable-log-path, and Ask-outcome integration tests to exercise more of the diagnostic-log call site (which has only one shared logging point, so this also covers Deny by construction even though no test wires Deny directly) Co-Authored-By: Claude Opus 4.7 (1M context) --- crates/tokf-cli/src/rewrite/bash_ast_tests.rs | 12 +- crates/tokf-cli/tests/cli_hook_handle.rs | 130 ++++++++++++++++++ 2 files changed, 141 insertions(+), 1 deletion(-) diff --git a/crates/tokf-cli/src/rewrite/bash_ast_tests.rs b/crates/tokf-cli/src/rewrite/bash_ast_tests.rs index 5521391..8fb5651 100644 --- a/crates/tokf-cli/src/rewrite/bash_ast_tests.rs +++ b/crates/tokf-cli/src/rewrite/bash_ast_tests.rs @@ -109,14 +109,24 @@ fn compound_segments_round_trip_byte_for_byte() { // The reassembled `seg + sep` chain must reproduce the original source // for any compound command, otherwise a downstream rewriter that // returns segments unchanged could still corrupt the command. + // + // Heredoc and line-continuation cases lock in that the inter-node + // source slice doesn't drop heredoc bodies or backslash-newline + // continuations — both are forms where the rable AST span boundaries + // are not the obvious whitespace gaps. for input in [ "cmd1\ncmd2", "cmd1\ncmd2\ncmd3", "cmd1 && cmd2\ncmd3", "cmd1\ncmd2 | head -1\ncmd3", "cmd1; cmd2\ncmd3 && cmd4", + "cat </dev/null\necho '{\"hookSpecificOutput\":{\"hookEventName\":\"PreToolUse\",\"permissionDecision\":\"ask\",\"permissionDecisionReason\":\"please confirm\",\"updatedInput\":{\"command\":\"git push\"}}}'\n", + ) + .unwrap(); + std::fs::set_permissions(&engine, std::fs::Permissions::from_mode(0o755)).unwrap(); + + let rewrites_dir = dir.path().join(".tokf"); + std::fs::create_dir_all(&rewrites_dir).unwrap(); + let rewrites = format!( + "[permissions]\nengine = \"external\"\n\n[permissions.external]\ncommand = \"{}\"\n", + engine.display(), + ); + std::fs::write(rewrites_dir.join("rewrites.toml"), rewrites).unwrap(); + + let json = r#"{"tool_name":"Bash","tool_input":{"command":"git push"}}"#; + let mut child = tokf() + .args(["hook", "handle"]) + .env("TOKF_HOOK_LOG", &log_path) + .current_dir(dir.path()) + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn() + .unwrap(); + { + use std::io::Write; + child + .stdin + .as_mut() + .unwrap() + .write_all(json.as_bytes()) + .unwrap(); + } + let output = child.wait_with_output().unwrap(); + assert!(output.status.success()); + let log = std::fs::read_to_string(&log_path).unwrap(); + assert!( + log.contains("outcome: Ask\n"), + "expected Ask outcome in log: {log}" + ); + assert!( + log.contains("before: |-\n git push\n"), + "missing/malformed before block: {log}" + ); +}