diff --git a/crates/tokf-common/Cargo.toml b/crates/tokf-common/Cargo.toml index 81f0a50..d10646e 100644 --- a/crates/tokf-common/Cargo.toml +++ b/crates/tokf-common/Cargo.toml @@ -13,17 +13,19 @@ categories = ["command-line-utilities", "development-tools"] serde = { version = "1", features = ["derive"] } serde_json = "1" sha2 = "0.10" -toml = { version = "1.0", optional = true } +# Required by `hash::epochs::*` for parsing filter TOML against frozen +# historical schemas (issue #350). Previously feature-gated under +# `validation`; promoted to a regular dep in 0.2.43. +toml = "1.0" regex = { version = "1", optional = true } unicode-normalization = "0.1" [features] default = [] -validation = ["toml", "regex"] +validation = ["regex"] [lints] workspace = true [dev-dependencies] -toml = "1.0" ts-rs = "12" diff --git a/crates/tokf-common/src/hash.rs b/crates/tokf-common/src/hash/current.rs similarity index 85% rename from crates/tokf-common/src/hash.rs rename to crates/tokf-common/src/hash/current.rs index 48f0910..dce4f3e 100644 --- a/crates/tokf-common/src/hash.rs +++ b/crates/tokf-common/src/hash/current.rs @@ -1,33 +1,19 @@ +//! The current, schema-tied `canonical_hash` function. +//! +//! Operates on a parsed [`FilterConfig`] under whatever shape it has at the +//! time the binary was compiled — so adding a default-valued field here +//! changes the output for filters that don't reference it (the regression +//! that motivated issue #350). New filters should publish under a versioned +//! epoch in [`super::epochs`] going forward; this function is retained for +//! the existing call sites that still hash freshly-parsed configs (publish +//! flow, resolver, server validation). + use std::fmt::Write as _; use sha2::{Digest, Sha256}; use crate::config::types::FilterConfig; - -/// Error returned when a [`FilterConfig`] cannot be hashed. -/// -/// Wraps the underlying serialization error without exposing `serde_json` as -/// a public dependency of this crate. -#[derive(Debug)] -pub struct HashError(serde_json::Error); - -impl std::fmt::Display for HashError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - self.0.fmt(f) - } -} - -impl std::error::Error for HashError { - fn source(&self) -> Option<&(dyn std::error::Error + 'static)> { - Some(&self.0) - } -} - -impl From for HashError { - fn from(e: serde_json::Error) -> Self { - Self(e) - } -} +use crate::hash::HashError; /// Compute a deterministic SHA-256 content hash for a [`FilterConfig`]. /// diff --git a/crates/tokf-common/src/hash/epochs/e1.rs b/crates/tokf-common/src/hash/epochs/e1.rs new file mode 100644 index 0000000..e7d34ba --- /dev/null +++ b/crates/tokf-common/src/hash/epochs/e1.rs @@ -0,0 +1,332 @@ +//! Hash epoch **e1** — first stable `canonical_hash` schema. +//! +//! Source of truth: `git show 5abfaf8:crates/tokf-common/src/config/types.rs` +//! (commit `5abfaf8`, 2026-02-22, "feat(filter): canonical content hash for +//! filter identity (#126)"). That commit introduced `canonical_hash` and +//! switched `GroupConfig.labels` from `HashMap` → `BTreeMap`, making the +//! JSON serialisation order-stable. +//! +//! ## FROZEN — DO NOT MODIFY +//! +//! Any change to the structs in this module — adding a field, removing a +//! field, changing a `#[derive]`, changing a `#[serde]` annotation, even +//! reordering fields — silently invalidates every `e1:…` hash ever +//! computed. The frozen-corpus CI test under +//! `crates/tokf-common/tests/hash_corpus/e1/` catches most violations but +//! not all. If the underlying schema needs to change, that's a *new* +//! epoch (`e2`, `e3`, …), not an edit of this one. + +use std::fmt::Write as _; + +use sha2::{Digest, Sha256}; + +use crate::hash::HashError; + +const VERSION: &str = "e1"; + +/// Compute the e1 hash for a filter TOML. +/// +/// Parses `toml_str` into the e1-frozen `FilterConfig`, serialises via +/// `serde_json::to_vec` (struct declaration order — same as the binary at +/// commit `5abfaf8`), SHA-256s the bytes, and prefixes `"e1:"`. +/// +/// # Errors +/// +/// - `HashError` if the TOML is malformed for the e1 shape, or JSON +/// serialisation fails (the latter should not happen for any +/// well-formed parse). +pub fn hash(toml_str: &str) -> Result { + let cfg: schema::FilterConfig = toml::from_str(toml_str)?; + let json = serde_json::to_vec(&cfg)?; + let digest = Sha256::digest(&json); + let mut out = String::with_capacity(VERSION.len() + 1 + 64); + let _ = write!(out, "{VERSION}:"); + for b in &digest { + let _ = write!(out, "{b:02x}"); + } + Ok(out) +} + +// ───────────────────────────────────────────────────────────────────────── +// FROZEN SCHEMA SNAPSHOT — verbatim copy of types.rs at commit 5abfaf8. +// Any modification below silently invalidates every published e1:… hash. +// ───────────────────────────────────────────────────────────────────────── + +#[allow(dead_code)] +mod schema { + use std::collections::BTreeMap; + + use serde::{Deserialize, Serialize}; + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + #[serde(untagged)] + pub enum CommandPattern { + Single(String), + Multiple(Vec), + } + + impl Default for CommandPattern { + fn default() -> Self { + Self::Single(String::new()) + } + } + + #[allow(clippy::struct_excessive_bools)] + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct FilterConfig { + pub command: CommandPattern, + pub run: Option, + #[serde(default)] + pub skip: Vec, + #[serde(default)] + pub keep: Vec, + #[serde(default)] + pub step: Vec, + pub extract: Option, + #[serde(default)] + pub match_output: Vec, + #[serde(default)] + pub section: Vec
, + pub on_success: Option, + pub on_failure: Option, + pub parse: Option, + pub output: Option, + pub fallback: Option, + #[serde(default)] + pub replace: Vec, + #[serde(default)] + pub dedup: bool, + pub dedup_window: Option, + #[serde(default)] + pub strip_ansi: bool, + #[serde(default)] + pub trim_lines: bool, + #[serde(default)] + pub strip_empty_lines: bool, + #[serde(default)] + pub collapse_empty_lines: bool, + #[serde(default)] + pub lua_script: Option, + #[serde(default)] + pub variant: Vec, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct Step { + pub run: String, + #[serde(rename = "as")] + pub as_name: Option, + pub pipeline: Option, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct ExtractRule { + pub pattern: String, + pub output: String, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct MatchOutputRule { + pub contains: String, + pub output: String, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct Section { + pub name: Option, + pub enter: Option, + pub exit: Option, + #[serde(rename = "match")] + pub match_pattern: Option, + pub split_on: Option, + pub collect_as: Option, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct OutputBranch { + pub output: Option, + pub aggregate: Option, + pub tail: Option, + pub head: Option, + #[serde(default)] + pub skip: Vec, + pub extract: Option, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct AggregateRule { + pub from: String, + pub pattern: String, + pub sum: Option, + pub count_as: Option, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct ParseConfig { + pub branch: Option, + pub group: Option, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct LineExtract { + pub line: usize, + pub pattern: String, + pub output: String, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct GroupConfig { + pub key: ExtractRule, + #[serde(default)] + pub labels: BTreeMap, + } + + #[derive(Debug, Clone, Default, PartialEq, Eq, Serialize, Deserialize)] + pub struct OutputConfig { + pub format: Option, + pub group_counts_format: Option, + pub empty: Option, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct FallbackConfig { + pub tail: Option, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct ReplaceRule { + pub pattern: String, + pub output: String, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + #[serde(rename_all = "lowercase")] + pub enum ScriptLang { + Luau, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct ScriptConfig { + pub lang: ScriptLang, + pub file: Option, + pub source: Option, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct VariantDetect { + #[serde(default)] + pub files: Vec, + pub output_pattern: Option, + } + + #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] + pub struct Variant { + pub name: String, + pub detect: VariantDetect, + pub filter: String, + } +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use super::*; + + #[test] + fn output_is_versioned_hex() { + let h = hash(r#"command = "git push""#).unwrap(); + assert!(h.starts_with("e1:")); + assert_eq!(h.len(), 3 + 64); + assert!( + h[3..] + .chars() + .all(|c| c.is_ascii_hexdigit() && !c.is_uppercase()) + ); + } + + /// e1 was the first STABLE schema specifically because labels became a + /// `BTreeMap`. Reordering label keys in the source TOML must not change + /// the hash. + #[test] + fn label_key_order_invariance() { + let a = hash( + r#" +command = "git status" +[parse.group.key] +pattern = "^(.{2}) " +output = "{1}" +[parse.group.labels] +M = "modified" +A = "added" +"#, + ) + .unwrap(); + let b = hash( + r#" +command = "git status" +[parse.group.key] +pattern = "^(.{2}) " +output = "{1}" +[parse.group.labels] +A = "added" +M = "modified" +"#, + ) + .unwrap(); + assert_eq!(a, b); + } + + /// TOML with fields that didn't exist at e1 (e.g. `inject_path`, + /// `show_history_hint`) must hash identically to TOML without them. + /// Serde silently drops unknown fields, exactly matching what the + /// binary at commit 5abfaf8 would have done. + #[test] + fn unknown_fields_are_silently_dropped() { + let with_new = hash( + r#" +command = "git push" +inject_path = true +show_history_hint = true +"#, + ) + .unwrap(); + let without_new = hash(r#"command = "git push""#).unwrap(); + assert_eq!(with_new, without_new); + } + + /// Same TOML, varied whitespace and comments → same hash. Comments and + /// formatting are erased by `toml::from_str` before serialisation, so + /// they don't contribute to the canonical hash. This is the same + /// invariant `current::canonical_hash` advertises. + #[test] + fn whitespace_and_comments_invariant() { + let a = hash(r#"command = "git push""#).unwrap(); + let b = hash("# leading comment\ncommand = \"git push\"\n\n").unwrap(); + assert_eq!(a, b); + } + + /// Malformed TOML must surface as `HashError::Parse`, not panic. + #[test] + fn malformed_toml_returns_parse_error() { + let err = hash("this is = not = valid = toml = at all").unwrap_err(); + assert!( + matches!(err, HashError::Parse(_)), + "expected Parse, got {err:?}" + ); + } + + /// Frozen reference vector. The expected value is captured ONCE at + /// authoring time and never changed; if this test fails, the schema + /// snapshot has drifted and a new epoch must be created instead of + /// editing e1. The corpus under + /// `crates/tokf-common/tests/hash_corpus/e1/` is the broader + /// equivalent; this is the inline smoke check. + #[test] + fn frozen_reference_minimal_filter() { + let h = hash(r#"command = "git push""#).unwrap(); + assert_eq!( + h, "e1:2c7b698282f042f3e391f54743c292357a679019220a31ff763d81150f21798d", + "e1 schema has drifted; bump to e2 instead of editing e1" + ); + } +} diff --git a/crates/tokf-common/src/hash/epochs/mod.rs b/crates/tokf-common/src/hash/epochs/mod.rs new file mode 100644 index 0000000..f1e5c5d --- /dev/null +++ b/crates/tokf-common/src/hash/epochs/mod.rs @@ -0,0 +1,24 @@ +//! Frozen historical hash epochs. +//! +//! Each module here is a byte-for-byte snapshot of `FilterConfig` (and its +//! dependent types) as they existed at a specific commit, plus a `hash()` +//! function that reproduces the canonical hash a binary at that commit +//! would have produced. +//! +//! ## Adding a new epoch +//! +//! When `FilterConfig` (or any dependent type) changes in a way that +//! affects `current::canonical_hash` output, also: +//! +//! 1. Add `eN.rs` with a verbatim copy of `types.rs` at the change commit, +//! wrapped in a private `mod schema { ... }`. Use +//! `git show :crates/tokf-common/src/config/types.rs` for fidelity. +//! 2. Append a `HashVersion` entry to `super::KNOWN_VERSIONS`. +//! 3. Add corpus fixtures under +//! `crates/tokf-common/tests/hash_corpus/eN/` covering the schema's +//! distinguishing features. +//! 4. **Never modify** an existing epoch. If the snapshot was wrong, +//! consider whether to introduce a *new* epoch with the corrected +//! schema instead — old `eN:…` hashes in the wild already exist. + +pub(super) mod e1; diff --git a/crates/tokf-common/src/hash/error.rs b/crates/tokf-common/src/hash/error.rs new file mode 100644 index 0000000..92c41f9 --- /dev/null +++ b/crates/tokf-common/src/hash/error.rs @@ -0,0 +1,38 @@ +//! Error type for the hash module. Lives here so `hash::current` and +//! `hash::epochs::*` can share it without each duplicating wrapper boilerplate. + +/// Error returned when a filter cannot be hashed. +/// +/// Wraps the underlying serialisation/deserialisation error without +/// exposing `serde_json` or `toml` as public dependencies of this crate. +#[derive(Debug)] +pub enum HashError { + /// Failed to parse filter TOML against an epoch's schema. + Parse(String), + /// Failed to JSON-serialise a parsed filter for hashing. Should not + /// happen for any well-formed parse. + Serialize(String), +} + +impl std::fmt::Display for HashError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Parse(m) => write!(f, "parse: {m}"), + Self::Serialize(m) => write!(f, "serialize: {m}"), + } + } +} + +impl std::error::Error for HashError {} + +impl From for HashError { + fn from(e: serde_json::Error) -> Self { + Self::Serialize(e.to_string()) + } +} + +impl From for HashError { + fn from(e: toml::de::Error) -> Self { + Self::Parse(e.to_string()) + } +} diff --git a/crates/tokf-common/src/hash/mod.rs b/crates/tokf-common/src/hash/mod.rs new file mode 100644 index 0000000..dfe2eea --- /dev/null +++ b/crates/tokf-common/src/hash/mod.rs @@ -0,0 +1,133 @@ +//! Versioned canonical hashes for filter content. +//! +//! Two layers, composed: +//! +//! 1. **`current`** — the schema-tied `canonical_hash(&FilterConfig)`. Used +//! at publish time and anywhere that already has a parsed `FilterConfig`. +//! Its output drifts with `FilterConfig` schema additions (issue #350) — +//! new fields with `#[serde(default)]` silently change every filter's +//! hash. +//! +//! 2. **`epochs`** — frozen byte-for-byte snapshots of the `FilterConfig` +//! schema at specific commits, each with its own hash function that +//! reproduces exactly what the binary at that commit would have produced. +//! Clients try every known epoch; whichever matches the URL hash +//! verifies the content. Each epoch is FROZEN once shipped; new schemas +//! become new epochs (`e2`, `e3`, …) rather than edits to existing ones. +//! +//! See `epochs/e1.rs` for the canonical pattern. + +pub mod current; +mod epochs; +mod error; + +pub use current::canonical_hash; +pub use error::HashError; + +/// A registered hash version: a stable identifier and the function that +/// computes its hash from raw filter TOML. +/// +/// Hashers must not panic; any failure mode (parse error, schema +/// mismatch, serialisation issue) must be reported as a [`HashError`]. +/// `compute_all` and `matches_any` drop `Err` results silently but do not +/// catch panics. +#[derive(Debug, Clone, Copy)] +pub struct HashVersion { + /// Stable identifier used as the hash prefix (e.g. `"e1"`). + pub id: &'static str, + hasher: fn(&str) -> Result, +} + +impl HashVersion { + /// Compute this version's hash for `toml`. + /// + /// # Errors + /// + /// Returns a [`HashError`] if `toml` cannot be parsed under this + /// version's frozen schema or serialised to JSON. + pub fn hash(self, toml: &str) -> Result { + (self.hasher)(toml) + } +} + +/// All known versioned hash schemes, listed in the order clients should try +/// them when matching a stored hash. Earlier entries are checked first. +/// +/// Subsequent epochs (`e2`, `e3`, …) are added to this slice as they ship; +/// the order is maintained to put the most-likely match first for the +/// current generation of filters in the wild. +pub const KNOWN_VERSIONS: &[HashVersion] = &[HashVersion { + id: "e1", + hasher: epochs::e1::hash, +}]; + +/// Compute every known versioned hash for `toml`. +/// +/// Errors per version are dropped silently — a version may legitimately +/// fail (e.g. malformed TOML for that epoch's shape); we just exclude it +/// from the result. +// +// TODO(#350-followup): wire into the install flow's `verify_and_resolve_hash` +// (PR #351 path) once that branch is merged so clients can verify URL +// hashes against any known epoch without a server round-trip. +pub fn compute_all(toml: &str) -> Vec<(&'static str, String)> { + KNOWN_VERSIONS + .iter() + .filter_map(|v| v.hash(toml).ok().map(|h| (v.id, h))) + .collect() +} + +/// Find the first known epoch (if any) whose hash equals `expected`. +/// +/// Returns the epoch's `id` on match, `None` if no version matches. Used +/// by the install flow as a fast-path before falling back to server-trust +/// verification. +pub fn matches_any(toml: &str, expected: &str) -> Option<&'static str> { + KNOWN_VERSIONS + .iter() + .find_map(|v| v.hash(toml).ok().filter(|h| h == expected).map(|_| v.id)) +} + +#[cfg(test)] +#[allow(clippy::unwrap_used)] +mod tests { + use super::*; + + #[test] + fn known_versions_is_non_empty() { + // Sanity: KNOWN_VERSIONS must include at least one epoch so + // `compute_all`/`matches_any` are actually useful. If this fails, + // someone removed every epoch — verify they meant to. + assert!(!KNOWN_VERSIONS.is_empty(), "no hash versions registered"); + } + + #[test] + fn compute_all_returns_one_entry_per_version() { + let result = compute_all(r#"command = "git push""#); + assert_eq!(result.len(), KNOWN_VERSIONS.len()); + for (got, expected) in result.iter().zip(KNOWN_VERSIONS) { + assert_eq!(got.0, expected.id); + } + } + + #[test] + fn matches_any_finds_known_hash() { + let toml = r#"command = "git push""#; + let computed = KNOWN_VERSIONS[0].hash(toml).unwrap(); + assert_eq!(matches_any(toml, &computed), Some(KNOWN_VERSIONS[0].id)); + } + + #[test] + fn matches_any_returns_none_for_unknown_hash() { + assert_eq!(matches_any(r#"command = "git push""#, "e1:0000"), None); + } + + /// `compute_all` and `matches_any` must silently drop versions whose + /// hasher returns `Err` — they're best-effort lookups, not validators. + #[test] + fn malformed_toml_yields_no_matches() { + let bad = "this = is = malformed = toml"; + assert!(compute_all(bad).is_empty()); + assert_eq!(matches_any(bad, "e1:anything"), None); + } +} diff --git a/crates/tokf-common/tests/hash_corpus.rs b/crates/tokf-common/tests/hash_corpus.rs new file mode 100644 index 0000000..45988b3 --- /dev/null +++ b/crates/tokf-common/tests/hash_corpus.rs @@ -0,0 +1,105 @@ +//! Frozen-corpus test for every registered `HashVersion`. +//! +//! Walks `tests/hash_corpus//` for each entry of +//! [`tokf_common::hash::KNOWN_VERSIONS`] and asserts every `.toml` +//! produces the hash recorded in its `.expected` sibling. A change +//! that breaks any expected value is either a bug in the hasher (fix +//! it) or an unintended schema drift (revert it / introduce a new +//! version). Modifying `.expected` files in place is the wrong response. + +#![allow(clippy::unwrap_used, clippy::expect_used, clippy::panic)] + +use std::path::Path; + +use tokf_common::hash; + +/// Print every corpus hash to stdout; used during authoring to capture +/// `.expected` values for new fixtures. Run with: +/// +/// ```sh +/// cargo test -p tokf-common --test hash_corpus -- print_all_hashes --ignored --nocapture +/// ``` +#[test] +#[ignore = "authoring helper; run explicitly to capture expected values"] +fn print_all_hashes() { + let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/hash_corpus"); + for version in hash::KNOWN_VERSIONS { + let dir = root.join(version.id); + if !dir.is_dir() { + continue; + } + let mut entries: Vec<_> = std::fs::read_dir(&dir) + .unwrap() + .filter_map(|e| { + let p = e.ok()?.path(); + (p.extension().is_some_and(|x| x == "toml")).then_some(p) + }) + .collect(); + entries.sort(); + for path in entries { + let toml = std::fs::read_to_string(&path).unwrap(); + let hash = version.hash(&toml).unwrap(); + println!("{}: {}", path.display(), hash); + } + } +} + +#[test] +fn corpus_round_trip() { + let root = Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/hash_corpus"); + let mut total = 0usize; + + for version in hash::KNOWN_VERSIONS { + let dir = root.join(version.id); + assert!( + dir.is_dir(), + "missing corpus directory for version {}: {}", + version.id, + dir.display() + ); + let mut for_version = 0usize; + let mut tomls = Vec::new(); + let mut expecteds = Vec::new(); + for entry in std::fs::read_dir(&dir).unwrap() { + let path = entry.unwrap().path(); + match path.extension().and_then(|e| e.to_str()) { + Some("toml") => tomls.push(path), + Some("expected") => expecteds.push(path), + _ => {} + } + } + + // Detect orphan `.expected` files (a `.toml` was deleted but the + // expected hash was left behind — the silent-clutter case). + for ex in &expecteds { + let toml_sibling = ex.with_extension("toml"); + assert!( + toml_sibling.exists(), + "orphan expected file (no matching .toml): {}", + ex.display() + ); + } + + for path in tomls { + let toml = std::fs::read_to_string(&path).unwrap(); + let expected_path = path.with_extension("expected"); + let expected = std::fs::read_to_string(&expected_path) + .unwrap_or_else(|_| panic!("missing expected file: {}", expected_path.display())); + let expected = expected.trim(); + + let actual = version.hash(&toml).unwrap(); + assert_eq!( + actual, + expected, + "{} hash drift in {}", + version.id, + path.display() + ); + for_version += 1; + total += 1; + } + assert!(for_version > 0, "no corpus entries under {}", dir.display()); + } + + assert!(total > 0, "corpus is empty"); +} diff --git a/crates/tokf-common/tests/hash_corpus/README.md b/crates/tokf-common/tests/hash_corpus/README.md new file mode 100644 index 0000000..9d6fd26 --- /dev/null +++ b/crates/tokf-common/tests/hash_corpus/README.md @@ -0,0 +1,21 @@ +# Hash corpus + +Frozen test vectors for `tokf_common::hash::KNOWN_VERSIONS`. Each +sub-directory is named after a hash version (`e1`, `e2`, …). Inside, +every `.toml` file has a sibling `.expected` containing the SHA-256 hash +that version's hasher must produce for that input. + +Loaded by `tests/hash_corpus.rs` and run on every `cargo test`. + +## Rules + +1. **Never modify an existing `.expected`.** It captures the hash of a + real-world filter as published; changing it silently invalidates that + filter's identity. If a hasher would now produce a different value, + the *hasher* changed (a bug, fix it) — never the expected value. +2. **Add new fixtures freely.** Any `.toml`/`.expected` pair under an + existing version's directory is exercised automatically. Capture the + expected hash by running the fixture once through that version's + hasher and pasting the result. +3. **A new version goes in a new directory.** Existing directories stay + pinned to their existing schema. diff --git a/crates/tokf-common/tests/hash_corpus/e1/001_minimal.expected b/crates/tokf-common/tests/hash_corpus/e1/001_minimal.expected new file mode 100644 index 0000000..941c1d4 --- /dev/null +++ b/crates/tokf-common/tests/hash_corpus/e1/001_minimal.expected @@ -0,0 +1 @@ +e1:2c7b698282f042f3e391f54743c292357a679019220a31ff763d81150f21798d diff --git a/crates/tokf-common/tests/hash_corpus/e1/001_minimal.toml b/crates/tokf-common/tests/hash_corpus/e1/001_minimal.toml new file mode 100644 index 0000000..f6c0115 --- /dev/null +++ b/crates/tokf-common/tests/hash_corpus/e1/001_minimal.toml @@ -0,0 +1 @@ +command = "git push" diff --git a/crates/tokf-common/tests/hash_corpus/e1/002_with_labels.expected b/crates/tokf-common/tests/hash_corpus/e1/002_with_labels.expected new file mode 100644 index 0000000..cd890c4 --- /dev/null +++ b/crates/tokf-common/tests/hash_corpus/e1/002_with_labels.expected @@ -0,0 +1 @@ +e1:def55822cc8b6d41d52bd404fe4676c7224627d74056cd2b52aa1aca2aca1fc9 diff --git a/crates/tokf-common/tests/hash_corpus/e1/002_with_labels.toml b/crates/tokf-common/tests/hash_corpus/e1/002_with_labels.toml new file mode 100644 index 0000000..6f89988 --- /dev/null +++ b/crates/tokf-common/tests/hash_corpus/e1/002_with_labels.toml @@ -0,0 +1,12 @@ +# Validates that BTreeMap label ordering produces a stable hash regardless of +# how the source TOML happens to spell the keys. +command = "git status" + +[parse.group.key] +pattern = "^(.{2}) " +output = "{1}" + +[parse.group.labels] +M = "modified" +A = "added" +D = "deleted" diff --git a/crates/tokf-common/tests/hash_corpus/e1/003_with_lua.expected b/crates/tokf-common/tests/hash_corpus/e1/003_with_lua.expected new file mode 100644 index 0000000..9cddc30 --- /dev/null +++ b/crates/tokf-common/tests/hash_corpus/e1/003_with_lua.expected @@ -0,0 +1 @@ +e1:517ab8cd8c471be82dd3718109db7571db514a6f1808e4da2baf3e4d7b2a98bd diff --git a/crates/tokf-common/tests/hash_corpus/e1/003_with_lua.toml b/crates/tokf-common/tests/hash_corpus/e1/003_with_lua.toml new file mode 100644 index 0000000..312e51e --- /dev/null +++ b/crates/tokf-common/tests/hash_corpus/e1/003_with_lua.toml @@ -0,0 +1,5 @@ +command = "my-tool" + +[lua_script] +lang = "luau" +source = "return input:upper()" diff --git a/crates/tokf-common/tests/hash_corpus/e1/004_kitchen_sink.expected b/crates/tokf-common/tests/hash_corpus/e1/004_kitchen_sink.expected new file mode 100644 index 0000000..7f5ef03 --- /dev/null +++ b/crates/tokf-common/tests/hash_corpus/e1/004_kitchen_sink.expected @@ -0,0 +1 @@ +e1:2a6e1b1cf56a1566561630da50ee42e19bc95e652fd6e868af519fc695f00fa8 diff --git a/crates/tokf-common/tests/hash_corpus/e1/004_kitchen_sink.toml b/crates/tokf-common/tests/hash_corpus/e1/004_kitchen_sink.toml new file mode 100644 index 0000000..f89cd58 --- /dev/null +++ b/crates/tokf-common/tests/hash_corpus/e1/004_kitchen_sink.toml @@ -0,0 +1,44 @@ +command = ["pnpm test", "npm test"] +run = "pnpm test --reporter=line" +skip = ["^\\s+✓", "^Slow test file:"] +keep = ["^FAIL\\b"] +dedup = true +strip_ansi = true +trim_lines = true + +[[step]] +run = "node --version" +as = "node_version" + +[[match_output]] +contains = "no tests found" +output = "(no tests)" + +[[section]] +name = "summary" +enter = "^─+$" +collect_as = "summary_lines" + +[on_success] +output = "{summary_lines}" +tail = 20 + +[on_failure] +output = "FAILED:\n{output}" +head = 5 +skip = ["^\\s+at "] + +[parse.branch] +line = 1 +pattern = "Running ([^ ]+)" +output = "{1}" + +[output] +format = "{branch}: {body}" + +[fallback] +tail = 10 + +[[replace]] +pattern = "/Users/[^/]+" +output = "/Users/"