From ffec7736b023f8ef4efd310dd192a9a4acf32559 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Tue, 30 Sep 2025 22:13:56 +0800 Subject: [PATCH 01/32] feat(output): add yaml output --- src/cli.rs | 10 +++++++++ src/config.rs | 3 +++ src/main.rs | 1 + src/output.rs | 58 +++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+) diff --git a/src/cli.rs b/src/cli.rs index b45ef12dc..ec1affed6 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -647,6 +647,16 @@ pub struct Opts { )] search_path: Vec, + /// Print results as YAML objects so you can use it with tools like yq and nushell. + #[arg( + long, + value_name = "yaml", + conflicts_with("format"), + conflicts_with("list_details"), + help = "Print results as YAML objects so you can use it with tools like yq and nushell." + )] + pub yaml: bool, + /// By default, relative paths are prefixed with './' when -x/--exec, /// -X/--exec-batch, or -0/--print0 are given, to reduce the risk of a /// path starting with '-' being treated as a command line option. Use diff --git a/src/config.rs b/src/config.rs index 9e18120c4..1ceaff540 100644 --- a/src/config.rs +++ b/src/config.rs @@ -130,6 +130,9 @@ pub struct Config { /// Whether or not to use hyperlinks on paths pub hyperlink: bool, + + /// Whether or not to print the result as a JSON object + pub yaml: bool, } impl Config { diff --git a/src/main.rs b/src/main.rs index 94623eb79..35be38445 100644 --- a/src/main.rs +++ b/src/main.rs @@ -326,6 +326,7 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result(stdout: &mut W, entry: &DirEntry, config: &Config) if let Some(ref format) = config.format { print_entry_format(stdout, entry, config, format)?; + } else if config.yaml { + print_entry_yaml_obj(stdout, entry, config)?; } else if let Some(ref ls_colors) = config.ls_colors { print_entry_colorized(stdout, entry, config, ls_colors)?; } else { @@ -173,3 +176,58 @@ fn print_entry_uncolorized( print_trailing_slash(stdout, entry, config, None) } } + +fn print_entry_yaml_obj( + stdout: &mut W, + entry: &DirEntry, + config: &Config, +) -> io::Result<()> { + let path = entry.stripped_path(config); + let path_string = path.to_string_lossy(); + let file_type = entry + .file_type() + .map(|ft| { + if ft.is_dir() { + "directory" + } else if ft.is_file() { + "file" + } else if ft.is_symlink() { + "symlink" + } else { + "other" + } + }) + .unwrap_or("unknown"); + + // Manually construct a simple YAML representation + // to avoid adding a dependency on serde_yaml (deprecated). + // + // A little bit dirty, but safe enough for buffered output. + let mut result = format!("- path: \"{}\"\n type: {}\n", path_string, file_type); + let metadata = entry.metadata(); + if !metadata.is_none() { + if let Some(meta) = metadata { + result.push_str(&format!(" size: {}\n", meta.len())); + result.push_str(&format!( + " mode: {:o}\n", + meta.permissions().mode() & 0o7777 + )); + if let Ok(modified) = meta.modified() { + if let Ok(duration) = modified.duration_since(std::time::UNIX_EPOCH) { + result.push_str(&format!(" modified: {}\n", duration.as_secs())); + } + } + if let Ok(accessed) = meta.accessed() { + if let Ok(duration) = accessed.duration_since(std::time::UNIX_EPOCH) { + result.push_str(&format!(" accessed: {}\n", duration.as_secs())); + } + } + if let Ok(created) = meta.created() { + if let Ok(duration) = created.duration_since(std::time::UNIX_EPOCH) { + result.push_str(&format!(" created: {}\n", duration.as_secs())); + } + } + } + } + write!(stdout, "{}", result) +} From 97d6bfbb01441ce5516fd7fae5c0602c72dc6430 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Tue, 30 Sep 2025 22:15:40 +0800 Subject: [PATCH 02/32] docs: update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d38415a51..57c311941 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Features +- Add `--yaml` flag for YAML format output. ## Bugfixes From cdb7bc0a3b89f16867c293f05bcd4d29127487ea Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Tue, 30 Sep 2025 22:57:11 +0800 Subject: [PATCH 03/32] fix: escape path, and disable permission display on Windows --- src/output.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/output.rs b/src/output.rs index 259cc3c7c..f9747a387 100644 --- a/src/output.rs +++ b/src/output.rs @@ -1,6 +1,5 @@ use std::borrow::Cow; use std::io::{self, Write}; -use std::os::unix::fs::PermissionsExt; use lscolors::{Indicator, LsColors, Style}; @@ -9,6 +8,9 @@ use crate::dir_entry::DirEntry; use crate::fmt::FormatTemplate; use crate::hyperlink::PathUrl; +#[cfg(unix)] +use std::os::unix::fs::PermissionsExt; + fn replace_path_separator(path: &str, new_path_separator: &str) -> String { path.replace(std::path::MAIN_SEPARATOR, new_path_separator) } @@ -183,7 +185,7 @@ fn print_entry_yaml_obj( config: &Config, ) -> io::Result<()> { let path = entry.stripped_path(config); - let path_string = path.to_string_lossy(); + let path_string = path.to_string_lossy().escape_default().to_string(); let file_type = entry .file_type() .map(|ft| { @@ -208,10 +210,13 @@ fn print_entry_yaml_obj( if !metadata.is_none() { if let Some(meta) = metadata { result.push_str(&format!(" size: {}\n", meta.len())); - result.push_str(&format!( - " mode: {:o}\n", - meta.permissions().mode() & 0o7777 - )); + #[cfg(unix)] + { + result.push_str(&format!( + " mode: {:o}\n", + meta.permissions().mode() & 0o7777 + )); + } if let Ok(modified) = meta.modified() { if let Ok(duration) = modified.duration_since(std::time::UNIX_EPOCH) { result.push_str(&format!(" modified: {}\n", duration.as_secs())); From c9cbb250e1300a373701f77ff48dbc36532e4afa Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Tue, 30 Sep 2025 23:17:22 +0800 Subject: [PATCH 04/32] fix(ci): fix warnings in cargo clippy --- src/output.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/output.rs b/src/output.rs index f9747a387..4357d3fbe 100644 --- a/src/output.rs +++ b/src/output.rs @@ -205,9 +205,9 @@ fn print_entry_yaml_obj( // to avoid adding a dependency on serde_yaml (deprecated). // // A little bit dirty, but safe enough for buffered output. - let mut result = format!("- path: \"{}\"\n type: {}\n", path_string, file_type); + let mut result = format!("- path: \"{path_string}\"\n type: {file_type}\n"); let metadata = entry.metadata(); - if !metadata.is_none() { + if metadata.is_some() { if let Some(meta) = metadata { result.push_str(&format!(" size: {}\n", meta.len())); #[cfg(unix)] @@ -234,5 +234,5 @@ fn print_entry_yaml_obj( } } } - write!(stdout, "{}", result) + write!(stdout, "{result}") } From 192ca9226be6bcaee2802ce9ee1ef8985406757a Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Wed, 1 Oct 2025 01:47:48 +0800 Subject: [PATCH 05/32] refactor: make output stateful --- src/output.rs | 451 +++++++++++++++++++++++++++++--------------------- src/walk.rs | 35 +++- 2 files changed, 291 insertions(+), 195 deletions(-) diff --git a/src/output.rs b/src/output.rs index 4357d3fbe..29ec5ceb5 100644 --- a/src/output.rs +++ b/src/output.rs @@ -11,228 +11,305 @@ use crate::hyperlink::PathUrl; #[cfg(unix)] use std::os::unix::fs::PermissionsExt; +enum DetailFormat { + Json, + Yaml, +} + fn replace_path_separator(path: &str, new_path_separator: &str) -> String { path.replace(std::path::MAIN_SEPARATOR, new_path_separator) } -// TODO: this function is performance critical and can probably be optimized -pub fn print_entry(stdout: &mut W, entry: &DirEntry, config: &Config) -> io::Result<()> { - let mut has_hyperlink = false; - if config.hyperlink { - if let Some(url) = PathUrl::new(entry.path()) { - write!(stdout, "\x1B]8;;{url}\x1B\\")?; - has_hyperlink = true; +struct FileDetail { + path: String, + file_type: String, + size: Option, + mode: Option, + modified: Option, + accessed: Option, + created: Option, +} + +pub struct Printer<'a, W> { + config: &'a Config, + pub stdout: W, + started: bool, +} + +impl<'a, W: Write> Printer<'a, W> { + pub fn new(config: &'a Config, stdout: W) -> Self { + Self { + config, + stdout, + started: false, } } - if let Some(ref format) = config.format { - print_entry_format(stdout, entry, config, format)?; - } else if config.yaml { - print_entry_yaml_obj(stdout, entry, config)?; - } else if let Some(ref ls_colors) = config.ls_colors { - print_entry_colorized(stdout, entry, config, ls_colors)?; - } else { - print_entry_uncolorized(stdout, entry, config)?; - }; - - if has_hyperlink { - write!(stdout, "\x1B]8;;\x1B\\")?; + // TODO: this function is performance critical and can probably be optimized + pub fn print_entry(&mut self, entry: &DirEntry) -> io::Result<()> { + let mut has_hyperlink = false; + if self.config.hyperlink { + if let Some(url) = PathUrl::new(entry.path()) { + write!(self.stdout, "\x1B]8;;{url}\x1B\\")?; + has_hyperlink = true; + } + } + + if let Some(ref format) = self.config.format { + self.print_entry_format(entry, format)?; + } else if self.config.yaml { + self.print_entry_detail(DetailFormat::Yaml, entry)?; + } else if self.config.json { + self.print_entry_detail(DetailFormat::Json, entry)?; + } else if let Some(ref ls_colors) = self.config.ls_colors { + self.print_entry_colorized(entry, ls_colors)?; + } else { + self.print_entry_uncolorized(entry)?; + }; + + if has_hyperlink { + write!(self.stdout, "\x1B]8;;\x1B\\")?; + } + + self.started = true; + if self.config.null_separator { + write!(self.stdout, "\0") + } else if self.config.json { + Ok(()) + } else { + writeln!(self.stdout) + } } - if config.null_separator { - write!(stdout, "\0") - } else { - writeln!(stdout) + // Display a trailing slash if the path is a directory and the config option is enabled. + // If the path_separator option is set, display that instead. + // The trailing slash will not be colored. + #[inline] + fn print_trailing_slash(&mut self, entry: &DirEntry, style: Option<&Style>) -> io::Result<()> { + if entry.file_type().is_some_and(|ft| ft.is_dir()) { + write!( + self.stdout, + "{}", + style + .map(Style::to_nu_ansi_term_style) + .unwrap_or_default() + .paint(&self.config.actual_path_separator) + )?; + } + Ok(()) } -} -// Display a trailing slash if the path is a directory and the config option is enabled. -// If the path_separator option is set, display that instead. -// The trailing slash will not be colored. -#[inline] -fn print_trailing_slash( - stdout: &mut W, - entry: &DirEntry, - config: &Config, - style: Option<&Style>, -) -> io::Result<()> { - if entry.file_type().is_some_and(|ft| ft.is_dir()) { - write!( - stdout, - "{}", - style - .map(Style::to_nu_ansi_term_style) - .unwrap_or_default() - .paint(&config.actual_path_separator) - )?; + // TODO: this function is performance critical and can probably be optimized + fn print_entry_format(&mut self, entry: &DirEntry, format: &FormatTemplate) -> io::Result<()> { + let output = format.generate( + entry.stripped_path(self.config), + self.config.path_separator.as_deref(), + ); + // TODO: support writing raw bytes on unix? + write!(self.stdout, "{}", output.to_string_lossy()) } - Ok(()) -} -// TODO: this function is performance critical and can probably be optimized -fn print_entry_format( - stdout: &mut W, - entry: &DirEntry, - config: &Config, - format: &FormatTemplate, -) -> io::Result<()> { - let output = format.generate( - entry.stripped_path(config), - config.path_separator.as_deref(), - ); - // TODO: support writing raw bytes on unix? - write!(stdout, "{}", output.to_string_lossy()) -} + // TODO: this function is performance critical and can probably be optimized + fn print_entry_colorized(&mut self, entry: &DirEntry, ls_colors: &LsColors) -> io::Result<()> { + // Split the path between the parent and the last component + let mut offset = 0; + let path = entry.stripped_path(self.config); + let path_str = path.to_string_lossy(); -// TODO: this function is performance critical and can probably be optimized -fn print_entry_colorized( - stdout: &mut W, - entry: &DirEntry, - config: &Config, - ls_colors: &LsColors, -) -> io::Result<()> { - // Split the path between the parent and the last component - let mut offset = 0; - let path = entry.stripped_path(config); - let path_str = path.to_string_lossy(); - - if let Some(parent) = path.parent() { - offset = parent.to_string_lossy().len(); - for c in path_str[offset..].chars() { - if std::path::is_separator(c) { - offset += c.len_utf8(); - } else { - break; + if let Some(parent) = path.parent() { + offset = parent.to_string_lossy().len(); + for c in path_str[offset..].chars() { + if std::path::is_separator(c) { + offset += c.len_utf8(); + } else { + break; + } } } - } - if offset > 0 { - let mut parent_str = Cow::from(&path_str[..offset]); - if let Some(ref separator) = config.path_separator { - *parent_str.to_mut() = replace_path_separator(&parent_str, separator); + if offset > 0 { + let mut parent_str = Cow::from(&path_str[..offset]); + if let Some(ref separator) = self.config.path_separator { + *parent_str.to_mut() = replace_path_separator(&parent_str, separator); + } + + let style = ls_colors + .style_for_indicator(Indicator::Directory) + .map(Style::to_nu_ansi_term_style) + .unwrap_or_default(); + write!(self.stdout, "{}", style.paint(parent_str))?; } - let style = ls_colors - .style_for_indicator(Indicator::Directory) + let style = entry + .style(ls_colors) .map(Style::to_nu_ansi_term_style) .unwrap_or_default(); - write!(stdout, "{}", style.paint(parent_str))?; + write!(self.stdout, "{}", style.paint(&path_str[offset..]))?; + + self.print_trailing_slash(entry, ls_colors.style_for_indicator(Indicator::Directory))?; + + Ok(()) } - let style = entry - .style(ls_colors) - .map(Style::to_nu_ansi_term_style) - .unwrap_or_default(); - write!(stdout, "{}", style.paint(&path_str[offset..]))?; + // TODO: this function is performance critical and can probably be optimized + fn print_entry_uncolorized_base(&mut self, entry: &DirEntry) -> io::Result<()> { + let path = entry.stripped_path(self.config); - print_trailing_slash( - stdout, - entry, - config, - ls_colors.style_for_indicator(Indicator::Directory), - )?; + let mut path_string = path.to_string_lossy(); + if let Some(ref separator) = self.config.path_separator { + *path_string.to_mut() = replace_path_separator(&path_string, separator); + } + write!(self.stdout, "{path_string}")?; + self.print_trailing_slash(entry, None) + } - Ok(()) -} + #[cfg(not(unix))] + fn print_entry_uncolorized(&self, entry: &DirEntry) -> io::Result<()> { + print_entry_uncolorized_base(entry) + } -// TODO: this function is performance critical and can probably be optimized -fn print_entry_uncolorized_base( - stdout: &mut W, - entry: &DirEntry, - config: &Config, -) -> io::Result<()> { - let path = entry.stripped_path(config); - - let mut path_string = path.to_string_lossy(); - if let Some(ref separator) = config.path_separator { - *path_string.to_mut() = replace_path_separator(&path_string, separator); + #[cfg(unix)] + fn print_entry_uncolorized(&mut self, entry: &DirEntry) -> io::Result<()> { + use std::os::unix::ffi::OsStrExt; + + if self.config.interactive_terminal || self.config.path_separator.is_some() { + // Fall back to the base implementation + self.print_entry_uncolorized_base(entry) + } else { + // Print path as raw bytes, allowing invalid UTF-8 filenames to be passed to other processes + self.stdout + .write_all(entry.stripped_path(self.config).as_os_str().as_bytes())?; + self.print_trailing_slash(entry, None) + } } - write!(stdout, "{path_string}")?; - print_trailing_slash(stdout, entry, config, None) -} -#[cfg(not(unix))] -fn print_entry_uncolorized( - stdout: &mut W, - entry: &DirEntry, - config: &Config, -) -> io::Result<()> { - print_entry_uncolorized_base(stdout, entry, config) -} + fn print_entry_yaml_obj(&mut self, detail: &FileDetail) -> io::Result<()> { + // Manually construct a simple YAML representation + // to avoid adding a dependency on serde_yaml (deprecated). + // + // A little bit dirty, but safe enough for buffered output. + let mut result = format!( + "- path: \"{}\"\n type: {}\n", + detail.path, detail.file_type + ); -#[cfg(unix)] -fn print_entry_uncolorized( - stdout: &mut W, - entry: &DirEntry, - config: &Config, -) -> io::Result<()> { - use std::os::unix::ffi::OsStrExt; - - if config.interactive_terminal || config.path_separator.is_some() { - // Fall back to the base implementation - print_entry_uncolorized_base(stdout, entry, config) - } else { - // Print path as raw bytes, allowing invalid UTF-8 filenames to be passed to other processes - stdout.write_all(entry.stripped_path(config).as_os_str().as_bytes())?; - print_trailing_slash(stdout, entry, config, None) + if let Some(size) = detail.size { + result.push_str(&format!(" size: {}\n", size)); + } + if let Some(mode) = detail.mode { + result.push_str(&format!(" mode: {:o}\n", mode)); + } + if let Some(modified) = detail.modified { + result.push_str(&format!(" modified: {}\n", modified)); + } + if let Some(accessed) = detail.accessed { + result.push_str(&format!(" accessed: {}\n", accessed)); + } + if let Some(created) = detail.created { + result.push_str(&format!(" created: {}\n", created)); + } + write!(self.stdout, "{result}") } -} -fn print_entry_yaml_obj( - stdout: &mut W, - entry: &DirEntry, - config: &Config, -) -> io::Result<()> { - let path = entry.stripped_path(config); - let path_string = path.to_string_lossy().escape_default().to_string(); - let file_type = entry - .file_type() - .map(|ft| { - if ft.is_dir() { - "directory" - } else if ft.is_file() { - "file" - } else if ft.is_symlink() { - "symlink" - } else { - "other" - } - }) - .unwrap_or("unknown"); - - // Manually construct a simple YAML representation - // to avoid adding a dependency on serde_yaml (deprecated). - // - // A little bit dirty, but safe enough for buffered output. - let mut result = format!("- path: \"{path_string}\"\n type: {file_type}\n"); - let metadata = entry.metadata(); - if metadata.is_some() { + fn print_entry_json_obj(&mut self, detail: &FileDetail) -> io::Result<()> { + // Manually construct a simple JSON representation. + // A little bit dirty, but safe enough for buffered output. + let mut result = format!( + " {{\"path\":\"{}\",\"type\":\"{}\"", + detail.path, detail.file_type + ); + + if let Some(size) = detail.size { + result.push_str(&format!(",\"size\":{}", size)); + } + if let Some(mode) = detail.mode { + result.push_str(&format!(",\"mode\":{:o}", mode)); + } + if let Some(modified) = detail.modified { + result.push_str(&format!(",\"modified\":{}", modified)); + } + if let Some(accessed) = detail.accessed { + result.push_str(&format!(",\"accessed\":{}", accessed)); + } + if let Some(created) = detail.created { + result.push_str(&format!(",\"created\":{}", created)); + } + result.push('}'); + if self.started { + write!(self.stdout, ",\n")?; + } + write!(self.stdout, "{result}") + } + + fn print_entry_detail(&mut self, format: DetailFormat, entry: &DirEntry) -> io::Result<()> { + let path = entry.stripped_path(self.config); + let path_string = path.to_string_lossy().escape_default().to_string(); + let file_type = entry + .file_type() + .map(|ft| { + if ft.is_dir() { + "directory" + } else if ft.is_file() { + "file" + } else if ft.is_symlink() { + "symlink" + } else { + "other" + } + }) + .unwrap_or("unknown") + .to_string(); + let metadata = entry.metadata(); + let mut detail = FileDetail { + path: path_string, + file_type: file_type, + size: None, + mode: None, + modified: None, + accessed: None, + created: None, + }; if let Some(meta) = metadata { - result.push_str(&format!(" size: {}\n", meta.len())); - #[cfg(unix)] - { - result.push_str(&format!( - " mode: {:o}\n", - meta.permissions().mode() & 0o7777 - )); - } - if let Ok(modified) = meta.modified() { - if let Ok(duration) = modified.duration_since(std::time::UNIX_EPOCH) { - result.push_str(&format!(" modified: {}\n", duration.as_secs())); + let size = meta.len(); + let mode = { + #[cfg(unix)] + { + Some(meta.permissions().mode() & 0o7777) } - } - if let Ok(accessed) = meta.accessed() { - if let Ok(duration) = accessed.duration_since(std::time::UNIX_EPOCH) { - result.push_str(&format!(" accessed: {}\n", duration.as_secs())); + #[cfg(not(unix))] + { + None } + }; + let modified = meta + .modified()? + .duration_since(std::time::UNIX_EPOCH) + .and_then(|d| Ok(d.as_secs())); + + let accessed = meta + .accessed()? + .duration_since(std::time::UNIX_EPOCH) + .and_then(|d| Ok(d.as_secs())); + + let created = meta + .created()? + .duration_since(std::time::UNIX_EPOCH) + .and_then(|d| Ok(d.as_secs())); + + detail.size = Some(size); + detail.mode = mode; + detail.modified = modified.ok(); + detail.accessed = accessed.ok(); + detail.created = created.ok(); + } + + match format { + DetailFormat::Json => { + return self.print_entry_json_obj(&detail); } - if let Ok(created) = meta.created() { - if let Ok(duration) = created.duration_since(std::time::UNIX_EPOCH) { - result.push_str(&format!(" created: {}\n", duration.as_secs())); - } + DetailFormat::Yaml => { + return self.print_entry_yaml_obj(&detail); } - } + }; } - write!(stdout, "{result}") } diff --git a/src/walk.rs b/src/walk.rs index 0991e1c73..62a158475 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -136,8 +136,6 @@ struct ReceiverBuffer<'a, W> { interrupt_flag: &'a AtomicBool, /// Receiver for worker results. rx: Receiver, - /// Standard output. - stdout: W, /// The current buffer mode. mode: ReceiverMode, /// The deadline to switch to streaming mode. @@ -146,9 +144,11 @@ struct ReceiverBuffer<'a, W> { buffer: Vec, /// Result count. num_results: usize, + /// The stdout printer instance. + printer: output::Printer<'a, W>, } -impl<'a, W: Write> ReceiverBuffer<'a, W> { +impl<'a, W: Write + 'static> ReceiverBuffer<'a, W> { /// Create a new receiver buffer. fn new(state: &'a WorkerState, rx: Receiver, stdout: W) -> Self { let config = &state.config; @@ -162,22 +162,41 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> { quit_flag, interrupt_flag, rx, - stdout, mode: ReceiverMode::Buffering, deadline, buffer: Vec::with_capacity(MAX_BUFFER_LENGTH), num_results: 0, + printer: output::Printer::new(config, stdout), } } /// Process results until finished. fn process(&mut self) -> ExitCode { + if self.config.json { + if let Err(e) = write!(self.printer.stdout, "[\n") { + if e.kind() != ::std::io::ErrorKind::BrokenPipe { + print_error(format!("Could not write to output: {e}")); + return ExitCode::GeneralError; + } + } + } + let ec; loop { - if let Err(ec) = self.poll() { + if let Err(err) = self.poll() { self.quit_flag.store(true, Ordering::Relaxed); - return ec; + ec = err; + break; + } + } + if self.config.json { + if let Err(e) = write!(self.printer.stdout, "\n]\n") { + if e.kind() != ::std::io::ErrorKind::BrokenPipe { + print_error(format!("Could not write to output: {e}")); + return ExitCode::GeneralError; + } } } + return ec; } /// Receive the next worker result. @@ -250,7 +269,7 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> { /// Output a path. fn print(&mut self, entry: &DirEntry) -> Result<(), ExitCode> { - if let Err(e) = output::print_entry(&mut self.stdout, entry, self.config) { + if let Err(e) = self.printer.print_entry(entry) { if e.kind() != ::std::io::ErrorKind::BrokenPipe { print_error(format!("Could not write to output: {e}")); return Err(ExitCode::GeneralError); @@ -294,7 +313,7 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> { /// Flush stdout if necessary. fn flush(&mut self) -> Result<(), ExitCode> { - if self.stdout.flush().is_err() { + if self.printer.stdout.flush().is_err() { // Probably a broken pipe. Exit gracefully. return Err(ExitCode::GeneralError); } From 602b389efb3b6b7b71a10f5fc975f60130b6ea6f Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Wed, 1 Oct 2025 01:49:58 +0800 Subject: [PATCH 06/32] feat(output): add json output --- src/cli.rs | 11 +++++++++++ src/config.rs | 5 ++++- src/main.rs | 1 + 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/cli.rs b/src/cli.rs index ec1affed6..3ac3d5986 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -657,6 +657,17 @@ pub struct Opts { )] pub yaml: bool, + /// Print results as a JSON array so you can use it with tools like jq and nushell. + #[arg( + long, + value_name = "json", + conflicts_with("format"), + conflicts_with("list_details"), + conflicts_with("yaml"), + help = "Print results as a JSON array so you can use it with tools like jq and nushell." + )] + pub json: bool, + /// By default, relative paths are prefixed with './' when -x/--exec, /// -X/--exec-batch, or -0/--print0 are given, to reduce the risk of a /// path starting with '-' being treated as a command line option. Use diff --git a/src/config.rs b/src/config.rs index 1ceaff540..e867ac938 100644 --- a/src/config.rs +++ b/src/config.rs @@ -131,7 +131,10 @@ pub struct Config { /// Whether or not to use hyperlinks on paths pub hyperlink: bool, - /// Whether or not to print the result as a JSON object + /// Whether or not to print the result as a JSON array + pub json: bool, + + /// Whether or not to print the result as YAML objects pub yaml: bool, } diff --git a/src/main.rs b/src/main.rs index 35be38445..0eaf4d6a5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -326,6 +326,7 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result Date: Wed, 1 Oct 2025 02:02:03 +0800 Subject: [PATCH 07/32] fix(ci): fix warnings in cargo clippy --- src/output.rs | 40 ++++++++++++++++++---------------------- src/walk.rs | 6 +++--- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/src/output.rs b/src/output.rs index 29ec5ceb5..e987f9c42 100644 --- a/src/output.rs +++ b/src/output.rs @@ -194,19 +194,19 @@ impl<'a, W: Write> Printer<'a, W> { ); if let Some(size) = detail.size { - result.push_str(&format!(" size: {}\n", size)); + result.push_str(&format!(" size: {size}\n")); } if let Some(mode) = detail.mode { - result.push_str(&format!(" mode: {:o}\n", mode)); + result.push_str(&format!(" mode: {mode:o}\n")); } if let Some(modified) = detail.modified { - result.push_str(&format!(" modified: {}\n", modified)); + result.push_str(&format!(" modified: {modified}\n")); } if let Some(accessed) = detail.accessed { - result.push_str(&format!(" accessed: {}\n", accessed)); + result.push_str(&format!(" accessed: {accessed}\n")); } if let Some(created) = detail.created { - result.push_str(&format!(" created: {}\n", created)); + result.push_str(&format!(" created: {created}\n")); } write!(self.stdout, "{result}") } @@ -220,23 +220,23 @@ impl<'a, W: Write> Printer<'a, W> { ); if let Some(size) = detail.size { - result.push_str(&format!(",\"size\":{}", size)); + result.push_str(&format!(",\"size\":{size}")); } if let Some(mode) = detail.mode { - result.push_str(&format!(",\"mode\":{:o}", mode)); + result.push_str(&format!(",\"mode\":{mode:o}")); } if let Some(modified) = detail.modified { - result.push_str(&format!(",\"modified\":{}", modified)); + result.push_str(&format!(",\"modified\":{modified}")); } if let Some(accessed) = detail.accessed { - result.push_str(&format!(",\"accessed\":{}", accessed)); + result.push_str(&format!(",\"accessed\":{accessed}")); } if let Some(created) = detail.created { - result.push_str(&format!(",\"created\":{}", created)); + result.push_str(&format!(",\"created\":{created}")); } result.push('}'); if self.started { - write!(self.stdout, ",\n")?; + writeln!(self.stdout, ",")?; } write!(self.stdout, "{result}") } @@ -262,7 +262,7 @@ impl<'a, W: Write> Printer<'a, W> { let metadata = entry.metadata(); let mut detail = FileDetail { path: path_string, - file_type: file_type, + file_type, size: None, mode: None, modified: None, @@ -284,17 +284,17 @@ impl<'a, W: Write> Printer<'a, W> { let modified = meta .modified()? .duration_since(std::time::UNIX_EPOCH) - .and_then(|d| Ok(d.as_secs())); + .map(|d| d.as_secs()); let accessed = meta .accessed()? .duration_since(std::time::UNIX_EPOCH) - .and_then(|d| Ok(d.as_secs())); + .map(|d| d.as_secs()); let created = meta .created()? .duration_since(std::time::UNIX_EPOCH) - .and_then(|d| Ok(d.as_secs())); + .map(|d| d.as_secs()); detail.size = Some(size); detail.mode = mode; @@ -304,12 +304,8 @@ impl<'a, W: Write> Printer<'a, W> { } match format { - DetailFormat::Json => { - return self.print_entry_json_obj(&detail); - } - DetailFormat::Yaml => { - return self.print_entry_yaml_obj(&detail); - } - }; + DetailFormat::Json => self.print_entry_json_obj(&detail), + DetailFormat::Yaml => self.print_entry_yaml_obj(&detail), + } } } diff --git a/src/walk.rs b/src/walk.rs index 62a158475..bd030bbb8 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -173,7 +173,7 @@ impl<'a, W: Write + 'static> ReceiverBuffer<'a, W> { /// Process results until finished. fn process(&mut self) -> ExitCode { if self.config.json { - if let Err(e) = write!(self.printer.stdout, "[\n") { + if let Err(e) = writeln!(self.printer.stdout, "[") { if e.kind() != ::std::io::ErrorKind::BrokenPipe { print_error(format!("Could not write to output: {e}")); return ExitCode::GeneralError; @@ -189,14 +189,14 @@ impl<'a, W: Write + 'static> ReceiverBuffer<'a, W> { } } if self.config.json { - if let Err(e) = write!(self.printer.stdout, "\n]\n") { + if let Err(e) = writeln!(self.printer.stdout, "\n]") { if e.kind() != ::std::io::ErrorKind::BrokenPipe { print_error(format!("Could not write to output: {e}")); return ExitCode::GeneralError; } } } - return ec; + ec } /// Receive the next worker result. From 4ea0ed3446bc4a7d6f8ebf0ae8937da074c34ca1 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Wed, 1 Oct 2025 02:03:46 +0800 Subject: [PATCH 08/32] fix: fix function calling in Windows --- src/output.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/output.rs b/src/output.rs index e987f9c42..02d7c9084 100644 --- a/src/output.rs +++ b/src/output.rs @@ -165,7 +165,7 @@ impl<'a, W: Write> Printer<'a, W> { #[cfg(not(unix))] fn print_entry_uncolorized(&self, entry: &DirEntry) -> io::Result<()> { - print_entry_uncolorized_base(entry) + self.print_entry_uncolorized_base(entry) } #[cfg(unix)] From e225946b1b192886ec5812cbb285a0abcba71170 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Wed, 1 Oct 2025 02:10:29 +0800 Subject: [PATCH 09/32] fix: fix reference mutable type annotation in Windows --- src/output.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/output.rs b/src/output.rs index 02d7c9084..2c4302de0 100644 --- a/src/output.rs +++ b/src/output.rs @@ -164,7 +164,7 @@ impl<'a, W: Write> Printer<'a, W> { } #[cfg(not(unix))] - fn print_entry_uncolorized(&self, entry: &DirEntry) -> io::Result<()> { + fn print_entry_uncolorized(&mut self, entry: &DirEntry) -> io::Result<()> { self.print_entry_uncolorized_base(entry) } From 977ee0e4b774f83b2edb9cbe7d1c742d6e05d5f3 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Fri, 10 Oct 2025 23:33:28 +0800 Subject: [PATCH 10/32] fix: resolve suggested changes --- Cargo.lock | 7 ++ Cargo.toml | 21 +++-- src/cli.rs | 34 +++---- src/config.rs | 8 +- src/main.rs | 3 +- src/output.rs | 240 +++++++++++++++++++++++++++++++------------------- src/walk.rs | 5 +- 7 files changed, 192 insertions(+), 126 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1f76c18a2..1e7f3df31 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -78,6 +78,12 @@ dependencies = [ "once_cell", ] +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "1.3.2" @@ -272,6 +278,7 @@ dependencies = [ "aho-corasick", "anyhow", "argmax", + "base64", "clap", "clap_complete", "crossbeam-channel", diff --git a/Cargo.toml b/Cargo.toml index d0a92dc09..282302b28 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,19 +5,13 @@ description = "fd is a simple, fast and user-friendly alternative to find." exclude = ["/benchmarks/*"] homepage = "https://github.com/sharkdp/fd" documentation = "https://docs.rs/fd-find" -keywords = [ - "search", - "find", - "file", - "filesystem", - "tool", -] +keywords = ["search", "find", "file", "filesystem", "tool"] license = "MIT OR Apache-2.0" name = "fd-find" readme = "README.md" repository = "https://github.com/sharkdp/fd" version = "10.3.0" -edition= "2021" +edition = "2021" rust-version = "1.77.2" [badges.appveyor] @@ -43,12 +37,13 @@ anyhow = "1.0" etcetera = "0.10" normpath = "1.1.1" crossbeam-channel = "0.5.15" -clap_complete = {version = "4.5.57", optional = true} +clap_complete = { version = "4.5.57", optional = true } faccess = "0.2.4" jiff = "0.2.14" # For now, pin the `home` crate to less than 0.5.11, to ensure it works on older versions of rust # TODO: when we upgrade past rust 1.85, remove this dependency home = "=0.5.9" +base64 = "0.22.1" [dependencies.clap] version = "4.5.46" @@ -60,7 +55,11 @@ default-features = false features = ["nu-ansi-term"] [target.'cfg(unix)'.dependencies] -nix = { version = "0.30.1", default-features = false, features = ["signal", "user", "hostname"] } +nix = { version = "0.30.1", default-features = false, features = [ + "signal", + "user", + "hostname", +] } [target.'cfg(all(unix, not(target_os = "redox")))'.dependencies] libc = "0.2" @@ -71,7 +70,7 @@ libc = "0.2" # This has to be kept in sync with src/main.rs where the allocator for # the program is set. [target.'cfg(all(not(windows), not(target_os = "android"), not(target_os = "macos"), not(target_os = "freebsd"), not(target_os = "openbsd"), not(target_os = "illumos"), not(all(target_env = "musl", target_pointer_width = "32")), not(target_arch = "riscv64")))'.dependencies] -tikv-jemallocator = {version = "0.6.0", optional = true} +tikv-jemallocator = { version = "0.6.0", optional = true } [dev-dependencies] diff = "0.1" diff --git a/src/cli.rs b/src/cli.rs index 3ac3d5986..9f00265ac 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -647,26 +647,17 @@ pub struct Opts { )] search_path: Vec, - /// Print results as YAML objects so you can use it with tools like yq and nushell. + /// Print results in a certain format so you can pipe it to tools. #[arg( long, - value_name = "yaml", - conflicts_with("format"), - conflicts_with("list_details"), - help = "Print results as YAML objects so you can use it with tools like yq and nushell." - )] - pub yaml: bool, - - /// Print results as a JSON array so you can use it with tools like jq and nushell. - #[arg( - long, - value_name = "json", + value_name = "output", + value_enum, + default_value_t = OutputFormat::Plain, conflicts_with("format"), conflicts_with("list_details"), - conflicts_with("yaml"), - help = "Print results as a JSON array so you can use it with tools like jq and nushell." + help = "Print results in a certain format so you can pipe it to tools." )] - pub json: bool, + pub output: OutputFormat, /// By default, relative paths are prefixed with './' when -x/--exec, /// -X/--exec-batch, or -0/--print0 are given, to reduce the risk of a @@ -845,6 +836,19 @@ pub enum HyperlinkWhen { Never, } +#[derive(Copy, Clone, PartialEq, Eq, Debug, ValueEnum)] +pub enum OutputFormat { + /// Plain text output (default) + Plain, + /// JSON output + Json, + /// NDJSON (Newline Delimited JSON) output + Ndjson, + /// YAML output + #[value(alias = "yml")] + Yaml, +} + // there isn't a derive api for getting grouped values yet, // so we have to use hand-rolled parsing for exec and exec-batch pub struct Exec { diff --git a/src/config.rs b/src/config.rs index e867ac938..df4e86f63 100644 --- a/src/config.rs +++ b/src/config.rs @@ -3,6 +3,7 @@ use std::{path::PathBuf, sync::Arc, time::Duration}; use lscolors::LsColors; use regex::bytes::RegexSet; +use crate::cli::OutputFormat; use crate::exec::CommandSet; use crate::filetypes::FileTypes; #[cfg(unix)] @@ -131,11 +132,8 @@ pub struct Config { /// Whether or not to use hyperlinks on paths pub hyperlink: bool, - /// Whether or not to print the result as a JSON array - pub json: bool, - - /// Whether or not to print the result as YAML objects - pub yaml: bool, + /// The output format to use + pub output: OutputFormat, } impl Config { diff --git a/src/main.rs b/src/main.rs index 0eaf4d6a5..e756ca0dd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -326,8 +326,7 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result String { path.replace(std::path::MAIN_SEPARATOR, new_path_separator) } +#[cfg(unix)] +fn encode_path(path: &std::path::Path) -> PathEncoding { + use std::os::unix::ffi::OsStrExt; + let bytes = path.as_os_str().as_bytes(); + + // Try to convert to UTF-8 first + match std::str::from_utf8(bytes) { + Ok(utf8_str) => { + let escaped: String = utf8_str.escape_default().collect(); + PathEncoding::Utf8(escaped) + } + Err(_) => { + // Invalid UTF-8, store as raw bytes + PathEncoding::Bytes(bytes.to_vec()) + } + } +} + +#[cfg(not(unix))] +fn encode_path(path: &std::path::Path) -> PathEncoding { + // On non-Unix systems, paths are typically UTF-8 or UTF-16 + let path_str = path.to_string_lossy(); + // Always escape the path string for safe output + // Note: if lossy conversion happened, this might lose information + let escaped: String = path_str.escape_default().collect(); + PathEncoding::Utf8(escaped) +} + +enum PathEncoding { + Utf8(String), + Bytes(Vec), +} + struct FileDetail { - path: String, + path: PathEncoding, file_type: String, size: Option, mode: Option, - modified: Option, - accessed: Option, - created: Option, + modified: Option, + accessed: Option, + created: Option, } pub struct Printer<'a, W> { @@ -55,16 +86,21 @@ impl<'a, W: Write> Printer<'a, W> { } } - if let Some(ref format) = self.config.format { - self.print_entry_format(entry, format)?; - } else if self.config.yaml { - self.print_entry_detail(DetailFormat::Yaml, entry)?; - } else if self.config.json { - self.print_entry_detail(DetailFormat::Json, entry)?; - } else if let Some(ref ls_colors) = self.config.ls_colors { - self.print_entry_colorized(entry, ls_colors)?; - } else { - self.print_entry_uncolorized(entry)?; + match ( + &self.config.format, + &self.config.output, + &self.config.ls_colors, + ) { + (Some(template), _, _) => self.print_entry_format(entry, template)?, + (None, OutputFormat::Json, _) => self.print_entry_detail(OutputFormat::Json, entry)?, + (None, OutputFormat::Yaml, _) => self.print_entry_detail(OutputFormat::Yaml, entry)?, + (None, OutputFormat::Ndjson, _) => { + self.print_entry_detail(OutputFormat::Ndjson, entry)? + } + (None, OutputFormat::Plain, Some(ls_colors)) => { + self.print_entry_colorized(entry, ls_colors)? + } + (None, OutputFormat::Plain, None) => self.print_entry_uncolorized(entry)?, }; if has_hyperlink { @@ -74,7 +110,7 @@ impl<'a, W: Write> Printer<'a, W> { self.started = true; if self.config.null_separator { write!(self.stdout, "\0") - } else if self.config.json { + } else if matches!(self.config.output, OutputFormat::Json) { Ok(()) } else { writeln!(self.stdout) @@ -187,82 +223,90 @@ impl<'a, W: Write> Printer<'a, W> { // Manually construct a simple YAML representation // to avoid adding a dependency on serde_yaml (deprecated). // - // A little bit dirty, but safe enough for buffered output. - let mut result = format!( - "- path: \"{}\"\n type: {}\n", - detail.path, detail.file_type - ); + // Write YAML fragments directly to stdout (should be buffered) + write!(self.stdout, "- ")?; + + match &detail.path { + PathEncoding::Utf8(path_utf8) => { + write!(self.stdout, "path: \"{}\"\n", path_utf8)?; + } + PathEncoding::Bytes(path_bytes) => { + write!( + self.stdout, + "path_base64: \"{}\"\n", + general_purpose::STANDARD.encode(path_bytes) + )?; + } + } + + write!(self.stdout, " type: {}\n", detail.file_type)?; if let Some(size) = detail.size { - result.push_str(&format!(" size: {size}\n")); + write!(self.stdout, " size: {size}\n")?; } if let Some(mode) = detail.mode { - result.push_str(&format!(" mode: {mode:o}\n")); + write!(self.stdout, " mode: 0o{mode:o}\n")?; } - if let Some(modified) = detail.modified { - result.push_str(&format!(" modified: {modified}\n")); + if let Some(modified) = &detail.modified { + write!(self.stdout, " modified: \"{}\"\n", modified)?; } - if let Some(accessed) = detail.accessed { - result.push_str(&format!(" accessed: {accessed}\n")); + if let Some(accessed) = &detail.accessed { + write!(self.stdout, " accessed: \"{}\"\n", accessed)?; } - if let Some(created) = detail.created { - result.push_str(&format!(" created: {created}\n")); + if let Some(created) = &detail.created { + write!(self.stdout, " created: \"{}\"\n", created)?; } - write!(self.stdout, "{result}") + Ok(()) } fn print_entry_json_obj(&mut self, detail: &FileDetail) -> io::Result<()> { - // Manually construct a simple JSON representation. - // A little bit dirty, but safe enough for buffered output. - let mut result = format!( - " {{\"path\":\"{}\",\"type\":\"{}\"", - detail.path, detail.file_type - ); + if self.started { + writeln!(self.stdout, ",")?; + } + + write!(self.stdout, " {{")?; + + match &detail.path { + PathEncoding::Utf8(path_utf8) => { + write!(self.stdout, "\"path\":\"{}\"", path_utf8)?; + } + PathEncoding::Bytes(path_bytes) => { + write!( + self.stdout, + "\"path_b64\":\"{}\"", + general_purpose::STANDARD.encode(path_bytes) + )?; + } + } + + write!(self.stdout, ",\"type\":\"{}\"", detail.file_type)?; if let Some(size) = detail.size { - result.push_str(&format!(",\"size\":{size}")); + write!(self.stdout, ",\"size\":{size}")?; } if let Some(mode) = detail.mode { - result.push_str(&format!(",\"mode\":{mode:o}")); + write!(self.stdout, ",\"mode\":{mode:o}")?; } - if let Some(modified) = detail.modified { - result.push_str(&format!(",\"modified\":{modified}")); + if let Some(modified) = &detail.modified { + write!(self.stdout, ",\"modified\":\"{}\"", modified)?; } - if let Some(accessed) = detail.accessed { - result.push_str(&format!(",\"accessed\":{accessed}")); + if let Some(accessed) = &detail.accessed { + write!(self.stdout, ",\"accessed\":\"{}\"", accessed)?; } - if let Some(created) = detail.created { - result.push_str(&format!(",\"created\":{created}")); + if let Some(created) = &detail.created { + write!(self.stdout, ",\"created\":\"{}\"", created)?; } - result.push('}'); - if self.started { - writeln!(self.stdout, ",")?; - } - write!(self.stdout, "{result}") + write!(self.stdout, "}}") } - fn print_entry_detail(&mut self, format: DetailFormat, entry: &DirEntry) -> io::Result<()> { + fn print_entry_detail(&mut self, format: OutputFormat, entry: &DirEntry) -> io::Result<()> { let path = entry.stripped_path(self.config); - let path_string = path.to_string_lossy().escape_default().to_string(); - let file_type = entry - .file_type() - .map(|ft| { - if ft.is_dir() { - "directory" - } else if ft.is_file() { - "file" - } else if ft.is_symlink() { - "symlink" - } else { - "other" - } - }) - .unwrap_or("unknown") - .to_string(); + let encoded_path = encode_path(path); let metadata = entry.metadata(); + let mut detail = FileDetail { - path: path_string, - file_type, + path: encoded_path, + file_type: "unknown".to_string(), size: None, mode: None, modified: None, @@ -281,31 +325,45 @@ impl<'a, W: Write> Printer<'a, W> { None } }; - let modified = meta - .modified()? - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_secs()); - - let accessed = meta - .accessed()? - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_secs()); - - let created = meta - .created()? - .duration_since(std::time::UNIX_EPOCH) - .map(|d| d.as_secs()); + let ft = match meta.file_type() { + ft if ft.is_dir() => "directory", + ft if ft.is_file() => "file", + ft if ft.is_symlink() => "symlink", + _ => "unknown", + } + .to_string(); + let modified = meta.modified().ok().and_then(|t| { + t.duration_since(std::time::UNIX_EPOCH) + .ok() + .and_then(|d| Timestamp::from_second(d.as_secs() as i64).ok()) + }); + + let accessed = meta.accessed().ok().and_then(|t| { + t.duration_since(std::time::UNIX_EPOCH) + .ok() + .and_then(|d| Timestamp::from_second(d.as_secs() as i64).ok()) + }); + + let created = meta.created().ok().and_then(|t| { + t.duration_since(std::time::UNIX_EPOCH) + .ok() + .and_then(|d| Timestamp::from_second(d.as_secs() as i64).ok()) + }); + + detail.file_type = ft; detail.size = Some(size); detail.mode = mode; - detail.modified = modified.ok(); - detail.accessed = accessed.ok(); - detail.created = created.ok(); + detail.modified = modified; + detail.accessed = accessed; + detail.created = created; } match format { - DetailFormat::Json => self.print_entry_json_obj(&detail), - DetailFormat::Yaml => self.print_entry_yaml_obj(&detail), + OutputFormat::Json => self.print_entry_json_obj(&detail), + OutputFormat::Yaml => self.print_entry_yaml_obj(&detail), + OutputFormat::Ndjson => self.print_entry_json_obj(&detail), // NDJSON uses same format as JSON for individual entries + OutputFormat::Plain => unreachable!("Plain format should not call print_entry_detail"), } } } diff --git a/src/walk.rs b/src/walk.rs index bd030bbb8..04e8de866 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -15,6 +15,7 @@ use ignore::overrides::{Override, OverrideBuilder}; use ignore::{WalkBuilder, WalkParallel, WalkState}; use regex::bytes::Regex; +use crate::cli::OutputFormat; use crate::config::Config; use crate::dir_entry::DirEntry; use crate::error::print_error; @@ -172,7 +173,7 @@ impl<'a, W: Write + 'static> ReceiverBuffer<'a, W> { /// Process results until finished. fn process(&mut self) -> ExitCode { - if self.config.json { + if self.config.output == OutputFormat::Json { if let Err(e) = writeln!(self.printer.stdout, "[") { if e.kind() != ::std::io::ErrorKind::BrokenPipe { print_error(format!("Could not write to output: {e}")); @@ -188,7 +189,7 @@ impl<'a, W: Write + 'static> ReceiverBuffer<'a, W> { break; } } - if self.config.json { + if self.config.output == OutputFormat::Json { if let Err(e) = writeln!(self.printer.stdout, "\n]") { if e.kind() != ::std::io::ErrorKind::BrokenPipe { print_error(format!("Could not write to output: {e}")); From 6434ee5024bf4f78cd08fe81389610f4f6ede749 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Fri, 10 Oct 2025 23:51:37 +0800 Subject: [PATCH 11/32] fix: move JSON array printing to Printer --- src/output.rs | 29 +++++++++++++++++++++++++++++ src/walk.rs | 19 ++++--------------- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/src/output.rs b/src/output.rs index 47446edd6..505418cad 100644 --- a/src/output.rs +++ b/src/output.rs @@ -8,6 +8,7 @@ use lscolors::{Indicator, LsColors, Style}; use crate::cli::OutputFormat; use crate::config::Config; use crate::dir_entry::DirEntry; +use crate::exit_codes::ExitCode; use crate::fmt::FormatTemplate; use crate::hyperlink::PathUrl; @@ -76,6 +77,34 @@ impl<'a, W: Write> Printer<'a, W> { } } + /// Begin JSON array output if in JSON format. + /// Returns an error if writing to output fails. + pub fn begin(&mut self) -> Result<(), ExitCode> { + if self.config.output == OutputFormat::Json { + if let Err(e) = writeln!(self.stdout, "[") { + if e.kind() != ::std::io::ErrorKind::BrokenPipe { + crate::error::print_error(format!("Could not write to output: {e}")); + return Err(ExitCode::GeneralError); + } + } + } + Ok(()) + } + + /// End JSON array output if in JSON format. + /// Returns an error if writing to output fails. + pub fn end(&mut self) -> Result<(), ExitCode> { + if self.config.output == OutputFormat::Json { + if let Err(e) = writeln!(self.stdout, "\n]") { + if e.kind() != ::std::io::ErrorKind::BrokenPipe { + crate::error::print_error(format!("Could not write to output: {e}")); + return Err(ExitCode::GeneralError); + } + } + } + Ok(()) + } + // TODO: this function is performance critical and can probably be optimized pub fn print_entry(&mut self, entry: &DirEntry) -> io::Result<()> { let mut has_hyperlink = false; diff --git a/src/walk.rs b/src/walk.rs index 04e8de866..a259a4576 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -15,7 +15,6 @@ use ignore::overrides::{Override, OverrideBuilder}; use ignore::{WalkBuilder, WalkParallel, WalkState}; use regex::bytes::Regex; -use crate::cli::OutputFormat; use crate::config::Config; use crate::dir_entry::DirEntry; use crate::error::print_error; @@ -173,13 +172,8 @@ impl<'a, W: Write + 'static> ReceiverBuffer<'a, W> { /// Process results until finished. fn process(&mut self) -> ExitCode { - if self.config.output == OutputFormat::Json { - if let Err(e) = writeln!(self.printer.stdout, "[") { - if e.kind() != ::std::io::ErrorKind::BrokenPipe { - print_error(format!("Could not write to output: {e}")); - return ExitCode::GeneralError; - } - } + if let Err(err) = self.printer.begin() { + return err; } let ec; loop { @@ -189,13 +183,8 @@ impl<'a, W: Write + 'static> ReceiverBuffer<'a, W> { break; } } - if self.config.output == OutputFormat::Json { - if let Err(e) = writeln!(self.printer.stdout, "\n]") { - if e.kind() != ::std::io::ErrorKind::BrokenPipe { - print_error(format!("Could not write to output: {e}")); - return ExitCode::GeneralError; - } - } + if let Err(err) = self.printer.end() { + return err; } ec } From b2d385f76a4e06192f6e0934e84c9b18fc1d81e3 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Sat, 11 Oct 2025 09:40:18 +0800 Subject: [PATCH 12/32] feat: implement NDJSON output --- src/output.rs | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/src/output.rs b/src/output.rs index 505418cad..5067211d3 100644 --- a/src/output.rs +++ b/src/output.rs @@ -137,10 +137,16 @@ impl<'a, W: Write> Printer<'a, W> { } self.started = true; + + if matches!( + self.config.output, + OutputFormat::Json | OutputFormat::Ndjson + ) { + return Ok(()); + } + if self.config.null_separator { write!(self.stdout, "\0") - } else if matches!(self.config.output, OutputFormat::Json) { - Ok(()) } else { writeln!(self.stdout) } @@ -288,12 +294,19 @@ impl<'a, W: Write> Printer<'a, W> { Ok(()) } - fn print_entry_json_obj(&mut self, detail: &FileDetail) -> io::Result<()> { + fn print_entry_json_obj(&mut self, detail: &FileDetail, comma: bool) -> io::Result<()> { if self.started { - writeln!(self.stdout, ",")?; + if comma { + writeln!(self.stdout, ",")?; + } else { + writeln!(self.stdout)?; + } } - write!(self.stdout, " {{")?; + if comma { + write!(self.stdout, " ")?; + } + write!(self.stdout, "{{")?; match &detail.path { PathEncoding::Utf8(path_utf8) => { @@ -389,9 +402,9 @@ impl<'a, W: Write> Printer<'a, W> { } match format { - OutputFormat::Json => self.print_entry_json_obj(&detail), + OutputFormat::Json => self.print_entry_json_obj(&detail, true), + OutputFormat::Ndjson => self.print_entry_json_obj(&detail, false), OutputFormat::Yaml => self.print_entry_yaml_obj(&detail), - OutputFormat::Ndjson => self.print_entry_json_obj(&detail), // NDJSON uses same format as JSON for individual entries OutputFormat::Plain => unreachable!("Plain format should not call print_entry_detail"), } } From 703b32f69aff167d8f2cb7b45abf2a779f805ddd Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Sat, 11 Oct 2025 09:43:55 +0800 Subject: [PATCH 13/32] fix(ci): fix warnings in cargo clippy --- src/output.rs | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/output.rs b/src/output.rs index 5067211d3..64750d36d 100644 --- a/src/output.rs +++ b/src/output.rs @@ -262,34 +262,34 @@ impl<'a, W: Write> Printer<'a, W> { write!(self.stdout, "- ")?; match &detail.path { - PathEncoding::Utf8(path_utf8) => { - write!(self.stdout, "path: \"{}\"\n", path_utf8)?; + PathEncoding::Utf8(path) => { + writeln!(self.stdout, "path: \"{}\"", path)?; } - PathEncoding::Bytes(path_bytes) => { - write!( + PathEncoding::Bytes(bytes) => { + writeln!( self.stdout, - "path_base64: \"{}\"\n", - general_purpose::STANDARD.encode(path_bytes) + "path_base64: \"{}\"", + general_purpose::STANDARD.encode(bytes) )?; } } - write!(self.stdout, " type: {}\n", detail.file_type)?; + writeln!(self.stdout, " type: {}", detail.file_type)?; if let Some(size) = detail.size { - write!(self.stdout, " size: {size}\n")?; + writeln!(self.stdout, " size: {}", size)?; } if let Some(mode) = detail.mode { - write!(self.stdout, " mode: 0o{mode:o}\n")?; + writeln!(self.stdout, " mode: 0o{mode:o}")?; } if let Some(modified) = &detail.modified { - write!(self.stdout, " modified: \"{}\"\n", modified)?; + writeln!(self.stdout, " modified: \"{}\"", modified)?; } if let Some(accessed) = &detail.accessed { - write!(self.stdout, " accessed: \"{}\"\n", accessed)?; + writeln!(self.stdout, " accessed: \"{}\"", accessed)?; } if let Some(created) = &detail.created { - write!(self.stdout, " created: \"{}\"\n", created)?; + writeln!(self.stdout, " created: \"{}\"", created)?; } Ok(()) } From 650e86c464937e6960fbcee3bb74379b0c52588f Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Sun, 12 Oct 2025 13:08:35 +0800 Subject: [PATCH 14/32] tests: add tests for `--output` flags --- Cargo.lock | 43 +++++++++++++++++++++++++++++++++++++++---- Cargo.toml | 1 + tests/tests.rs | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 79 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1e7f3df31..d98af4233 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -298,6 +298,7 @@ dependencies = [ "nu-ansi-term", "regex", "regex-syntax", + "serde_json", "tempfile", "test-case", "tikv-jemallocator", @@ -377,6 +378,12 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + [[package]] name = "jiff" version = "0.2.15" @@ -595,6 +602,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + [[package]] name = "same-file" version = "1.0.6" @@ -606,24 +619,46 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", +] + +[[package]] +name = "serde_core" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", "syn", ] +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + [[package]] name = "shlex" version = "1.3.0" diff --git a/Cargo.toml b/Cargo.toml index 282302b28..5cbae9736 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,6 +77,7 @@ diff = "0.1" tempfile = "3.21" filetime = "0.2" test-case = "3.3" +serde_json = "1.0.145" [profile.release] lto = true diff --git a/tests/tests.rs b/tests/tests.rs index 76da9fee3..2da03af89 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -2692,3 +2692,42 @@ fn test_hyperlink() { te.assert_output(&["--hyperlink=always", "a.foo"], &expected); } + +/// Test various output formats +#[test] +fn test_output_format() { + let te = TestEnv::new(DEFAULT_DIRS, DEFAULT_FILES); + + let re = te.assert_success_and_get_output(".", &["--output=json", "."]); + let stdout = String::from_utf8_lossy(&re.stdout); + let files: Vec = serde_json::from_str(&stdout).unwrap(); + + assert_eq!(files.len(), DEFAULT_FILES.len() + DEFAULT_DIRS.len()); + + te.assert_success_and_get_output(".", &["--output=ndjson", "."]); + te.assert_success_and_get_output(".", &["--output=plain", "."]); + te.assert_success_and_get_output(".", &["--output=yaml", "."]); + te.assert_success_and_get_output(".", &["--output=yml", "."]); +} + +/// Filenames with invalid UTF-8 sequences +#[cfg(target_os = "linux")] +#[test] +fn test_output_format_invalid_utf8() { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + let dirs = &["test1"]; + let files = &[]; + let te = TestEnv::new(dirs, files); + + fs::File::create( + te.test_root() + .join(OsStr::from_bytes(b"test1/test_\xFEinvalid.txt")), + ) + .unwrap(); + + te.assert_success_and_get_output("test1/", &["--output=json", "", "test1/"]); + + te.assert_output(&["invalid", "test1/"], "test1/test_�invalid.txt"); +} From 2e463c74da883e4a9e1290990986ff526925a041 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Sun, 12 Oct 2025 13:24:34 +0800 Subject: [PATCH 15/32] docs: update manpage for `--output` flags --- doc/fd.1 | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/doc/fd.1 b/doc/fd.1 index 4a6aa0ece..f04f41e76 100644 --- a/doc/fd.1 +++ b/doc/fd.1 @@ -503,6 +503,23 @@ Maximum number of arguments to pass to the command given with -X. If the number greater than the given size, the command given with -X is run again with remaining arguments. A batch size of zero means there is no limit (default), but note that batching might still happen due to OS restrictions on the maximum length of command lines. +.TP +.BI "\-\-output " +Specify what marking language to use for the output. The value can be plain, json, ndjson, or +yaml. + +Currently, the default is "plain", and if the option is used without an argument "plain" +is used. The available options are: +.RS +.IP plain +Output the results as plain text (default). +.IP json +Output the results as a JSON array. +.IP ndjson +Output the results as newline-delimited JSON. +.IP yaml +Output the results as YAML. +.RE .SH PATTERN SYNTAX The regular expression syntax used by fd is documented here: From 10570e92ea8660c2703e4fee617a44d2d9937fed Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Wed, 15 Oct 2025 13:47:26 +0800 Subject: [PATCH 16/32] tests: fix invalid utf8 base64 test --- tests/tests.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/tests.rs b/tests/tests.rs index 2da03af89..00c78f07c 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -2727,7 +2727,11 @@ fn test_output_format_invalid_utf8() { ) .unwrap(); - te.assert_success_and_get_output("test1/", &["--output=json", "", "test1/"]); + let re = te.assert_success_and_get_output(".", &["", "--output=json", "test1/"]); + let stdout = String::from_utf8_lossy(&re.stdout); + let files: Vec = serde_json::from_str(&stdout).unwrap(); + assert_eq!(files.len(), 1); + assert_eq!(files[0]["path_b64"], "dGVzdDEvdGVzdF/+aW52YWxpZC50eHQ="); te.assert_output(&["invalid", "test1/"], "test1/test_�invalid.txt"); } From 949a5aa584413c048925961bc3d13ce3a253f082 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Wed, 15 Oct 2025 16:39:37 +0800 Subject: [PATCH 17/32] docs: update manpage to change "ndjson" to "jsonl" --- doc/fd.1 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/fd.1 b/doc/fd.1 index f04f41e76..73856cac6 100644 --- a/doc/fd.1 +++ b/doc/fd.1 @@ -505,7 +505,7 @@ batch size of zero means there is no limit (default), but note that batching mig due to OS restrictions on the maximum length of command lines. .TP .BI "\-\-output " -Specify what marking language to use for the output. The value can be plain, json, ndjson, or +Specify a structured format to use for the output. The value can be plain (default), json, jsonl, or yaml. Currently, the default is "plain", and if the option is used without an argument "plain" @@ -515,8 +515,8 @@ is used. The available options are: Output the results as plain text (default). .IP json Output the results as a JSON array. -.IP ndjson -Output the results as newline-delimited JSON. +.IP jsonl +Output the results as JSON Lines (as known as NDJSON). .IP yaml Output the results as YAML. .RE From cb3ef970b68f2d869d7936066144099c5a487e27 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Wed, 15 Oct 2025 16:40:47 +0800 Subject: [PATCH 18/32] fix: change FileDetail creating logic and base64 import --- src/output.rs | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/src/output.rs b/src/output.rs index 64750d36d..971f18565 100644 --- a/src/output.rs +++ b/src/output.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use std::io::{self, Write}; -use base64::{engine::general_purpose, Engine as _}; +use base64::{prelude::BASE64_STANDARD, Engine as _}; use jiff::Timestamp; use lscolors::{Indicator, LsColors, Style}; @@ -269,7 +269,7 @@ impl<'a, W: Write> Printer<'a, W> { writeln!( self.stdout, "path_base64: \"{}\"", - general_purpose::STANDARD.encode(bytes) + BASE64_STANDARD.encode(bytes) )?; } } @@ -316,7 +316,7 @@ impl<'a, W: Write> Printer<'a, W> { write!( self.stdout, "\"path_b64\":\"{}\"", - general_purpose::STANDARD.encode(path_bytes) + BASE64_STANDARD.encode(path_bytes) )?; } } @@ -346,16 +346,7 @@ impl<'a, W: Write> Printer<'a, W> { let encoded_path = encode_path(path); let metadata = entry.metadata(); - let mut detail = FileDetail { - path: encoded_path, - file_type: "unknown".to_string(), - size: None, - mode: None, - modified: None, - accessed: None, - created: None, - }; - if let Some(meta) = metadata { + let detail = if let Some(meta) = metadata { let size = meta.len(); let mode = { #[cfg(unix)] @@ -393,14 +384,26 @@ impl<'a, W: Write> Printer<'a, W> { .and_then(|d| Timestamp::from_second(d.as_secs() as i64).ok()) }); - detail.file_type = ft; - detail.size = Some(size); - detail.mode = mode; - detail.modified = modified; - detail.accessed = accessed; - detail.created = created; - } - + FileDetail { + path: encoded_path, + file_type: ft, + size: Some(size), + mode, + modified, + accessed, + created, + } + } else { + FileDetail { + path: encoded_path, + file_type: "unknown".to_string(), + size: None, + mode: None, + modified: None, + accessed: None, + created: None, + } + }; match format { OutputFormat::Json => self.print_entry_json_obj(&detail, true), OutputFormat::Ndjson => self.print_entry_json_obj(&detail, false), From 60ecc099c68d7ac6222ac6efa03c15524dccd203 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Thu, 16 Oct 2025 09:39:08 +0800 Subject: [PATCH 19/32] fix: change ndjson flag to commonly used jsonl --- src/cli.rs | 5 +++-- src/output.rs | 19 ++++++------------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index 9f00265ac..1bdb6aec6 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -842,8 +842,9 @@ pub enum OutputFormat { Plain, /// JSON output Json, - /// NDJSON (Newline Delimited JSON) output - Ndjson, + /// JSONL (JSON Lines, as known as Newline Delimited JSON) output + #[value(alias = "ndjson")] + Jsonl, /// YAML output #[value(alias = "yml")] Yaml, diff --git a/src/output.rs b/src/output.rs index 971f18565..9515e6565 100644 --- a/src/output.rs +++ b/src/output.rs @@ -123,8 +123,8 @@ impl<'a, W: Write> Printer<'a, W> { (Some(template), _, _) => self.print_entry_format(entry, template)?, (None, OutputFormat::Json, _) => self.print_entry_detail(OutputFormat::Json, entry)?, (None, OutputFormat::Yaml, _) => self.print_entry_detail(OutputFormat::Yaml, entry)?, - (None, OutputFormat::Ndjson, _) => { - self.print_entry_detail(OutputFormat::Ndjson, entry)? + (None, OutputFormat::Jsonl, _) => { + self.print_entry_detail(OutputFormat::Jsonl, entry)? } (None, OutputFormat::Plain, Some(ls_colors)) => { self.print_entry_colorized(entry, ls_colors)? @@ -138,10 +138,7 @@ impl<'a, W: Write> Printer<'a, W> { self.started = true; - if matches!( - self.config.output, - OutputFormat::Json | OutputFormat::Ndjson - ) { + if matches!(self.config.output, OutputFormat::Json) { return Ok(()); } @@ -295,12 +292,8 @@ impl<'a, W: Write> Printer<'a, W> { } fn print_entry_json_obj(&mut self, detail: &FileDetail, comma: bool) -> io::Result<()> { - if self.started { - if comma { - writeln!(self.stdout, ",")?; - } else { - writeln!(self.stdout)?; - } + if self.started && comma { + writeln!(self.stdout, ",")?; } if comma { @@ -406,7 +399,7 @@ impl<'a, W: Write> Printer<'a, W> { }; match format { OutputFormat::Json => self.print_entry_json_obj(&detail, true), - OutputFormat::Ndjson => self.print_entry_json_obj(&detail, false), + OutputFormat::Jsonl => self.print_entry_json_obj(&detail, false), OutputFormat::Yaml => self.print_entry_yaml_obj(&detail), OutputFormat::Plain => unreachable!("Plain format should not call print_entry_detail"), } From 7c9f1d87b5fbff9dfec1475f1cba62459ee7d0bb Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Wed, 29 Oct 2025 23:37:22 +0800 Subject: [PATCH 20/32] fix: replace String to &str with lifetime, adopt as_encoded_bytes --- src/output.rs | 47 +++++++++++++---------------------------------- 1 file changed, 13 insertions(+), 34 deletions(-) diff --git a/src/output.rs b/src/output.rs index 9515e6565..00f16dc7c 100644 --- a/src/output.rs +++ b/src/output.rs @@ -1,5 +1,6 @@ use std::borrow::Cow; use std::io::{self, Write}; +use std::path::Path; use base64::{prelude::BASE64_STANDARD, Engine as _}; use jiff::Timestamp; @@ -19,42 +20,21 @@ fn replace_path_separator(path: &str, new_path_separator: &str) -> String { path.replace(std::path::MAIN_SEPARATOR, new_path_separator) } -#[cfg(unix)] -fn encode_path(path: &std::path::Path) -> PathEncoding { - use std::os::unix::ffi::OsStrExt; - let bytes = path.as_os_str().as_bytes(); - - // Try to convert to UTF-8 first - match std::str::from_utf8(bytes) { - Ok(utf8_str) => { - let escaped: String = utf8_str.escape_default().collect(); - PathEncoding::Utf8(escaped) - } - Err(_) => { - // Invalid UTF-8, store as raw bytes - PathEncoding::Bytes(bytes.to_vec()) - } +fn encode_path(path: &Path) -> PathEncoding<'_> { + match path.to_str() { + Some(utf8) => PathEncoding::Utf8(utf8.escape_default()), + None => PathEncoding::Bytes(path.as_os_str().as_encoded_bytes()), } } -#[cfg(not(unix))] -fn encode_path(path: &std::path::Path) -> PathEncoding { - // On non-Unix systems, paths are typically UTF-8 or UTF-16 - let path_str = path.to_string_lossy(); - // Always escape the path string for safe output - // Note: if lossy conversion happened, this might lose information - let escaped: String = path_str.escape_default().collect(); - PathEncoding::Utf8(escaped) +enum PathEncoding<'a> { + Utf8(std::str::EscapeDefault<'a>), + Bytes(&'a [u8]), } -enum PathEncoding { - Utf8(String), - Bytes(Vec), -} - -struct FileDetail { - path: PathEncoding, - file_type: String, +struct FileDetail<'a> { + path: PathEncoding<'a>, + file_type: &'static str, size: Option, mode: Option, modified: Option, @@ -356,8 +336,7 @@ impl<'a, W: Write> Printer<'a, W> { ft if ft.is_file() => "file", ft if ft.is_symlink() => "symlink", _ => "unknown", - } - .to_string(); + }; let modified = meta.modified().ok().and_then(|t| { t.duration_since(std::time::UNIX_EPOCH) @@ -389,7 +368,7 @@ impl<'a, W: Write> Printer<'a, W> { } else { FileDetail { path: encoded_path, - file_type: "unknown".to_string(), + file_type: "unknown", size: None, mode: None, modified: None, From 2c1bdb50ef3eed13bd6b61913fff3b9385601a02 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Wed, 29 Oct 2025 23:48:20 +0800 Subject: [PATCH 21/32] feat: add --json flag for JSONL output --- src/cli.rs | 10 ++++++++++ src/config.rs | 3 +++ src/main.rs | 1 + src/output.rs | 18 ++++++++++++------ 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/src/cli.rs b/src/cli.rs index 1bdb6aec6..4bb3b9fee 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -647,6 +647,16 @@ pub struct Opts { )] search_path: Vec, + /// Print results in JSONL format. + #[arg( + long, + value_name = "json", + conflicts_with("output"), + help = "Print results in JSONL format so you can pipe it to tools.", + long_help + )] + pub json: bool, + /// Print results in a certain format so you can pipe it to tools. #[arg( long, diff --git a/src/config.rs b/src/config.rs index df4e86f63..038d19cc5 100644 --- a/src/config.rs +++ b/src/config.rs @@ -132,6 +132,9 @@ pub struct Config { /// Whether or not to use hyperlinks on paths pub hyperlink: bool, + /// Whether to print results in JSONL format + pub jsonl: bool, + /// The output format to use pub output: OutputFormat, } diff --git a/src/main.rs b/src/main.rs index d28ff0bd3..7903bacf3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -327,6 +327,7 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result Printer<'a, W> { match ( &self.config.format, &self.config.output, + &self.config.jsonl, &self.config.ls_colors, ) { - (Some(template), _, _) => self.print_entry_format(entry, template)?, - (None, OutputFormat::Json, _) => self.print_entry_detail(OutputFormat::Json, entry)?, - (None, OutputFormat::Yaml, _) => self.print_entry_detail(OutputFormat::Yaml, entry)?, - (None, OutputFormat::Jsonl, _) => { + (Some(template), _, _, _) => self.print_entry_format(entry, template)?, + (None, _, true, _) => self.print_entry_detail(OutputFormat::Jsonl, entry)?, + (None, OutputFormat::Json, false, _) => { + self.print_entry_detail(OutputFormat::Json, entry)? + } + (None, OutputFormat::Yaml, false, _) => { + self.print_entry_detail(OutputFormat::Yaml, entry)? + } + (None, OutputFormat::Jsonl, false, _) => { self.print_entry_detail(OutputFormat::Jsonl, entry)? } - (None, OutputFormat::Plain, Some(ls_colors)) => { + (None, OutputFormat::Plain, false, Some(ls_colors)) => { self.print_entry_colorized(entry, ls_colors)? } - (None, OutputFormat::Plain, None) => self.print_entry_uncolorized(entry)?, + (None, OutputFormat::Plain, false, None) => self.print_entry_uncolorized(entry)?, }; if has_hyperlink { From e831976e135440214db86abeb7eb7744513a0fe9 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Wed, 29 Oct 2025 23:49:20 +0800 Subject: [PATCH 22/32] docs: add --json flag to manpage --- doc/fd.1 | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/fd.1 b/doc/fd.1 index 73856cac6..067d6bdc8 100644 --- a/doc/fd.1 +++ b/doc/fd.1 @@ -504,6 +504,9 @@ greater than the given size, the command given with -X is run again with remaini batch size of zero means there is no limit (default), but note that batching might still happen due to OS restrictions on the maximum length of command lines. .TP +.BI "\-\-json " +Specify JSONL (as known as NDJSON) format to use for the output. +.TP .BI "\-\-output " Specify a structured format to use for the output. The value can be plain (default), json, jsonl, or yaml. From 49654f4b9f9ae68176fccd72af126ed91aeb11a1 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Sun, 2 Nov 2025 15:32:23 +0800 Subject: [PATCH 23/32] fix(clippy): collapse if blocks --- src/output.rs | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/src/output.rs b/src/output.rs index 853b14fa9..1117bb97f 100644 --- a/src/output.rs +++ b/src/output.rs @@ -60,13 +60,12 @@ impl<'a, W: Write> Printer<'a, W> { /// Begin JSON array output if in JSON format. /// Returns an error if writing to output fails. pub fn begin(&mut self) -> Result<(), ExitCode> { - if self.config.output == OutputFormat::Json { - if let Err(e) = writeln!(self.stdout, "[") { - if e.kind() != ::std::io::ErrorKind::BrokenPipe { - crate::error::print_error(format!("Could not write to output: {e}")); - return Err(ExitCode::GeneralError); - } - } + if self.config.output == OutputFormat::Json + && let Err(e) = writeln!(self.stdout, "[") + && e.kind() != ::std::io::ErrorKind::BrokenPipe + { + crate::error::print_error(format!("Could not write to output: {e}")); + return Err(ExitCode::GeneralError); } Ok(()) } @@ -74,13 +73,12 @@ impl<'a, W: Write> Printer<'a, W> { /// End JSON array output if in JSON format. /// Returns an error if writing to output fails. pub fn end(&mut self) -> Result<(), ExitCode> { - if self.config.output == OutputFormat::Json { - if let Err(e) = writeln!(self.stdout, "\n]") { - if e.kind() != ::std::io::ErrorKind::BrokenPipe { - crate::error::print_error(format!("Could not write to output: {e}")); - return Err(ExitCode::GeneralError); - } - } + if self.config.output == OutputFormat::Json + && let Err(e) = writeln!(self.stdout, "\n]") + && e.kind() != ::std::io::ErrorKind::BrokenPipe + { + crate::error::print_error(format!("Could not write to output: {e}")); + return Err(ExitCode::GeneralError); } Ok(()) } @@ -88,13 +86,12 @@ impl<'a, W: Write> Printer<'a, W> { // TODO: this function is performance critical and can probably be optimized pub fn print_entry(&mut self, entry: &DirEntry) -> io::Result<()> { let mut has_hyperlink = false; - if self.config.hyperlink { - if let Some(url) = PathUrl::new(entry.path()) { - write!(self.stdout, "\x1B]8;;{url}\x1B\\")?; - has_hyperlink = true; - } + if self.config.hyperlink + && let Some(url) = PathUrl::new(entry.path()) + { + write!(self.stdout, "\x1B]8;;{url}\x1B\\")?; + has_hyperlink = true; } - match ( &self.config.format, &self.config.output, From 56d347eae7d496d5e8ef9e7ed2b992c5877b71a5 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Sat, 8 Nov 2025 20:09:21 +0800 Subject: [PATCH 24/32] fix: remove the `--output` flag --- doc/fd.1 | 16 ------- src/cli.rs | 18 -------- src/config.rs | 4 -- src/main.rs | 1 - src/output.rs | 113 ++++---------------------------------------------- src/walk.rs | 11 +---- 6 files changed, 8 insertions(+), 155 deletions(-) diff --git a/doc/fd.1 b/doc/fd.1 index bbea527cb..6c41d0f3b 100644 --- a/doc/fd.1 +++ b/doc/fd.1 @@ -514,22 +514,6 @@ due to OS restrictions on the maximum length of command lines. .BI "\-\-json " Specify JSONL (as known as NDJSON) format to use for the output. .TP -.BI "\-\-output " -Specify a structured format to use for the output. The value can be plain (default), json, jsonl, or -yaml. - -Currently, the default is "plain", and if the option is used without an argument "plain" -is used. The available options are: -.RS -.IP plain -Output the results as plain text (default). -.IP json -Output the results as a JSON array. -.IP jsonl -Output the results as JSON Lines (as known as NDJSON). -.IP yaml -Output the results as YAML. -.RE .SH PATTERN SYNTAX The regular expression syntax used by fd is documented here: diff --git a/src/cli.rs b/src/cli.rs index e816e82da..249570b2d 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -652,24 +652,11 @@ pub struct Opts { #[arg( long, value_name = "json", - conflicts_with("output"), help = "Print results in JSONL format so you can pipe it to tools.", long_help )] pub json: bool, - /// Print results in a certain format so you can pipe it to tools. - #[arg( - long, - value_name = "output", - value_enum, - default_value_t = OutputFormat::Plain, - conflicts_with("format"), - conflicts_with("list_details"), - help = "Print results in a certain format so you can pipe it to tools." - )] - pub output: OutputFormat, - /// By default, relative paths are prefixed with './' when -x/--exec, /// -X/--exec-batch, or -0/--print0 are given, to reduce the risk of a /// path starting with '-' being treated as a command line option. Use @@ -851,14 +838,9 @@ pub enum HyperlinkWhen { pub enum OutputFormat { /// Plain text output (default) Plain, - /// JSON output - Json, /// JSONL (JSON Lines, as known as Newline Delimited JSON) output #[value(alias = "ndjson")] Jsonl, - /// YAML output - #[value(alias = "yml")] - Yaml, } // there isn't a derive api for getting grouped values yet, diff --git a/src/config.rs b/src/config.rs index 038d19cc5..b1ecee3d7 100644 --- a/src/config.rs +++ b/src/config.rs @@ -3,7 +3,6 @@ use std::{path::PathBuf, sync::Arc, time::Duration}; use lscolors::LsColors; use regex::bytes::RegexSet; -use crate::cli::OutputFormat; use crate::exec::CommandSet; use crate::filetypes::FileTypes; #[cfg(unix)] @@ -134,9 +133,6 @@ pub struct Config { /// Whether to print results in JSONL format pub jsonl: bool, - - /// The output format to use - pub output: OutputFormat, } impl Config { diff --git a/src/main.rs b/src/main.rs index 9100c2814..f3c08b729 100644 --- a/src/main.rs +++ b/src/main.rs @@ -326,7 +326,6 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result { pub struct Printer<'a, W> { config: &'a Config, pub stdout: W, - started: bool, } impl<'a, W: Write> Printer<'a, W> { pub fn new(config: &'a Config, stdout: W) -> Self { - Self { - config, - stdout, - started: false, - } - } - - /// Begin JSON array output if in JSON format. - /// Returns an error if writing to output fails. - pub fn begin(&mut self) -> Result<(), ExitCode> { - if self.config.output == OutputFormat::Json - && let Err(e) = writeln!(self.stdout, "[") - && e.kind() != ::std::io::ErrorKind::BrokenPipe - { - crate::error::print_error(format!("Could not write to output: {e}")); - return Err(ExitCode::GeneralError); - } - Ok(()) - } - - /// End JSON array output if in JSON format. - /// Returns an error if writing to output fails. - pub fn end(&mut self) -> Result<(), ExitCode> { - if self.config.output == OutputFormat::Json - && let Err(e) = writeln!(self.stdout, "\n]") - && e.kind() != ::std::io::ErrorKind::BrokenPipe - { - crate::error::print_error(format!("Could not write to output: {e}")); - return Err(ExitCode::GeneralError); - } - Ok(()) + Self { config, stdout } } // TODO: this function is performance critical and can probably be optimized @@ -94,37 +62,19 @@ impl<'a, W: Write> Printer<'a, W> { } match ( &self.config.format, - &self.config.output, &self.config.jsonl, &self.config.ls_colors, ) { - (Some(template), _, _, _) => self.print_entry_format(entry, template)?, - (None, _, true, _) => self.print_entry_detail(OutputFormat::Jsonl, entry)?, - (None, OutputFormat::Json, false, _) => { - self.print_entry_detail(OutputFormat::Json, entry)? - } - (None, OutputFormat::Yaml, false, _) => { - self.print_entry_detail(OutputFormat::Yaml, entry)? - } - (None, OutputFormat::Jsonl, false, _) => { - self.print_entry_detail(OutputFormat::Jsonl, entry)? - } - (None, OutputFormat::Plain, false, Some(ls_colors)) => { - self.print_entry_colorized(entry, ls_colors)? - } - (None, OutputFormat::Plain, false, None) => self.print_entry_uncolorized(entry)?, + (Some(template), _, _) => self.print_entry_format(entry, template)?, + (None, true, _) => self.print_entry_detail(OutputFormat::Jsonl, entry)?, + (None, false, Some(ls_colors)) => self.print_entry_colorized(entry, ls_colors)?, + (None, false, None) => self.print_entry_uncolorized(entry)?, }; if has_hyperlink { write!(self.stdout, "\x1B]8;;\x1B\\")?; } - self.started = true; - - if matches!(self.config.output, OutputFormat::Json) { - return Ok(()); - } - if self.config.null_separator { write!(self.stdout, "\0") } else { @@ -234,54 +184,7 @@ impl<'a, W: Write> Printer<'a, W> { } } - fn print_entry_yaml_obj(&mut self, detail: &FileDetail) -> io::Result<()> { - // Manually construct a simple YAML representation - // to avoid adding a dependency on serde_yaml (deprecated). - // - // Write YAML fragments directly to stdout (should be buffered) - write!(self.stdout, "- ")?; - - match &detail.path { - PathEncoding::Utf8(path) => { - writeln!(self.stdout, "path: \"{}\"", path)?; - } - PathEncoding::Bytes(bytes) => { - writeln!( - self.stdout, - "path_base64: \"{}\"", - BASE64_STANDARD.encode(bytes) - )?; - } - } - - writeln!(self.stdout, " type: {}", detail.file_type)?; - - if let Some(size) = detail.size { - writeln!(self.stdout, " size: {}", size)?; - } - if let Some(mode) = detail.mode { - writeln!(self.stdout, " mode: 0o{mode:o}")?; - } - if let Some(modified) = &detail.modified { - writeln!(self.stdout, " modified: \"{}\"", modified)?; - } - if let Some(accessed) = &detail.accessed { - writeln!(self.stdout, " accessed: \"{}\"", accessed)?; - } - if let Some(created) = &detail.created { - writeln!(self.stdout, " created: \"{}\"", created)?; - } - Ok(()) - } - - fn print_entry_json_obj(&mut self, detail: &FileDetail, comma: bool) -> io::Result<()> { - if self.started && comma { - writeln!(self.stdout, ",")?; - } - - if comma { - write!(self.stdout, " ")?; - } + fn print_entry_json_obj(&mut self, detail: &FileDetail) -> io::Result<()> { write!(self.stdout, "{{")?; match &detail.path { @@ -380,9 +283,7 @@ impl<'a, W: Write> Printer<'a, W> { } }; match format { - OutputFormat::Json => self.print_entry_json_obj(&detail, true), - OutputFormat::Jsonl => self.print_entry_json_obj(&detail, false), - OutputFormat::Yaml => self.print_entry_yaml_obj(&detail), + OutputFormat::Jsonl => self.print_entry_json_obj(&detail), OutputFormat::Plain => unreachable!("Plain format should not call print_entry_detail"), } } diff --git a/src/walk.rs b/src/walk.rs index ca8c52d33..df461d895 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -172,21 +172,12 @@ impl<'a, W: Write + 'static> ReceiverBuffer<'a, W> { /// Process results until finished. fn process(&mut self) -> ExitCode { - if let Err(err) = self.printer.begin() { - return err; - } - let ec; loop { if let Err(err) = self.poll() { self.quit_flag.store(true, Ordering::Relaxed); - ec = err; - break; + return err; } } - if let Err(err) = self.printer.end() { - return err; - } - ec } /// Receive the next worker result. From 6db440926a287212e2c25ae0bbdc81a567f94404 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Sun, 9 Nov 2025 14:28:07 +0800 Subject: [PATCH 25/32] tests: fix `--output` tests to `--json` --- tests/tests.rs | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/tests/tests.rs b/tests/tests.rs index 9ddc51279..c6aec16ea 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -2713,16 +2713,20 @@ fn test_hyperlink() { fn test_output_format() { let te = TestEnv::new(DEFAULT_DIRS, DEFAULT_FILES); - let re = te.assert_success_and_get_output(".", &["--output=json", "."]); + let re = te.assert_success_and_get_output(".", &["--json", "."]); let stdout = String::from_utf8_lossy(&re.stdout); - let files: Vec = serde_json::from_str(&stdout).unwrap(); + let mut count = 0; + stdout.split("\n").for_each(|line| { + println!("line: {}", line); + if line.trim().is_empty() { + return; + } + let file: serde_json::Value = serde_json::from_str(line).unwrap(); + assert!(file.is_object() && file["path"].is_string()); + count += 1; + }); - assert_eq!(files.len(), DEFAULT_FILES.len() + DEFAULT_DIRS.len()); - - te.assert_success_and_get_output(".", &["--output=ndjson", "."]); - te.assert_success_and_get_output(".", &["--output=plain", "."]); - te.assert_success_and_get_output(".", &["--output=yaml", "."]); - te.assert_success_and_get_output(".", &["--output=yml", "."]); + assert_eq!(count, DEFAULT_FILES.len() + DEFAULT_DIRS.len()); } /// Filenames with invalid UTF-8 sequences @@ -2742,11 +2746,11 @@ fn test_output_format_invalid_utf8() { ) .unwrap(); - let re = te.assert_success_and_get_output(".", &["", "--output=json", "test1/"]); + let re = te.assert_success_and_get_output(".", &["", "--json", "test1/"]); let stdout = String::from_utf8_lossy(&re.stdout); - let files: Vec = serde_json::from_str(&stdout).unwrap(); - assert_eq!(files.len(), 1); - assert_eq!(files[0]["path_b64"], "dGVzdDEvdGVzdF/+aW52YWxpZC50eHQ="); + let files: serde_json::Value = serde_json::from_str(&stdout).unwrap(); + assert!(files.is_object()); + assert_eq!(files["path_b64"], "dGVzdDEvdGVzdF/+aW52YWxpZC50eHQ="); te.assert_output(&["invalid", "test1/"], "test1/test_�invalid.txt"); } From c2c8497ddeece634a496e9e7d1a2634106cb0947 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Thu, 13 Nov 2025 18:54:17 +0800 Subject: [PATCH 26/32] docs: add fields explaination in manual --- doc/fd.1 | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/doc/fd.1 b/doc/fd.1 index 6c41d0f3b..fd6c2f41e 100644 --- a/doc/fd.1 +++ b/doc/fd.1 @@ -512,7 +512,27 @@ batch size of zero means there is no limit (default), but note that batching mig due to OS restrictions on the maximum length of command lines. .TP .BI "\-\-json " +.RS Specify JSONL (as known as NDJSON) format to use for the output. + +Output fields: + + - "path": The file path as a UTF\-8 string. + + Note that when the path contains invalid UTF-8 sequences, it is encoded in base64 and stored in the "path_b64" field instead. + + - "type": The file type (e.g., "file", "directory"). + + - "size": The file size in bytes. + + - "mode": The file permissions in octal (e.g., 644). + + - "modified": The last modification time in ISO 8601 format (e.g., 2000-01-01T12:00:00Z). + + - "accessed": The last access time in ISO 8601 format. + + - "created": The creation time in ISO 8601 format. +.RE .TP .SH PATTERN SYNTAX The regular expression syntax used by fd is documented here: From 13d086886dd150eca659ea812251bdb07c82fdd3 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Thu, 13 Nov 2025 18:56:09 +0800 Subject: [PATCH 27/32] docs: change the flag to `--json` in CHANGELOG.md --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ffa56549..8515a9271 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## Features -- Add `--yaml` flag for YAML format output. +- Add `--json` flag for JSONL format output. ## Bugfixes From 47ee6ce939f32d20ae360e817e993468714c30d9 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Thu, 13 Nov 2025 18:56:55 +0800 Subject: [PATCH 28/32] fix(printer): make `Priner.stdout` private --- src/output.rs | 6 +++++- src/walk.rs | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/output.rs b/src/output.rs index 916ff4b7e..9b0963539 100644 --- a/src/output.rs +++ b/src/output.rs @@ -43,7 +43,7 @@ struct FileDetail<'a> { pub struct Printer<'a, W> { config: &'a Config, - pub stdout: W, + stdout: W, } impl<'a, W: Write> Printer<'a, W> { @@ -51,6 +51,10 @@ impl<'a, W: Write> Printer<'a, W> { Self { config, stdout } } + pub fn flush(&mut self) -> io::Result<()> { + self.stdout.flush() + } + // TODO: this function is performance critical and can probably be optimized pub fn print_entry(&mut self, entry: &DirEntry) -> io::Result<()> { let mut has_hyperlink = false; diff --git a/src/walk.rs b/src/walk.rs index df461d895..b0b3a8fb8 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -148,7 +148,7 @@ struct ReceiverBuffer<'a, W> { printer: output::Printer<'a, W>, } -impl<'a, W: Write + 'static> ReceiverBuffer<'a, W> { +impl<'a, W: Write + 'a> ReceiverBuffer<'a, W> { /// Create a new receiver buffer. fn new(state: &'a WorkerState, rx: Receiver, stdout: W) -> Self { let config = &state.config; @@ -294,7 +294,7 @@ impl<'a, W: Write + 'static> ReceiverBuffer<'a, W> { /// Flush stdout if necessary. fn flush(&mut self) -> Result<(), ExitCode> { - if self.printer.stdout.flush().is_err() { + if self.printer.flush().is_err() { // Probably a broken pipe. Exit gracefully. return Err(ExitCode::GeneralError); } From 8f3688607007420913057a9d3cc235fc0337ed53 Mon Sep 17 00:00:00 2001 From: Dustin Jiang Date: Thu, 20 Nov 2025 22:14:43 +0800 Subject: [PATCH 29/32] fix: use `jiff::Timestamp::try_from` to process SystemTime --- src/output.rs | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/src/output.rs b/src/output.rs index 9b0963539..55382c103 100644 --- a/src/output.rs +++ b/src/output.rs @@ -248,23 +248,20 @@ impl<'a, W: Write> Printer<'a, W> { _ => "unknown", }; - let modified = meta.modified().ok().and_then(|t| { - t.duration_since(std::time::UNIX_EPOCH) - .ok() - .and_then(|d| Timestamp::from_second(d.as_secs() as i64).ok()) - }); - - let accessed = meta.accessed().ok().and_then(|t| { - t.duration_since(std::time::UNIX_EPOCH) - .ok() - .and_then(|d| Timestamp::from_second(d.as_secs() as i64).ok()) - }); - - let created = meta.created().ok().and_then(|t| { - t.duration_since(std::time::UNIX_EPOCH) - .ok() - .and_then(|d| Timestamp::from_second(d.as_secs() as i64).ok()) - }); + let modified = meta + .modified() + .ok() + .and_then(|t| Timestamp::try_from(t).ok()); + + let accessed = meta + .accessed() + .ok() + .and_then(|t| Timestamp::try_from(t).ok()); + + let created = meta + .created() + .ok() + .and_then(|t| Timestamp::try_from(t).ok()); FileDetail { path: encoded_path, From 474cdd3a6eaa9d3bdfa3bb96594ebce198745d5f Mon Sep 17 00:00:00 2001 From: Thayne McCombs Date: Tue, 23 Dec 2025 01:36:28 -0700 Subject: [PATCH 30/32] refactor: json fmt mod This also addresses feedback from json PR: - mode is output as a string, using the octal representation - path uses the same format as the ripgrep output - use "size_bytes" instead of "size" to make the unit more clear Also, I fixed an issue where the mode included high bytes that are actually used to encode the filetype (at least on Linux). --- doc/fd.1 | 18 +-- src/cli.rs | 10 +- src/config.rs | 16 +-- src/fmt/json.rs | 83 ++++++++++++ src/fmt/mod.rs | 298 ++++---------------------------------------- src/fmt/template.rs | 279 +++++++++++++++++++++++++++++++++++++++++ src/main.rs | 21 ++-- src/output.rs | 146 ++-------------------- src/walk.rs | 5 +- tests/tests.rs | 51 +++++--- 10 files changed, 462 insertions(+), 465 deletions(-) create mode 100644 src/fmt/json.rs create mode 100644 src/fmt/template.rs diff --git a/doc/fd.1 b/doc/fd.1 index fd6c2f41e..e026d4f8d 100644 --- a/doc/fd.1 +++ b/doc/fd.1 @@ -513,25 +513,27 @@ due to OS restrictions on the maximum length of command lines. .TP .BI "\-\-json " .RS -Specify JSONL (as known as NDJSON) format to use for the output. +Specify JSONL (as known as NDJSON) format to use for the output. Output fields: - - "path": The file path as a UTF\-8 string. + - "path": An object containing the path of the file. When the path is valid UTF-8, it this contains a single "text" field + containing the path as a string. Otherwise it contains a single "bytes" field containing the base64 encoded bytes of the + path. - Note that when the path contains invalid UTF-8 sequences, it is encoded in base64 and stored in the "path_b64" field instead. + On windows, this may use a lossy UTF-8 encoding, since there isn't an obvious way to encode the pathname. - - "type": The file type (e.g., "file", "directory"). + - "type": The file type (e.g., "file", "directory", "symlink"). - - "size": The file size in bytes. + - "size_bytes": The file size in bytes. - "mode": The file permissions in octal (e.g., 644). - - "modified": The last modification time in ISO 8601 format (e.g., 2000-01-01T12:00:00Z). + - "modified": The last modification time in RFC3339 (ISO 8601) format (e.g., 2000-01-01T12:00:00Z). - - "accessed": The last access time in ISO 8601 format. + - "accessed": The last access time in RFC3339 format. - - "created": The creation time in ISO 8601 format. + - "created": The creation time in RFC3339 format. .RE .TP .SH PATTERN SYNTAX diff --git a/src/cli.rs b/src/cli.rs index 249570b2d..2cfb5e8a2 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -653,6 +653,7 @@ pub struct Opts { long, value_name = "json", help = "Print results in JSONL format so you can pipe it to tools.", + conflicts_with_all(&["format", "list_details"]), long_help )] pub json: bool, @@ -834,15 +835,6 @@ pub enum HyperlinkWhen { Never, } -#[derive(Copy, Clone, PartialEq, Eq, Debug, ValueEnum)] -pub enum OutputFormat { - /// Plain text output (default) - Plain, - /// JSONL (JSON Lines, as known as Newline Delimited JSON) output - #[value(alias = "ndjson")] - Jsonl, -} - // there isn't a derive api for getting grouped values yet, // so we have to use hand-rolled parsing for exec and exec-batch pub struct Exec { diff --git a/src/config.rs b/src/config.rs index b1ecee3d7..57a3fa792 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,6 +1,5 @@ use std::{path::PathBuf, sync::Arc, time::Duration}; -use lscolors::LsColors; use regex::bytes::RegexSet; use crate::exec::CommandSet; @@ -8,7 +7,7 @@ use crate::filetypes::FileTypes; #[cfg(unix)] use crate::filter::OwnerFilter; use crate::filter::{SizeFilter, TimeFilter}; -use crate::fmt::FormatTemplate; +use crate::fmt::OutputFormat; /// Configuration options for *fd*. pub struct Config { @@ -70,10 +69,6 @@ pub struct Config { /// `max_buffer_time`. pub max_buffer_time: Option, - /// `None` if the output should not be colorized. Otherwise, a `LsColors` instance that defines - /// how to style different filetypes. - pub ls_colors: Option, - /// Whether or not we are writing to an interactive terminal #[cfg_attr(not(unix), allow(unused))] pub interactive_terminal: bool, @@ -87,8 +82,10 @@ pub struct Config { /// The value (if present) will be a lowercase string without leading dots. pub extensions: Option, - /// A format string to use to format results, similarly to exec - pub format: Option, + /// The format to use for the output + /// + /// determined by multiple options + pub format: OutputFormat, /// If a value is supplied, each item found will be used to generate and execute commands. pub command: Option>, @@ -130,9 +127,6 @@ pub struct Config { /// Whether or not to use hyperlinks on paths pub hyperlink: bool, - - /// Whether to print results in JSONL format - pub jsonl: bool, } impl Config { diff --git a/src/fmt/json.rs b/src/fmt/json.rs new file mode 100644 index 000000000..1c3250345 --- /dev/null +++ b/src/fmt/json.rs @@ -0,0 +1,83 @@ +#[cfg(unix)] +use std::os::unix::{ffi::OsStrExt, fs::MetadataExt}; +use std::{ + fs::{FileType, Metadata}, + io::Write, + path::Path, + time::SystemTime, +}; + +use base64::{Engine as _, prelude::BASE64_STANDARD}; +use jiff::Timestamp; + +pub fn output_json( + out: &mut W, + path: &Path, + filetype: Option, + metadata: Option<&Metadata>, +) -> std::io::Result<()> { + out.write_all(b"{")?; + + // Print the path as an object that either has a "text" key containing the + // utf8 path, or a "bytes" key with the base64 encoded bytes of the path + #[cfg(unix)] + match path.to_str() { + Some(text) => { + // NB: This assumes that rust's debug output for a string + // is a valid JSON string. At time of writing this is the case + // but it is possible, though unlikely, that this could change + // in the future. + write!(out, r#""path":{{"text":{:?}}}"#, text)?; + } + None => { + let encoded_bytes = BASE64_STANDARD.encode(path.as_os_str().as_bytes()); + write!(out, r#""path":{{"bytes":"{}"}}"#, encoded_bytes)?; + } + }; + // On non-unix platforms, if the path isn't valid utf-8, + // we don't know what kind of encoding was used, and + // as_encoded_bytes() isn't necessarily stable between rust versions + // so the best we can really do is a lossy string + #[cfg(not(unix))] + write!(out, r#""path":{{"text":{:?}}}"#, path.to_string_lossy())?; + + // print the type of file + let ft = match filetype { + Some(ft) if ft.is_dir() => "directory", + Some(ft) if ft.is_file() => "file", + Some(ft) if ft.is_symlink() => "symlink", + _ => "unknown", + }; + write!(out, r#","type":"{}""#, ft)?; + + if let Some(meta) = metadata { + // Output the mode as octal + // We also need to mask it to just include the permission + // bits and not the file type bits (that is handled by "type" above) + #[cfg(unix)] + write!(out, r#","mode":"{:o}""#, meta.mode() & 0x7777)?; + + write!(out, r#","size_bytes":{}"#, meta.len())?; + + // would it be better to do these with os-specific functions? + if let Ok(modified) = meta.modified().map(json_timestamp) { + write!(out, r#","modified":"{}""#, modified)?; + } + if let Ok(accessed) = meta.accessed().map(json_timestamp) { + write!(out, r#","modified":"{}""#, accessed)?; + } + if let Ok(created) = meta.created().map(json_timestamp) { + write!(out, r#","modified":"{}""#, created)?; + } + } + + out.write_all(b"}") +} + +fn json_timestamp(time: SystemTime) -> Timestamp { + // System timestamps should always be valid, so assume that we can + // unwrap it + // If we ever do want to handle an error here, maybe convert to either the MAX or MIN + // timestamp depending on which side of the epoch the SystemTime is? + Timestamp::try_from(time).expect("Invalid timestamp") +} diff --git a/src/fmt/mod.rs b/src/fmt/mod.rs index 87ee41923..12ce3207c 100644 --- a/src/fmt/mod.rs +++ b/src/fmt/mod.rs @@ -1,281 +1,27 @@ mod input; - -use std::borrow::Cow; -use std::ffi::{OsStr, OsString}; -use std::fmt::{self, Display, Formatter}; -use std::path::{Component, Path, Prefix}; -use std::sync::OnceLock; - -use aho_corasick::AhoCorasick; - -use self::input::{basename, dirname, remove_extension}; - -/// Designates what should be written to a buffer -/// -/// Each `Token` contains either text, or a placeholder variant, which will be used to generate -/// commands after all tokens for a given command template have been collected. -#[derive(Clone, Debug, PartialEq, Eq)] -pub enum Token { - Placeholder, - Basename, - Parent, - NoExt, - BasenameNoExt, - Text(String), -} - -impl Display for Token { - fn fmt(&self, f: &mut Formatter) -> fmt::Result { - match *self { - Token::Placeholder => f.write_str("{}")?, - Token::Basename => f.write_str("{/}")?, - Token::Parent => f.write_str("{//}")?, - Token::NoExt => f.write_str("{.}")?, - Token::BasenameNoExt => f.write_str("{/.}")?, - Token::Text(ref string) => f.write_str(string)?, - } - Ok(()) - } -} - -/// A parsed format string -/// -/// This is either a collection of `Token`s including at least one placeholder variant, -/// or a fixed text. -#[derive(Clone, Debug, PartialEq)] -pub enum FormatTemplate { - Tokens(Vec), - Text(String), -} - -static PLACEHOLDERS: OnceLock = OnceLock::new(); - -impl FormatTemplate { - pub fn has_tokens(&self) -> bool { - matches!(self, FormatTemplate::Tokens(_)) - } - - pub fn parse(fmt: &str) -> Self { - // NOTE: we assume that { and } have the same length - const BRACE_LEN: usize = '{'.len_utf8(); - let mut tokens = Vec::new(); - let mut remaining = fmt; - let mut buf = String::new(); - let placeholders = PLACEHOLDERS.get_or_init(|| { - AhoCorasick::new(["{{", "}}", "{}", "{/}", "{//}", "{.}", "{/.}"]).unwrap() - }); - while let Some(m) = placeholders.find(remaining) { - match m.pattern().as_u32() { - 0 | 1 => { - // we found an escaped {{ or }}, so add - // everything up to the first char to the buffer - // then skip the second one. - buf += &remaining[..m.start() + BRACE_LEN]; - remaining = &remaining[m.end()..]; - } - id if !remaining[m.end()..].starts_with('}') => { - buf += &remaining[..m.start()]; - if !buf.is_empty() { - tokens.push(Token::Text(std::mem::take(&mut buf))); - } - tokens.push(token_from_pattern_id(id)); - remaining = &remaining[m.end()..]; - } - _ => { - // We got a normal pattern, but the final "}" - // is escaped, so add up to that to the buffer, then - // skip the final } - buf += &remaining[..m.end()]; - remaining = &remaining[m.end() + BRACE_LEN..]; - } - } - } - // Add the rest of the string to the buffer, and add the final buffer to the tokens - if !remaining.is_empty() { - buf += remaining; - } - if tokens.is_empty() { - // No placeholders were found, so just return the text - return FormatTemplate::Text(buf); - } - // Add final text segment - if !buf.is_empty() { - tokens.push(Token::Text(buf)); - } - debug_assert!(!tokens.is_empty()); - FormatTemplate::Tokens(tokens) - } - - /// Generate a result string from this template. If path_separator is Some, then it will replace - /// the path separator in all placeholder tokens. Fixed text and tokens are not affected by - /// path separator substitution. - pub fn generate(&self, path: impl AsRef, path_separator: Option<&str>) -> OsString { - use Token::*; - let path = path.as_ref(); - - match *self { - Self::Tokens(ref tokens) => { - let mut s = OsString::new(); - for token in tokens { - match token { - Basename => s.push(Self::replace_separator(basename(path), path_separator)), - BasenameNoExt => s.push(Self::replace_separator( - &remove_extension(basename(path).as_ref()), - path_separator, - )), - NoExt => s.push(Self::replace_separator( - &remove_extension(path), - path_separator, - )), - Parent => s.push(Self::replace_separator(&dirname(path), path_separator)), - Placeholder => { - s.push(Self::replace_separator(path.as_ref(), path_separator)) - } - Text(string) => s.push(string), - } - } - s - } - Self::Text(ref text) => OsString::from(text), - } - } - - /// Replace the path separator in the input with the custom separator string. If path_separator - /// is None, simply return a borrowed Cow of the input. Otherwise, the input is - /// interpreted as a Path and its components are iterated through and re-joined into a new - /// OsString. - fn replace_separator<'a>(path: &'a OsStr, path_separator: Option<&str>) -> Cow<'a, OsStr> { - // fast-path - no replacement necessary - if path_separator.is_none() { - return Cow::Borrowed(path); - } - - let path_separator = path_separator.unwrap(); - let mut out = OsString::with_capacity(path.len()); - let mut components = Path::new(path).components().peekable(); - - while let Some(comp) = components.next() { - match comp { - // Absolute paths on Windows are tricky. A Prefix component is usually a drive - // letter or UNC path, and is usually followed by RootDir. There are also - // "verbatim" prefixes beginning with "\\?\" that skip normalization. We choose to - // ignore verbatim path prefixes here because they're very rare, might be - // impossible to reach here, and there's no good way to deal with them. If users - // are doing something advanced involving verbatim windows paths, they can do their - // own output filtering with a tool like sed. - Component::Prefix(prefix) => { - if let Prefix::UNC(server, share) = prefix.kind() { - // Prefix::UNC is a parsed version of '\\server\share' - out.push(path_separator); - out.push(path_separator); - out.push(server); - out.push(path_separator); - out.push(share); - } else { - // All other Windows prefix types are rendered as-is. This results in e.g. "C:" for - // drive letters. DeviceNS and Verbatim* prefixes won't have backslashes converted, - // but they're not returned by directories fd can search anyway so we don't worry - // about them. - out.push(comp.as_os_str()); - } - } - - // Root directory is always replaced with the custom separator. - Component::RootDir => out.push(path_separator), - - // Everything else is joined normally, with a trailing separator if we're not last - _ => { - out.push(comp.as_os_str()); - if components.peek().is_some() { - out.push(path_separator); - } - } - } - } - Cow::Owned(out) - } -} - -// Convert the id from an aho-corasick match to the -// appropriate token -fn token_from_pattern_id(id: u32) -> Token { - use Token::*; - match id { - 2 => Placeholder, - 3 => Basename, - 4 => Parent, - 5 => NoExt, - 6 => BasenameNoExt, - _ => unreachable!(), - } +pub mod json; +mod template; + +use lscolors::LsColors; + +pub use self::template::{FormatTemplate, Token}; + +/// Description of how the results should be formatted in the output +pub enum OutputFormat { + /// Default. + /// Output as a plain path + Plain, + /// Output the path with color highlighting + Color(LsColors), + /// Use a custom template to format the results + Template(FormatTemplate), + /// Output in the json lines (jsonl, newline separated values) format + Jsonl, } -#[cfg(test)] -mod fmt_tests { - use super::*; - use std::path::PathBuf; - - #[test] - fn parse_no_placeholders() { - let templ = FormatTemplate::parse("This string has no placeholders"); - assert_eq!( - templ, - FormatTemplate::Text("This string has no placeholders".into()) - ); - } - - #[test] - fn parse_only_brace_escapes() { - let templ = FormatTemplate::parse("This string only has escapes like {{ and }}"); - assert_eq!( - templ, - FormatTemplate::Text("This string only has escapes like { and }".into()) - ); - } - - #[test] - fn all_placeholders() { - use Token::*; - - let templ = FormatTemplate::parse( - "{{path={} \ - basename={/} \ - parent={//} \ - noExt={.} \ - basenameNoExt={/.} \ - }}", - ); - assert_eq!( - templ, - FormatTemplate::Tokens(vec![ - Text("{path=".into()), - Placeholder, - Text(" basename=".into()), - Basename, - Text(" parent=".into()), - Parent, - Text(" noExt=".into()), - NoExt, - Text(" basenameNoExt=".into()), - BasenameNoExt, - Text(" }".into()), - ]) - ); - - let mut path = PathBuf::new(); - path.push("a"); - path.push("folder"); - path.push("file.txt"); - - let expanded = templ.generate(&path, Some("/")).into_string().unwrap(); - - assert_eq!( - expanded, - "{path=a/folder/file.txt \ - basename=file.txt \ - parent=a/folder \ - noExt=a/folder/file \ - basenameNoExt=file }" - ); +impl OutputFormat { + /// Return true if the output format uses ANSI colors + pub fn uses_color(&self) -> bool { + matches!(self, OutputFormat::Color(_)) } } diff --git a/src/fmt/template.rs b/src/fmt/template.rs new file mode 100644 index 000000000..7394f27e0 --- /dev/null +++ b/src/fmt/template.rs @@ -0,0 +1,279 @@ +use std::borrow::Cow; +use std::ffi::{OsStr, OsString}; +use std::fmt::{self, Display, Formatter}; +use std::path::{Component, Path, Prefix}; +use std::sync::OnceLock; + +use aho_corasick::AhoCorasick; + +use super::input::{basename, dirname, remove_extension}; + +/// Designates what should be written to a buffer +/// +/// Each `Token` contains either text, or a placeholder variant, which will be used to generate +/// commands after all tokens for a given command template have been collected. +#[derive(Clone, Debug, PartialEq, Eq)] +pub enum Token { + Placeholder, + Basename, + Parent, + NoExt, + BasenameNoExt, + Text(String), +} + +impl Display for Token { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + match *self { + Token::Placeholder => f.write_str("{}")?, + Token::Basename => f.write_str("{/}")?, + Token::Parent => f.write_str("{//}")?, + Token::NoExt => f.write_str("{.}")?, + Token::BasenameNoExt => f.write_str("{/.}")?, + Token::Text(ref string) => f.write_str(string)?, + } + Ok(()) + } +} + +/// A parsed format string +/// +/// This is either a collection of `Token`s including at least one placeholder variant, +/// or a fixed text. +#[derive(Clone, Debug, PartialEq)] +pub enum FormatTemplate { + Tokens(Vec), + Text(String), +} + +static PLACEHOLDERS: OnceLock = OnceLock::new(); + +impl FormatTemplate { + pub fn has_tokens(&self) -> bool { + matches!(self, FormatTemplate::Tokens(_)) + } + + pub fn parse(fmt: &str) -> Self { + // NOTE: we assume that { and } have the same length + const BRACE_LEN: usize = '{'.len_utf8(); + let mut tokens = Vec::new(); + let mut remaining = fmt; + let mut buf = String::new(); + let placeholders = PLACEHOLDERS.get_or_init(|| { + AhoCorasick::new(["{{", "}}", "{}", "{/}", "{//}", "{.}", "{/.}"]).unwrap() + }); + while let Some(m) = placeholders.find(remaining) { + match m.pattern().as_u32() { + 0 | 1 => { + // we found an escaped {{ or }}, so add + // everything up to the first char to the buffer + // then skip the second one. + buf += &remaining[..m.start() + BRACE_LEN]; + remaining = &remaining[m.end()..]; + } + id if !remaining[m.end()..].starts_with('}') => { + buf += &remaining[..m.start()]; + if !buf.is_empty() { + tokens.push(Token::Text(std::mem::take(&mut buf))); + } + tokens.push(token_from_pattern_id(id)); + remaining = &remaining[m.end()..]; + } + _ => { + // We got a normal pattern, but the final "}" + // is escaped, so add up to that to the buffer, then + // skip the final } + buf += &remaining[..m.end()]; + remaining = &remaining[m.end() + BRACE_LEN..]; + } + } + } + // Add the rest of the string to the buffer, and add the final buffer to the tokens + if !remaining.is_empty() { + buf += remaining; + } + if tokens.is_empty() { + // No placeholders were found, so just return the text + return FormatTemplate::Text(buf); + } + // Add final text segment + if !buf.is_empty() { + tokens.push(Token::Text(buf)); + } + debug_assert!(!tokens.is_empty()); + FormatTemplate::Tokens(tokens) + } + + /// Generate a result string from this template. If path_separator is Some, then it will replace + /// the path separator in all placeholder tokens. Fixed text and tokens are not affected by + /// path separator substitution. + pub fn generate(&self, path: impl AsRef, path_separator: Option<&str>) -> OsString { + use Token::*; + let path = path.as_ref(); + + match *self { + Self::Tokens(ref tokens) => { + let mut s = OsString::new(); + for token in tokens { + match token { + Basename => s.push(Self::replace_separator(basename(path), path_separator)), + BasenameNoExt => s.push(Self::replace_separator( + &remove_extension(basename(path).as_ref()), + path_separator, + )), + NoExt => s.push(Self::replace_separator( + &remove_extension(path), + path_separator, + )), + Parent => s.push(Self::replace_separator(&dirname(path), path_separator)), + Placeholder => { + s.push(Self::replace_separator(path.as_ref(), path_separator)) + } + Text(string) => s.push(string), + } + } + s + } + Self::Text(ref text) => OsString::from(text), + } + } + + /// Replace the path separator in the input with the custom separator string. If path_separator + /// is None, simply return a borrowed Cow of the input. Otherwise, the input is + /// interpreted as a Path and its components are iterated through and re-joined into a new + /// OsString. + fn replace_separator<'a>(path: &'a OsStr, path_separator: Option<&str>) -> Cow<'a, OsStr> { + // fast-path - no replacement necessary + if path_separator.is_none() { + return Cow::Borrowed(path); + } + + let path_separator = path_separator.unwrap(); + let mut out = OsString::with_capacity(path.len()); + let mut components = Path::new(path).components().peekable(); + + while let Some(comp) = components.next() { + match comp { + // Absolute paths on Windows are tricky. A Prefix component is usually a drive + // letter or UNC path, and is usually followed by RootDir. There are also + // "verbatim" prefixes beginning with "\\?\" that skip normalization. We choose to + // ignore verbatim path prefixes here because they're very rare, might be + // impossible to reach here, and there's no good way to deal with them. If users + // are doing something advanced involving verbatim windows paths, they can do their + // own output filtering with a tool like sed. + Component::Prefix(prefix) => { + if let Prefix::UNC(server, share) = prefix.kind() { + // Prefix::UNC is a parsed version of '\\server\share' + out.push(path_separator); + out.push(path_separator); + out.push(server); + out.push(path_separator); + out.push(share); + } else { + // All other Windows prefix types are rendered as-is. This results in e.g. "C:" for + // drive letters. DeviceNS and Verbatim* prefixes won't have backslashes converted, + // but they're not returned by directories fd can search anyway so we don't worry + // about them. + out.push(comp.as_os_str()); + } + } + + // Root directory is always replaced with the custom separator. + Component::RootDir => out.push(path_separator), + + // Everything else is joined normally, with a trailing separator if we're not last + _ => { + out.push(comp.as_os_str()); + if components.peek().is_some() { + out.push(path_separator); + } + } + } + } + Cow::Owned(out) + } +} + +// Convert the id from an aho-corasick match to the +// appropriate token +fn token_from_pattern_id(id: u32) -> Token { + use Token::*; + match id { + 2 => Placeholder, + 3 => Basename, + 4 => Parent, + 5 => NoExt, + 6 => BasenameNoExt, + _ => unreachable!(), + } +} + +#[cfg(test)] +mod fmt_tests { + use super::*; + use std::path::PathBuf; + + #[test] + fn parse_no_placeholders() { + let templ = FormatTemplate::parse("This string has no placeholders"); + assert_eq!( + templ, + FormatTemplate::Text("This string has no placeholders".into()) + ); + } + + #[test] + fn parse_only_brace_escapes() { + let templ = FormatTemplate::parse("This string only has escapes like {{ and }}"); + assert_eq!( + templ, + FormatTemplate::Text("This string only has escapes like { and }".into()) + ); + } + + #[test] + fn all_placeholders() { + use Token::*; + + let templ = FormatTemplate::parse( + "{{path={} \ + basename={/} \ + parent={//} \ + noExt={.} \ + basenameNoExt={/.} \ + }}", + ); + assert_eq!( + templ, + FormatTemplate::Tokens(vec![ + Text("{path=".into()), + Placeholder, + Text(" basename=".into()), + Basename, + Text(" parent=".into()), + Parent, + Text(" noExt=".into()), + NoExt, + Text(" basenameNoExt=".into()), + BasenameNoExt, + Text(" }".into()), + ]) + ); + + let mut path = PathBuf::new(); + path.push("a"); + path.push("folder"); + path.push("file.txt"); + + let expanded = templ.generate(&path, Some("/")).into_string().unwrap(); + + assert_eq!( + expanded, + "{path=a/folder/file.txt \ + basename=file.txt \ + parent=a/folder \ + noExt=a/folder/file \ + basenameNoExt=file }" + ); + } +} diff --git a/src/main.rs b/src/main.rs index f3c08b729..9e161cae5 100644 --- a/src/main.rs +++ b/src/main.rs @@ -32,6 +32,7 @@ use crate::filetypes::FileTypes; #[cfg(unix)] use crate::filter::OwnerFilter; use crate::filter::TimeFilter; +use crate::fmt::{FormatTemplate, OutputFormat}; use crate::regex_helper::{pattern_has_uppercase_char, pattern_matches_strings_with_leading_dot}; // We use jemalloc for performance reasons, see https://github.com/sharkdp/fd/pull/481 @@ -232,11 +233,18 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result true, HyperlinkWhen::Never => false, @@ -265,7 +273,6 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result Result Result String { path.replace(std::path::MAIN_SEPARATOR, new_path_separator) } -fn encode_path(path: &Path) -> PathEncoding<'_> { - match path.to_str() { - Some(utf8) => PathEncoding::Utf8(utf8.escape_default()), - None => PathEncoding::Bytes(path.as_os_str().as_encoded_bytes()), - } -} - -enum PathEncoding<'a> { - Utf8(std::str::EscapeDefault<'a>), - Bytes(&'a [u8]), -} - -struct FileDetail<'a> { - path: PathEncoding<'a>, - file_type: &'static str, - size: Option, - mode: Option, - modified: Option, - accessed: Option, - created: Option, -} - pub struct Printer<'a, W> { config: &'a Config, stdout: W, @@ -64,15 +36,11 @@ impl<'a, W: Write> Printer<'a, W> { write!(self.stdout, "\x1B]8;;{url}\x1B\\")?; has_hyperlink = true; } - match ( - &self.config.format, - &self.config.jsonl, - &self.config.ls_colors, - ) { - (Some(template), _, _) => self.print_entry_format(entry, template)?, - (None, true, _) => self.print_entry_detail(OutputFormat::Jsonl, entry)?, - (None, false, Some(ls_colors)) => self.print_entry_colorized(entry, ls_colors)?, - (None, false, None) => self.print_entry_uncolorized(entry)?, + match &self.config.format { + Plain => self.print_entry_uncolorized(entry)?, + Color(colors) => self.print_entry_colorized(entry, colors)?, + Template(template) => self.print_entry_format(entry, template)?, + Jsonl => self.print_entry_json(entry)?, }; if has_hyperlink { @@ -188,104 +156,14 @@ impl<'a, W: Write> Printer<'a, W> { } } - fn print_entry_json_obj(&mut self, detail: &FileDetail) -> io::Result<()> { - write!(self.stdout, "{{")?; - - match &detail.path { - PathEncoding::Utf8(path_utf8) => { - write!(self.stdout, "\"path\":\"{}\"", path_utf8)?; - } - PathEncoding::Bytes(path_bytes) => { - write!( - self.stdout, - "\"path_b64\":\"{}\"", - BASE64_STANDARD.encode(path_bytes) - )?; - } - } - - write!(self.stdout, ",\"type\":\"{}\"", detail.file_type)?; - - if let Some(size) = detail.size { - write!(self.stdout, ",\"size\":{size}")?; - } - if let Some(mode) = detail.mode { - write!(self.stdout, ",\"mode\":{mode:o}")?; - } - if let Some(modified) = &detail.modified { - write!(self.stdout, ",\"modified\":\"{}\"", modified)?; - } - if let Some(accessed) = &detail.accessed { - write!(self.stdout, ",\"accessed\":\"{}\"", accessed)?; - } - if let Some(created) = &detail.created { - write!(self.stdout, ",\"created\":\"{}\"", created)?; - } - write!(self.stdout, "}}") - } - - fn print_entry_detail(&mut self, format: OutputFormat, entry: &DirEntry) -> io::Result<()> { + /// Print the entry as a jsonl line + fn print_entry_json(&mut self, entry: &DirEntry) -> io::Result<()> { let path = entry.stripped_path(self.config); - let encoded_path = encode_path(path); + // Should we have an option to avoid doing a stat call? + // Is it worth doing json output if all you have is the path and file type? let metadata = entry.metadata(); - let detail = if let Some(meta) = metadata { - let size = meta.len(); - let mode = { - #[cfg(unix)] - { - Some(meta.permissions().mode() & 0o7777) - } - #[cfg(not(unix))] - { - None - } - }; - let ft = match meta.file_type() { - ft if ft.is_dir() => "directory", - ft if ft.is_file() => "file", - ft if ft.is_symlink() => "symlink", - _ => "unknown", - }; - - let modified = meta - .modified() - .ok() - .and_then(|t| Timestamp::try_from(t).ok()); - - let accessed = meta - .accessed() - .ok() - .and_then(|t| Timestamp::try_from(t).ok()); - - let created = meta - .created() - .ok() - .and_then(|t| Timestamp::try_from(t).ok()); - - FileDetail { - path: encoded_path, - file_type: ft, - size: Some(size), - mode, - modified, - accessed, - created, - } - } else { - FileDetail { - path: encoded_path, - file_type: "unknown", - size: None, - mode: None, - modified: None, - accessed: None, - created: None, - } - }; - match format { - OutputFormat::Jsonl => self.print_entry_json_obj(&detail), - OutputFormat::Plain => unreachable!("Plain format should not call print_entry_detail"), - } + crate::fmt::json::output_json(&mut self.stdout, path, entry.file_type(), metadata)?; + Ok(()) } } diff --git a/src/walk.rs b/src/walk.rs index b0b3a8fb8..587671362 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -21,6 +21,7 @@ use crate::error::print_error; use crate::exec; use crate::exit_codes::{ExitCode, merge_exitcodes}; use crate::filesystem; +use crate::fmt::OutputFormat; use crate::output; /// The receiver thread can either be buffering results or directly streaming to the console. @@ -597,7 +598,7 @@ impl WorkerState { } if config.is_printing() - && let Some(ls_colors) = &config.ls_colors + && let OutputFormat::Color(ls_colors) = &config.format { // Compute colors in parallel entry.style(ls_colors); @@ -624,7 +625,7 @@ impl WorkerState { let config = &self.config; let walker = self.build_walker(paths)?; - if config.ls_colors.is_some() && config.is_printing() { + if config.format.uses_color() && config.is_printing() { let quit_flag = Arc::clone(&self.quit_flag); let interrupt_flag = Arc::clone(&self.interrupt_flag); diff --git a/tests/tests.rs b/tests/tests.rs index c6aec16ea..343f49bdc 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -2708,31 +2708,50 @@ fn test_hyperlink() { te.assert_output(&["--hyperlink=always", "a.foo"], &expected); } -/// Test various output formats +/// Test json output #[test] -fn test_output_format() { +fn test_json() { let te = TestEnv::new(DEFAULT_DIRS, DEFAULT_FILES); - let re = te.assert_success_and_get_output(".", &["--json", "."]); + let re = te.assert_success_and_get_output(".", &["--json", "foo"]); let stdout = String::from_utf8_lossy(&re.stdout); - let mut count = 0; - stdout.split("\n").for_each(|line| { - println!("line: {}", line); - if line.trim().is_empty() { - return; - } - let file: serde_json::Value = serde_json::from_str(line).unwrap(); - assert!(file.is_object() && file["path"].is_string()); - count += 1; - }); + let found_files: std::collections::HashSet<_> = stdout + .split("\n") + .flat_map(|line| { + if line.is_empty() { + return None; + } + let file: serde_json::Value = serde_json::from_str(line).unwrap(); + assert!(file.is_object(), "Match is not object"); + assert!(file["path"].is_object(), "Path is not an object"); + Some( + file["path"]["text"] + .as_str() + .expect("path.text is not a string") + .to_owned(), + ) + }) + .collect(); - assert_eq!(count, DEFAULT_FILES.len() + DEFAULT_DIRS.len()); + let expected = [ + "a.foo", + "one/b.foo", + "one/two/c.foo", + "one/two/C.Foo2", + "one/two/three/directory_foo", + "one/two/three/d.foo", + ]; + + assert_eq!(found_files.len(), expected.len()); + for f in expected { + assert!(found_files.contains(f), "didn't find {f}"); + } } /// Filenames with invalid UTF-8 sequences #[cfg(target_os = "linux")] #[test] -fn test_output_format_invalid_utf8() { +fn test_json_invalid_utf8() { use std::ffi::OsStr; use std::os::unix::ffi::OsStrExt; @@ -2750,7 +2769,7 @@ fn test_output_format_invalid_utf8() { let stdout = String::from_utf8_lossy(&re.stdout); let files: serde_json::Value = serde_json::from_str(&stdout).unwrap(); assert!(files.is_object()); - assert_eq!(files["path_b64"], "dGVzdDEvdGVzdF/+aW52YWxpZC50eHQ="); + assert_eq!(files["path"]["bytes"], "dGVzdDEvdGVzdF/+aW52YWxpZC50eHQ="); te.assert_output(&["invalid", "test1/"], "test1/test_�invalid.txt"); } From 3a620b09ab55bab13ef9aca3a4d2e717f8c72831 Mon Sep 17 00:00:00 2001 From: Thayne McCombs Date: Tue, 23 Dec 2025 01:36:28 -0700 Subject: [PATCH 31/32] refactor: json fmt mod This also addresses feedback from json PR: - mode is output as a string, using the octal representation - path uses the same format as the ripgrep output - use "size_bytes" instead of "size" to make the unit more clear Also, I fixed an issue where the mode included high bytes that are actually used to encode the filetype (at least on Linux). --- tests/tests.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tests/tests.rs b/tests/tests.rs index 49e69810b..bb342f9eb 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -2713,7 +2713,9 @@ fn test_hyperlink() { fn test_json() { let te = TestEnv::new(DEFAULT_DIRS, DEFAULT_FILES); - let re = te.assert_success_and_get_output(".", &["--json", "foo"]); + // We use path-separator=/ so that the paths are the same on windows as on + // unix + let re = te.assert_success_and_get_output(".", &["--json", "--path-separator=/", "foo"]); let stdout = String::from_utf8_lossy(&re.stdout); let found_files: std::collections::HashSet<_> = stdout .split("\n") From 2adb30dfdecd61aba5127b747b325ae998b14d77 Mon Sep 17 00:00:00 2001 From: Thayne McCombs Date: Mon, 29 Dec 2025 01:10:49 -0700 Subject: [PATCH 32/32] fix: Use path separator in json output Unless we use binary output --- doc/fd.1 | 2 ++ src/fmt/json.rs | 19 ++++++++++++------- src/output.rs | 8 +++++++- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/doc/fd.1 b/doc/fd.1 index e026d4f8d..cd72d5f16 100644 --- a/doc/fd.1 +++ b/doc/fd.1 @@ -523,6 +523,8 @@ Output fields: On windows, this may use a lossy UTF-8 encoding, since there isn't an obvious way to encode the pathname. + If a custom path separator is given, it is used in the "text" field, but not in the "bytes" field. + - "type": The file type (e.g., "file", "directory", "symlink"). - "size_bytes": The file size in bytes. diff --git a/src/fmt/json.rs b/src/fmt/json.rs index 1c3250345..e51597005 100644 --- a/src/fmt/json.rs +++ b/src/fmt/json.rs @@ -1,11 +1,10 @@ +use std::borrow::Cow; +use std::fs::{FileType, Metadata}; +use std::io::Write; #[cfg(unix)] use std::os::unix::{ffi::OsStrExt, fs::MetadataExt}; -use std::{ - fs::{FileType, Metadata}, - io::Write, - path::Path, - time::SystemTime, -}; +use std::path::{MAIN_SEPARATOR, Path}; +use std::time::SystemTime; use base64::{Engine as _, prelude::BASE64_STANDARD}; use jiff::Timestamp; @@ -15,6 +14,7 @@ pub fn output_json( path: &Path, filetype: Option, metadata: Option<&Metadata>, + path_separator: &Option, ) -> std::io::Result<()> { out.write_all(b"{")?; @@ -23,11 +23,16 @@ pub fn output_json( #[cfg(unix)] match path.to_str() { Some(text) => { + let final_path: Cow = if let Some(sep) = path_separator { + text.replace(MAIN_SEPARATOR, sep).into() + } else { + text.into() + }; // NB: This assumes that rust's debug output for a string // is a valid JSON string. At time of writing this is the case // but it is possible, though unlikely, that this could change // in the future. - write!(out, r#""path":{{"text":{:?}}}"#, text)?; + write!(out, r#""path":{{"text":{:?}}}"#, final_path)?; } None => { let encoded_bytes = BASE64_STANDARD.encode(path.as_os_str().as_bytes()); diff --git a/src/output.rs b/src/output.rs index 481585120..5a3706c3d 100644 --- a/src/output.rs +++ b/src/output.rs @@ -163,7 +163,13 @@ impl<'a, W: Write> Printer<'a, W> { // Is it worth doing json output if all you have is the path and file type? let metadata = entry.metadata(); - crate::fmt::json::output_json(&mut self.stdout, path, entry.file_type(), metadata)?; + crate::fmt::json::output_json( + &mut self.stdout, + path, + entry.file_type(), + metadata, + &self.config.path_separator, + )?; Ok(()) } }