From d33fbbc964b321025e2995aad745775062b65d26 Mon Sep 17 00:00:00 2001 From: Matteo Manuelli Date: Wed, 19 Nov 2025 16:25:00 +0100 Subject: [PATCH 1/4] Add support for filtering orphaned files with --owner orphan Extends the --owner filter to support finding files with no valid user/group by using the 'orphan' keyword. This provides equivalent functionality to find's -nouser and -nogroup flags. Examples: fd --owner orphan # equivalent to find -nouser fd --owner :orphan # equivalent to find -nogroup fd --owner orphan:orphan # both -nouser and -nogroup Implementation: - Added Check::Orphaned variant to owner filter - Extended OwnerFilter::from_string to recognize 'orphan' keyword - Updated OwnerFilter::matches to check if uid/gid maps to valid user/group - Added unit tests for orphan parsing --- src/cli.rs | 3 +++ src/filter/owner.rs | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/cli.rs b/src/cli.rs index d5174689d..ed480f644 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -441,11 +441,14 @@ pub struct Opts { /// Filter files by their user and/or group. /// Format: [(user|uid)][:(group|gid)]. Either side is optional. /// Precede either side with a '!' to exclude files instead. + /// Use 'orphan' to match files with no valid user/group. /// /// Examples: /// {n} --owner john /// {n} --owner :students /// {n} --owner '!john:students' + /// {n} --owner orphan + /// {n} --owner :orphan #[cfg(unix)] #[arg(long, short = 'o', value_parser = OwnerFilter::from_string, value_name = "user:group", help = "Filter by owning user and/or group", diff --git a/src/filter/owner.rs b/src/filter/owner.rs index 0f56fda71..06c857e91 100644 --- a/src/filter/owner.rs +++ b/src/filter/owner.rs @@ -13,6 +13,7 @@ enum Check { Equal(T), NotEq(T), Ignore, + Orphaned, } impl OwnerFilter { @@ -69,7 +70,17 @@ impl OwnerFilter { pub fn matches(&self, md: &fs::Metadata) -> bool { use std::os::unix::fs::MetadataExt; - self.uid.check(md.uid()) && self.gid.check(md.gid()) + let uid_match = match self.uid { + Check::Orphaned => User::from_uid(md.uid().into()).ok().flatten().is_none(), + _ => self.uid.check(md.uid()), + }; + + let gid_match = match self.gid { + Check::Orphaned => Group::from_gid(md.gid().into()).ok().flatten().is_none(), + _ => self.gid.check(md.gid()), + }; + + uid_match && gid_match } } @@ -79,6 +90,7 @@ impl Check { Check::Equal(x) => v == *x, Check::NotEq(x) => v != *x, Check::Ignore => true, + Check::Orphaned => unreachable!("Orphaned check handled in OwnerFilter::matches"), } } @@ -88,6 +100,7 @@ impl Check { { let (s, equality) = match s { Some("") | None => return Ok(Check::Ignore), + Some("orphan") => return Ok(Check::Orphaned), Some(s) if s.starts_with('!') => (&s[1..], false), Some(s) => (s, true), }; @@ -134,6 +147,10 @@ mod owner_parsing { both_negate:"!4:!3" => Ok(OwnerFilter { uid: NotEq(4), gid: NotEq(3) }), uid_not_gid:"6:!8" => Ok(OwnerFilter { uid: Equal(6), gid: NotEq(8) }), + orphan_uid: "orphan" => Ok(OwnerFilter { uid: Orphaned, gid: Ignore }), + orphan_gid: ":orphan" => Ok(OwnerFilter { uid: Ignore, gid: Orphaned }), + orphan_both:"orphan:orphan"=> Ok(OwnerFilter { uid: Orphaned, gid: Orphaned }), + more_colons:"3:5:" => Err(_), only_colons:"::" => Err(_), } From cd9fc6247306758b4e703a0dea6b0b8d9488b5bf Mon Sep 17 00:00:00 2001 From: Matteo Manuelli Date: Thu, 20 Nov 2025 12:33:35 +0100 Subject: [PATCH 2/4] Address review feedback: improve design and add caching Changes based on maintainer feedback: - Changed keyword from 'orphan' to '-' to avoid conflicts with actual usernames - Refactored Check::Orphan to include validator function (fn(u32) -> bool) - Simplified matches() to use check() uniformly - no special case handling - Added caching with LazyLock> to avoid repeated uid/gid lookups - Updated documentation (man page and CHANGELOG) The new design stores the validation logic in the Orphan variant itself, making it self-contained and consistent with other Check variants. All checks now go through the generic check() method. --- CHANGELOG.md | 2 ++ doc/fd.1 | 5 +++- src/cli.rs | 6 ++--- src/filter/owner.rs | 59 ++++++++++++++++++++++++++++++--------------- 4 files changed, 48 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44555fb5a..26a509df9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,8 @@ ## Features +- Add support for filtering orphaned files with `--owner "-"` (equivalent to find's `-nouser`/`-nogroup`) + ## Bugfixes diff --git a/doc/fd.1 b/doc/fd.1 index df42b1724..906f8246a 100644 --- a/doc/fd.1 +++ b/doc/fd.1 @@ -378,12 +378,15 @@ Examples: .TP .BI "-o, \-\-owner " [user][:group] Filter files by their user and/or group. Format: [(user|uid)][:(group|gid)]. Either side -is optional. Precede either side with a '!' to exclude files instead. +is optional. Precede either side with a '!' to exclude files instead. Use '\-' to match +files with no valid user/group (equivalent to find's \-nouser/\-nogroup). Examples: \-\-owner john \-\-owner :students \-\-owner "!john:students" + \-\-owner "\-" + \-\-owner ":\-" .TP .BI "-C, \-\-base\-directory " path Change the current working directory of fd to the provided path. This means that search results will diff --git a/src/cli.rs b/src/cli.rs index ed480f644..c5d8b398c 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -441,14 +441,14 @@ pub struct Opts { /// Filter files by their user and/or group. /// Format: [(user|uid)][:(group|gid)]. Either side is optional. /// Precede either side with a '!' to exclude files instead. - /// Use 'orphan' to match files with no valid user/group. + /// Use '-' to match files with no valid user/group. /// /// Examples: /// {n} --owner john /// {n} --owner :students /// {n} --owner '!john:students' - /// {n} --owner orphan - /// {n} --owner :orphan + /// {n} --owner "-" + /// {n} --owner ":-" #[cfg(unix)] #[arg(long, short = 'o', value_parser = OwnerFilter::from_string, value_name = "user:group", help = "Filter by owning user and/or group", diff --git a/src/filter/owner.rs b/src/filter/owner.rs index 06c857e91..1cab3cd05 100644 --- a/src/filter/owner.rs +++ b/src/filter/owner.rs @@ -1,6 +1,35 @@ use anyhow::{Result, anyhow}; use nix::unistd::{Group, User}; +use std::collections::HashSet; use std::fs; +use std::sync::{LazyLock, Mutex}; + +static VALID_UIDS: LazyLock>> = LazyLock::new(|| Mutex::new(HashSet::new())); +static VALID_GIDS: LazyLock>> = LazyLock::new(|| Mutex::new(HashSet::new())); + +fn is_valid_uid(uid: u32) -> bool { + let mut cache = VALID_UIDS.lock().unwrap(); + if cache.contains(&uid) { + return true; + } + let valid = matches!(User::from_uid(uid.into()), Ok(Some(_))); + if valid { + cache.insert(uid); + } + valid +} + +fn is_valid_gid(gid: u32) -> bool { + let mut cache = VALID_GIDS.lock().unwrap(); + if cache.contains(&gid) { + return true; + } + let valid = matches!(Group::from_gid(gid.into()), Ok(Some(_))); + if valid { + cache.insert(gid); + } + valid +} #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub struct OwnerFilter { @@ -13,7 +42,7 @@ enum Check { Equal(T), NotEq(T), Ignore, - Orphaned, + Orphan(fn(T) -> bool), } impl OwnerFilter { @@ -44,7 +73,7 @@ impl OwnerFilter { .map(|user| user.uid.as_raw()) .ok_or_else(|| anyhow!("'{}' is not a recognized user name", s)) } - })?; + }, is_valid_uid)?; let gid = Check::parse(snd, |s| { if let Ok(gid) = s.parse() { Ok(gid) @@ -53,7 +82,7 @@ impl OwnerFilter { .map(|group| group.gid.as_raw()) .ok_or_else(|| anyhow!("'{}' is not a recognized group name", s)) } - })?; + }, is_valid_gid)?; Ok(OwnerFilter { uid, gid }) } @@ -70,17 +99,7 @@ impl OwnerFilter { pub fn matches(&self, md: &fs::Metadata) -> bool { use std::os::unix::fs::MetadataExt; - let uid_match = match self.uid { - Check::Orphaned => User::from_uid(md.uid().into()).ok().flatten().is_none(), - _ => self.uid.check(md.uid()), - }; - - let gid_match = match self.gid { - Check::Orphaned => Group::from_gid(md.gid().into()).ok().flatten().is_none(), - _ => self.gid.check(md.gid()), - }; - - uid_match && gid_match + self.uid.check(md.uid()) && self.gid.check(md.gid()) } } @@ -90,17 +109,17 @@ impl Check { Check::Equal(x) => v == *x, Check::NotEq(x) => v != *x, Check::Ignore => true, - Check::Orphaned => unreachable!("Orphaned check handled in OwnerFilter::matches"), + Check::Orphan(validator) => !validator(v), } } - fn parse(s: Option<&str>, f: F) -> Result + fn parse(s: Option<&str>, f: F, validator: fn(T) -> bool) -> Result where F: Fn(&str) -> Result, { let (s, equality) = match s { Some("") | None => return Ok(Check::Ignore), - Some("orphan") => return Ok(Check::Orphaned), + Some("-") => return Ok(Check::Orphan(validator)), Some(s) if s.starts_with('!') => (&s[1..], false), Some(s) => (s, true), }; @@ -147,9 +166,9 @@ mod owner_parsing { both_negate:"!4:!3" => Ok(OwnerFilter { uid: NotEq(4), gid: NotEq(3) }), uid_not_gid:"6:!8" => Ok(OwnerFilter { uid: Equal(6), gid: NotEq(8) }), - orphan_uid: "orphan" => Ok(OwnerFilter { uid: Orphaned, gid: Ignore }), - orphan_gid: ":orphan" => Ok(OwnerFilter { uid: Ignore, gid: Orphaned }), - orphan_both:"orphan:orphan"=> Ok(OwnerFilter { uid: Orphaned, gid: Orphaned }), + orphan_uid: "-" => Ok(OwnerFilter { uid: Orphan(is_valid_uid), gid: Ignore }), + orphan_gid: ":-" => Ok(OwnerFilter { uid: Ignore, gid: Orphan(is_valid_gid) }), + orphan_both:"-:-" => Ok(OwnerFilter { uid: Orphan(is_valid_uid), gid: Orphan(is_valid_gid) }), more_colons:"3:5:" => Err(_), only_colons:"::" => Err(_), From cd6b72d6f86172c9233e9dca51544c9507aa588c Mon Sep 17 00:00:00 2001 From: Matteo Manuelli Date: Mon, 24 Nov 2025 21:49:23 +0100 Subject: [PATCH 3/4] Fix clippy warning: remove PartialEq from Check to avoid fn pointer comparison --- src/filter/owner.rs | 65 ++++++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 31 deletions(-) diff --git a/src/filter/owner.rs b/src/filter/owner.rs index 1cab3cd05..9af2314bf 100644 --- a/src/filter/owner.rs +++ b/src/filter/owner.rs @@ -31,13 +31,13 @@ fn is_valid_gid(gid: u32) -> bool { valid } -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug)] pub struct OwnerFilter { uid: Check, gid: Check, } -#[derive(Clone, Copy, Debug, PartialEq, Eq)] +#[derive(Clone, Copy, Debug)] enum Check { Equal(T), NotEq(T), @@ -46,11 +46,6 @@ enum Check { } impl OwnerFilter { - const IGNORE: Self = OwnerFilter { - uid: Check::Ignore, - gid: Check::Ignore, - }; - /// Parses an owner constraint /// Returns an error if the string is invalid /// Returns Ok(None) when string is acceptable but a noop (such as "" or ":") @@ -65,31 +60,39 @@ impl OwnerFilter { )); } - let uid = Check::parse(fst, |s| { - if let Ok(uid) = s.parse() { - Ok(uid) - } else { - User::from_name(s)? - .map(|user| user.uid.as_raw()) - .ok_or_else(|| anyhow!("'{}' is not a recognized user name", s)) - } - }, is_valid_uid)?; - let gid = Check::parse(snd, |s| { - if let Ok(gid) = s.parse() { - Ok(gid) - } else { - Group::from_name(s)? - .map(|group| group.gid.as_raw()) - .ok_or_else(|| anyhow!("'{}' is not a recognized group name", s)) - } - }, is_valid_gid)?; + let uid = Check::parse( + fst, + |s| { + if let Ok(uid) = s.parse() { + Ok(uid) + } else { + User::from_name(s)? + .map(|user| user.uid.as_raw()) + .ok_or_else(|| anyhow!("'{}' is not a recognized user name", s)) + } + }, + is_valid_uid, + )?; + let gid = Check::parse( + snd, + |s| { + if let Ok(gid) = s.parse() { + Ok(gid) + } else { + Group::from_name(s)? + .map(|group| group.gid.as_raw()) + .ok_or_else(|| anyhow!("'{}' is not a recognized group name", s)) + } + }, + is_valid_gid, + )?; Ok(OwnerFilter { uid, gid }) } /// If self is a no-op (ignore both uid and gid) then return `None`, otherwise wrap in a `Some` pub fn filter_ignore(self) -> Option { - if self == Self::IGNORE { + if matches!(self.uid, Check::Ignore) && matches!(self.gid, Check::Ignore) { None } else { Some(self) @@ -155,20 +158,20 @@ mod owner_parsing { use super::Check::*; owner_tests! { - empty: "" => Ok(OwnerFilter::IGNORE), + empty: "" => Ok(OwnerFilter { uid: Ignore, gid: Ignore }), uid_only: "5" => Ok(OwnerFilter { uid: Equal(5), gid: Ignore }), uid_gid: "9:3" => Ok(OwnerFilter { uid: Equal(9), gid: Equal(3) }), gid_only: ":8" => Ok(OwnerFilter { uid: Ignore, gid: Equal(8) }), - colon_only: ":" => Ok(OwnerFilter::IGNORE), + colon_only: ":" => Ok(OwnerFilter { uid: Ignore, gid: Ignore }), trailing: "5:" => Ok(OwnerFilter { uid: Equal(5), gid: Ignore }), uid_negate: "!5" => Ok(OwnerFilter { uid: NotEq(5), gid: Ignore }), both_negate:"!4:!3" => Ok(OwnerFilter { uid: NotEq(4), gid: NotEq(3) }), uid_not_gid:"6:!8" => Ok(OwnerFilter { uid: Equal(6), gid: NotEq(8) }), - orphan_uid: "-" => Ok(OwnerFilter { uid: Orphan(is_valid_uid), gid: Ignore }), - orphan_gid: ":-" => Ok(OwnerFilter { uid: Ignore, gid: Orphan(is_valid_gid) }), - orphan_both:"-:-" => Ok(OwnerFilter { uid: Orphan(is_valid_uid), gid: Orphan(is_valid_gid) }), + orphan_uid: "-" => Ok(OwnerFilter { uid: Orphan(_), gid: Ignore }), + orphan_gid: ":-" => Ok(OwnerFilter { uid: Ignore, gid: Orphan(_) }), + orphan_both:"-:-" => Ok(OwnerFilter { uid: Orphan(_), gid: Orphan(_) }), more_colons:"3:5:" => Err(_), only_colons:"::" => Err(_), From 763e88576aafb5e4d4b65d32ecc4fba246b413c5 Mon Sep 17 00:00:00 2001 From: Matteo Manuelli Date: Thu, 27 Nov 2025 13:27:17 +0100 Subject: [PATCH 4/4] Use thread-local cache with HashMap for uid/gid lookups - Replace LazyLock> with thread_local RefCell - Cache both positive and negative lookup results - Avoids mutex contention in parallel traversal --- src/filter/owner.rs | 40 +++++++++++++++++----------------------- 1 file changed, 17 insertions(+), 23 deletions(-) diff --git a/src/filter/owner.rs b/src/filter/owner.rs index 9af2314bf..43df5ef74 100644 --- a/src/filter/owner.rs +++ b/src/filter/owner.rs @@ -1,34 +1,28 @@ use anyhow::{Result, anyhow}; use nix::unistd::{Group, User}; -use std::collections::HashSet; -use std::fs; -use std::sync::{LazyLock, Mutex}; +use std::{cell::RefCell, collections::HashMap, fs}; -static VALID_UIDS: LazyLock>> = LazyLock::new(|| Mutex::new(HashSet::new())); -static VALID_GIDS: LazyLock>> = LazyLock::new(|| Mutex::new(HashSet::new())); +thread_local! { + static UID_CACHE: RefCell> = RefCell::new(HashMap::new()); + static GID_CACHE: RefCell> = RefCell::new(HashMap::new()); +} fn is_valid_uid(uid: u32) -> bool { - let mut cache = VALID_UIDS.lock().unwrap(); - if cache.contains(&uid) { - return true; - } - let valid = matches!(User::from_uid(uid.into()), Ok(Some(_))); - if valid { - cache.insert(uid); - } - valid + UID_CACHE.with(|cache| { + *cache + .borrow_mut() + .entry(uid) + .or_insert_with(|| matches!(User::from_uid(uid.into()), Ok(Some(_)))) + }) } fn is_valid_gid(gid: u32) -> bool { - let mut cache = VALID_GIDS.lock().unwrap(); - if cache.contains(&gid) { - return true; - } - let valid = matches!(Group::from_gid(gid.into()), Ok(Some(_))); - if valid { - cache.insert(gid); - } - valid + GID_CACHE.with(|cache| { + *cache + .borrow_mut() + .entry(gid) + .or_insert_with(|| matches!(Group::from_gid(gid.into()), Ok(Some(_)))) + }) } #[derive(Clone, Copy, Debug)]