diff --git a/docs/content/docs/_index.md b/docs/content/docs/_index.md index 28147f4e..51951053 100644 --- a/docs/content/docs/_index.md +++ b/docs/content/docs/_index.md @@ -1280,6 +1280,26 @@ Returns a string representation of the given value. Example: `{{ value | as_str }}` +#### substr + +Returns a substring of the given string, starting at the specified `begin` index and +extending for `length` characters. If the range exceeds the string's length, it will return +as many characters as possible. + +Example: `{{ value | find(begin = 0, length=300) }}` + +#### find + +Returns first position of the given string, or -1 if not found + +Example: `{{ value | find("Hello") }}` + +#### rfind + +Returns the last position of the given string, or string length if not found + +Example: `{{ value | rfind("World") }}` + #### default Returns the default value given only if the variable evaluated is not present in the context and is therefore meant to be at the beginning of a filter chain if there are several filters. @@ -1388,6 +1408,7 @@ Example: A comprehensive syntax description can be found in the [regex crate documentation](https://docs.rs/regex/). + ### Built-in functions Tera comes with some built-in global functions. diff --git a/src/builtins/filters/string.rs b/src/builtins/filters/string.rs index d874bd6d..b5dae863 100644 --- a/src/builtins/filters/string.rs +++ b/src/builtins/filters/string.rs @@ -1,3 +1,4 @@ +#[allow(non_snake_case)] /// Filters operating on string use std::collections::HashMap; @@ -441,6 +442,74 @@ pub fn float(value: &Value, args: &HashMap) -> Result { Ok(to_value(v).unwrap()) } +/// Returns a substring starting at `begin` with optional `length` (in graphemes). +/// If `length` is not provided, returns the rest of the string from `begin`. +pub fn substr(value: &Value, args: &HashMap) -> Result { + let s = try_get_value!("substr", "value", String, value); + + let begin = match args.get("begin") { + Some(b) => try_get_value!("substr", "begin", usize, b), + None => return Err(Error::msg("Filter `substr` expected an arg called `begin`")), + }; + + let length = args.get("length").and_then(|l| l.as_u64()).map(|l| l as usize); + + let graphemes = GraphemeIndices::new(&s).collect::>(); + + if begin >= graphemes.len() { + return Ok(to_value("").unwrap()); + } + + let start_idx = graphemes[begin].0; + let end_idx = if let Some(len) = length { + let end = (begin + len).min(graphemes.len()); + if end == graphemes.len() { + s.len() + } else { + graphemes[end].0 + } + } else { + s.len() + }; + + Ok(to_value(&s[start_idx..end_idx]).unwrap()) +} + +/// Finds the position of the first occurrence of a substring. +/// Returns -1 if not found. +pub fn find(value: &Value, args: &HashMap) -> Result { + let s = try_get_value!("find", "value", String, value); + + let needle = match args.get("needle") { + Some(needle) => try_get_value!("find", "needle", String, needle), + None => return Err(Error::msg("Filter `find` expected an arg called `needle`")), + }; + + let pos = match s.find(&needle) { + Some(idx) => idx as i64, + None => -1, + }; + Ok(to_value(pos).unwrap()) +} + +/// Finds the position of the last occurrence of a substring. +/// Returns the length of the string if not found. +pub fn rfind(value: &Value, args: &HashMap) -> Result { + let s = try_get_value!("rfind", "value", String, value); + + let needle = match args.get("needle") { + Some(needle) => try_get_value!("rfind", "needle", String, needle), + None => return Err(Error::msg("Filter `rfind` expected an arg called `needle`")), + }; + + let pos = match s.rfind(&needle) { + Some(idx) => idx as i64, + None => s.len() as i64, + }; + Ok(to_value(pos).unwrap()) +} + + #[cfg(test)] mod tests { use std::collections::HashMap; @@ -934,4 +1003,161 @@ mod tests { assert_eq!(result.unwrap(), to_value(expected).unwrap()); } } + + + #[test] + fn test_substr_basic() { + let mut args = HashMap::new(); + args.insert("begin".to_string(), to_value(2).unwrap()); + args.insert("length".to_string(), to_value(3).unwrap()); + let result = substr(&to_value("abcdef").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value("cde").unwrap()); + } + + #[test] + fn test_substr_no_length() { + let mut args = HashMap::new(); + args.insert("begin".to_string(), to_value(3).unwrap()); + let result = substr(&to_value("abcdef").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value("def").unwrap()); + } + + #[test] + fn test_substr_unicode_graphemes() { + let mut args = HashMap::new(); + args.insert("begin".to_string(), to_value(1).unwrap()); + args.insert("length".to_string(), to_value(2).unwrap()); + let result = substr(&to_value("a๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆb").unwrap(), &args); + assert!(result.is_ok()); + // "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ" is a single grapheme, so result should be "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆb" + assert_eq!(result.unwrap(), to_value("๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆb").unwrap()); + } + + #[test] + fn test_substr_begin_out_of_bounds() { + let mut args = HashMap::new(); + args.insert("begin".to_string(), to_value(10).unwrap()); + let result = substr(&to_value("abc").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value("").unwrap()); + } + + #[test] + fn test_substr_missing_begin() { + let args = HashMap::new(); + let result = substr(&to_value("abc").unwrap(), &args); + assert!(result.is_err()); + assert_eq!( + result.err().unwrap().to_string(), + "Filter `substr` expected an arg called `begin`" + ); + } + + #[test] + fn test_find() { + let mut args = HashMap::new(); + + // Basic find + args.insert("needle".to_string(), to_value("bar").unwrap()); + let result = find(&to_value("foobar").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value(3).unwrap()); + + // Pattern at start + args.insert("needle".to_string(), to_value("foo").unwrap()); + let result = find(&to_value("foobar").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value(0).unwrap()); + + // Pattern at end + args.insert("needle".to_string(), to_value("bar").unwrap()); + let result = find(&to_value("bar").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value(0).unwrap()); + + // Pattern not found + args.insert("needle".to_string(), to_value("baz").unwrap()); + let result = find(&to_value("foobar").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value(-1).unwrap()); + + // Empty pattern + args.insert("needle".to_string(), to_value("").unwrap()); + let result = find(&to_value("foobar").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value(0).unwrap()); + + // Empty string + args.insert("needle".to_string(), to_value("foo").unwrap()); + let result = find(&to_value("").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value(-1).unwrap()); + + // Unicode pattern + args.insert("needle".to_string(), to_value("๐Ÿ‘ฉ").unwrap()); + let result = find(&to_value("๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ family").unwrap(), &args); + assert!(result.is_ok()); + // "๐Ÿ‘ฉ" starts at byte 8 in "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ family" + assert_eq!(result.unwrap(), to_value(7).unwrap()); + } + + + #[test] + fn test_rfind() { + let mut args = HashMap::new(); + + // Basic rfind + args.insert("needle".to_string(), to_value("bar").unwrap()); + let result = rfind(&to_value("foobarbar").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value(6).unwrap()); + + // Pattern at start + args.insert("needle".to_string(), to_value("foo").unwrap()); + let result = rfind(&to_value("foobar").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value(0).unwrap()); + + // Pattern at end + args.insert("needle".to_string(), to_value("bar").unwrap()); + let result = rfind(&to_value("bar").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value(0).unwrap()); + + // Pattern not found + args.insert("needle".to_string(), to_value("baz").unwrap()); + let result = rfind(&to_value("foobar").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value(-1).unwrap()); + + // Empty pattern + args.insert("needle".to_string(), to_value("").unwrap()); + let result = rfind(&to_value("foobar").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value(6).unwrap()); + + // Empty string + args.insert("needle".to_string(), to_value("foo").unwrap()); + let result = rfind(&to_value("").unwrap(), &args); + assert!(result.is_ok()); + assert_eq!(result.unwrap(), to_value(-1).unwrap()); + + // Unicode pattern + args.insert("needle".to_string(), to_value("๐Ÿ‘ฉ").unwrap()); + let result = rfind(&to_value("๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ family").unwrap(), &args); + assert!(result.is_ok()); + // The last "๐Ÿ‘ฉ" starts at byte 20 in "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ family ๐Ÿ‘ฉ" + assert_eq!(result.unwrap(), to_value(7).unwrap()); + + // Unicode pattern + args.insert("needle".to_string(), to_value("๐Ÿ‘ฉ").unwrap()); + let result = rfind(&to_value("๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ family ๐Ÿ‘ฉ").unwrap(), &args); + assert!(result.is_ok()); + // The last "๐Ÿ‘ฉ" starts at byte 20 in "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ family ๐Ÿ‘ฉ" + assert_eq!(result.unwrap(), to_value(33).unwrap()); + } + + } diff --git a/src/tera.rs b/src/tera.rs index 0c694f5c..e0797ce9 100644 --- a/src/tera.rs +++ b/src/tera.rs @@ -726,6 +726,10 @@ impl Tera { self.register_filter("as_str", common::as_str); self.register_filter("get", object::get); + + self.register_filter("substr", string::substr); + self.register_filter("find", string::find); + self.register_filter("rfind", string::rfind); } fn register_tera_testers(&mut self) {