diff --git a/bindings/python/py_src/tokenizers/pre_tokenizers.pyi b/bindings/python/py_src/tokenizers/pre_tokenizers.pyi index 158af993c..0bc404061 100644 --- a/bindings/python/py_src/tokenizers/pre_tokenizers.pyi +++ b/bindings/python/py_src/tokenizers/pre_tokenizers.pyi @@ -353,9 +353,9 @@ class UnicodeScripts(PreTokenizer): @final class Whitespace(PreTokenizer): """ - This pre-tokenizer splits on word boundaries according to the ``\w+|[^\w\s]+`` - regex pattern. It splits on word characters or characters that aren't words or - whitespaces (punctuation such as hyphens, apostrophes, commas, etc.). + This pre-tokenizer splits on word boundaries. It splits on word characters + or characters that aren't words or whitespaces (punctuation such as hyphens, + apostrophes, commas, etc.). Example:: diff --git a/bindings/python/src/pre_tokenizers.rs b/bindings/python/src/pre_tokenizers.rs index 8400324db..97491adb7 100644 --- a/bindings/python/src/pre_tokenizers.rs +++ b/bindings/python/src/pre_tokenizers.rs @@ -335,9 +335,9 @@ impl PyByteLevel { } } -/// This pre-tokenizer splits on word boundaries according to the ``\w+|[^\w\s]+`` -/// regex pattern. It splits on word characters or characters that aren't words or -/// whitespaces (punctuation such as hyphens, apostrophes, commas, etc.). +/// This pre-tokenizer splits on word boundaries. It splits on word characters +/// or characters that aren't words or whitespaces (punctuation such as hyphens, +/// apostrophes, commas, etc.). /// /// Example:: ///