huggingface · eyupcanakman · May 10, 2026
diff --git a/bindings/python/py_src/tokenizers/pre_tokenizers.pyi b/bindings/python/py_src/tokenizers/pre_tokenizers.pyi
@@ -353,9 +353,9 @@ class UnicodeScripts(PreTokenizer):
 @final
 class Whitespace(PreTokenizer):
     """
-    This pre-tokenizer splits on word boundaries according to the ``\w+|[^\w\s]+``
-    regex pattern. It splits on word characters or characters that aren't words or
-    whitespaces (punctuation such as hyphens, apostrophes, commas, etc.).
+    This pre-tokenizer splits on word boundaries. It splits on word characters
+    or characters that aren't words or whitespaces (punctuation such as hyphens,
+    apostrophes, commas, etc.).
 
     Example::
 

diff --git a/bindings/python/src/pre_tokenizers.rs b/bindings/python/src/pre_tokenizers.rs
@@ -335,9 +335,9 @@ impl PyByteLevel {
     }
 }
 
-/// This pre-tokenizer splits on word boundaries according to the ``\w+|[^\w\s]+``
-/// regex pattern. It splits on word characters or characters that aren't words or
-/// whitespaces (punctuation such as hyphens, apostrophes, commas, etc.).
+/// This pre-tokenizer splits on word boundaries. It splits on word characters
+/// or characters that aren't words or whitespaces (punctuation such as hyphens,
+/// apostrophes, commas, etc.).
 ///
 /// Example::
 ///