diff --git a/doc/pages/changelog.asciidoc b/doc/pages/changelog.asciidoc index e0149fdf31..3e6c02bbaa 100644 --- a/doc/pages/changelog.asciidoc +++ b/doc/pages/changelog.asciidoc @@ -16,6 +16,8 @@ released versions. * `number-lines -full-relative` switch to keep a smaller line number gutter. +* Support the `\N` escape sequence in regex (like in PCRE, matches `[^\n]`). + == Kakoune 2025.06.03 * Expose env vars that are mentioned in the arguments passed to shell expansions diff --git a/doc/pages/faq.asciidoc b/doc/pages/faq.asciidoc index 9ad201337f..52bf92a496 100644 --- a/doc/pages/faq.asciidoc +++ b/doc/pages/faq.asciidoc @@ -292,8 +292,8 @@ In most distributions, `/bin/sh` will end up being used. Data in buffers is a stream of characters, and newlines do not receive special treatment compared to other characters, with regards to regex matching. In -order to select data in a line without any trailing newline characters, one could -use the `[^\n]+` pattern, which is arguably a good compromise when +order to select data in a line without any trailing newline characters, one +could use the `\N+` pattern, which is arguably a good compromise when balanced against the ability to select data over several lines. You can instruct the regex engine to stop matching newline characters with diff --git a/doc/pages/highlighters.asciidoc b/doc/pages/highlighters.asciidoc index d0191af76b..6ed5d334d3 100644 --- a/doc/pages/highlighters.asciidoc +++ b/doc/pages/highlighters.asciidoc @@ -131,9 +131,9 @@ highlighter is replaced with the new one. This highlights C++ style comments in cyan, with an eventual 'TODO:' in yellow on red background: --------------------------------------------------------------------- -add-highlighter window/ regex //\h*(TODO:)[^\n]* 0:cyan 1:yellow,red --------------------------------------------------------------------- +----------------------------------------------------------------- +add-highlighter window/ regex //\h*(TODO:)\N* 0:cyan 1:yellow,red +----------------------------------------------------------------- capture_id can be either the capture number, or its name if a named capture is used in the regex (See diff --git a/doc/pages/regex.asciidoc b/doc/pages/regex.asciidoc index 416efd1f04..6b2c011562 100644 --- a/doc/pages/regex.asciidoc +++ b/doc/pages/regex.asciidoc @@ -74,6 +74,8 @@ is equivalent to `[\d]`. `.` matches any character, including newlines, by default. (see <> on how to change it) +`\N` matches any character but newlines (not affected by modifiers). + == Groups Regex atoms can be grouped using `(` and `)` or `(?:` and `)`. If `(` is @@ -184,7 +186,7 @@ by ; some divergence exists for ease of use, or performance reasons: * Lookarounds are not arbitrary, but lookbehind is supported. -* `\K`, `\Q..\E`, `\A`, `\h` and `\z` are added. +* `\K`, `\N`, `\Q..\E`, `\A`, `\h` and `\z` are added. * Stricter handling of escaping, as we introduce additional escapes; identity escapes like `\X` with `X` being a non-special character are not accepted, to avoid confusions between `\h` meaning literal diff --git a/src/main.cc b/src/main.cc index 1c33c41afc..6a6d1d4e8b 100644 --- a/src/main.cc +++ b/src/main.cc @@ -55,6 +55,7 @@ struct { "» {+b}%val\\{buffile}{} is now empty for scratch buffers\n" "» {+b}FocusIn{}/{+b}FocusOut{} events on suspend\n" "» {+u}number-lines -full-relative{} switch to keep a smaller line number gutter\n" + "» support the {+b}\\N{} escape sequence in regex (matches {+b}[^\\n]{})\n" }, { 20250603, "» kak_* appearing in shell arguments will be added to the environment\n" diff --git a/src/regex_vm.cc b/src/regex_vm.cc index 2e4c3ab840..ebf9db8cda 100644 --- a/src/regex_vm.cc +++ b/src/regex_vm.cc @@ -357,6 +357,9 @@ struct RegexParser { const Codepoint cp = *m_pos++; + if (cp == 'N') + return add_node(ParsedRegex::AnyCharExceptNewLine); + if (cp == 'Q') { auto escaped_sequence = add_node(ParsedRegex::Sequence); @@ -1562,6 +1565,12 @@ auto test_regex = UnitTest{[]{ kak_assert(*vm.captures()[0] == 'f'); } + { + TestVM<> vm{R"(a(?<=\N)\N+(?=.\N)\s(?S)d.+(?!.)\s(? vm{R"([d-ea-dcf-k]+)"}; kak_assert(vm.exec("abcde"));