From 7631db709cd98e422a8cc390c47a8e57f08a6a24 Mon Sep 17 00:00:00 2001 From: Adam Alston Date: Tue, 31 Mar 2026 21:05:16 -0400 Subject: [PATCH 1/2] fix: stop greedy processing instruction matching --- packages/parser/lib/lexer.js | 2 +- packages/parser/test/parser-smoke-spec.js | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/packages/parser/lib/lexer.js b/packages/parser/lib/lexer.js index e7e0686e..b792c770 100644 --- a/packages/parser/lib/lexer.js +++ b/packages/parser/lib/lexer.js @@ -103,7 +103,7 @@ const INVALID_SLASH_OPEN = createToken({ const PROCESSING_INSTRUCTION = createToken({ name: "PROCESSING_INSTRUCTION", - pattern: makePattern`<\\?${f.Name}.*\\?>`, + pattern: makePattern`<\\?${f.Name}.*?\\?>`, }); const OPEN = createToken({ name: "OPEN", pattern: / { const lexAndParseResult = parse(inputText); expect(lexAndParseResult.parseErrors).to.be.empty; }); + + it("should tokenize processing instructions in sibling elements separately", () => { + const inputText = + ''; + const { lexErrors, parseErrors, tokenVector } = parse(inputText); + const processingInstructionImages = tokenVector.reduce( + (images, token) => + token.tokenType.name === "PROCESSING_INSTRUCTION" + ? [...images, token.image] + : images, + [] + ); + + expect(lexErrors).to.be.empty; + expect(parseErrors).to.be.empty; + expect(processingInstructionImages).to.deep.equal([ + "", + "", + "", + "", + ]); + }); }); From 3ed7be250c41c82771349a38f8d564aa916295a0 Mon Sep 17 00:00:00 2001 From: Adam Alston Date: Tue, 31 Mar 2026 21:21:00 -0400 Subject: [PATCH 2/2] fix: support multiline processing instructions --- packages/parser/lib/lexer.js | 3 ++- packages/parser/test/parser-smoke-spec.js | 27 +++++++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/packages/parser/lib/lexer.js b/packages/parser/lib/lexer.js index b792c770..42b9d3cc 100644 --- a/packages/parser/lib/lexer.js +++ b/packages/parser/lib/lexer.js @@ -103,7 +103,8 @@ const INVALID_SLASH_OPEN = createToken({ const PROCESSING_INSTRUCTION = createToken({ name: "PROCESSING_INSTRUCTION", - pattern: makePattern`<\\?${f.Name}.*?\\?>`, + pattern: makePattern`<\\?${f.Name}(?:.|\\r?\\n)*?\\?>`, + line_breaks: true, }); const OPEN = createToken({ name: "OPEN", pattern: / { "", ]); }); + + it("should tokenize multiline processing instructions with LF and CRLF", () => { + [ + { lineEnding: "\n", expected: "" }, + { lineEnding: "\r\n", expected: "" }, + ].forEach(({ lineEnding, expected }) => { + const inputText = ` + + + + +`.replace(/\n/g, lineEnding); + const { lexErrors, parseErrors, tokenVector } = parse(inputText); + const processingInstructionImages = tokenVector.reduce( + (images, token) => + token.tokenType.name === "PROCESSING_INSTRUCTION" + ? [...images, token.image] + : images, + [] + ); + + expect(lexErrors).to.be.empty; + expect(parseErrors).to.be.empty; + expect(processingInstructionImages).to.deep.equal([expected]); + }); + }); });