diff --git a/packages/parser/lib/lexer.js b/packages/parser/lib/lexer.js index e7e0686e..42b9d3cc 100644 --- a/packages/parser/lib/lexer.js +++ b/packages/parser/lib/lexer.js @@ -103,7 +103,8 @@ const INVALID_SLASH_OPEN = createToken({ const PROCESSING_INSTRUCTION = createToken({ name: "PROCESSING_INSTRUCTION", - pattern: makePattern`<\\?${f.Name}.*\\?>`, + pattern: makePattern`<\\?${f.Name}(?:.|\\r?\\n)*?\\?>`, + line_breaks: true, }); const OPEN = createToken({ name: "OPEN", pattern: / { const lexAndParseResult = parse(inputText); expect(lexAndParseResult.parseErrors).to.be.empty; }); + + it("should tokenize processing instructions in sibling elements separately", () => { + const inputText = + ''; + const { lexErrors, parseErrors, tokenVector } = parse(inputText); + const processingInstructionImages = tokenVector.reduce( + (images, token) => + token.tokenType.name === "PROCESSING_INSTRUCTION" + ? [...images, token.image] + : images, + [] + ); + + expect(lexErrors).to.be.empty; + expect(parseErrors).to.be.empty; + expect(processingInstructionImages).to.deep.equal([ + "", + "", + "", + "", + ]); + }); + + it("should tokenize multiline processing instructions with LF and CRLF", () => { + [ + { lineEnding: "\n", expected: "" }, + { lineEnding: "\r\n", expected: "" }, + ].forEach(({ lineEnding, expected }) => { + const inputText = ` + + + + +`.replace(/\n/g, lineEnding); + const { lexErrors, parseErrors, tokenVector } = parse(inputText); + const processingInstructionImages = tokenVector.reduce( + (images, token) => + token.tokenType.name === "PROCESSING_INSTRUCTION" + ? [...images, token.image] + : images, + [] + ); + + expect(lexErrors).to.be.empty; + expect(parseErrors).to.be.empty; + expect(processingInstructionImages).to.deep.equal([expected]); + }); + }); });