diff --git a/pkl-parser/src/main/java/org/pkl/parser/GenericParser.java b/pkl-parser/src/main/java/org/pkl/parser/GenericParser.java index cf76a8b5f..3cb01dc64 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/GenericParser.java +++ b/pkl-parser/src/main/java/org/pkl/parser/GenericParser.java @@ -1,5 +1,5 @@ /* - * Copyright © 2025-2026 Apple Inc. and the Pkl project authors. All rights reserved. + * Copyright © 2026 Apple Inc. and the Pkl project authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,1570 +15,10 @@ */ package org.pkl.parser; -import java.util.ArrayList; -import java.util.List; -import java.util.function.Supplier; -import org.pkl.parser.syntax.Operator; -import org.pkl.parser.syntax.generic.FullSpan; import org.pkl.parser.syntax.generic.Node; -import org.pkl.parser.syntax.generic.NodeType; -import org.pkl.parser.util.ErrorMessages; -import org.pkl.parser.util.Nullable; - -@SuppressWarnings("DuplicatedCode") -public class GenericParser { - - private Lexer lexer; - private Token lookahead; - private FullSpan spanLookahead; - private FullToken _lookahead; - private int cursor = 0; - private final List tokens = new ArrayList<>(); - - private void init(String source) { - this.lexer = new Lexer(source); - cursor = 0; - while (true) { - var ft = new FullToken(lexer.next(), lexer.fullSpan(), lexer.newLinesBetween); - tokens.add(ft); - if (ft.token == Token.EOF) break; - } - _lookahead = tokens.get(cursor); - lookahead = _lookahead.token; - spanLookahead = _lookahead.span; - } +public final class GenericParser { public Node parseModule(String source) { - init(source); - if (lookahead == Token.EOF) { - return new Node(NodeType.MODULE, new FullSpan(0, 0, 1, 1, 1, 1), List.of()); - } - var children = new ArrayList(); - var nodes = new ArrayList(); - if (lookahead == Token.SHEBANG) { - nodes.add(makeAffix(next())); - } - ff(nodes); - - var res = parseMemberHeader(children); - - if (isModuleDecl()) { - nodes.add(parseModuleDecl(children)); - children.clear(); - res = new HeaderResult(false, false, false); - ff(nodes); - } - - // imports - var imports = new ArrayList(); - while (lookahead == Token.IMPORT || lookahead == Token.IMPORT_STAR) { - if (res.hasDocComment || res.hasAnnotations || res.hasModifiers) { - throw parserError("wrongHeaders", "Imports"); - } - var lastImport = parseImportDecl(); - imports.add(lastImport); - // keep trailing affixes as part of the import - while (lookahead.isAffix() && lastImport.span.isSameLine(spanLookahead)) { - imports.add(makeAffix(next())); - } - if (!isImport()) break; - ff(imports); - } - if (!imports.isEmpty()) { - nodes.add(new Node(NodeType.IMPORT_LIST, imports)); - ff(nodes); - } - - // entries - if (res.hasDocComment || res.hasAnnotations || res.hasModifiers) { - nodes.add(parseModuleMember(children)); - ff(nodes); - } - - while (lookahead != Token.EOF) { - children.clear(); - parseMemberHeader(children); - nodes.add(parseModuleMember(children)); - ff(nodes); - } - return new Node(NodeType.MODULE, nodes); - } - - private Node parseModuleDecl(List preChildren) { - var headerParts = getHeaderParts(preChildren); - var children = new ArrayList<>(headerParts.preffixes); - var headers = new ArrayList(); - if (headerParts.modifierList != null) { - headers.add(headerParts.modifierList); - } - if (lookahead == Token.MODULE) { - var subChildren = new ArrayList<>(headers); - subChildren.add(makeTerminal(next())); - ff(subChildren); - subChildren.add(parseQualifiedIdentifier()); - children.add(new Node(NodeType.MODULE_DEFINITION, subChildren)); - } else { - children.addAll(headers); - if (headerParts.modifierList != null) { - throw parserError("wrongHeaders", "Amends or extends declaration"); - } - } - var looka = lookahead(); - if (looka == Token.AMENDS || looka == Token.EXTENDS) { - var type = looka == Token.AMENDS ? NodeType.AMENDS_CLAUSE : NodeType.EXTENDS_CLAUSE; - ff(children); - var subChildren = new ArrayList(); - subChildren.add(makeTerminal(next())); - ff(subChildren); - subChildren.add(parseStringConstant()); - children.add(new Node(type, subChildren)); - } - return new Node(NodeType.MODULE_DECLARATION, children); - } - - private Node parseQualifiedIdentifier() { - var children = new ArrayList(); - children.add(parseIdentifier()); - while (lookahead() == Token.DOT) { - ff(children); - children.add(new Node(NodeType.TERMINAL, next().span)); - ff(children); - children.add(parseIdentifier()); - } - return new Node(NodeType.QUALIFIED_IDENTIFIER, children); - } - - private Node parseImportDecl() { - var children = new ArrayList(); - children.add(makeTerminal(next())); - ff(children); - children.add(parseStringConstant()); - if (lookahead() == Token.AS) { - ff(children); - var alias = new ArrayList(); - alias.add(makeTerminal(next())); - ff(alias); - alias.add(parseIdentifier()); - children.add(new Node(NodeType.IMPORT_ALIAS, alias)); - } - return new Node(NodeType.IMPORT, children); - } - - private HeaderResult parseMemberHeader(List children) { - var hasDocComment = false; - var hasAnnotation = false; - var hasModifier = false; - var docs = new ArrayList(); - ff(children); - while (lookahead() == Token.DOC_COMMENT) { - ff(docs); - docs.add(new Node(NodeType.DOC_COMMENT_LINE, next().span)); - hasDocComment = true; - } - if (hasDocComment) { - children.add(new Node(NodeType.DOC_COMMENT, docs)); - } - ff(children); - while (lookahead == Token.AT) { - children.add(parseAnnotation()); - hasAnnotation = true; - ff(children); - } - var modifiers = new ArrayList(); - while (lookahead.isModifier()) { - modifiers.add(make(NodeType.MODIFIER, next().span)); - hasModifier = true; - ff(children); - } - if (hasModifier) children.add(new Node(NodeType.MODIFIER_LIST, modifiers)); - return new HeaderResult(hasDocComment, hasAnnotation, hasModifier); - } - - private Node parseModuleMember(List preChildren) { - return switch (lookahead) { - case IDENTIFIER -> parseClassProperty(preChildren); - case TYPE_ALIAS -> parseTypeAlias(preChildren); - case CLASS -> parseClass(preChildren); - case FUNCTION -> parseClassMethod(preChildren); - case EOF -> throw parserError("unexpectedEndOfFile"); - default -> { - if (lookahead.isKeyword()) { - throw parserError("keywordNotAllowedHere", lookahead.text()); - } - if (lookahead == Token.DOC_COMMENT) { - throw parserError("danglingDocComment"); - } - throw parserError("invalidTopLevelToken"); - } - }; - } - - private Node parseTypeAlias(List preChildren) { - var headerParts = getHeaderParts(preChildren); - var children = new ArrayList<>(headerParts.preffixes); - var headers = new ArrayList(); - if (headerParts.modifierList != null) { - headers.add(headerParts.modifierList); - } - // typealias keyword - headers.add(makeTerminal(next())); - ff(headers); - headers.add(parseIdentifier()); - ff(headers); - if (lookahead == Token.LT) { - headers.add(parseTypeParameterList()); - ff(headers); - } - expect(Token.ASSIGN, headers, "unexpectedToken", "="); - children.add(new Node(NodeType.TYPEALIAS_HEADER, headers)); - var body = new ArrayList(); - ff(body); - body.add(parseType()); - children.add(new Node(NodeType.TYPEALIAS_BODY, body)); - return new Node(NodeType.TYPEALIAS, children); - } - - private Node parseClass(List preChildren) { - var headerParts = getHeaderParts(preChildren); - var children = new ArrayList<>(headerParts.preffixes); - var headers = new ArrayList(); - if (headerParts.modifierList != null) { - headers.add(headerParts.modifierList); - } - // class keyword - headers.add(makeTerminal(next())); - ff(headers); - headers.add(parseIdentifier()); - if (lookahead() == Token.LT) { - ff(headers); - headers.add(parseTypeParameterList()); - } - if (lookahead() == Token.EXTENDS) { - var extend = new ArrayList(); - ff(extend); - extend.add(makeTerminal(next())); - ff(extend); - extend.add(parseType()); - headers.add(new Node(NodeType.CLASS_HEADER_EXTENDS, extend)); - } - children.add(new Node(NodeType.CLASS_HEADER, headers)); - if (lookahead() == Token.LBRACE) { - ff(children); - children.add(parseClassBody()); - } - return new Node(NodeType.CLASS, children); - } - - private Node parseClassBody() { - var children = new ArrayList(); - children.add(makeTerminal(next())); - var elements = new ArrayList(); - var hasElements = false; - ff(elements); - while (lookahead != Token.RBRACE && lookahead != Token.EOF) { - hasElements = true; - var preChildren = new ArrayList(); - parseMemberHeader(preChildren); - if (lookahead == Token.FUNCTION) { - elements.add(parseClassMethod(preChildren)); - } else { - elements.add(parseClassProperty(preChildren)); - } - ff(elements); - } - if (lookahead == Token.EOF) { - throw parserError(ErrorMessages.create("missingDelimiter", "}"), prev().span.stopSpan()); - } - if (hasElements) { - children.add(new Node(NodeType.CLASS_BODY_ELEMENTS, elements)); - } else if (!elements.isEmpty()) { - // add affixes - children.addAll(elements); - } - expect(Token.RBRACE, children, "missingDelimiter", "}"); - return new Node(NodeType.CLASS_BODY, children); - } - - private Node parseClassProperty(List preChildren) { - var headerParts = getHeaderParts(preChildren); - var children = new ArrayList<>(headerParts.preffixes); - var header = new ArrayList(); - var headerBegin = new ArrayList(); - if (headerParts.modifierList != null) { - headerBegin.add(headerParts.modifierList); - } - headerBegin.add(parseIdentifier()); - header.add(new Node(NodeType.CLASS_PROPERTY_HEADER_BEGIN, headerBegin)); - var hasTypeAnnotation = false; - if (lookahead() == Token.COLON) { - ff(header); - header.add(parseTypeAnnotation()); - hasTypeAnnotation = true; - } - children.add(new Node(NodeType.CLASS_PROPERTY_HEADER, header)); - if (lookahead() == Token.ASSIGN) { - ff(children); - children.add(makeTerminal(next())); - var body = new ArrayList(); - ff(body); - body.add(parseExpr()); - children.add(new Node(NodeType.CLASS_PROPERTY_BODY, body)); - } else if (lookahead() == Token.LBRACE) { - if (hasTypeAnnotation) { - throw parserError("typeAnnotationInAmends"); - } - while (lookahead() == Token.LBRACE) { - ff(children); - children.add(parseObjectBody()); - } - } - return new Node(NodeType.CLASS_PROPERTY, children); - } - - private Node parseClassMethod(List preChildren) { - var headerParts = getHeaderParts(preChildren); - var children = new ArrayList<>(headerParts.preffixes); - var headers = new ArrayList(); - if (headerParts.modifierList != null) { - headers.add(headerParts.modifierList); - } - expect(Token.FUNCTION, headers, "unexpectedToken", "function"); - ff(headers); - headers.add(parseIdentifier()); - children.add(new Node(NodeType.CLASS_METHOD_HEADER, headers)); - ff(children); - if (lookahead == Token.LT) { - children.add(parseTypeParameterList()); - ff(children); - } - children.add(parseParameterList()); - if (lookahead() == Token.COLON) { - ff(children); - children.add(parseTypeAnnotation()); - } - if (lookahead() == Token.ASSIGN) { - ff(children); - children.add(makeTerminal(next())); - var body = new ArrayList(); - ff(body); - body.add(parseExpr()); - children.add(new Node(NodeType.CLASS_METHOD_BODY, body)); - } - return new Node(NodeType.CLASS_METHOD, children); - } - - private Node parseObjectBody() { - var children = new ArrayList(); - expect(Token.LBRACE, children, "unexpectedToken", "{"); - if (lookahead() == Token.RBRACE) { - ff(children); - children.add(makeTerminal(next())); - return new Node(NodeType.OBJECT_BODY, children); - } - if (isParameter()) { - var params = new ArrayList(); - ff(params); - parseListOf(Token.ARROW, params, this::parseParameter); - expect(Token.ARROW, params, "unexpectedToken2", ",", "->"); - children.add(new Node(NodeType.OBJECT_PARAMETER_LIST, params)); - ff(children); - } - var members = new ArrayList(); - ff(members); - while (lookahead != Token.RBRACE) { - if (lookahead == Token.EOF) { - throw parserError(ErrorMessages.create("missingDelimiter", "}"), prev().span.stopSpan()); - } - members.add(parseObjectMember()); - ff(members); - } - if (!members.isEmpty()) { - children.add(new Node(NodeType.OBJECT_MEMBER_LIST, members)); - } - children.add(makeTerminal(next())); // RBRACE - return new Node(NodeType.OBJECT_BODY, children); - } - - /** Returns true if the lookahead is a parameter, false if it's a member. May have to backtrack */ - private boolean isParameter() { - if (lookahead == Token.UNDERSCORE) return true; - if (lookahead != Token.IDENTIFIER) return false; - // have to backtrack - var originalCursor = cursor; - var result = false; - next(); // identifier - ff(); - if (lookahead == Token.ARROW || lookahead == Token.COMMA) { - result = true; - } else if (lookahead == Token.COLON) { - next(); // colon - ff(); - parseType(); - ff(); - result = lookahead == Token.COMMA || lookahead == Token.ARROW; - } - backtrackTo(originalCursor); - return result; - } - - private Node parseObjectMember() { - return switch (lookahead) { - case IDENTIFIER -> { - var originalCursor = cursor; - next(); - ff(new ArrayList<>()); - if (lookahead == Token.LBRACE || lookahead == Token.COLON || lookahead == Token.ASSIGN) { - // it's an objectProperty - backtrackTo(originalCursor); - yield parseObjectProperty(null); - } else { - backtrackTo(originalCursor); - // it's an expression - yield parseObjectElement(); - } - } - case FUNCTION -> parseObjectMethod(List.of()); - case LPRED -> parseMemberPredicate(); - case LBRACK -> parseObjectEntry(); - case SPREAD, QSPREAD -> parseObjectSpread(); - case WHEN -> parseWhenGenerator(); - case FOR -> parseForGenerator(); - case TYPE_ALIAS, CLASS -> - throw parserError(ErrorMessages.create("missingDelimiter", "}"), prev().span.stopSpan()); - default -> { - var preChildren = new ArrayList(); - while (lookahead.isModifier()) { - preChildren.add(make(NodeType.MODIFIER, next().span)); - ff(preChildren); - } - if (!preChildren.isEmpty()) { - if (lookahead == Token.FUNCTION) { - yield parseObjectMethod(List.of(new Node(NodeType.MODIFIER_LIST, preChildren))); - } else { - yield parseObjectProperty(List.of(new Node(NodeType.MODIFIER_LIST, preChildren))); - } - } else { - yield parseObjectElement(); - } - } - }; - } - - private Node parseObjectElement() { - return new Node(NodeType.OBJECT_ELEMENT, List.of(parseExpr())); - } - - private Node parseObjectProperty(@Nullable List preChildren) { - var children = new ArrayList(); - var header = new ArrayList(); - var headerBegin = new ArrayList(); - if (preChildren != null) { - headerBegin.addAll(preChildren); - } - ff(headerBegin); - var modifierList = new ArrayList(); - while (lookahead.isModifier()) { - modifierList.add(make(NodeType.MODIFIER, next().span)); - ff(modifierList); - } - if (!modifierList.isEmpty()) { - headerBegin.add(new Node(NodeType.MODIFIER_LIST, modifierList)); - } - headerBegin.add(parseIdentifier()); - header.add(new Node(NodeType.OBJECT_PROPERTY_HEADER_BEGIN, headerBegin)); - var hasTypeAnnotation = false; - if (lookahead() == Token.COLON) { - ff(header); - header.add(parseTypeAnnotation()); - hasTypeAnnotation = true; - } - children.add(new Node(NodeType.OBJECT_PROPERTY_HEADER, header)); - if (hasTypeAnnotation || lookahead() == Token.ASSIGN) { - ff(children); - expect(Token.ASSIGN, children, "unexpectedToken", "="); - var body = new ArrayList(); - ff(body); - body.add(parseExpr("}")); - children.add(new Node(NodeType.OBJECT_PROPERTY_BODY, body)); - return new Node(NodeType.OBJECT_PROPERTY, children); - } - ff(children); - children.addAll(parseBodyList()); - return new Node(NodeType.OBJECT_PROPERTY, children); - } - - private Node parseObjectMethod(List preChildren) { - var headerParts = getHeaderParts(preChildren); - var children = new ArrayList<>(headerParts.preffixes); - var headers = new ArrayList(); - if (headerParts.modifierList != null) { - headers.add(headerParts.modifierList); - } - expect(Token.FUNCTION, headers, "unexpectedToken", "function"); - ff(headers); - headers.add(parseIdentifier()); - children.add(new Node(NodeType.CLASS_METHOD_HEADER, headers)); - ff(children); - if (lookahead == Token.LT) { - children.add(parseTypeParameterList()); - ff(children); - } - children.add(parseParameterList()); - ff(children); - if (lookahead == Token.COLON) { - children.add(parseTypeAnnotation()); - ff(children); - } - expect(Token.ASSIGN, children, "unexpectedToken", "="); - var body = new ArrayList(); - ff(body); - body.add(parseExpr()); - children.add(new Node(NodeType.CLASS_METHOD_BODY, body)); - return new Node(NodeType.OBJECT_METHOD, children); - } - - private Node parseMemberPredicate() { - var children = new ArrayList(); - children.add(makeTerminal(next())); - ff(children); - children.add(parseExpr()); - ff(children); - var firstBrack = expect(Token.RBRACK, "unexpectedToken", "]]"); - children.add(makeTerminal(firstBrack)); - var secondbrack = expect(Token.RBRACK, "unexpectedToken", "]]"); - children.add(makeTerminal(secondbrack)); - if (firstBrack.span.charIndex() != secondbrack.span.charIndex() - 1) { - // There shouldn't be any whitespace between the first and second ']'. - var span = firstBrack.span.endWith(secondbrack.span); - var text = lexer.textFor(span.charIndex(), span.length()); - throw parserError(ErrorMessages.create("unexpectedToken", text, "]]"), firstBrack.span); - } - ff(children); - if (lookahead == Token.ASSIGN) { - children.add(makeTerminal(next())); - ff(children); - children.add(parseExpr("}")); - return new Node(NodeType.MEMBER_PREDICATE, children); - } - children.addAll(parseBodyList()); - return new Node(NodeType.MEMBER_PREDICATE, children); - } - - private Node parseObjectEntry() { - var children = new ArrayList(); - var header = new ArrayList(); - expect(Token.LBRACK, header, "unexpectedToken", "["); - ff(header); - header.add(parseExpr()); - expect(Token.RBRACK, header, "unexpectedToken", "]"); - if (lookahead() == Token.ASSIGN) { - ff(header); - header.add(makeTerminal(next())); - children.add(new Node(NodeType.OBJECT_ENTRY_HEADER, header)); - ff(children); - children.add(parseExpr()); - return new Node(NodeType.OBJECT_ENTRY, children); - } - children.add(new Node(NodeType.OBJECT_ENTRY_HEADER, header)); - ff(children); - children.addAll(parseBodyList()); - return new Node(NodeType.OBJECT_ENTRY, children); - } - - private Node parseObjectSpread() { - var children = new ArrayList(); - children.add(makeTerminal(next())); - ff(children); - children.add(parseExpr()); - return new Node(NodeType.OBJECT_SPREAD, children); - } - - private Node parseWhenGenerator() { - var children = new ArrayList(); - var header = new ArrayList(); - children.add(makeTerminal(next())); - ff(children); - expect(Token.LPAREN, header, "unexpectedToken", "("); - ff(header); - header.add(parseExpr()); - ff(header); - expect(Token.RPAREN, header, "unexpectedToken", ")"); - children.add(new Node(NodeType.WHEN_GENERATOR_HEADER, header)); - ff(children); - children.add(parseObjectBody()); - if (lookahead() == Token.ELSE) { - ff(children); - children.add(makeTerminal(next())); - ff(children); - children.add(parseObjectBody()); - } - return new Node(NodeType.WHEN_GENERATOR, children); - } - - private Node parseForGenerator() { - var children = new ArrayList(); - children.add(makeTerminal(next())); - ff(children); - var header = new ArrayList(); - expect(Token.LPAREN, header, "unexpectedToken", "("); - var headerDefinition = new ArrayList(); - var headerDefinitionHeader = new ArrayList(); - ff(headerDefinitionHeader); - headerDefinitionHeader.add(parseParameter()); - ff(headerDefinitionHeader); - if (lookahead == Token.COMMA) { - headerDefinitionHeader.add(makeTerminal(next())); - ff(headerDefinitionHeader); - headerDefinitionHeader.add(parseParameter()); - ff(headerDefinitionHeader); - } - expect(Token.IN, headerDefinitionHeader, "unexpectedToken", "in"); - headerDefinition.add( - new Node(NodeType.FOR_GENERATOR_HEADER_DEFINITION_HEADER, headerDefinitionHeader)); - ff(headerDefinition); - headerDefinition.add(parseExpr()); - ff(headerDefinition); - header.add(new Node(NodeType.FOR_GENERATOR_HEADER_DEFINITION, headerDefinition)); - expect(Token.RPAREN, header, "unexpectedToken", ")"); - children.add(new Node(NodeType.FOR_GENERATOR_HEADER, header)); - ff(children); - children.add(parseObjectBody()); - return new Node(NodeType.FOR_GENERATOR, children); - } - - private Node parseExpr() { - return parseExpr(null, 1); - } - - private Node parseExpr(@Nullable String expectation) { - return parseExpr(expectation, 1); - } - - private Node parseExpr(@Nullable String expectation, int minPrecedence) { - var expr = parseExprAtom(expectation); - var fullOpToken = fullLookahead(); - var operator = getOperator(fullOpToken.tk); - while (operator != null) { - if (operator.getPrec() < minPrecedence) break; - // `-` and `[]` must be in the same line as the left operand and have no semicolons inbetween - if ((operator == Operator.MINUS || operator == Operator.SUBSCRIPT) - && (fullOpToken.hasSemicolon || !expr.span.isSameLine(fullOpToken.tk.span))) break; - var children = new ArrayList(); - children.add(expr); - ff(children); - var op = next(); - children.add(make(NodeType.OPERATOR, op.span)); - ff(children); - var nodeType = NodeType.BINARY_OP_EXPR; - var nextMinPrec = operator.isLeftAssoc() ? operator.getPrec() + 1 : operator.getPrec(); - switch (op.token) { - case IS, AS -> children.add(parseType()); - case LBRACK -> { - nodeType = NodeType.SUBSCRIPT_EXPR; - children.add(parseExpr("]")); - ff(children); - expect(Token.RBRACK, children, "unexpectedToken", "]"); - } - case DOT, QDOT -> { - nodeType = NodeType.QUALIFIED_ACCESS_EXPR; - children.add(parseUnqualifiedAccessExpr()); - } - case NON_NULL -> nodeType = NodeType.NON_NULL_EXPR; - default -> children.add(parseExpr(expectation, nextMinPrec)); - } - - expr = new Node(nodeType, children); - fullOpToken = fullLookahead(); - operator = getOperator(fullOpToken.tk); - } - return expr; - } - - private @Nullable Operator getOperator(FullToken tk) { - return switch (tk.token) { - case POW -> Operator.POW; - case STAR -> Operator.MULT; - case DIV -> Operator.DIV; - case INT_DIV -> Operator.INT_DIV; - case MOD -> Operator.MOD; - case PLUS -> Operator.PLUS; - case MINUS -> Operator.MINUS; - case GT -> Operator.GT; - case GTE -> Operator.GTE; - case LT -> Operator.LT; - case LTE -> Operator.LTE; - case IS -> Operator.IS; - case AS -> Operator.AS; - case EQUAL -> Operator.EQ_EQ; - case NOT_EQUAL -> Operator.NOT_EQ; - case AND -> Operator.AND; - case OR -> Operator.OR; - case PIPE -> Operator.PIPE; - case COALESCE -> Operator.NULL_COALESCE; - case DOT -> Operator.DOT; - case QDOT -> Operator.QDOT; - case LBRACK -> Operator.SUBSCRIPT; - case NON_NULL -> Operator.NON_NULL; - default -> null; - }; - } - - private Node parseUnqualifiedAccessExpr() { - var children = new ArrayList(); - children.add(parseIdentifier()); - if (lookahead() == Token.LPAREN && noSemicolonInbetween() && _lookahead.newLinesBetween == 0) { - ff(children); - children.add(parseArgumentList()); - } - return new Node(NodeType.UNQUALIFIED_ACCESS_EXPR, children); - } - - private Node parseExprAtom(@Nullable String expectation) { - var expr = - switch (lookahead) { - case THIS -> new Node(NodeType.THIS_EXPR, next().span); - case OUTER -> new Node(NodeType.OUTER_EXPR, next().span); - case MODULE -> new Node(NodeType.MODULE_EXPR, next().span); - case NULL -> new Node(NodeType.NULL_EXPR, next().span); - case THROW -> { - var children = new ArrayList(); - children.add(makeTerminal(next())); - ff(children); - expect(Token.LPAREN, children, "unexpectedToken", "("); - ff(children); - children.add(parseExpr(")")); - ff(children); - expect(Token.RPAREN, children, "unexpectedToken", ")"); - yield new Node(NodeType.THROW_EXPR, children); - } - case TRACE -> { - var children = new ArrayList(); - children.add(makeTerminal(next())); - ff(children); - expect(Token.LPAREN, children, "unexpectedToken", "("); - ff(children); - children.add(parseExpr(")")); - ff(children); - expect(Token.RPAREN, children, "unexpectedToken", ")"); - yield new Node(NodeType.TRACE_EXPR, children); - } - case IMPORT, IMPORT_STAR -> { - var children = new ArrayList(); - children.add(makeTerminal(next())); - ff(children); - expect(Token.LPAREN, children, "unexpectedToken", "("); - ff(children); - children.add(parseStringConstant()); - ff(children); - expect(Token.RPAREN, children, "unexpectedToken", ")"); - yield new Node(NodeType.IMPORT_EXPR, children); - } - case READ, READ_STAR, READ_QUESTION -> { - var children = new ArrayList(); - children.add(makeTerminal(next())); - ff(children); - expect(Token.LPAREN, children, "unexpectedToken", "("); - ff(children); - children.add(parseExpr(")")); - ff(children); - expect(Token.RPAREN, children, "unexpectedToken", ")"); - yield new Node(NodeType.READ_EXPR, children); - } - case NEW -> { - var children = new ArrayList(); - var header = new ArrayList(); - header.add(makeTerminal(next())); - ff(header); - if (lookahead != Token.LBRACE) { - header.add(parseType("{")); - children.add(new Node(NodeType.NEW_HEADER, header)); - ff(children); - } else { - children.add(new Node(NodeType.NEW_HEADER, header)); - } - children.add(parseObjectBody()); - yield new Node(NodeType.NEW_EXPR, children); - } - case MINUS -> { - var children = new ArrayList(); - children.add(makeTerminal(next())); - ff(children); - // unary minus has higher precendence than most binary operators - children.add(parseExpr(expectation, 12)); - yield new Node(NodeType.UNARY_MINUS_EXPR, children); - } - case NOT -> { - var children = new ArrayList(); - children.add(makeTerminal(next())); - ff(children); - // logical not has higher precendence than most binary operators - children.add(parseExpr(expectation, 11)); - yield new Node(NodeType.LOGICAL_NOT_EXPR, children); - } - case LPAREN -> { - // can be function literal or parenthesized expression - if (isFunctionLiteral()) { - yield parseFunctionLiteral(); - } else { - yield parseParenthesizedExpr(); - } - } - case SUPER -> { - var children = new ArrayList(); - children.add(makeTerminal(next())); - ff(children); - if (lookahead == Token.DOT) { - children.add(makeTerminal(next())); - ff(children); - children.add(parseIdentifier()); - if (lookahead() == Token.LPAREN) { - ff(children); - children.add(parseArgumentList()); - } - yield new Node(NodeType.SUPER_ACCESS_EXPR, children); - } else { - expect(Token.LBRACK, children, "unexpectedToken", "["); - ff(children); - children.add(parseExpr()); - ff(children); - expect(Token.RBRACK, children, "unexpectedToken", "]"); - yield new Node(NodeType.SUPER_SUBSCRIPT_EXPR, children); - } - } - case IF -> { - var children = new ArrayList(); - var header = new ArrayList(); - header.add(makeTerminal(next())); - ff(header); - var condition = new ArrayList(); - var conditionExpr = new ArrayList(); - expect(Token.LPAREN, condition, "unexpectedToken", "("); - ff(conditionExpr); - conditionExpr.add(parseExpr(")")); - ff(conditionExpr); - condition.add(new Node(NodeType.IF_CONDITION_EXPR, conditionExpr)); - expect(Token.RPAREN, condition, "unexpectedToken", ")"); - header.add(new Node(NodeType.IF_CONDITION, condition)); - children.add(new Node(NodeType.IF_HEADER, header)); - var thenExpr = new ArrayList(); - ff(thenExpr); - thenExpr.add(parseExpr("else")); - ff(thenExpr); - children.add(new Node(NodeType.IF_THEN_EXPR, thenExpr)); - expect(Token.ELSE, children, "unexpectedToken", "else"); - var elseExpr = new ArrayList(); - ff(elseExpr); - elseExpr.add(parseExpr(expectation)); - children.add(new Node(NodeType.IF_ELSE_EXPR, elseExpr)); - yield new Node(NodeType.IF_EXPR, children); - } - case LET -> { - var children = new ArrayList(); - children.add(makeTerminal(next())); - ff(children); - var paramDef = new ArrayList(); - expect(Token.LPAREN, paramDef, "unexpectedToken", "("); - var param = new ArrayList(); - ff(param); - param.add(parseParameter()); - ff(param); - expect(Token.ASSIGN, param, "unexpectedToken", "="); - ff(param); - param.add(parseExpr(")")); - paramDef.add(new Node(NodeType.LET_PARAMETER, param)); - ff(paramDef); - expect(Token.RPAREN, paramDef, "unexpectedToken", ")"); - children.add(new Node(NodeType.LET_PARAMETER_DEFINITION, paramDef)); - ff(children); - children.add(parseExpr(expectation)); - yield new Node(NodeType.LET_EXPR, children); - } - case TRUE, FALSE -> new Node(NodeType.BOOL_LITERAL_EXPR, next().span); - case INT, HEX, BIN, OCT -> new Node(NodeType.INT_LITERAL_EXPR, next().span); - case FLOAT -> new Node(NodeType.FLOAT_LITERAL_EXPR, next().span); - case STRING_START -> parseSingleLineStringLiteralExpr(); - case STRING_MULTI_START -> parseMultiLineStringLiteralExpr(); - case IDENTIFIER -> parseUnqualifiedAccessExpr(); - case EOF -> - throw parserError( - ErrorMessages.create("unexpectedEndOfFile"), prev().span.stopSpan()); - default -> { - var text = _lookahead.text(lexer); - if (expectation != null) { - throw parserError("unexpectedToken", text, expectation); - } - throw parserError("unexpectedTokenForExpression", text); - } - }; - return parseExprRest(expr); - } - - @SuppressWarnings("DuplicatedCode") - private Node parseExprRest(Node expr) { - // amends - if (lookahead() == Token.LBRACE) { - var children = new ArrayList(); - children.add(expr); - ff(children); - if (expr.type == NodeType.PARENTHESIZED_EXPR - || expr.type == NodeType.AMENDS_EXPR - || expr.type == NodeType.NEW_EXPR) { - children.add(parseObjectBody()); - return parseExprRest(new Node(NodeType.AMENDS_EXPR, children)); - } - throw parserError("unexpectedCurlyProbablyAmendsExpression", expr.text(lexer.getSource())); - } - return expr; - } - - private boolean isFunctionLiteral() { - var originalCursor = cursor; - try { - next(); // open ( - ff(); - var token = next().token; - ff(); - if (token == Token.RPAREN) { - return lookahead == Token.ARROW; - } - if (token == Token.UNDERSCORE) { - return true; - } - if (token != Token.IDENTIFIER) { - return false; - } - if (lookahead == Token.COMMA || lookahead == Token.COLON) { - return true; - } - if (lookahead == Token.RPAREN) { - next(); - ff(); - return lookahead == Token.ARROW; - } - return false; - } finally { - backtrackTo(originalCursor); - } - } - - private Node parseSingleLineStringLiteralExpr() { - var children = new ArrayList(); - var start = next(); - children.add(makeTerminal(start)); // string start - while (lookahead != Token.STRING_END) { - switch (lookahead) { - case STRING_PART -> { - var tk = next(); - if (!tk.text(lexer).isEmpty()) { - children.add(make(NodeType.STRING_CHARS, tk.span)); - } - } - case STRING_ESCAPE_NEWLINE, - STRING_ESCAPE_TAB, - STRING_ESCAPE_QUOTE, - STRING_ESCAPE_BACKSLASH, - STRING_ESCAPE_RETURN, - STRING_ESCAPE_UNICODE -> - children.add(make(NodeType.STRING_ESCAPE, next().span)); - case INTERPOLATION_START -> { - children.add(makeTerminal(next())); - ff(children); - children.add(parseExpr(")")); - ff(children); - expect(Token.RPAREN, children, "unexpectedToken", ")"); - } - case EOF -> { - var delimiter = new StringBuilder(start.text(lexer)).reverse().toString(); - throw parserError("missingDelimiter", delimiter); - } - } - } - children.add(makeTerminal(next())); // string end - return new Node(NodeType.SINGLE_LINE_STRING_LITERAL_EXPR, children); - } - - private Node parseMultiLineStringLiteralExpr() { - var children = new ArrayList(); - var start = next(); - children.add(makeTerminal(start)); // string start - if (lookahead != Token.STRING_NEWLINE) { - throw parserError(ErrorMessages.create("stringContentMustBeginOnNewLine"), spanLookahead); - } - while (lookahead != Token.STRING_END) { - switch (lookahead) { - case STRING_PART -> { - var tk = next(); - if (!tk.text(lexer).isEmpty()) { - children.add(make(NodeType.STRING_CHARS, tk.span)); - } - } - case STRING_NEWLINE -> children.add(make(NodeType.STRING_NEWLINE, next().span)); - case STRING_ESCAPE_NEWLINE, - STRING_ESCAPE_TAB, - STRING_ESCAPE_QUOTE, - STRING_ESCAPE_BACKSLASH, - STRING_ESCAPE_RETURN, - STRING_ESCAPE_UNICODE -> - children.add(make(NodeType.STRING_ESCAPE, next().span)); - case INTERPOLATION_START -> { - children.add(makeTerminal(next())); - ff(children); - children.add(parseExpr(")")); - ff(children); - expect(Token.RPAREN, children, "unexpectedToken", ")"); - } - case EOF -> { - var delimiter = new StringBuilder(start.text(lexer)).reverse().toString(); - throw parserError("missingDelimiter", delimiter); - } - } - } - children.add(makeTerminal(next())); // string end - validateStringEndDelimiter(children); - validateStringIndentation(children); - return new Node(NodeType.MULTI_LINE_STRING_LITERAL_EXPR, children); - } - - private void validateStringEndDelimiter(List nodes) { - var beforeLast = nodes.get(nodes.size() - 2); - if (beforeLast.type == NodeType.STRING_NEWLINE) return; - var text = beforeLast.text(lexer.getSource()); - if (!text.isBlank()) { - throw parserError( - ErrorMessages.create("closingStringDelimiterMustBeginOnNewLine"), beforeLast.span); - } - } - - private void validateStringIndentation(List nodes) { - var indentNode = nodes.get(nodes.size() - 2); - if (indentNode.type == NodeType.STRING_NEWLINE) return; - var indent = indentNode.text(lexer.getSource()); - var previousNewline = false; - for (var i = 1; i < nodes.size() - 2; i++) { - var child = nodes.get(i); - if (child.type != NodeType.STRING_NEWLINE && previousNewline) { - var text = child.text(lexer.getSource()); - if (!text.startsWith(indent)) { - throw parserError(ErrorMessages.create("stringIndentationMustMatchLastLine"), child.span); - } - } - previousNewline = child.type == NodeType.STRING_NEWLINE; - } - } - - private Node parseParenthesizedExpr() { - var children = new ArrayList(); - expect(Token.LPAREN, children, "unexpectedToken", "("); - if (lookahead() == Token.RPAREN) { - ff(children); - children.add(makeTerminal(next())); - return new Node(NodeType.PARENTHESIZED_EXPR, children); - } - var elements = new ArrayList(); - ff(elements); - elements.add(parseExpr(")")); - ff(elements); - children.add(new Node(NodeType.PARENTHESIZED_EXPR_ELEMENTS, elements)); - expect(Token.RPAREN, children, "unexpectedToken", ")"); - return new Node(NodeType.PARENTHESIZED_EXPR, children); - } - - private Node parseFunctionLiteral() { - var paramListChildren = new ArrayList(); - expect(Token.LPAREN, paramListChildren, "unexpectedToken", "("); - if (lookahead() == Token.RPAREN) { - ff(paramListChildren); - paramListChildren.add(makeTerminal(next())); - } else { - var elements = new ArrayList(); - ff(elements); - parseListOf(Token.RPAREN, elements, this::parseParameter); - paramListChildren.add(new Node(NodeType.PARAMETER_LIST_ELEMENTS, elements)); - expect(Token.RPAREN, paramListChildren, "unexpectedToken2", ",", ")"); - } - var children = new ArrayList(); - children.add(new Node(NodeType.PARAMETER_LIST, paramListChildren)); - ff(children); - expect(Token.ARROW, children, "unexpectedToken", "->"); - var body = new ArrayList(); - ff(body); - body.add(parseExpr()); - children.add(new Node(NodeType.FUNCTION_LITERAL_BODY, body)); - return new Node(NodeType.FUNCTION_LITERAL_EXPR, children); - } - - private Node parseType() { - return parseType(null); - } - - private Node parseType(@Nullable String expectation) { - var children = new ArrayList(); - var hasDefault = false; - FullSpan start = null; - if (lookahead == Token.STAR) { - var tk = next(); - start = tk.span; - children.add(makeTerminal(tk)); - ff(children); - hasDefault = true; - } - var first = parseTypeAtom(expectation); - children.add(first); - - if (lookahead() != Token.UNION) { - if (hasDefault) { - throw parserError(ErrorMessages.create("notAUnion"), start.endWith(first.span)); - } - return first; - } - - while (lookahead() == Token.UNION) { - ff(children); - children.add(makeTerminal(next())); - ff(children); - if (lookahead == Token.STAR) { - if (hasDefault) { - throw parserError("multipleUnionDefaults"); - } - children.add(makeTerminal(next())); - ff(children); - hasDefault = true; - } - var type = parseTypeAtom(expectation); - children.add(type); - } - return new Node(NodeType.UNION_TYPE, children); + return new GenericParserImpl(source).parseModule(); } - - private Node parseTypeAtom(@Nullable String expectation) { - var typ = - switch (lookahead) { - case UNKNOWN -> make(NodeType.UNKNOWN_TYPE, next().span); - case NOTHING -> make(NodeType.NOTHING_TYPE, next().span); - case MODULE -> make(NodeType.MODULE_TYPE, next().span); - case LPAREN -> { - var children = new ArrayList(); - children.add(makeTerminal(next())); - var totalTypes = 0; - if (lookahead() == Token.RPAREN) { - ff(children); - children.add(makeTerminal(next())); - } else { - var elements = new ArrayList(); - ff(elements); - elements.add(parseType(")")); - ff(elements); - while (lookahead == Token.COMMA) { - var comma = next(); - if (lookahead() == Token.RPAREN) { - ff(elements); - break; - } - elements.add(makeTerminal(comma)); - ff(elements); - elements.add(parseType(")")); - totalTypes++; - ff(elements); - } - children.add(new Node(NodeType.PARENTHESIZED_TYPE_ELEMENTS, elements)); - expect(Token.RPAREN, children, "unexpectedToken2", ",", ")"); - } - if (totalTypes > 1 || lookahead() == Token.ARROW) { - var actualChildren = new ArrayList(); - actualChildren.add(new Node(NodeType.FUNCTION_TYPE_PARAMETERS, children)); - ff(actualChildren); - expect(Token.ARROW, actualChildren, "unexpectedToken", "->"); - ff(actualChildren); - actualChildren.add(parseType(expectation)); - yield new Node(NodeType.FUNCTION_TYPE, actualChildren); - } else { - yield new Node(NodeType.PARENTHESIZED_TYPE, children); - } - } - case IDENTIFIER -> { - var children = new ArrayList(); - children.add(parseQualifiedIdentifier()); - if (lookahead() == Token.LT) { - ff(children); - children.add(parseTypeArgumentList()); - } - yield new Node(NodeType.DECLARED_TYPE, children); - } - case STRING_START -> - new Node(NodeType.STRING_CONSTANT_TYPE, List.of(parseStringConstant())); - default -> { - var text = _lookahead.text(lexer); - if (expectation != null) { - throw parserError("unexpectedTokenForType2", text, expectation); - } - throw parserError("unexpectedTokenForType", text); - } - }; - - if (typ.type == NodeType.FUNCTION_TYPE) return typ; - return parseTypeEnd(typ); - } - - private Node parseTypeEnd(Node type) { - var children = new ArrayList(); - children.add(type); - // nullable types - if (lookahead() == Token.QUESTION) { - ff(children); - children.add(makeTerminal(next())); - var res = new Node(NodeType.NULLABLE_TYPE, children); - return parseTypeEnd(res); - } - // constrained types: have to start in the same line as the type - var fla = fullLookahead(); - if (fla.tk.token == Token.LPAREN && noSemicolonInbetween() && fla.tk.newLinesBetween == 0) { - ff(children); - var constraint = new ArrayList(); - constraint.add(makeTerminal(next())); - var elements = new ArrayList(); - ff(elements); - parseListOf(Token.RPAREN, elements, () -> parseExpr(")")); - constraint.add(new Node(NodeType.CONSTRAINED_TYPE_ELEMENTS, elements)); - expect(Token.RPAREN, constraint, "unexpectedToken2", ",", ")"); - children.add(new Node(NodeType.CONSTRAINED_TYPE_CONSTRAINT, constraint)); - var res = new Node(NodeType.CONSTRAINED_TYPE, children); - return parseTypeEnd(res); - } - return type; - } - - private Node parseAnnotation() { - var children = new ArrayList(); - children.add(makeTerminal(next())); - children.add(parseType()); - if (lookahead() == Token.LBRACE) { - ff(children); - children.add(parseObjectBody()); - } - return new Node(NodeType.ANNOTATION, children); - } - - private Node parseParameter() { - if (lookahead == Token.UNDERSCORE) { - return new Node(NodeType.PARAMETER, List.of(makeTerminal(next()))); - } - return parseTypedIdentifier(); - } - - private Node parseTypedIdentifier() { - var children = new ArrayList(); - children.add(parseIdentifier()); - if (lookahead() == Token.COLON) { - ff(children); - children.add(parseTypeAnnotation()); - } - return new Node(NodeType.PARAMETER, children); - } - - private Node parseParameterList() { - var children = new ArrayList(); - expect(Token.LPAREN, children, "unexpectedToken", "("); - ff(children); - if (lookahead == Token.RPAREN) { - children.add(makeTerminal(next())); - } else { - var elements = new ArrayList(); - parseListOf(Token.RPAREN, elements, this::parseParameter); - children.add(new Node(NodeType.PARAMETER_LIST_ELEMENTS, elements)); - expect(Token.RPAREN, children, "unexpectedToken2", ",", ")"); - } - return new Node(NodeType.PARAMETER_LIST, children); - } - - private List parseBodyList() { - if (lookahead != Token.LBRACE) { - throw parserError("unexpectedToken2", _lookahead.text(lexer), "{", "="); - } - var bodies = new ArrayList(); - do { - bodies.add(parseObjectBody()); - } while (lookahead() == Token.LBRACE); - return bodies; - } - - private Node parseTypeParameterList() { - var children = new ArrayList(); - expect(Token.LT, children, "unexpectedToken", "<"); - ff(children); - var elements = new ArrayList(); - parseListOf(Token.GT, elements, this::parseTypeParameter); - children.add(new Node(NodeType.TYPE_PARAMETER_LIST_ELEMENTS, elements)); - expect(Token.GT, children, "unexpectedToken2", ",", ">"); - return new Node(NodeType.TYPE_PARAMETER_LIST, children); - } - - private Node parseTypeArgumentList() { - var children = new ArrayList(); - expect(Token.LT, children, "unexpectedToken", "<"); - ff(children); - var elements = new ArrayList(); - parseListOf(Token.GT, elements, () -> parseType(">")); - children.add(new Node(NodeType.TYPE_ARGUMENT_LIST_ELEMENTS, elements)); - expect(Token.GT, children, "unexpectedToken2", ",", ">"); - return new Node(NodeType.TYPE_ARGUMENT_LIST, children); - } - - private Node parseArgumentList() { - var children = new ArrayList(); - expect(Token.LPAREN, children, "unexpectedToken", "("); - if (lookahead() == Token.RPAREN) { - ff(children); - children.add(makeTerminal(next())); - return new Node(NodeType.ARGUMENT_LIST, children); - } - var elements = new ArrayList(); - ff(elements); - parseListOf(Token.RPAREN, elements, () -> parseExpr(")")); - ff(elements); - children.add(new Node(NodeType.ARGUMENT_LIST_ELEMENTS, elements)); - expect(Token.RPAREN, children, "unexpectedToken2", ",", ")"); - return new Node(NodeType.ARGUMENT_LIST, children); - } - - private Node parseTypeParameter() { - var children = new ArrayList(); - if (lookahead == Token.IN) { - children.add(makeTerminal(next())); - } else if (lookahead == Token.OUT) { - children.add(makeTerminal(next())); - } - children.add(parseIdentifier()); - return new Node(NodeType.TYPE_PARAMETER, children); - } - - private Node parseTypeAnnotation() { - var children = new ArrayList(); - expect(Token.COLON, children, "unexpectedToken", ":"); - ff(children); - children.add(parseType()); - return new Node(NodeType.TYPE_ANNOTATION, children); - } - - private Node parseIdentifier() { - if (lookahead != Token.IDENTIFIER) { - if (lookahead.isKeyword()) { - throw parserError("keywordNotAllowedHere", lookahead.text()); - } - throw parserError("unexpectedToken", _lookahead.text(lexer), "identifier"); - } - return new Node(NodeType.IDENTIFIER, next().span); - } - - private Node parseStringConstant() { - var children = new ArrayList(); - var startTk = expect(Token.STRING_START, "unexpectedToken", "\""); - children.add(makeTerminal(startTk)); - while (lookahead != Token.STRING_END) { - switch (lookahead) { - case STRING_PART, - STRING_ESCAPE_NEWLINE, - STRING_ESCAPE_TAB, - STRING_ESCAPE_QUOTE, - STRING_ESCAPE_BACKSLASH, - STRING_ESCAPE_RETURN, - STRING_ESCAPE_UNICODE -> - children.add(makeTerminal(next())); - case EOF -> { - var delimiter = new StringBuilder(startTk.text(lexer)).reverse().toString(); - throw parserError("missingDelimiter", delimiter); - } - case INTERPOLATION_START -> throw parserError("interpolationInConstant"); - // the lexer makes sure we only get the above tokens inside a string - default -> throw new RuntimeException("Unreacheable code"); - } - } - children.add(makeTerminal(next())); // string end - return new Node(NodeType.STRING_CHARS, children); - } - - private FullToken expect(Token type, String errorKey, Object... messageArgs) { - if (lookahead != type) { - var span = spanLookahead; - if (lookahead == Token.EOF || _lookahead.newLinesBetween > 0) { - // don't point at the EOF or the next line, but at the end of the last token - span = prev().span.stopSpan(); - } - var args = messageArgs; - if (errorKey.startsWith("unexpectedToken")) { - args = new Object[messageArgs.length + 1]; - args[0] = lookahead == Token.EOF ? "EOF" : _lookahead.text(lexer); - System.arraycopy(messageArgs, 0, args, 1, messageArgs.length); - } - throw parserError(ErrorMessages.create(errorKey, args), span); - } - return next(); - } - - private void expect(Token type, List children, String errorKey, Object... messageArgs) { - var tk = expect(type, errorKey, messageArgs); - children.add(makeTerminal(tk)); - } - - private void parseListOf(Token terminator, List children, Supplier parser) { - children.add(parser.get()); - ff(children); - while (lookahead == Token.COMMA) { - // don't store the last comma - var comma = makeTerminal(next()); - if (lookahead() == terminator) break; - children.add(comma); - ff(children); - children.add(parser.get()); - ff(children); - } - } - - private GenericParserError parserError(String messageKey, Object... args) { - return new GenericParserError(ErrorMessages.create(messageKey, args), spanLookahead); - } - - private GenericParserError parserError(String message, FullSpan span) { - return new GenericParserError(message, span); - } - - private boolean isModuleDecl() { - var _cursor = cursor; - var ftk = tokens.get(_cursor); - while (ftk.token.isAffix() || ftk.token.isModifier()) { - ftk = tokens.get(++_cursor); - } - var tk = ftk.token; - return tk == Token.MODULE || tk == Token.EXTENDS || tk == Token.AMENDS; - } - - private boolean isImport() { - var _cursor = cursor; - var ftk = tokens.get(_cursor); - while (ftk.token.isAffix()) { - ftk = tokens.get(++_cursor); - } - var tk = ftk.token; - return tk == Token.IMPORT || tk == Token.IMPORT_STAR; - } - - private FullToken next() { - var tmp = tokens.get(cursor++); - _lookahead = tokens.get(cursor); - lookahead = _lookahead.token; - spanLookahead = _lookahead.span; - return tmp; - } - - private boolean noSemicolonInbetween() { - return tokens.get(cursor - 1).token != Token.SEMICOLON; - } - - private void backtrack() { - var tmp = tokens.get(--cursor); - lookahead = tmp.token; - spanLookahead = tmp.span; - } - - private void backtrackTo(int point) { - cursor = point; - var tmp = tokens.get(cursor); - lookahead = tmp.token; - spanLookahead = tmp.span; - } - - private FullToken prev() { - return tokens.get(cursor - 1); - } - - // Jump over affixes and find the next token - private Token lookahead() { - var i = cursor; - var tmp = tokens.get(i); - while (tmp.token.isAffix() && tmp.token != Token.EOF) { - tmp = tokens.get(++i); - } - return tmp.token; - } - - // Jump over affixes and find the next token - private LookaheadSearch fullLookahead() { - var i = cursor; - var hasSemicolon = false; - var tmp = tokens.get(i); - while (tmp.token.isAffix() && tmp.token != Token.EOF) { - if (tmp.token == Token.SEMICOLON) { - hasSemicolon = true; - } - tmp = tokens.get(++i); - } - return new LookaheadSearch(tmp, hasSemicolon); - } - - private record LookaheadSearch(FullToken tk, boolean hasSemicolon) {} - - private record HeaderParts(List preffixes, @Nullable Node modifierList) {} - - private HeaderParts getHeaderParts(List nodes) { - if (nodes.isEmpty()) return new HeaderParts(nodes, null); - var last = nodes.get(nodes.size() - 1); - if (last.type == NodeType.MODIFIER_LIST) { - return new HeaderParts(nodes.subList(0, nodes.size() - 1), last); - } - return new HeaderParts(nodes, null); - } - - private Node make(NodeType type, FullSpan span) { - return new Node(type, span); - } - - private Node makeAffix(FullToken tk) { - return new Node(nodeTypeForAffix(tk.token), tk.span); - } - - private Node makeTerminal(FullToken tk) { - return new Node(NodeType.TERMINAL, tk.span); - } - - // fast-forward over affix tokens - // store children - private void ff(List children) { - var tmp = tokens.get(cursor); - while (tmp.token.isAffix()) { - children.add(makeAffix(tmp)); - tmp = tokens.get(++cursor); - } - _lookahead = tmp; - lookahead = _lookahead.token; - spanLookahead = _lookahead.span; - } - - // fast-forward over affix tokens - private void ff() { - var tmp = tokens.get(cursor); - while (tmp.token.isAffix()) { - tmp = tokens.get(++cursor); - } - _lookahead = tmp; - lookahead = _lookahead.token; - spanLookahead = _lookahead.span; - } - - private NodeType nodeTypeForAffix(Token token) { - return switch (token) { - case LINE_COMMENT -> NodeType.LINE_COMMENT; - case BLOCK_COMMENT -> NodeType.BLOCK_COMMENT; - case SHEBANG -> NodeType.SHEBANG; - case SEMICOLON -> NodeType.SEMICOLON; - default -> throw new RuntimeException("Unreacheable code"); - }; - } - - private record FullToken(Token token, FullSpan span, int newLinesBetween) { - String text(Lexer lexer) { - return lexer.textFor(span.charIndex(), span.length()); - } - } - - private record HeaderResult( - boolean hasDocComment, boolean hasAnnotations, boolean hasModifiers) {} } diff --git a/pkl-parser/src/main/java/org/pkl/parser/GenericParserError.java b/pkl-parser/src/main/java/org/pkl/parser/GenericParserError.java index 33fa40376..ff783e147 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/GenericParserError.java +++ b/pkl-parser/src/main/java/org/pkl/parser/GenericParserError.java @@ -1,5 +1,5 @@ /* - * Copyright © 2025 Apple Inc. and the Pkl project authors. All rights reserved. + * Copyright © 2025-2026 Apple Inc. and the Pkl project authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ import org.pkl.parser.syntax.generic.FullSpan; -public class GenericParserError extends RuntimeException { +public final class GenericParserError extends RuntimeException { private final FullSpan span; public GenericParserError(String msg, FullSpan span) { diff --git a/pkl-parser/src/main/java/org/pkl/parser/GenericParserImpl.java b/pkl-parser/src/main/java/org/pkl/parser/GenericParserImpl.java new file mode 100644 index 000000000..8e2248e40 --- /dev/null +++ b/pkl-parser/src/main/java/org/pkl/parser/GenericParserImpl.java @@ -0,0 +1,1582 @@ +/* + * Copyright © 2025-2026 Apple Inc. and the Pkl project authors. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.pkl.parser; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; +import org.pkl.parser.syntax.Operator; +import org.pkl.parser.syntax.generic.FullSpan; +import org.pkl.parser.syntax.generic.Node; +import org.pkl.parser.syntax.generic.NodeType; +import org.pkl.parser.util.ErrorMessages; +import org.pkl.parser.util.Nullable; + +@SuppressWarnings("DuplicatedCode") +class GenericParserImpl { + + private final Lexer lexer; + private Token lookahead; + private FullSpan spanLookahead; + private FullToken _lookahead; + private int cursor = 0; + private final List tokens = new ArrayList<>(); + + GenericParserImpl(String source) { + this.lexer = new Lexer(source); + while (true) { + var ft = new FullToken(lexer.next(), lexer.fullSpan(), lexer.getNewLinesBetween()); + tokens.add(ft); + if (ft.token == Token.EOF) break; + } + _lookahead = tokens.get(cursor); + lookahead = _lookahead.token; + spanLookahead = _lookahead.span; + } + + Node parseModule() { + if (lookahead == Token.EOF) { + return new Node(NodeType.MODULE, new FullSpan(0, 0, 1, 1, 1, 1), List.of()); + } + var children = new ArrayList(); + var nodes = new ArrayList(); + if (lookahead == Token.SHEBANG) { + nodes.add(makeAffix(next())); + } + ff(nodes); + + var res = parseMemberHeader(children); + + if (isModuleDecl()) { + nodes.add(parseModuleDecl(children)); + children.clear(); + res = new HeaderResult(false, false, false); + ff(nodes); + } + + // imports + var imports = new ArrayList(); + while (lookahead == Token.IMPORT || lookahead == Token.IMPORT_STAR) { + if (res.hasDocComment || res.hasAnnotations || res.hasModifiers) { + throw parserError("wrongHeaders", "Imports"); + } + var lastImport = parseImportDecl(); + imports.add(lastImport); + // keep trailing affixes as part of the import + while (lookahead.isAffix() && lastImport.span.isSameLine(spanLookahead)) { + imports.add(makeAffix(next())); + } + if (!isImport()) break; + ff(imports); + } + if (!imports.isEmpty()) { + nodes.add(new Node(NodeType.IMPORT_LIST, imports)); + ff(nodes); + } + + // entries + if (res.hasDocComment || res.hasAnnotations || res.hasModifiers) { + nodes.add(parseModuleMember(children)); + ff(nodes); + } + + while (lookahead != Token.EOF) { + children.clear(); + parseMemberHeader(children); + nodes.add(parseModuleMember(children)); + ff(nodes); + } + return new Node(NodeType.MODULE, nodes); + } + + private Node parseModuleDecl(List preChildren) { + var headerParts = getHeaderParts(preChildren); + var children = new ArrayList<>(headerParts.preffixes); + var headers = new ArrayList(); + if (headerParts.modifierList != null) { + headers.add(headerParts.modifierList); + } + if (lookahead == Token.MODULE) { + var subChildren = new ArrayList<>(headers); + subChildren.add(makeTerminal(next())); + ff(subChildren); + subChildren.add(parseQualifiedIdentifier()); + children.add(new Node(NodeType.MODULE_DEFINITION, subChildren)); + } else { + children.addAll(headers); + if (headerParts.modifierList != null) { + throw parserError("wrongHeaders", "Amends or extends declaration"); + } + } + var looka = lookahead(); + if (looka == Token.AMENDS || looka == Token.EXTENDS) { + var type = looka == Token.AMENDS ? NodeType.AMENDS_CLAUSE : NodeType.EXTENDS_CLAUSE; + ff(children); + var subChildren = new ArrayList(); + subChildren.add(makeTerminal(next())); + ff(subChildren); + subChildren.add(parseStringConstant()); + children.add(new Node(type, subChildren)); + } + return new Node(NodeType.MODULE_DECLARATION, children); + } + + private Node parseQualifiedIdentifier() { + var children = new ArrayList(); + children.add(parseIdentifier()); + while (lookahead() == Token.DOT) { + ff(children); + children.add(new Node(NodeType.TERMINAL, next().span)); + ff(children); + children.add(parseIdentifier()); + } + return new Node(NodeType.QUALIFIED_IDENTIFIER, children); + } + + private Node parseImportDecl() { + var children = new ArrayList(); + children.add(makeTerminal(next())); + ff(children); + children.add(parseStringConstant()); + if (lookahead() == Token.AS) { + ff(children); + var alias = new ArrayList(); + alias.add(makeTerminal(next())); + ff(alias); + alias.add(parseIdentifier()); + children.add(new Node(NodeType.IMPORT_ALIAS, alias)); + } + return new Node(NodeType.IMPORT, children); + } + + private HeaderResult parseMemberHeader(List children) { + var hasDocComment = false; + var hasAnnotation = false; + var hasModifier = false; + var docs = new ArrayList(); + ff(children); + while (lookahead() == Token.DOC_COMMENT) { + ff(docs); + docs.add(new Node(NodeType.DOC_COMMENT_LINE, next().span)); + hasDocComment = true; + } + if (hasDocComment) { + children.add(new Node(NodeType.DOC_COMMENT, docs)); + } + ff(children); + while (lookahead == Token.AT) { + children.add(parseAnnotation()); + hasAnnotation = true; + ff(children); + } + var modifiers = new ArrayList(); + while (lookahead.isModifier()) { + modifiers.add(make(NodeType.MODIFIER, next().span)); + hasModifier = true; + ff(children); + } + if (hasModifier) children.add(new Node(NodeType.MODIFIER_LIST, modifiers)); + return new HeaderResult(hasDocComment, hasAnnotation, hasModifier); + } + + private Node parseModuleMember(List preChildren) { + return switch (lookahead) { + case IDENTIFIER -> parseClassProperty(preChildren); + case TYPE_ALIAS -> parseTypeAlias(preChildren); + case CLASS -> parseClass(preChildren); + case FUNCTION -> parseClassMethod(preChildren); + case EOF -> throw parserError("unexpectedEndOfFile"); + default -> { + if (lookahead.isKeyword()) { + throw parserError("keywordNotAllowedHere", lookahead.text()); + } + if (lookahead == Token.DOC_COMMENT) { + throw parserError("danglingDocComment"); + } + throw parserError("invalidTopLevelToken"); + } + }; + } + + private Node parseTypeAlias(List preChildren) { + var headerParts = getHeaderParts(preChildren); + var children = new ArrayList<>(headerParts.preffixes); + var headers = new ArrayList(); + if (headerParts.modifierList != null) { + headers.add(headerParts.modifierList); + } + // typealias keyword + headers.add(makeTerminal(next())); + ff(headers); + headers.add(parseIdentifier()); + ff(headers); + if (lookahead == Token.LT) { + headers.add(parseTypeParameterList()); + ff(headers); + } + expect(Token.ASSIGN, headers, "unexpectedToken", "="); + children.add(new Node(NodeType.TYPEALIAS_HEADER, headers)); + var body = new ArrayList(); + ff(body); + body.add(parseType()); + children.add(new Node(NodeType.TYPEALIAS_BODY, body)); + return new Node(NodeType.TYPEALIAS, children); + } + + private Node parseClass(List preChildren) { + var headerParts = getHeaderParts(preChildren); + var children = new ArrayList<>(headerParts.preffixes); + var headers = new ArrayList(); + if (headerParts.modifierList != null) { + headers.add(headerParts.modifierList); + } + // class keyword + headers.add(makeTerminal(next())); + ff(headers); + headers.add(parseIdentifier()); + if (lookahead() == Token.LT) { + ff(headers); + headers.add(parseTypeParameterList()); + } + if (lookahead() == Token.EXTENDS) { + var extend = new ArrayList(); + ff(extend); + extend.add(makeTerminal(next())); + ff(extend); + extend.add(parseType()); + headers.add(new Node(NodeType.CLASS_HEADER_EXTENDS, extend)); + } + children.add(new Node(NodeType.CLASS_HEADER, headers)); + if (lookahead() == Token.LBRACE) { + ff(children); + children.add(parseClassBody()); + } + return new Node(NodeType.CLASS, children); + } + + private Node parseClassBody() { + var children = new ArrayList(); + children.add(makeTerminal(next())); + var elements = new ArrayList(); + var hasElements = false; + ff(elements); + while (lookahead != Token.RBRACE && lookahead != Token.EOF) { + hasElements = true; + var preChildren = new ArrayList(); + parseMemberHeader(preChildren); + if (lookahead == Token.FUNCTION) { + elements.add(parseClassMethod(preChildren)); + } else { + elements.add(parseClassProperty(preChildren)); + } + ff(elements); + } + if (lookahead == Token.EOF) { + throw parserError(ErrorMessages.create("missingDelimiter", "}"), prev().span.stopSpan()); + } + if (hasElements) { + children.add(new Node(NodeType.CLASS_BODY_ELEMENTS, elements)); + } else if (!elements.isEmpty()) { + // add affixes + children.addAll(elements); + } + expect(Token.RBRACE, children, "missingDelimiter", "}"); + return new Node(NodeType.CLASS_BODY, children); + } + + private Node parseClassProperty(List preChildren) { + var headerParts = getHeaderParts(preChildren); + var children = new ArrayList<>(headerParts.preffixes); + var header = new ArrayList(); + var headerBegin = new ArrayList(); + if (headerParts.modifierList != null) { + headerBegin.add(headerParts.modifierList); + } + headerBegin.add(parseIdentifier()); + header.add(new Node(NodeType.CLASS_PROPERTY_HEADER_BEGIN, headerBegin)); + var hasTypeAnnotation = false; + if (lookahead() == Token.COLON) { + ff(header); + header.add(parseTypeAnnotation()); + hasTypeAnnotation = true; + } + children.add(new Node(NodeType.CLASS_PROPERTY_HEADER, header)); + if (lookahead() == Token.ASSIGN) { + ff(children); + children.add(makeTerminal(next())); + var body = new ArrayList(); + ff(body); + body.add(parseExpr()); + children.add(new Node(NodeType.CLASS_PROPERTY_BODY, body)); + } else if (lookahead() == Token.LBRACE) { + if (hasTypeAnnotation) { + throw parserError("typeAnnotationInAmends"); + } + while (lookahead() == Token.LBRACE) { + ff(children); + children.add(parseObjectBody()); + } + } + return new Node(NodeType.CLASS_PROPERTY, children); + } + + private Node parseClassMethod(List preChildren) { + var headerParts = getHeaderParts(preChildren); + var children = new ArrayList<>(headerParts.preffixes); + var headers = new ArrayList(); + if (headerParts.modifierList != null) { + headers.add(headerParts.modifierList); + } + expect(Token.FUNCTION, headers, "unexpectedToken", "function"); + ff(headers); + headers.add(parseIdentifier()); + children.add(new Node(NodeType.CLASS_METHOD_HEADER, headers)); + ff(children); + if (lookahead == Token.LT) { + children.add(parseTypeParameterList()); + ff(children); + } + children.add(parseParameterList()); + if (lookahead() == Token.COLON) { + ff(children); + children.add(parseTypeAnnotation()); + } + if (lookahead() == Token.ASSIGN) { + ff(children); + children.add(makeTerminal(next())); + var body = new ArrayList(); + ff(body); + body.add(parseExpr()); + children.add(new Node(NodeType.CLASS_METHOD_BODY, body)); + } + return new Node(NodeType.CLASS_METHOD, children); + } + + private Node parseObjectBody() { + var children = new ArrayList(); + expect(Token.LBRACE, children, "unexpectedToken", "{"); + if (lookahead() == Token.RBRACE) { + ff(children); + children.add(makeTerminal(next())); + return new Node(NodeType.OBJECT_BODY, children); + } + if (isParameter()) { + var params = new ArrayList(); + ff(params); + parseListOf(Token.ARROW, params, this::parseParameter); + expect(Token.ARROW, params, "unexpectedToken2", ",", "->"); + children.add(new Node(NodeType.OBJECT_PARAMETER_LIST, params)); + ff(children); + } + var members = new ArrayList(); + ff(members); + while (lookahead != Token.RBRACE) { + if (lookahead == Token.EOF) { + throw parserError(ErrorMessages.create("missingDelimiter", "}"), prev().span.stopSpan()); + } + members.add(parseObjectMember()); + ff(members); + } + if (!members.isEmpty()) { + children.add(new Node(NodeType.OBJECT_MEMBER_LIST, members)); + } + children.add(makeTerminal(next())); // RBRACE + return new Node(NodeType.OBJECT_BODY, children); + } + + /** Returns true if the lookahead is a parameter, false if it's a member. May have to backtrack */ + private boolean isParameter() { + if (lookahead == Token.UNDERSCORE) return true; + if (lookahead != Token.IDENTIFIER) return false; + // have to backtrack + var originalCursor = cursor; + var result = false; + next(); // identifier + ff(); + if (lookahead == Token.ARROW || lookahead == Token.COMMA) { + result = true; + } else if (lookahead == Token.COLON) { + next(); // colon + ff(); + parseType(); + ff(); + result = lookahead == Token.COMMA || lookahead == Token.ARROW; + } + backtrackTo(originalCursor); + return result; + } + + private Node parseObjectMember() { + return switch (lookahead) { + case IDENTIFIER -> { + var originalCursor = cursor; + next(); + ff(new ArrayList<>()); + if (lookahead == Token.LBRACE || lookahead == Token.COLON || lookahead == Token.ASSIGN) { + // it's an objectProperty + backtrackTo(originalCursor); + yield parseObjectProperty(null); + } else { + backtrackTo(originalCursor); + // it's an expression + yield parseObjectElement(); + } + } + case FUNCTION -> parseObjectMethod(List.of()); + case LPRED -> parseMemberPredicate(); + case LBRACK -> parseObjectEntry(); + case SPREAD, QSPREAD -> parseObjectSpread(); + case WHEN -> parseWhenGenerator(); + case FOR -> parseForGenerator(); + case TYPE_ALIAS, CLASS -> + throw parserError(ErrorMessages.create("missingDelimiter", "}"), prev().span.stopSpan()); + default -> { + var preChildren = new ArrayList(); + while (lookahead.isModifier()) { + preChildren.add(make(NodeType.MODIFIER, next().span)); + ff(preChildren); + } + if (!preChildren.isEmpty()) { + if (lookahead == Token.FUNCTION) { + yield parseObjectMethod(List.of(new Node(NodeType.MODIFIER_LIST, preChildren))); + } else { + yield parseObjectProperty(List.of(new Node(NodeType.MODIFIER_LIST, preChildren))); + } + } else { + yield parseObjectElement(); + } + } + }; + } + + private Node parseObjectElement() { + return new Node(NodeType.OBJECT_ELEMENT, List.of(parseExpr())); + } + + private Node parseObjectProperty(@Nullable List preChildren) { + var children = new ArrayList(); + var header = new ArrayList(); + var headerBegin = new ArrayList(); + if (preChildren != null) { + headerBegin.addAll(preChildren); + } + ff(headerBegin); + var modifierList = new ArrayList(); + while (lookahead.isModifier()) { + modifierList.add(make(NodeType.MODIFIER, next().span)); + ff(modifierList); + } + if (!modifierList.isEmpty()) { + headerBegin.add(new Node(NodeType.MODIFIER_LIST, modifierList)); + } + headerBegin.add(parseIdentifier()); + header.add(new Node(NodeType.OBJECT_PROPERTY_HEADER_BEGIN, headerBegin)); + var hasTypeAnnotation = false; + if (lookahead() == Token.COLON) { + ff(header); + header.add(parseTypeAnnotation()); + hasTypeAnnotation = true; + } + children.add(new Node(NodeType.OBJECT_PROPERTY_HEADER, header)); + if (hasTypeAnnotation || lookahead() == Token.ASSIGN) { + ff(children); + expect(Token.ASSIGN, children, "unexpectedToken", "="); + var body = new ArrayList(); + ff(body); + body.add(parseExpr("}")); + children.add(new Node(NodeType.OBJECT_PROPERTY_BODY, body)); + return new Node(NodeType.OBJECT_PROPERTY, children); + } + ff(children); + children.addAll(parseBodyList()); + return new Node(NodeType.OBJECT_PROPERTY, children); + } + + private Node parseObjectMethod(List preChildren) { + var headerParts = getHeaderParts(preChildren); + var children = new ArrayList<>(headerParts.preffixes); + var headers = new ArrayList(); + if (headerParts.modifierList != null) { + headers.add(headerParts.modifierList); + } + expect(Token.FUNCTION, headers, "unexpectedToken", "function"); + ff(headers); + headers.add(parseIdentifier()); + children.add(new Node(NodeType.CLASS_METHOD_HEADER, headers)); + ff(children); + if (lookahead == Token.LT) { + children.add(parseTypeParameterList()); + ff(children); + } + children.add(parseParameterList()); + ff(children); + if (lookahead == Token.COLON) { + children.add(parseTypeAnnotation()); + ff(children); + } + expect(Token.ASSIGN, children, "unexpectedToken", "="); + var body = new ArrayList(); + ff(body); + body.add(parseExpr()); + children.add(new Node(NodeType.CLASS_METHOD_BODY, body)); + return new Node(NodeType.OBJECT_METHOD, children); + } + + private Node parseMemberPredicate() { + var children = new ArrayList(); + children.add(makeTerminal(next())); + ff(children); + children.add(parseExpr()); + ff(children); + var firstBrack = expect(Token.RBRACK, "unexpectedToken", "]]"); + children.add(makeTerminal(firstBrack)); + var secondbrack = expect(Token.RBRACK, "unexpectedToken", "]]"); + children.add(makeTerminal(secondbrack)); + if (firstBrack.span.charIndex() != secondbrack.span.charIndex() - 1) { + // There shouldn't be any whitespace between the first and second ']'. + var span = firstBrack.span.endWith(secondbrack.span); + var text = lexer.textFor(span.charIndex(), span.length()); + throw parserError(ErrorMessages.create("unexpectedToken", text, "]]"), firstBrack.span); + } + ff(children); + if (lookahead == Token.ASSIGN) { + children.add(makeTerminal(next())); + ff(children); + children.add(parseExpr("}")); + return new Node(NodeType.MEMBER_PREDICATE, children); + } + children.addAll(parseBodyList()); + return new Node(NodeType.MEMBER_PREDICATE, children); + } + + private Node parseObjectEntry() { + var children = new ArrayList(); + var header = new ArrayList(); + expect(Token.LBRACK, header, "unexpectedToken", "["); + ff(header); + header.add(parseExpr()); + expect(Token.RBRACK, header, "unexpectedToken", "]"); + if (lookahead() == Token.ASSIGN) { + ff(header); + header.add(makeTerminal(next())); + children.add(new Node(NodeType.OBJECT_ENTRY_HEADER, header)); + ff(children); + children.add(parseExpr()); + return new Node(NodeType.OBJECT_ENTRY, children); + } + children.add(new Node(NodeType.OBJECT_ENTRY_HEADER, header)); + ff(children); + children.addAll(parseBodyList()); + return new Node(NodeType.OBJECT_ENTRY, children); + } + + private Node parseObjectSpread() { + var children = new ArrayList(); + children.add(makeTerminal(next())); + ff(children); + children.add(parseExpr()); + return new Node(NodeType.OBJECT_SPREAD, children); + } + + private Node parseWhenGenerator() { + var children = new ArrayList(); + var header = new ArrayList(); + children.add(makeTerminal(next())); + ff(children); + expect(Token.LPAREN, header, "unexpectedToken", "("); + ff(header); + header.add(parseExpr()); + ff(header); + expect(Token.RPAREN, header, "unexpectedToken", ")"); + children.add(new Node(NodeType.WHEN_GENERATOR_HEADER, header)); + ff(children); + children.add(parseObjectBody()); + if (lookahead() == Token.ELSE) { + ff(children); + children.add(makeTerminal(next())); + ff(children); + children.add(parseObjectBody()); + } + return new Node(NodeType.WHEN_GENERATOR, children); + } + + private Node parseForGenerator() { + var children = new ArrayList(); + children.add(makeTerminal(next())); + ff(children); + var header = new ArrayList(); + expect(Token.LPAREN, header, "unexpectedToken", "("); + var headerDefinition = new ArrayList(); + var headerDefinitionHeader = new ArrayList(); + ff(headerDefinitionHeader); + headerDefinitionHeader.add(parseParameter()); + ff(headerDefinitionHeader); + if (lookahead == Token.COMMA) { + headerDefinitionHeader.add(makeTerminal(next())); + ff(headerDefinitionHeader); + headerDefinitionHeader.add(parseParameter()); + ff(headerDefinitionHeader); + } + expect(Token.IN, headerDefinitionHeader, "unexpectedToken", "in"); + headerDefinition.add( + new Node(NodeType.FOR_GENERATOR_HEADER_DEFINITION_HEADER, headerDefinitionHeader)); + ff(headerDefinition); + headerDefinition.add(parseExpr()); + ff(headerDefinition); + header.add(new Node(NodeType.FOR_GENERATOR_HEADER_DEFINITION, headerDefinition)); + expect(Token.RPAREN, header, "unexpectedToken", ")"); + children.add(new Node(NodeType.FOR_GENERATOR_HEADER, header)); + ff(children); + children.add(parseObjectBody()); + return new Node(NodeType.FOR_GENERATOR, children); + } + + private Node parseExpr() { + return parseExpr(null, 1); + } + + private Node parseExpr(@Nullable String expectation) { + return parseExpr(expectation, 1); + } + + private Node parseExpr(@Nullable String expectation, int minPrecedence) { + var expr = parseExprAtom(expectation); + var fullOpToken = fullLookahead(); + var operator = getOperator(fullOpToken.tk); + while (operator != null) { + if (operator.getPrec() < minPrecedence) break; + // `-` and `[]` must be in the same line as the left operand and have no semicolons inbetween + if ((operator == Operator.MINUS || operator == Operator.SUBSCRIPT) + && (fullOpToken.hasSemicolon || !expr.span.isSameLine(fullOpToken.tk.span))) break; + var children = new ArrayList(); + children.add(expr); + ff(children); + var op = next(); + children.add(make(NodeType.OPERATOR, op.span)); + ff(children); + var nodeType = NodeType.BINARY_OP_EXPR; + var nextMinPrec = operator.isLeftAssoc() ? operator.getPrec() + 1 : operator.getPrec(); + switch (op.token) { + case IS, AS -> children.add(parseType()); + case LBRACK -> { + nodeType = NodeType.SUBSCRIPT_EXPR; + children.add(parseExpr("]")); + ff(children); + expect(Token.RBRACK, children, "unexpectedToken", "]"); + } + case DOT, QDOT -> { + nodeType = NodeType.QUALIFIED_ACCESS_EXPR; + children.add(parseUnqualifiedAccessExpr()); + } + case NON_NULL -> nodeType = NodeType.NON_NULL_EXPR; + default -> children.add(parseExpr(expectation, nextMinPrec)); + } + + expr = new Node(nodeType, children); + fullOpToken = fullLookahead(); + operator = getOperator(fullOpToken.tk); + } + return expr; + } + + private @Nullable Operator getOperator(FullToken tk) { + return switch (tk.token) { + case POW -> Operator.POW; + case STAR -> Operator.MULT; + case DIV -> Operator.DIV; + case INT_DIV -> Operator.INT_DIV; + case MOD -> Operator.MOD; + case PLUS -> Operator.PLUS; + case MINUS -> Operator.MINUS; + case GT -> Operator.GT; + case GTE -> Operator.GTE; + case LT -> Operator.LT; + case LTE -> Operator.LTE; + case IS -> Operator.IS; + case AS -> Operator.AS; + case EQUAL -> Operator.EQ_EQ; + case NOT_EQUAL -> Operator.NOT_EQ; + case AND -> Operator.AND; + case OR -> Operator.OR; + case PIPE -> Operator.PIPE; + case COALESCE -> Operator.NULL_COALESCE; + case DOT -> Operator.DOT; + case QDOT -> Operator.QDOT; + case LBRACK -> Operator.SUBSCRIPT; + case NON_NULL -> Operator.NON_NULL; + default -> null; + }; + } + + private Node parseUnqualifiedAccessExpr() { + var children = new ArrayList(); + children.add(parseIdentifier()); + if (lookahead() == Token.LPAREN && noSemicolonInbetween() && _lookahead.newLinesBetween == 0) { + ff(children); + children.add(parseArgumentList()); + } + return new Node(NodeType.UNQUALIFIED_ACCESS_EXPR, children); + } + + private Node parseExprAtom(@Nullable String expectation) { + var expr = + switch (lookahead) { + case THIS -> new Node(NodeType.THIS_EXPR, next().span); + case OUTER -> new Node(NodeType.OUTER_EXPR, next().span); + case MODULE -> new Node(NodeType.MODULE_EXPR, next().span); + case NULL -> new Node(NodeType.NULL_EXPR, next().span); + case THROW -> { + var children = new ArrayList(); + children.add(makeTerminal(next())); + ff(children); + expect(Token.LPAREN, children, "unexpectedToken", "("); + ff(children); + children.add(parseExpr(")")); + ff(children); + expect(Token.RPAREN, children, "unexpectedToken", ")"); + yield new Node(NodeType.THROW_EXPR, children); + } + case TRACE -> { + var children = new ArrayList(); + children.add(makeTerminal(next())); + ff(children); + expect(Token.LPAREN, children, "unexpectedToken", "("); + ff(children); + children.add(parseExpr(")")); + ff(children); + expect(Token.RPAREN, children, "unexpectedToken", ")"); + yield new Node(NodeType.TRACE_EXPR, children); + } + case IMPORT, IMPORT_STAR -> { + var children = new ArrayList(); + children.add(makeTerminal(next())); + ff(children); + expect(Token.LPAREN, children, "unexpectedToken", "("); + ff(children); + children.add(parseStringConstant()); + ff(children); + expect(Token.RPAREN, children, "unexpectedToken", ")"); + yield new Node(NodeType.IMPORT_EXPR, children); + } + case READ, READ_STAR, READ_QUESTION -> { + var children = new ArrayList(); + children.add(makeTerminal(next())); + ff(children); + expect(Token.LPAREN, children, "unexpectedToken", "("); + ff(children); + children.add(parseExpr(")")); + ff(children); + expect(Token.RPAREN, children, "unexpectedToken", ")"); + yield new Node(NodeType.READ_EXPR, children); + } + case NEW -> { + var children = new ArrayList(); + var header = new ArrayList(); + header.add(makeTerminal(next())); + ff(header); + if (lookahead != Token.LBRACE) { + header.add(parseType("{")); + children.add(new Node(NodeType.NEW_HEADER, header)); + ff(children); + } else { + children.add(new Node(NodeType.NEW_HEADER, header)); + } + children.add(parseObjectBody()); + yield new Node(NodeType.NEW_EXPR, children); + } + case MINUS -> { + var children = new ArrayList(); + children.add(makeTerminal(next())); + ff(children); + // unary minus has higher precendence than most binary operators + children.add(parseExpr(expectation, 12)); + yield new Node(NodeType.UNARY_MINUS_EXPR, children); + } + case NOT -> { + var children = new ArrayList(); + children.add(makeTerminal(next())); + ff(children); + // logical not has higher precendence than most binary operators + children.add(parseExpr(expectation, 11)); + yield new Node(NodeType.LOGICAL_NOT_EXPR, children); + } + case LPAREN -> { + // can be function literal or parenthesized expression + if (isFunctionLiteral()) { + yield parseFunctionLiteral(); + } else { + yield parseParenthesizedExpr(); + } + } + case SUPER -> { + var children = new ArrayList(); + children.add(makeTerminal(next())); + ff(children); + if (lookahead == Token.DOT) { + children.add(makeTerminal(next())); + ff(children); + children.add(parseIdentifier()); + if (lookahead() == Token.LPAREN) { + ff(children); + children.add(parseArgumentList()); + } + yield new Node(NodeType.SUPER_ACCESS_EXPR, children); + } else { + expect(Token.LBRACK, children, "unexpectedToken", "["); + ff(children); + children.add(parseExpr()); + ff(children); + expect(Token.RBRACK, children, "unexpectedToken", "]"); + yield new Node(NodeType.SUPER_SUBSCRIPT_EXPR, children); + } + } + case IF -> { + var children = new ArrayList(); + var header = new ArrayList(); + header.add(makeTerminal(next())); + ff(header); + var condition = new ArrayList(); + var conditionExpr = new ArrayList(); + expect(Token.LPAREN, condition, "unexpectedToken", "("); + ff(conditionExpr); + conditionExpr.add(parseExpr(")")); + ff(conditionExpr); + condition.add(new Node(NodeType.IF_CONDITION_EXPR, conditionExpr)); + expect(Token.RPAREN, condition, "unexpectedToken", ")"); + header.add(new Node(NodeType.IF_CONDITION, condition)); + children.add(new Node(NodeType.IF_HEADER, header)); + var thenExpr = new ArrayList(); + ff(thenExpr); + thenExpr.add(parseExpr("else")); + ff(thenExpr); + children.add(new Node(NodeType.IF_THEN_EXPR, thenExpr)); + expect(Token.ELSE, children, "unexpectedToken", "else"); + var elseExpr = new ArrayList(); + ff(elseExpr); + elseExpr.add(parseExpr(expectation)); + children.add(new Node(NodeType.IF_ELSE_EXPR, elseExpr)); + yield new Node(NodeType.IF_EXPR, children); + } + case LET -> { + var children = new ArrayList(); + children.add(makeTerminal(next())); + ff(children); + var paramDef = new ArrayList(); + expect(Token.LPAREN, paramDef, "unexpectedToken", "("); + var param = new ArrayList(); + ff(param); + param.add(parseParameter()); + ff(param); + expect(Token.ASSIGN, param, "unexpectedToken", "="); + ff(param); + param.add(parseExpr(")")); + paramDef.add(new Node(NodeType.LET_PARAMETER, param)); + ff(paramDef); + expect(Token.RPAREN, paramDef, "unexpectedToken", ")"); + children.add(new Node(NodeType.LET_PARAMETER_DEFINITION, paramDef)); + ff(children); + children.add(parseExpr(expectation)); + yield new Node(NodeType.LET_EXPR, children); + } + case TRUE, FALSE -> new Node(NodeType.BOOL_LITERAL_EXPR, next().span); + case INT, HEX, BIN, OCT -> new Node(NodeType.INT_LITERAL_EXPR, next().span); + case FLOAT -> new Node(NodeType.FLOAT_LITERAL_EXPR, next().span); + case STRING_START -> parseSingleLineStringLiteralExpr(); + case STRING_MULTI_START -> parseMultiLineStringLiteralExpr(); + case IDENTIFIER -> parseUnqualifiedAccessExpr(); + case EOF -> + throw parserError( + ErrorMessages.create("unexpectedEndOfFile"), prev().span.stopSpan()); + default -> { + var text = _lookahead.text(lexer); + if (expectation != null) { + throw parserError("unexpectedToken", text, expectation); + } + throw parserError("unexpectedTokenForExpression", text); + } + }; + return parseExprRest(expr); + } + + @SuppressWarnings("DuplicatedCode") + private Node parseExprRest(Node expr) { + // amends + if (lookahead() == Token.LBRACE) { + var children = new ArrayList(); + children.add(expr); + ff(children); + if (expr.type == NodeType.PARENTHESIZED_EXPR + || expr.type == NodeType.AMENDS_EXPR + || expr.type == NodeType.NEW_EXPR) { + children.add(parseObjectBody()); + return parseExprRest(new Node(NodeType.AMENDS_EXPR, children)); + } + throw parserError("unexpectedCurlyProbablyAmendsExpression", expr.text(lexer.getSource())); + } + return expr; + } + + private boolean isFunctionLiteral() { + var originalCursor = cursor; + try { + next(); // open ( + ff(); + var token = next().token; + ff(); + if (token == Token.RPAREN) { + return lookahead == Token.ARROW; + } + if (token == Token.UNDERSCORE) { + return true; + } + if (token != Token.IDENTIFIER) { + return false; + } + if (lookahead == Token.COMMA || lookahead == Token.COLON) { + return true; + } + if (lookahead == Token.RPAREN) { + next(); + ff(); + return lookahead == Token.ARROW; + } + return false; + } finally { + backtrackTo(originalCursor); + } + } + + private Node parseSingleLineStringLiteralExpr() { + var children = new ArrayList(); + var start = next(); + children.add(makeTerminal(start)); // string start + while (lookahead != Token.STRING_END) { + switch (lookahead) { + case STRING_PART -> { + var tk = next(); + if (!tk.text(lexer).isEmpty()) { + children.add(make(NodeType.STRING_CHARS, tk.span)); + } + } + case STRING_ESCAPE_NEWLINE, + STRING_ESCAPE_TAB, + STRING_ESCAPE_QUOTE, + STRING_ESCAPE_BACKSLASH, + STRING_ESCAPE_RETURN, + STRING_ESCAPE_UNICODE -> + children.add(make(NodeType.STRING_ESCAPE, next().span)); + case INTERPOLATION_START -> { + children.add(makeTerminal(next())); + ff(children); + children.add(parseExpr(")")); + ff(children); + expect(Token.RPAREN, children, "unexpectedToken", ")"); + } + case EOF -> { + var delimiter = new StringBuilder(start.text(lexer)).reverse().toString(); + throw parserError("missingDelimiter", delimiter); + } + } + } + children.add(makeTerminal(next())); // string end + return new Node(NodeType.SINGLE_LINE_STRING_LITERAL_EXPR, children); + } + + private Node parseMultiLineStringLiteralExpr() { + var children = new ArrayList(); + var start = next(); + children.add(makeTerminal(start)); // string start + if (lookahead != Token.STRING_NEWLINE) { + throw parserError(ErrorMessages.create("stringContentMustBeginOnNewLine"), spanLookahead); + } + while (lookahead != Token.STRING_END) { + switch (lookahead) { + case STRING_PART -> { + var tk = next(); + if (!tk.text(lexer).isEmpty()) { + children.add(make(NodeType.STRING_CHARS, tk.span)); + } + } + case STRING_NEWLINE -> children.add(make(NodeType.STRING_NEWLINE, next().span)); + case STRING_ESCAPE_NEWLINE, + STRING_ESCAPE_TAB, + STRING_ESCAPE_QUOTE, + STRING_ESCAPE_BACKSLASH, + STRING_ESCAPE_RETURN, + STRING_ESCAPE_UNICODE -> + children.add(make(NodeType.STRING_ESCAPE, next().span)); + case INTERPOLATION_START -> { + children.add(makeTerminal(next())); + ff(children); + children.add(parseExpr(")")); + ff(children); + expect(Token.RPAREN, children, "unexpectedToken", ")"); + } + case EOF -> { + var delimiter = new StringBuilder(start.text(lexer)).reverse().toString(); + throw parserError("missingDelimiter", delimiter); + } + } + } + children.add(makeTerminal(next())); // string end + validateStringEndDelimiter(children); + validateStringIndentation(children); + return new Node(NodeType.MULTI_LINE_STRING_LITERAL_EXPR, children); + } + + private void validateStringEndDelimiter(List nodes) { + var beforeLast = nodes.get(nodes.size() - 2); + if (beforeLast.type == NodeType.STRING_NEWLINE) return; + var text = beforeLast.text(lexer.getSource()); + if (!text.isBlank()) { + throw parserError( + ErrorMessages.create("closingStringDelimiterMustBeginOnNewLine"), beforeLast.span); + } + } + + private void validateStringIndentation(List nodes) { + var indentNode = nodes.get(nodes.size() - 2); + if (indentNode.type == NodeType.STRING_NEWLINE) return; + var indent = indentNode.text(lexer.getSource()); + var previousNewline = false; + for (var i = 1; i < nodes.size() - 2; i++) { + var child = nodes.get(i); + if (child.type != NodeType.STRING_NEWLINE && previousNewline) { + var text = child.text(lexer.getSource()); + if (!text.startsWith(indent)) { + throw parserError(ErrorMessages.create("stringIndentationMustMatchLastLine"), child.span); + } + } + previousNewline = child.type == NodeType.STRING_NEWLINE; + } + } + + private Node parseParenthesizedExpr() { + var children = new ArrayList(); + expect(Token.LPAREN, children, "unexpectedToken", "("); + if (lookahead() == Token.RPAREN) { + ff(children); + children.add(makeTerminal(next())); + return new Node(NodeType.PARENTHESIZED_EXPR, children); + } + var elements = new ArrayList(); + ff(elements); + elements.add(parseExpr(")")); + ff(elements); + children.add(new Node(NodeType.PARENTHESIZED_EXPR_ELEMENTS, elements)); + expect(Token.RPAREN, children, "unexpectedToken", ")"); + return new Node(NodeType.PARENTHESIZED_EXPR, children); + } + + private Node parseFunctionLiteral() { + var paramListChildren = new ArrayList(); + expect(Token.LPAREN, paramListChildren, "unexpectedToken", "("); + if (lookahead() == Token.RPAREN) { + ff(paramListChildren); + paramListChildren.add(makeTerminal(next())); + } else { + var elements = new ArrayList(); + ff(elements); + parseListOf(Token.RPAREN, elements, this::parseParameter); + paramListChildren.add(new Node(NodeType.PARAMETER_LIST_ELEMENTS, elements)); + expect(Token.RPAREN, paramListChildren, "unexpectedToken2", ",", ")"); + } + var children = new ArrayList(); + children.add(new Node(NodeType.PARAMETER_LIST, paramListChildren)); + ff(children); + expect(Token.ARROW, children, "unexpectedToken", "->"); + var body = new ArrayList(); + ff(body); + body.add(parseExpr()); + children.add(new Node(NodeType.FUNCTION_LITERAL_BODY, body)); + return new Node(NodeType.FUNCTION_LITERAL_EXPR, children); + } + + private Node parseType() { + return parseType(null); + } + + private Node parseType(@Nullable String expectation) { + var children = new ArrayList(); + var hasDefault = false; + FullSpan start = null; + if (lookahead == Token.STAR) { + var tk = next(); + start = tk.span; + children.add(makeTerminal(tk)); + ff(children); + hasDefault = true; + } + var first = parseTypeAtom(expectation); + children.add(first); + + if (lookahead() != Token.UNION) { + if (hasDefault) { + throw parserError(ErrorMessages.create("notAUnion"), start.endWith(first.span)); + } + return first; + } + + while (lookahead() == Token.UNION) { + ff(children); + children.add(makeTerminal(next())); + ff(children); + if (lookahead == Token.STAR) { + if (hasDefault) { + throw parserError("multipleUnionDefaults"); + } + children.add(makeTerminal(next())); + ff(children); + hasDefault = true; + } + var type = parseTypeAtom(expectation); + children.add(type); + } + return new Node(NodeType.UNION_TYPE, children); + } + + private Node parseTypeAtom(@Nullable String expectation) { + var typ = + switch (lookahead) { + case UNKNOWN -> make(NodeType.UNKNOWN_TYPE, next().span); + case NOTHING -> make(NodeType.NOTHING_TYPE, next().span); + case MODULE -> make(NodeType.MODULE_TYPE, next().span); + case LPAREN -> { + var children = new ArrayList(); + children.add(makeTerminal(next())); + var totalTypes = 0; + if (lookahead() == Token.RPAREN) { + ff(children); + children.add(makeTerminal(next())); + } else { + var elements = new ArrayList(); + ff(elements); + elements.add(parseType(")")); + ff(elements); + while (lookahead == Token.COMMA) { + var comma = next(); + if (lookahead() == Token.RPAREN) { + ff(elements); + break; + } + elements.add(makeTerminal(comma)); + ff(elements); + elements.add(parseType(")")); + totalTypes++; + ff(elements); + } + children.add(new Node(NodeType.PARENTHESIZED_TYPE_ELEMENTS, elements)); + expect(Token.RPAREN, children, "unexpectedToken2", ",", ")"); + } + if (totalTypes > 1 || lookahead() == Token.ARROW) { + var actualChildren = new ArrayList(); + actualChildren.add(new Node(NodeType.FUNCTION_TYPE_PARAMETERS, children)); + ff(actualChildren); + expect(Token.ARROW, actualChildren, "unexpectedToken", "->"); + ff(actualChildren); + actualChildren.add(parseType(expectation)); + yield new Node(NodeType.FUNCTION_TYPE, actualChildren); + } else { + yield new Node(NodeType.PARENTHESIZED_TYPE, children); + } + } + case IDENTIFIER -> { + var children = new ArrayList(); + children.add(parseQualifiedIdentifier()); + if (lookahead() == Token.LT) { + ff(children); + children.add(parseTypeArgumentList()); + } + yield new Node(NodeType.DECLARED_TYPE, children); + } + case STRING_START -> + new Node(NodeType.STRING_CONSTANT_TYPE, List.of(parseStringConstant())); + default -> { + var text = _lookahead.text(lexer); + if (expectation != null) { + throw parserError("unexpectedTokenForType2", text, expectation); + } + throw parserError("unexpectedTokenForType", text); + } + }; + + if (typ.type == NodeType.FUNCTION_TYPE) return typ; + return parseTypeEnd(typ); + } + + private Node parseTypeEnd(Node type) { + var children = new ArrayList(); + children.add(type); + // nullable types + if (lookahead() == Token.QUESTION) { + ff(children); + children.add(makeTerminal(next())); + var res = new Node(NodeType.NULLABLE_TYPE, children); + return parseTypeEnd(res); + } + // constrained types: have to start in the same line as the type + var fla = fullLookahead(); + if (fla.tk.token == Token.LPAREN && noSemicolonInbetween() && fla.tk.newLinesBetween == 0) { + ff(children); + var constraint = new ArrayList(); + constraint.add(makeTerminal(next())); + var elements = new ArrayList(); + ff(elements); + parseListOf(Token.RPAREN, elements, () -> parseExpr(")")); + constraint.add(new Node(NodeType.CONSTRAINED_TYPE_ELEMENTS, elements)); + expect(Token.RPAREN, constraint, "unexpectedToken2", ",", ")"); + children.add(new Node(NodeType.CONSTRAINED_TYPE_CONSTRAINT, constraint)); + var res = new Node(NodeType.CONSTRAINED_TYPE, children); + return parseTypeEnd(res); + } + return type; + } + + private Node parseAnnotation() { + var children = new ArrayList(); + children.add(makeTerminal(next())); + children.add(parseType()); + if (lookahead() == Token.LBRACE) { + ff(children); + children.add(parseObjectBody()); + } + return new Node(NodeType.ANNOTATION, children); + } + + private Node parseParameter() { + if (lookahead == Token.UNDERSCORE) { + return new Node(NodeType.PARAMETER, List.of(makeTerminal(next()))); + } + return parseTypedIdentifier(); + } + + private Node parseTypedIdentifier() { + var children = new ArrayList(); + children.add(parseIdentifier()); + if (lookahead() == Token.COLON) { + ff(children); + children.add(parseTypeAnnotation()); + } + return new Node(NodeType.PARAMETER, children); + } + + private Node parseParameterList() { + var children = new ArrayList(); + expect(Token.LPAREN, children, "unexpectedToken", "("); + ff(children); + if (lookahead == Token.RPAREN) { + children.add(makeTerminal(next())); + } else { + var elements = new ArrayList(); + parseListOf(Token.RPAREN, elements, this::parseParameter); + children.add(new Node(NodeType.PARAMETER_LIST_ELEMENTS, elements)); + expect(Token.RPAREN, children, "unexpectedToken2", ",", ")"); + } + return new Node(NodeType.PARAMETER_LIST, children); + } + + private List parseBodyList() { + if (lookahead != Token.LBRACE) { + throw parserError("unexpectedToken2", _lookahead.text(lexer), "{", "="); + } + var bodies = new ArrayList(); + do { + bodies.add(parseObjectBody()); + } while (lookahead() == Token.LBRACE); + return bodies; + } + + private Node parseTypeParameterList() { + var children = new ArrayList(); + expect(Token.LT, children, "unexpectedToken", "<"); + ff(children); + var elements = new ArrayList(); + parseListOf(Token.GT, elements, this::parseTypeParameter); + children.add(new Node(NodeType.TYPE_PARAMETER_LIST_ELEMENTS, elements)); + expect(Token.GT, children, "unexpectedToken2", ",", ">"); + return new Node(NodeType.TYPE_PARAMETER_LIST, children); + } + + private Node parseTypeArgumentList() { + var children = new ArrayList(); + expect(Token.LT, children, "unexpectedToken", "<"); + ff(children); + var elements = new ArrayList(); + parseListOf(Token.GT, elements, () -> parseType(">")); + children.add(new Node(NodeType.TYPE_ARGUMENT_LIST_ELEMENTS, elements)); + expect(Token.GT, children, "unexpectedToken2", ",", ">"); + return new Node(NodeType.TYPE_ARGUMENT_LIST, children); + } + + private Node parseArgumentList() { + var children = new ArrayList(); + expect(Token.LPAREN, children, "unexpectedToken", "("); + if (lookahead() == Token.RPAREN) { + ff(children); + children.add(makeTerminal(next())); + return new Node(NodeType.ARGUMENT_LIST, children); + } + var elements = new ArrayList(); + ff(elements); + parseListOf(Token.RPAREN, elements, () -> parseExpr(")")); + ff(elements); + children.add(new Node(NodeType.ARGUMENT_LIST_ELEMENTS, elements)); + expect(Token.RPAREN, children, "unexpectedToken2", ",", ")"); + return new Node(NodeType.ARGUMENT_LIST, children); + } + + private Node parseTypeParameter() { + var children = new ArrayList(); + if (lookahead == Token.IN) { + children.add(makeTerminal(next())); + } else if (lookahead == Token.OUT) { + children.add(makeTerminal(next())); + } + children.add(parseIdentifier()); + return new Node(NodeType.TYPE_PARAMETER, children); + } + + private Node parseTypeAnnotation() { + var children = new ArrayList(); + expect(Token.COLON, children, "unexpectedToken", ":"); + ff(children); + children.add(parseType()); + return new Node(NodeType.TYPE_ANNOTATION, children); + } + + private Node parseIdentifier() { + if (lookahead != Token.IDENTIFIER) { + if (lookahead.isKeyword()) { + throw parserError("keywordNotAllowedHere", lookahead.text()); + } + throw parserError("unexpectedToken", _lookahead.text(lexer), "identifier"); + } + return new Node(NodeType.IDENTIFIER, next().span); + } + + private Node parseStringConstant() { + var children = new ArrayList(); + var startTk = expect(Token.STRING_START, "unexpectedToken", "\""); + children.add(makeTerminal(startTk)); + while (lookahead != Token.STRING_END) { + switch (lookahead) { + case STRING_PART, + STRING_ESCAPE_NEWLINE, + STRING_ESCAPE_TAB, + STRING_ESCAPE_QUOTE, + STRING_ESCAPE_BACKSLASH, + STRING_ESCAPE_RETURN, + STRING_ESCAPE_UNICODE -> + children.add(makeTerminal(next())); + case EOF -> { + var delimiter = new StringBuilder(startTk.text(lexer)).reverse().toString(); + throw parserError("missingDelimiter", delimiter); + } + case INTERPOLATION_START -> throw parserError("interpolationInConstant"); + // the lexer makes sure we only get the above tokens inside a string + default -> throw new RuntimeException("Unreacheable code"); + } + } + children.add(makeTerminal(next())); // string end + return new Node(NodeType.STRING_CHARS, children); + } + + private FullToken expect(Token type, String errorKey, Object... messageArgs) { + if (lookahead != type) { + var span = spanLookahead; + if (lookahead == Token.EOF || _lookahead.newLinesBetween > 0) { + // don't point at the EOF or the next line, but at the end of the last token + span = prev().span.stopSpan(); + } + var args = messageArgs; + if (errorKey.startsWith("unexpectedToken")) { + args = new Object[messageArgs.length + 1]; + args[0] = lookahead == Token.EOF ? "EOF" : _lookahead.text(lexer); + System.arraycopy(messageArgs, 0, args, 1, messageArgs.length); + } + throw parserError(ErrorMessages.create(errorKey, args), span); + } + return next(); + } + + private void expect(Token type, List children, String errorKey, Object... messageArgs) { + var tk = expect(type, errorKey, messageArgs); + children.add(makeTerminal(tk)); + } + + private void parseListOf(Token terminator, List children, Supplier parser) { + children.add(parser.get()); + ff(children); + while (lookahead == Token.COMMA) { + // don't store the last comma + var comma = makeTerminal(next()); + if (lookahead() == terminator) break; + children.add(comma); + ff(children); + children.add(parser.get()); + ff(children); + } + } + + private GenericParserError parserError(String messageKey, Object... args) { + return new GenericParserError(ErrorMessages.create(messageKey, args), spanLookahead); + } + + private GenericParserError parserError(String message, FullSpan span) { + return new GenericParserError(message, span); + } + + private boolean isModuleDecl() { + var _cursor = cursor; + var ftk = tokens.get(_cursor); + while (ftk.token.isAffix() || ftk.token.isModifier()) { + ftk = tokens.get(++_cursor); + } + var tk = ftk.token; + return tk == Token.MODULE || tk == Token.EXTENDS || tk == Token.AMENDS; + } + + private boolean isImport() { + var _cursor = cursor; + var ftk = tokens.get(_cursor); + while (ftk.token.isAffix()) { + ftk = tokens.get(++_cursor); + } + var tk = ftk.token; + return tk == Token.IMPORT || tk == Token.IMPORT_STAR; + } + + private FullToken next() { + var tmp = tokens.get(cursor++); + _lookahead = tokens.get(cursor); + lookahead = _lookahead.token; + spanLookahead = _lookahead.span; + return tmp; + } + + private boolean noSemicolonInbetween() { + return tokens.get(cursor - 1).token != Token.SEMICOLON; + } + + private void backtrack() { + var tmp = tokens.get(--cursor); + lookahead = tmp.token; + spanLookahead = tmp.span; + } + + private void backtrackTo(int point) { + cursor = point; + var tmp = tokens.get(cursor); + lookahead = tmp.token; + spanLookahead = tmp.span; + } + + private FullToken prev() { + return tokens.get(cursor - 1); + } + + // Jump over affixes and find the next token + private Token lookahead() { + var i = cursor; + var tmp = tokens.get(i); + while (tmp.token.isAffix() && tmp.token != Token.EOF) { + tmp = tokens.get(++i); + } + return tmp.token; + } + + // Jump over affixes and find the next token + private LookaheadSearch fullLookahead() { + var i = cursor; + var hasSemicolon = false; + var tmp = tokens.get(i); + while (tmp.token.isAffix() && tmp.token != Token.EOF) { + if (tmp.token == Token.SEMICOLON) { + hasSemicolon = true; + } + tmp = tokens.get(++i); + } + return new LookaheadSearch(tmp, hasSemicolon); + } + + private record LookaheadSearch(FullToken tk, boolean hasSemicolon) {} + + private record HeaderParts(List preffixes, @Nullable Node modifierList) {} + + private HeaderParts getHeaderParts(List nodes) { + if (nodes.isEmpty()) return new HeaderParts(nodes, null); + var last = nodes.get(nodes.size() - 1); + if (last.type == NodeType.MODIFIER_LIST) { + return new HeaderParts(nodes.subList(0, nodes.size() - 1), last); + } + return new HeaderParts(nodes, null); + } + + private Node make(NodeType type, FullSpan span) { + return new Node(type, span); + } + + private Node makeAffix(FullToken tk) { + return new Node(nodeTypeForAffix(tk.token), tk.span); + } + + private Node makeTerminal(FullToken tk) { + return new Node(NodeType.TERMINAL, tk.span); + } + + // fast-forward over affix tokens + // store children + private void ff(List children) { + var tmp = tokens.get(cursor); + while (tmp.token.isAffix()) { + children.add(makeAffix(tmp)); + tmp = tokens.get(++cursor); + } + _lookahead = tmp; + lookahead = _lookahead.token; + spanLookahead = _lookahead.span; + } + + // fast-forward over affix tokens + private void ff() { + var tmp = tokens.get(cursor); + while (tmp.token.isAffix()) { + tmp = tokens.get(++cursor); + } + _lookahead = tmp; + lookahead = _lookahead.token; + spanLookahead = _lookahead.span; + } + + private NodeType nodeTypeForAffix(Token token) { + return switch (token) { + case LINE_COMMENT -> NodeType.LINE_COMMENT; + case BLOCK_COMMENT -> NodeType.BLOCK_COMMENT; + case SHEBANG -> NodeType.SHEBANG; + case SEMICOLON -> NodeType.SEMICOLON; + default -> throw new RuntimeException("Unreacheable code"); + }; + } + + private record FullToken(Token token, FullSpan span, int newLinesBetween) { + String text(Lexer lexer) { + return lexer.textFor(span.charIndex(), span.length()); + } + } + + private record HeaderResult( + boolean hasDocComment, boolean hasAnnotations, boolean hasModifiers) {} +} diff --git a/pkl-parser/src/main/java/org/pkl/parser/Lexer.java b/pkl-parser/src/main/java/org/pkl/parser/Lexer.java index 87c3ad531..089094710 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/Lexer.java +++ b/pkl-parser/src/main/java/org/pkl/parser/Lexer.java @@ -20,12 +20,12 @@ import org.pkl.parser.syntax.generic.FullSpan; import org.pkl.parser.util.ErrorMessages; -public class Lexer { +public final class Lexer { private final char[] source; private final int size; - protected int cursor = 0; - protected int sCursor = 0; + private int cursor = 0; + private int sCursor = 0; private int line = 1; private int sLine = 1; private int col = 1; @@ -36,7 +36,7 @@ public class Lexer { private boolean stringEnded = false; private boolean isEscape = false; // how many newlines exist between two subsequent tokens - protected int newLinesBetween = 0; + private int newLinesBetween = 0; private static final int EOF = -1; @@ -73,6 +73,10 @@ public int getCursor() { return cursor; } + public int getNewLinesBetween() { + return newLinesBetween; + } + public char[] getSource() { return source; } diff --git a/pkl-parser/src/main/java/org/pkl/parser/Parser.java b/pkl-parser/src/main/java/org/pkl/parser/Parser.java index 688c90823..ff12f537b 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/Parser.java +++ b/pkl-parser/src/main/java/org/pkl/parser/Parser.java @@ -1,5 +1,5 @@ /* - * Copyright © 2024-2025 Apple Inc. and the Pkl project authors. All rights reserved. + * Copyright © 2026 Apple Inc. and the Pkl project authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,1854 +15,22 @@ */ package org.pkl.parser; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.function.Supplier; -import org.pkl.parser.syntax.Annotation; -import org.pkl.parser.syntax.ArgumentList; -import org.pkl.parser.syntax.Class; -import org.pkl.parser.syntax.ClassBody; -import org.pkl.parser.syntax.ClassMethod; -import org.pkl.parser.syntax.ClassProperty; -import org.pkl.parser.syntax.DocComment; import org.pkl.parser.syntax.Expr; -import org.pkl.parser.syntax.Expr.AmendsExpr; -import org.pkl.parser.syntax.Expr.BinaryOperatorExpr; -import org.pkl.parser.syntax.Expr.BoolLiteralExpr; -import org.pkl.parser.syntax.Expr.FloatLiteralExpr; -import org.pkl.parser.syntax.Expr.FunctionLiteralExpr; -import org.pkl.parser.syntax.Expr.IfExpr; -import org.pkl.parser.syntax.Expr.IntLiteralExpr; -import org.pkl.parser.syntax.Expr.LetExpr; -import org.pkl.parser.syntax.Expr.LogicalNotExpr; -import org.pkl.parser.syntax.Expr.ModuleExpr; -import org.pkl.parser.syntax.Expr.MultiLineStringLiteralExpr; -import org.pkl.parser.syntax.Expr.NewExpr; -import org.pkl.parser.syntax.Expr.NonNullExpr; -import org.pkl.parser.syntax.Expr.NullLiteralExpr; -import org.pkl.parser.syntax.Expr.OuterExpr; -import org.pkl.parser.syntax.Expr.ParenthesizedExpr; -import org.pkl.parser.syntax.Expr.QualifiedAccessExpr; -import org.pkl.parser.syntax.Expr.ReadExpr; -import org.pkl.parser.syntax.Expr.ReadType; -import org.pkl.parser.syntax.Expr.SingleLineStringLiteralExpr; -import org.pkl.parser.syntax.Expr.SubscriptExpr; -import org.pkl.parser.syntax.Expr.SuperAccessExpr; -import org.pkl.parser.syntax.Expr.SuperSubscriptExpr; -import org.pkl.parser.syntax.Expr.ThisExpr; -import org.pkl.parser.syntax.Expr.ThrowExpr; -import org.pkl.parser.syntax.Expr.TraceExpr; -import org.pkl.parser.syntax.Expr.UnaryMinusExpr; -import org.pkl.parser.syntax.Expr.UnqualifiedAccessExpr; -import org.pkl.parser.syntax.ExtendsOrAmendsClause; -import org.pkl.parser.syntax.Identifier; -import org.pkl.parser.syntax.ImportClause; -import org.pkl.parser.syntax.Keyword; -import org.pkl.parser.syntax.Modifier; import org.pkl.parser.syntax.Module; -import org.pkl.parser.syntax.ModuleDecl; -import org.pkl.parser.syntax.Node; -import org.pkl.parser.syntax.ObjectBody; -import org.pkl.parser.syntax.ObjectMember; -import org.pkl.parser.syntax.Operator; -import org.pkl.parser.syntax.Parameter; -import org.pkl.parser.syntax.Parameter.TypedIdentifier; -import org.pkl.parser.syntax.ParameterList; -import org.pkl.parser.syntax.QualifiedIdentifier; import org.pkl.parser.syntax.ReplInput; -import org.pkl.parser.syntax.StringConstant; -import org.pkl.parser.syntax.StringPart; -import org.pkl.parser.syntax.StringPart.StringChars; -import org.pkl.parser.syntax.Type; -import org.pkl.parser.syntax.Type.DeclaredType; -import org.pkl.parser.syntax.Type.ParenthesizedType; -import org.pkl.parser.syntax.Type.StringConstantType; -import org.pkl.parser.syntax.TypeAlias; -import org.pkl.parser.syntax.TypeAnnotation; -import org.pkl.parser.syntax.TypeArgumentList; -import org.pkl.parser.syntax.TypeParameter; -import org.pkl.parser.syntax.TypeParameterList; -import org.pkl.parser.util.ErrorMessages; -import org.pkl.parser.util.Nullable; - -@SuppressWarnings("DuplicatedCode") -public class Parser { - - private Lexer lexer; - private Token lookahead; - private Span spanLookahead; - private boolean backtracking = false; - private FullToken prev; - private FullToken _lookahead; - private boolean precededBySemicolon = false; +public final class Parser { public Parser() {} - private void init(String source) { - this.lexer = new Lexer(source); - _lookahead = forceNext(); - lookahead = _lookahead.token; - spanLookahead = _lookahead.span; - } - public Module parseModule(String source) { - init(source); - if (lookahead == Token.EOF) { - return new Module(Collections.singletonList(null), new Span(0, 0)); - } - if (lookahead == Token.SHEBANG) next(); - var start = spanLookahead; - Span end = null; - ModuleDecl moduleDecl; - var nodes = new ArrayList(); - try { - var header = parseMemberHeader(); - - moduleDecl = parseModuleDecl(header); - if (moduleDecl != null) { - end = moduleDecl.span(); - header = null; - } - nodes.add(moduleDecl); - // imports - while (lookahead == Token.IMPORT || lookahead == Token.IMPORT_STAR) { - if (header != null && header.isNotEmpty()) { - throw parserError("wrongHeaders", "Imports"); - } - var _import = parseImportDecl(); - nodes.add(_import); - end = _import.span(); - } - - // entries - if (header != null && header.isNotEmpty()) { - end = parseModuleMember(header, nodes); - } - - while (lookahead != Token.EOF) { - header = parseMemberHeader(); - end = parseModuleMember(header, nodes); - } - return new Module(nodes, start.endWith(spanLookahead)); - } catch (ParserError pe) { - var spanEnd = end != null ? end : start; - pe.setPartialParseResult(new Module(nodes, start.endWith(spanEnd))); - throw pe; - } + return new ParserImpl(source).parseModule(); } public Expr parseExpressionInput(String source) { - init(source); - var expr = parseExpr(); - expect(Token.EOF, "unexpectedToken", "end of file"); - return expr; + return new ParserImpl(source).parseExpressionInput(); } public ReplInput parseReplInput(String source) { - init(source); - var nodes = new ArrayList(); - while (lookahead != Token.EOF) { - var header = parseMemberHeader(); - switch (lookahead) { - case IMPORT, IMPORT_STAR -> { - ensureEmptyHeaders(header, "Imports"); - nodes.add(parseImportDecl()); - } - case MODULE, AMENDS, EXTENDS -> nodes.add(parseModuleDecl(header)); - case CLASS -> nodes.add(parseClass(header)); - case TYPE_ALIAS -> nodes.add(parseTypeAlias(header)); - case FUNCTION -> nodes.add(parseClassMethod(header)); - case IDENTIFIER -> { - next(); - switch (lookahead) { - case COLON, ASSIGN, LBRACE -> { - backtrack(); - nodes.add(parseClassProperty(header)); - } - default -> { - backtrack(); - ensureEmptyHeaders(header, "Expressions"); - nodes.add(parseExpr()); - } - } - } - default -> { - ensureEmptyHeaders(header, "Expressions"); - nodes.add(parseExpr()); - } - } - } - Span span; - if (nodes.isEmpty()) { - span = new Span(0, 0); - } else { - span = nodes.get(0).span().endWith(nodes.get(nodes.size() - 1).span()); - } - return new ReplInput(nodes, span); - } - - private @Nullable ModuleDecl parseModuleDecl(MemberHeader header) { - QualifiedIdentifier moduleName = null; - Keyword moduleKeyword = null; - var start = header.span(); - Span end = null; - if (lookahead == Token.MODULE) { - var module = expect(Token.MODULE, "unexpectedToken", "module"); - moduleKeyword = new Keyword(module.span); - if (start == null) { - start = module.span; - } - moduleName = parseQualifiedIdentifier(); - end = moduleName.span(); - } - var extendsOrAmendsDecl = parseExtendsAmendsDecl(); - if (extendsOrAmendsDecl != null) { - if (start == null) { - start = extendsOrAmendsDecl.span(); - } - end = extendsOrAmendsDecl.span(); - } - if (moduleName != null || extendsOrAmendsDecl != null) { - var children = new ArrayList(); - children.add(header.docComment); - children.addAll(header.annotations); - var modifiersOffset = children.size(); - children.addAll(header.modifiers); - var nameOffset = children.size(); - children.add(moduleKeyword); - children.add(moduleName); - children.add(extendsOrAmendsDecl); - return new ModuleDecl(children, modifiersOffset, nameOffset, start.endWith(end)); - } - return null; - } - - private QualifiedIdentifier parseQualifiedIdentifier() { - var idents = parseListOf(Token.DOT, this::parseIdentifier); - return new QualifiedIdentifier(idents); - } - - private @Nullable ExtendsOrAmendsClause parseExtendsAmendsDecl() { - if (lookahead == Token.EXTENDS) { - var tk = next().span; - var url = parseStringConstant(); - return new ExtendsOrAmendsClause( - url, ExtendsOrAmendsClause.Type.EXTENDS, tk.endWith(url.span())); - } - if (lookahead == Token.AMENDS) { - var tk = next().span; - var url = parseStringConstant(); - return new ExtendsOrAmendsClause( - url, ExtendsOrAmendsClause.Type.AMENDS, tk.endWith(url.span())); - } - return null; - } - - private ImportClause parseImportDecl() { - Span start; - boolean isGlob = false; - if (lookahead == Token.IMPORT_STAR) { - start = next().span; - isGlob = true; - } else { - start = expect(Token.IMPORT, "unexpectedToken2", "import", "import*").span; - } - var str = parseStringConstant(); - var end = str.span(); - Identifier alias = null; - if (lookahead == Token.AS) { - next(); - alias = parseIdentifier(); - end = alias.span(); - } - return new ImportClause(str, isGlob, alias, start.endWith(end)); - } - - private MemberHeader parseMemberHeader() { - DocComment docComment = null; - var annotations = new ArrayList(); - var modifiers = new ArrayList(); - if (lookahead == Token.DOC_COMMENT) { - docComment = parseDocComment(); - } - while (lookahead == Token.AT) { - annotations.add(parseAnnotation()); - } - while (lookahead.isModifier()) { - modifiers.add(parseModifier()); - } - return new MemberHeader(docComment, annotations, modifiers); - } - - private DocComment parseDocComment() { - var spans = new ArrayList(); - spans.add(nextComment().span); - while (lookahead == Token.DOC_COMMENT - || lookahead == Token.LINE_COMMENT - || lookahead == Token.BLOCK_COMMENT) { - var next = nextComment(); - // newlines are not allowed in doc comments - if (next.newLinesBetween > 1) { - if (next.token == Token.DOC_COMMENT) { - backtrack(); - } - break; - } - if (next.token == Token.DOC_COMMENT) { - spans.add(next.span); - } - } - while (lookahead == Token.LINE_COMMENT || lookahead == Token.BLOCK_COMMENT) { - nextComment(); - } - return new DocComment(spans); - } - - private Span parseModuleMember(MemberHeader header, List nodes) { - switch (lookahead) { - case IDENTIFIER -> { - var node = parseClassProperty(header); - nodes.add(node); - return node.span(); - } - case TYPE_ALIAS -> { - var node = parseTypeAlias(header); - nodes.add(node); - return node.span(); - } - case CLASS -> { - var node = parseClass(header); - nodes.add(node); - return node.span(); - } - case FUNCTION -> { - var node = parseClassMethod(header); - nodes.add(node); - return node.span(); - } - case EOF -> throw parserError("unexpectedEndOfFile"); - default -> { - if (lookahead.isKeyword()) { - throw parserError("keywordNotAllowedHere", lookahead.text()); - } - if (lookahead == Token.DOC_COMMENT) { - throw parserError("danglingDocComment"); - } - throw parserError("invalidTopLevelToken"); - } - } - } - - private TypeAlias parseTypeAlias(MemberHeader header) { - var typeAlias = next().span; - var startSpan = header.span(typeAlias); - - var identifier = parseIdentifier(); - TypeParameterList typePars = null; - if (lookahead == Token.LT) { - typePars = parseTypeParameterList(); - } - expect(Token.ASSIGN, "unexpectedToken", "="); - var type = parseType(); - var children = new ArrayList(header.annotations.size() + header.modifiers.size() + 5); - children.add(header.docComment); - children.addAll(header.annotations); - var modifiersOffset = header.annotations.size() + 1; - children.addAll(header.modifiers); - var nameOffset = modifiersOffset + header.modifiers.size(); - children.add(new Keyword(typeAlias)); - children.add(identifier); - children.add(typePars); - children.add(type); - return new TypeAlias(children, modifiersOffset, nameOffset, startSpan.endWith(type.span())); - } - - private Class parseClass(MemberHeader header) { - var classKeyword = next(); - var startSpan = header.span(classKeyword.span); - var children = new ArrayList(); - children.add(header.docComment); - children.addAll(header.annotations); - var modifiersOffset = header.annotations.size() + 1; - children.addAll(header.modifiers); - var nameOffset = modifiersOffset + header.modifiers.size(); - children.add(new Keyword(classKeyword.span)); - var name = parseIdentifier(); - children.add(name); - TypeParameterList typePars = null; - var end = name.span(); - if (lookahead == Token.LT) { - typePars = parseTypeParameterList(); - end = typePars.span(); - } - children.add(typePars); - Type superClass = null; - if (lookahead == Token.EXTENDS) { - next(); - superClass = parseType(); - end = superClass.span(); - } - children.add(superClass); - - ClassBody body = null; - if (lookahead == Token.LBRACE) { - body = parseClassBody(); - end = body.span(); - } - children.add(body); - - return new Class(children, modifiersOffset, nameOffset, startSpan.endWith(end)); - } - - private ClassBody parseClassBody() { - var start = expect(Token.LBRACE, "missingDelimiter", "{").span; - var children = new ArrayList(); - while (lookahead != Token.RBRACE && lookahead != Token.EOF) { - var entryHeader = parseMemberHeader(); - if (lookahead == Token.FUNCTION) { - children.add(parseClassMethod(entryHeader)); - } else { - children.add(parseClassProperty(entryHeader)); - } - } - if (lookahead == Token.EOF) { - throw new ParserError( - ErrorMessages.create("missingDelimiter", "}"), prev.span.stopSpan().move(1)); - } - var end = expect(Token.RBRACE, "missingDelimiter", "}").span; - return new ClassBody(children, start.endWith(end)); - } - - private ClassProperty parseClassProperty(MemberHeader header) { - var name = parseIdentifier(); - var start = header.span(name.span()); - var children = new ArrayList(); - children.add(header.docComment); - children.addAll(header.annotations); - var modifiersOffset = header.annotations.size() + 1; - children.addAll(header.modifiers); - var nameOffset = modifiersOffset + header.modifiers.size(); - TypeAnnotation typeAnnotation = null; - Expr expr = null; - var bodies = new ArrayList(); - if (lookahead == Token.COLON) { - typeAnnotation = parseTypeAnnotation(); - } - if (lookahead == Token.ASSIGN) { - next(); - expr = parseExpr(); - } else if (lookahead == Token.LBRACE) { - if (typeAnnotation != null) { - throw parserError("typeAnnotationInAmends"); - } - while (lookahead == Token.LBRACE) { - bodies.add(parseObjectBody()); - } - } - children.add(name); - children.add(typeAnnotation); - children.add(expr); - children.addAll(bodies); - if (expr != null) { - return new ClassProperty(children, modifiersOffset, nameOffset, start.endWith(expr.span())); - } - if (!bodies.isEmpty()) { - return new ClassProperty( - children, - modifiersOffset, - nameOffset, - start.endWith(bodies.get(bodies.size() - 1).span())); - } - if (typeAnnotation == null) { - throw new ParserError(ErrorMessages.create("invalidProperty"), name.span()); - } - return new ClassProperty( - children, modifiersOffset, nameOffset, start.endWith(typeAnnotation.span())); - } - - private ClassMethod parseClassMethod(MemberHeader header) { - var func = expect(Token.FUNCTION, "unexpectedToken", "function").span; - var start = header.span(func); - var headerSpanStart = header.modifierSpan(func); - var children = new ArrayList(); - children.add(header.docComment); - children.addAll(header.annotations); - var modifiersOffset = header.annotations.size() + 1; - children.addAll(header.modifiers); - var nameOffset = modifiersOffset + header.modifiers.size(); - var name = parseIdentifier(); - children.add(name); - TypeParameterList typePars = null; - if (lookahead == Token.LT) { - typePars = parseTypeParameterList(); - } - children.add(typePars); - var parameterList = parseParameterList(); - children.add(parameterList); - var end = parameterList.span(); - var endHeader = end; - TypeAnnotation typeAnnotation = null; - if (lookahead == Token.COLON) { - typeAnnotation = parseTypeAnnotation(); - end = typeAnnotation.span(); - endHeader = end; - } - children.add(typeAnnotation); - Expr expr = null; - if (lookahead == Token.ASSIGN) { - next(); - expr = parseExpr(); - end = expr.span(); - } - children.add(expr); - return new ClassMethod( - children, - modifiersOffset, - nameOffset, - headerSpanStart.endWith(endHeader), - start.endWith(end)); - } - - private ObjectBody parseObjectBody() { - var start = expect(Token.LBRACE, "unexpectedToken", "{").span; - List nodes = new ArrayList<>(); - var membersOffset = -1; - if (lookahead == Token.RBRACE) { - return new ObjectBody(List.of(), 0, start.endWith(next().span)); - } else if (lookahead == Token.UNDERSCORE) { - // it's a parameter - nodes.addAll(parseListOfParameter(Token.COMMA, Token.ARROW)); - expect(Token.ARROW, "unexpectedToken2", ",", "->"); - } else if (lookahead == Token.IDENTIFIER) { - // not sure what it is yet - var identifier = parseIdentifier(); - if (lookahead == Token.ARROW) { - // it's a parameter - next(); - nodes.add(new TypedIdentifier(identifier, null, identifier.span())); - } else if (lookahead == Token.COMMA) { - // it's a parameter - backtrack(); - nodes.addAll(parseListOfParameter(Token.COMMA, Token.ARROW)); - expect(Token.ARROW, "unexpectedToken2", ",", "->"); - } else if (lookahead == Token.COLON) { - // still not sure - var colon = next().span; - var type = parseType(); - var typeAnnotation = new TypeAnnotation(type, colon.endWith(type.span())); - if (lookahead == Token.COMMA) { - // it's a parameter - next(); - nodes.add( - new TypedIdentifier( - identifier, typeAnnotation, identifier.span().endWith(type.span()))); - nodes.addAll(parseListOfParameter(Token.COMMA, Token.ARROW)); - expect(Token.ARROW, "unexpectedToken2", ",", "->"); - } else if (lookahead == Token.ARROW) { - // it's a parameter - next(); - nodes.add( - new TypedIdentifier( - identifier, typeAnnotation, identifier.span().endWith(type.span()))); - } else { - // it's a member - expect(Token.ASSIGN, "unexpectedToken", "="); - var expr = parseExpr(); - membersOffset = 0; - nodes.add( - new ObjectMember.ObjectProperty( - Arrays.asList(identifier, typeAnnotation, expr), - 0, - identifier.span().endWith(expr.span()))); - } - } else { - // member - backtrack(); - } - } - - if (membersOffset < 0) { - membersOffset = nodes.size(); - } - // members - while (lookahead != Token.RBRACE) { - if (lookahead == Token.EOF) { - throw new ParserError( - ErrorMessages.create("missingDelimiter", "}"), prev.span.stopSpan().move(1)); - } - nodes.add(parseObjectMember()); - } - var end = next().span; - return new ObjectBody(nodes, membersOffset, start.endWith(end)); - } - - private ObjectMember parseObjectMember() { - return switch (lookahead) { - case IDENTIFIER -> { - next(); - if (lookahead == Token.LBRACE || lookahead == Token.COLON || lookahead == Token.ASSIGN) { - // it's an objectProperty - backtrack(); - yield parseObjectProperty(null); - } else { - backtrack(); - // it's an expression - yield parseObjectElement(); - } - } - case FUNCTION -> parseObjectMethod(List.of()); - case LPRED -> parseMemberPredicate(); - case LBRACK -> parseObjectEntry(); - case SPREAD, QSPREAD -> parseObjectSpread(); - case WHEN -> parseWhenGenerator(); - case FOR -> parseForGenerator(); - case TYPE_ALIAS, CLASS -> - throw new ParserError( - ErrorMessages.create("missingDelimiter", "}"), prev.span.stopSpan().move(1)); - default -> { - var modifiers = new ArrayList(); - while (lookahead.isModifier()) { - modifiers.add(parseModifier()); - } - if (!modifiers.isEmpty()) { - if (lookahead == Token.FUNCTION) { - yield parseObjectMethod(modifiers); - } else { - yield parseObjectProperty(modifiers); - } - } else { - yield parseObjectElement(); - } - } - }; - } - - private ObjectMember.ObjectElement parseObjectElement() { - var expr = parseExpr("}"); - return new ObjectMember.ObjectElement(expr, expr.span()); - } - - private ObjectMember parseObjectProperty(@Nullable List modifiers) { - var start = spanLookahead; - if (modifiers != null && !modifiers.isEmpty()) { - start = modifiers.get(0).span(); - } - var allModifiers = modifiers; - if (allModifiers == null) { - allModifiers = parseModifierList(); - } - var identifier = parseIdentifier(); - TypeAnnotation typeAnnotation = null; - if (lookahead == Token.COLON) { - typeAnnotation = parseTypeAnnotation(); - } - if (typeAnnotation != null || lookahead == Token.ASSIGN) { - expect(Token.ASSIGN, "unexpectedToken", "="); - var expr = parseExpr("}"); - var nodes = new ArrayList(allModifiers.size() + 4); - nodes.addAll(allModifiers); - nodes.add(identifier); - nodes.add(typeAnnotation); - nodes.add(expr); - return new ObjectMember.ObjectProperty( - nodes, allModifiers.size(), start.endWith(expr.span())); - } - var bodies = parseBodyList(); - var end = bodies.get(bodies.size() - 1).span(); - var nodes = new ArrayList(allModifiers.size() + 4); - nodes.addAll(allModifiers); - nodes.add(identifier); - nodes.add(null); - nodes.add(null); - nodes.addAll(bodies); - return new ObjectMember.ObjectProperty(nodes, allModifiers.size(), start.endWith(end)); - } - - private ObjectMember.ObjectMethod parseObjectMethod(List modifiers) { - var start = spanLookahead; - if (!modifiers.isEmpty()) { - start = modifiers.get(0).span(); - } - var function = expect(Token.FUNCTION, "unexpectedToken", "function").span; - var identifier = parseIdentifier(); - TypeParameterList params = null; - if (lookahead == Token.LT) { - params = parseTypeParameterList(); - } - var args = parseParameterList(); - TypeAnnotation typeAnnotation = null; - if (lookahead == Token.COLON) { - typeAnnotation = parseTypeAnnotation(); - } - expect(Token.ASSIGN, "unexpectedToken", "="); - var expr = parseExpr("}"); - var nodes = new ArrayList(modifiers.size() + 6); - nodes.addAll(modifiers); - nodes.add(new Keyword(function)); - nodes.add(identifier); - nodes.add(params); - nodes.add(args); - nodes.add(typeAnnotation); - nodes.add(expr); - return new ObjectMember.ObjectMethod(nodes, modifiers.size(), start.endWith(expr.span())); - } - - private ObjectMember parseMemberPredicate() { - var start = next().span; - var pred = parseExpr("]]"); - var firstBrack = expect(Token.RBRACK, "unexpectedToken", "]]").span; - Span secondbrack; - if (lookahead != Token.RBRACK) { - var text = _lookahead.text(lexer); - throw new ParserError(ErrorMessages.create("unexpectedToken", text, "]]"), firstBrack); - } else { - secondbrack = next().span; - } - if (firstBrack.charIndex() != secondbrack.charIndex() - 1) { - // There shouldn't be any whitespace between the first and second ']'. - var span = firstBrack.endWith(secondbrack); - var text = lexer.textFor(span.charIndex(), span.length()); - throw new ParserError(ErrorMessages.create("unexpectedToken", text, "]]"), firstBrack); - } - if (lookahead == Token.ASSIGN) { - next(); - var expr = parseExpr("}"); - return new ObjectMember.MemberPredicate(List.of(pred, expr), start.endWith(expr.span())); - } - var bodies = parseBodyList(); - var end = bodies.get(bodies.size() - 1).span(); - var nodes = new ArrayList(bodies.size() + 2); - nodes.add(pred); - nodes.add(null); - nodes.addAll(bodies); - return new ObjectMember.MemberPredicate(nodes, start.endWith(end)); - } - - private ObjectMember parseObjectEntry() { - var start = expect(Token.LBRACK, "unexpectedToken", "[").span; - var key = parseExpr("]"); - expect(Token.RBRACK, "unexpectedToken", "]"); - if (lookahead == Token.ASSIGN) { - next(); - var expr = parseExpr("}"); - return new ObjectMember.ObjectEntry(List.of(key, expr), start.endWith(expr.span())); - } - var bodies = parseBodyList(); - var end = bodies.get(bodies.size() - 1).span(); - var nodes = new ArrayList(bodies.size() + 2); - nodes.add(key); - nodes.add(null); - nodes.addAll(bodies); - return new ObjectMember.ObjectEntry(nodes, start.endWith(end)); - } - - private ObjectMember.ObjectSpread parseObjectSpread() { - var start = next(); - boolean isNullable = start.token == Token.QSPREAD; - var expr = parseExpr("}"); - return new ObjectMember.ObjectSpread(expr, isNullable, start.span.endWith(expr.span())); - } - - private ObjectMember.WhenGenerator parseWhenGenerator() { - var start = next().span; - expect(Token.LPAREN, "unexpectedToken", "("); - var pred = parseExpr(")"); - expect(Token.RPAREN, "unexpectedToken", ")"); - var body = parseObjectBody(); - var end = body.span(); - ObjectBody elseBody = null; - if (lookahead == Token.ELSE) { - next(); - elseBody = parseObjectBody(); - end = elseBody.span(); - } - return new ObjectMember.WhenGenerator(pred, body, elseBody, start.endWith(end)); - } - - private ObjectMember.ForGenerator parseForGenerator() { - var start = next().span; - expect(Token.LPAREN, "unexpectedToken", "("); - var par1 = parseParameter(); - Parameter par2 = null; - if (lookahead == Token.COMMA) { - next(); - par2 = parseParameter(); - } - expect(Token.IN, "unexpectedToken", "in"); - var expr = parseExpr(")"); - expect(Token.RPAREN, "unexpectedToken", ")"); - var body = parseObjectBody(); - return new ObjectMember.ForGenerator(par1, par2, expr, body, start.endWith(body.span())); - } - - private Expr parseExpr() { - return parseExpr(null); - } - - @SuppressWarnings("DuplicatedCode") - private Expr parseExpr(@Nullable String expectation) { - return parseExpr(expectation, 1); - } - - private Expr parseExpr(@Nullable String expectation, int minPrecedence) { - var expr = parseExprAtom(expectation); - var op = getOperator(); - while (op != null) { - if (op.getPrec() < minPrecedence) break; - // `-` must be in the same line as the left operand and have no semicolons inbetween - if (op == Operator.MINUS && (precededBySemicolon || _lookahead.newLinesBetween > 0)) break; - - next(); // operator - switch (op) { - case IS -> { - var type = parseType(); - expr = new Expr.TypeCheckExpr(expr, type, expr.span().endWith(type.span())); - } - case AS -> { - var type = parseType(); - expr = new Expr.TypeCastExpr(expr, type, expr.span().endWith(type.span())); - } - case DOT, QDOT -> { - var rhs = parseIdentifier(); - var isNullable = op == Operator.QDOT; - ArgumentList argumentList = null; - if (lookahead == Token.LPAREN - && !precededBySemicolon - && _lookahead.newLinesBetween == 0) { - argumentList = parseArgumentList(); - } - var lastSpan = argumentList != null ? argumentList.span() : rhs.span(); - expr = - new QualifiedAccessExpr( - expr, rhs, isNullable, argumentList, expr.span().endWith(lastSpan)); - } - default -> { - var nextMinPrec = op.isLeftAssoc() ? op.getPrec() + 1 : op.getPrec(); - var rhs = parseExpr(expectation, nextMinPrec); - expr = new BinaryOperatorExpr(expr, rhs, op, expr.span().endWith(rhs.span())); - } - } - op = getOperator(); - } - return expr; - } - - private @Nullable Operator getOperator() { - return switch (lookahead) { - case POW -> Operator.POW; - case STAR -> Operator.MULT; - case DIV -> Operator.DIV; - case INT_DIV -> Operator.INT_DIV; - case MOD -> Operator.MOD; - case PLUS -> Operator.PLUS; - case MINUS -> Operator.MINUS; - case GT -> Operator.GT; - case GTE -> Operator.GTE; - case LT -> Operator.LT; - case LTE -> Operator.LTE; - case IS -> Operator.IS; - case AS -> Operator.AS; - case EQUAL -> Operator.EQ_EQ; - case NOT_EQUAL -> Operator.NOT_EQ; - case AND -> Operator.AND; - case OR -> Operator.OR; - case PIPE -> Operator.PIPE; - case COALESCE -> Operator.NULL_COALESCE; - case DOT -> Operator.DOT; - case QDOT -> Operator.QDOT; - default -> null; - }; - } - - private Expr parseExprAtom(@Nullable String expectation) { - var expr = - switch (lookahead) { - case THIS -> new ThisExpr(next().span); - case OUTER -> new OuterExpr(next().span); - case MODULE -> new ModuleExpr(next().span); - case NULL -> new NullLiteralExpr(next().span); - case THROW -> { - var start = next().span; - expect(Token.LPAREN, "unexpectedToken", "("); - var exp = parseExpr(")"); - var end = expect(Token.RPAREN, "unexpectedToken", ")").span; - yield new ThrowExpr(exp, start.endWith(end)); - } - case TRACE -> { - var start = next().span; - expect(Token.LPAREN, "unexpectedToken", "("); - var exp = parseExpr(")"); - var end = expect(Token.RPAREN, "unexpectedToken", ")").span; - yield new TraceExpr(exp, start.endWith(end)); - } - case IMPORT -> { - var start = next().span; - expect(Token.LPAREN, "unexpectedToken", "("); - var strConst = parseStringConstant(); - var end = expect(Token.RPAREN, "unexpectedToken", ")").span; - yield new Expr.ImportExpr(strConst, false, start.endWith(end)); - } - case IMPORT_STAR -> { - var start = next().span; - expect(Token.LPAREN, "unexpectedToken", "("); - var strConst = parseStringConstant(); - var end = expect(Token.RPAREN, "unexpectedToken", ")").span; - yield new Expr.ImportExpr(strConst, true, start.endWith(end)); - } - case READ, READ_STAR, READ_QUESTION -> { - var readType = - switch (lookahead) { - case READ_QUESTION -> ReadType.NULL; - case READ_STAR -> ReadType.GLOB; - default -> ReadType.READ; - }; - var start = next().span; - expect(Token.LPAREN, "unexpectedToken", "("); - var exp = parseExpr(")"); - var end = expect(Token.RPAREN, "unexpectedToken", ")").span; - yield new ReadExpr(exp, readType, start.endWith(end)); - } - case NEW -> { - var start = next().span; - Type type = null; - if (lookahead != Token.LBRACE) { - type = parseType("{"); - } - var body = parseObjectBody(); - yield new NewExpr(type, body, start.endWith(body.span())); - } - case MINUS -> { - var start = next().span; - // calling `parseExprAtom` here and not `parseExpr` because - // unary minus has higher precendence than binary operators - var exp = parseExprAtom(expectation); - yield new UnaryMinusExpr(exp, start.endWith(exp.span())); - } - case NOT -> { - var start = next().span; - // calling `parseExprAtom` here and not `parseExpr` because - // logical not has higher precendence than binary operators - var exp = parseExprAtom(expectation); - yield new LogicalNotExpr(exp, start.endWith(exp.span())); - } - case LPAREN -> { - // can be function literal or parenthesized expression - var start = next().span; - yield switch (lookahead) { - case UNDERSCORE -> parseFunctionLiteral(start); - case IDENTIFIER -> parseFunctionLiteralOrParenthesized(start); - case RPAREN -> { - var endParen = next().span; - var paramList = new ParameterList(List.of(), start.endWith(endParen)); - expect(Token.ARROW, "unexpectedToken", "->"); - var exp = parseExpr(expectation); - yield new FunctionLiteralExpr(paramList, exp, start.endWith(exp.span())); - } - default -> { - // expression - var exp = parseExpr(")"); - var end = expect(Token.RPAREN, "unexpectedToken", ")").span; - yield new ParenthesizedExpr(exp, start.endWith(end)); - } - }; - } - case SUPER -> { - var start = next().span; - if (lookahead == Token.DOT) { - next(); - var identifier = parseIdentifier(); - if (lookahead == Token.LPAREN - && !precededBySemicolon - && _lookahead.newLinesBetween == 0) { - var args = parseArgumentList(); - yield new SuperAccessExpr(identifier, args, start.endWith(args.span())); - } else { - yield new SuperAccessExpr(identifier, null, start.endWith(identifier.span())); - } - } else { - expect(Token.LBRACK, "unexpectedToken", "["); - var exp = parseExpr("]"); - var end = expect(Token.RBRACK, "unexpectedToken", "]").span; - yield new SuperSubscriptExpr(exp, start.endWith(end)); - } - } - case IF -> { - var start = next().span; - expect(Token.LPAREN, "unexpectedToken", "("); - var pred = parseExpr(")"); - expect(Token.RPAREN, "unexpectedToken", ")"); - var then = parseExpr("else"); - expect(Token.ELSE, "unexpectedToken", "else"); - var elseCase = parseExpr(expectation); - yield new IfExpr(pred, then, elseCase, start.endWith(elseCase.span())); - } - case LET -> { - var start = next().span(); - expect(Token.LPAREN, "unexpectedToken", "("); - var param = parseParameter(); - expect(Token.ASSIGN, "unexpectedToken", "="); - var bindExpr = parseExpr(")"); - expect(Token.RPAREN, "unexpectedToken", ")"); - var exp = parseExpr(expectation); - yield new LetExpr(param, bindExpr, exp, start.endWith(exp.span())); - } - case TRUE -> new BoolLiteralExpr(true, next().span); - case FALSE -> new BoolLiteralExpr(false, next().span); - case INT, HEX, BIN, OCT -> { - var tk = next(); - yield new IntLiteralExpr(tk.text(lexer), tk.span); - } - case FLOAT -> { - var tk = next(); - yield new FloatLiteralExpr(tk.text(lexer), tk.span); - } - case STRING_START -> parseSingleLineStringLiteralExpr(); - case STRING_MULTI_START -> parseMultiLineStringLiteralExpr(); - case IDENTIFIER -> { - var identifier = parseIdentifier(); - if (lookahead == Token.LPAREN - && !precededBySemicolon - && _lookahead.newLinesBetween == 0) { - var args = parseArgumentList(); - yield new UnqualifiedAccessExpr( - identifier, args, identifier.span().endWith(args.span())); - } else { - yield new UnqualifiedAccessExpr(identifier, null, identifier.span()); - } - } - case EOF -> - throw new ParserError( - ErrorMessages.create("unexpectedEndOfFile"), prev.span.stopSpan().move(1)); - default -> { - var text = _lookahead.text(lexer); - if (expectation != null) { - throw parserError("unexpectedToken", text, expectation); - } - throw parserError("unexpectedTokenForExpression", text); - } - }; - return parseExprRest(expr); - } - - @SuppressWarnings("DuplicatedCode") - private Expr parseExprRest(Expr expr) { - // non null - if (lookahead == Token.NON_NULL) { - var end = next().span; - var res = new NonNullExpr(expr, expr.span().endWith(end)); - return parseExprRest(res); - } - // amends - if (lookahead == Token.LBRACE) { - if (expr instanceof ParenthesizedExpr - || expr instanceof AmendsExpr - || expr instanceof NewExpr) { - var body = parseObjectBody(); - return parseExprRest(new AmendsExpr(expr, body, expr.span().endWith(body.span()))); - } - throw parserError("unexpectedCurlyProbablyAmendsExpression", expr.text(lexer.getSource())); - } - // qualified access - if (lookahead == Token.DOT || lookahead == Token.QDOT) { - var isNullable = next().token == Token.QDOT; - var identifier = parseIdentifier(); - ArgumentList argumentList = null; - if (lookahead == Token.LPAREN && !precededBySemicolon && _lookahead.newLinesBetween == 0) { - argumentList = parseArgumentList(); - } - var lastSpan = argumentList != null ? argumentList.span() : identifier.span(); - var res = - new QualifiedAccessExpr( - expr, identifier, isNullable, argumentList, expr.span().endWith(lastSpan)); - return parseExprRest(res); - } - // subscript (needs to be in the same line as the expression) - if (lookahead == Token.LBRACK && !precededBySemicolon && _lookahead.newLinesBetween == 0) { - next(); - var exp = parseExpr("]"); - var end = expect(Token.RBRACK, "unexpectedToken", "]").span; - var res = new SubscriptExpr(expr, exp, expr.span().endWith(end)); - return parseExprRest(res); - } - return expr; - } - - private Expr parseSingleLineStringLiteralExpr() { - var start = next(); - var parts = new ArrayList(); - var builder = new StringBuilder(); - var startSpan = spanLookahead; - var end = spanLookahead; - while (lookahead != Token.STRING_END) { - switch (lookahead) { - case STRING_PART -> { - var tk = next(); - end = tk.span; - builder.append(tk.text(lexer)); - } - case STRING_ESCAPE_NEWLINE -> { - end = next().span; - builder.append('\n'); - } - case STRING_ESCAPE_TAB -> { - end = next().span; - builder.append('\t'); - } - case STRING_ESCAPE_QUOTE -> { - end = next().span; - builder.append('"'); - } - case STRING_ESCAPE_BACKSLASH -> { - end = next().span; - builder.append('\\'); - } - case STRING_ESCAPE_RETURN -> { - end = next().span; - builder.append('\r'); - } - case STRING_ESCAPE_UNICODE -> { - var tk = next(); - end = tk.span; - builder.append(parseUnicodeEscape(tk)); - } - case INTERPOLATION_START -> { - var istart = next().span; - if (!builder.isEmpty()) { - assert startSpan != null; - parts.add(new StringChars(builder.toString(), startSpan.endWith(end))); - builder = new StringBuilder(); - } - var exp = parseExpr(")"); - end = expect(Token.RPAREN, "unexpectedToken", ")").span; - parts.add(new StringPart.StringInterpolation(exp, istart.endWith(end))); - startSpan = spanLookahead; - } - case EOF -> { - var delimiter = new StringBuilder(start.text(lexer)).reverse().toString(); - throw parserError("missingDelimiter", delimiter); - } - } - } - if (!builder.isEmpty()) { - parts.add(new StringChars(builder.toString(), startSpan.endWith(end))); - } - end = next().span; - return new SingleLineStringLiteralExpr(parts, start.span, end, start.span.endWith(end)); - } - - private Expr parseMultiLineStringLiteralExpr() { - var start = next(); - var stringTokens = new ArrayList(); - while (lookahead != Token.STRING_END) { - switch (lookahead) { - case STRING_PART, - STRING_NEWLINE, - STRING_ESCAPE_NEWLINE, - STRING_ESCAPE_TAB, - STRING_ESCAPE_QUOTE, - STRING_ESCAPE_BACKSLASH, - STRING_ESCAPE_RETURN, - STRING_ESCAPE_UNICODE -> - stringTokens.add(new TempNode(next(), null)); - case INTERPOLATION_START -> { - var istart = next(); - var exp = parseExpr(")"); - var end = expect(Token.RPAREN, "unexpectedToken", ")").span; - var interpolation = new StringPart.StringInterpolation(exp, istart.span.endWith(end)); - stringTokens.add(new TempNode(null, interpolation)); - } - case EOF -> { - var delimiter = new StringBuilder(start.text(lexer)).reverse().toString(); - throw parserError("missingDelimiter", delimiter); - } - } - } - var end = next().span; - var fullSpan = start.span.endWith(end); - var parts = validateMultiLineString(stringTokens, fullSpan); - return new MultiLineStringLiteralExpr(parts, start.span, end, fullSpan); - } - - private List validateMultiLineString(List nodes, Span span) { - var firstNode = nodes.isEmpty() ? null : nodes.get(0); - if (firstNode == null - || firstNode.token == null - || firstNode.token.token != Token.STRING_NEWLINE) { - var errorSpan = firstNode == null ? span : firstNode.span(); - throw new ParserError(ErrorMessages.create("stringContentMustBeginOnNewLine"), errorSpan); - } - // only contains a newline - if (nodes.size() == 1) { - return List.of(new StringChars("", firstNode.span())); - } - var indent = getCommonIndent(nodes, span); - return renderString(nodes, indent); - } - - @SuppressWarnings("DataFlowIssue") - private List renderString(List nodes, String commonIndent) { - var parts = new ArrayList(); - var builder = new StringBuilder(); - var endOffset = nodes.get(nodes.size() - 1).token.token == Token.STRING_NEWLINE ? 1 : 2; - var isNewLine = true; - Span start = null; - Span end = null; - for (var i = 1; i < nodes.size() - endOffset; i++) { - var node = nodes.get(i); - if (node.node != null) { - if (!builder.isEmpty()) { - parts.add(new StringChars(builder.toString(), start.endWith(end))); - builder = new StringBuilder(); - start = null; - } - parts.add(node.node); - } else { - var token = node.token; - assert token != null; - if (start == null) { - start = token.span; - } - end = token.span; - switch (token.token) { - case STRING_NEWLINE -> { - builder.append('\n'); - isNewLine = true; - } - case STRING_PART -> { - var text = token.text(lexer); - if (isNewLine) { - if (text.startsWith(commonIndent)) { - builder.append(text, commonIndent.length(), text.length()); - } else { - var actualIndent = getLeadingIndentCount(text); - var textSpan = token.span.move(actualIndent).grow(-actualIndent); - throw new ParserError( - ErrorMessages.create("stringIndentationMustMatchLastLine"), textSpan); - } - } else { - builder.append(text); - } - isNewLine = false; - } - default -> { - if (isNewLine && !commonIndent.isEmpty()) { - throw new ParserError( - ErrorMessages.create("stringIndentationMustMatchLastLine"), token.span); - } - builder.append(getEscapeText(token)); - isNewLine = false; - } - } - } - } - if (!builder.isEmpty()) { - parts.add(new StringChars(builder.toString(), start.endWith(end))); - } - return parts; - } - - @SuppressWarnings("DuplicatedCode") - private Expr parseFunctionLiteralOrParenthesized(Span start) { - var identifier = parseIdentifier(); - return switch (lookahead) { - case COMMA -> { - next(); - var params = new ArrayList(); - params.add(new TypedIdentifier(identifier, null, identifier.span())); - params.addAll(parseListOfParameter(Token.COMMA, Token.RPAREN)); - var endParen = expect(Token.RPAREN, "unexpectedToken2", ",", ")").span; - var paramList = new ParameterList(params, start.endWith(endParen)); - expect(Token.ARROW, "unexpectedToken", "->"); - var expr = parseExpr(); - yield new FunctionLiteralExpr(paramList, expr, start.endWith(expr.span())); - } - case COLON -> { - var typeAnnotation = parseTypeAnnotation(); - var params = new ArrayList(); - params.add( - new TypedIdentifier( - identifier, typeAnnotation, identifier.span().endWith(typeAnnotation.span()))); - if (lookahead == Token.COMMA) { - next(); - params.addAll(parseListOfParameter(Token.COMMA, Token.RPAREN)); - } - var endParen = expect(Token.RPAREN, "unexpectedToken2", ",", ")").span; - var paramList = new ParameterList(params, start.endWith(endParen)); - expect(Token.ARROW, "unexpectedToken", "->"); - var expr = parseExpr(")"); - yield new FunctionLiteralExpr(paramList, expr, start.endWith(expr.span())); - } - case RPAREN -> { - // still not sure - var end = next().span; - if (lookahead == Token.ARROW) { - next(); - var expr = parseExpr(); - var params = new ArrayList(); - params.add(new TypedIdentifier(identifier, null, identifier.span())); - var paramList = new ParameterList(params, start.endWith(end)); - yield new FunctionLiteralExpr(paramList, expr, start.endWith(expr.span())); - } else { - var exp = new UnqualifiedAccessExpr(identifier, null, identifier.span()); - yield new ParenthesizedExpr(exp, start.endWith(end)); - } - } - default -> { - // this is an expression - backtrack(); - var expr = parseExpr(")"); - var end = expect(Token.RPAREN, "unexpectedToken", ")").span; - yield new ParenthesizedExpr(expr, start.endWith(end)); - } - }; - } - - private FunctionLiteralExpr parseFunctionLiteral(Span start) { - // the open parens is already parsed - var params = parseListOfParameter(Token.COMMA, Token.RPAREN); - var endParen = expect(Token.RPAREN, "unexpectedToken2", ",", ")").span; - var paramList = new ParameterList(params, start.endWith(endParen)); - expect(Token.ARROW, "unexpectedToken", "->"); - var expr = parseExpr(); - return new FunctionLiteralExpr(paramList, expr, start.endWith(expr.span())); - } - - private Type parseType() { - return parseType(null); - } - - private Type parseType(@Nullable String expectation) { - var defaultIndex = -1; - Span start = null; - if (lookahead == Token.STAR) { - defaultIndex = 0; - start = next().span; - } - var first = parseTypeAtom(expectation); - if (start == null) { - start = first.span(); - } - - if (lookahead != Token.UNION) { - if (defaultIndex == 0) { - throw new ParserError(ErrorMessages.create("notAUnion"), start.endWith(first.span())); - } - return first; - } - - var types = new ArrayList(); - types.add(first); - var end = start; - var i = 1; - while (lookahead == Token.UNION) { - next(); - if (lookahead == Token.STAR) { - if (defaultIndex != -1) { - throw parserError("multipleUnionDefaults"); - } - defaultIndex = i; - next(); - } - var type = parseTypeAtom(expectation); - types.add(type); - end = type.span(); - i++; - } - return new Type.UnionType(types, defaultIndex, start.endWith(end)); - } - - private Type parseTypeAtom(@Nullable String expectation) { - Type typ; - switch (lookahead) { - case UNKNOWN -> typ = new Type.UnknownType(next().span); - case NOTHING -> typ = new Type.NothingType(next().span); - case MODULE -> typ = new Type.ModuleType(next().span); - case LPAREN -> { - var tk = next(); - var children = new ArrayList(); - Span end; - if (lookahead == Token.RPAREN) { - end = next().span; - } else { - children.addAll(parseListOf(Token.COMMA, Token.RPAREN, () -> parseType(")"))); - end = expect(Token.RPAREN, "unexpectedToken2", ",", ")").span; - } - if (lookahead == Token.ARROW || children.size() > 1) { - expect(Token.ARROW, "unexpectedToken", "->"); - var ret = parseType(expectation); - children.add(ret); - typ = new Type.FunctionType(children, tk.span.endWith(ret.span())); - } else { - if (children.isEmpty()) { - throw new ParserError(ErrorMessages.create("unexpectedTokenForType", ")"), end); - } - typ = new ParenthesizedType((Type) children.get(0), tk.span.endWith(end)); - } - } - case IDENTIFIER -> { - var start = spanLookahead; - var name = parseQualifiedIdentifier(); - var end = name.span(); - TypeArgumentList typeArgumentList = null; - if (lookahead == Token.LT) { - typeArgumentList = parseTypeArgumentList(); - end = typeArgumentList.span(); - } - typ = new DeclaredType(name, typeArgumentList, start.endWith(end)); - } - case STRING_START -> { - var str = parseStringConstant(); - typ = new StringConstantType(str, str.span()); - } - default -> { - var text = _lookahead.text(lexer); - if (expectation != null) { - throw parserError("unexpectedTokenForType2", text, expectation); - } - throw parserError("unexpectedTokenForType", text); - } - } - - if (typ instanceof Type.FunctionType) return typ; - return parseTypeEnd(typ); - } - - private Type parseTypeEnd(Type type) { - // nullable types - if (lookahead == Token.QUESTION) { - var end = spanLookahead; - next(); - var res = new Type.NullableType(type, type.span().endWith(end)); - return parseTypeEnd(res); - } - // constrained types: have to start in the same line as the type - if (lookahead == Token.LPAREN && !precededBySemicolon && _lookahead.newLinesBetween == 0) { - next(); - var constraints = parseListOf(Token.COMMA, Token.RPAREN, () -> parseExpr(")")); - var end = expect(Token.RPAREN, "unexpectedToken2", ",", ")").span; - var children = new ArrayList(constraints.size() + 1); - children.add(type); - children.addAll(constraints); - var res = new Type.ConstrainedType(children, type.span().endWith(end)); - return parseTypeEnd(res); - } - return type; - } - - private Annotation parseAnnotation() { - var start = next().span; - var children = new ArrayList(2); - var type = parseType(); - children.add(type); - ObjectBody body = null; - var end = type.span(); - if (lookahead == Token.LBRACE) { - body = parseObjectBody(); - end = body.span(); - } - children.add(body); - return new Annotation(children, start.endWith(end)); - } - - private Parameter parseParameter() { - if (lookahead == Token.UNDERSCORE) { - var span = next().span; - return new Parameter.Underscore(span); - } - return parseTypedIdentifier(); - } - - private Modifier parseModifier() { - return switch (lookahead) { - case EXTERNAL -> new Modifier(Modifier.ModifierValue.EXTERNAL, next().span); - case ABSTRACT -> new Modifier(Modifier.ModifierValue.ABSTRACT, next().span); - case OPEN -> new Modifier(Modifier.ModifierValue.OPEN, next().span); - case LOCAL -> new Modifier(Modifier.ModifierValue.LOCAL, next().span); - case HIDDEN -> new Modifier(Modifier.ModifierValue.HIDDEN, next().span); - case FIXED -> new Modifier(Modifier.ModifierValue.FIXED, next().span); - case CONST -> new Modifier(Modifier.ModifierValue.CONST, next().span); - default -> throw new RuntimeException("Unreacheable code"); - }; - } - - private List parseModifierList() { - var modifiers = new ArrayList(); - while (lookahead.isModifier()) { - modifiers.add(parseModifier()); - } - return modifiers; - } - - private ParameterList parseParameterList() { - var start = expect(Token.LPAREN, "unexpectedToken", "(").span; - Span end; - List args = new ArrayList<>(); - if (lookahead == Token.RPAREN) { - end = next().span; - } else { - args = parseListOfParameter(Token.COMMA, Token.RPAREN); - end = expect(Token.RPAREN, "unexpectedToken2", ",", ")").span; - } - return new ParameterList(args, start.endWith(end)); - } - - private List parseBodyList() { - if (lookahead != Token.LBRACE) { - throw parserError("unexpectedToken2", _lookahead.text(lexer), "{", "="); - } - var bodies = new ArrayList(); - do { - bodies.add(parseObjectBody()); - } while (lookahead == Token.LBRACE); - return bodies; - } - - private TypeParameterList parseTypeParameterList() { - var start = expect(Token.LT, "unexpectedToken", "<").span; - var pars = parseListOf(Token.COMMA, Token.GT, this::parseTypeParameter); - var end = expect(Token.GT, "unexpectedToken2", ",", ">").span; - return new TypeParameterList(pars, start.endWith(end)); - } - - private TypeArgumentList parseTypeArgumentList() { - var start = expect(Token.LT, "unexpectedToken", "<").span; - var pars = parseListOf(Token.COMMA, Token.GT, this::parseType); - var end = expect(Token.GT, "unexpectedToken2", ",", ">").span; - return new TypeArgumentList(pars, start.endWith(end)); - } - - private ArgumentList parseArgumentList() { - var start = expect(Token.LPAREN, "unexpectedToken", "(").span; - if (lookahead == Token.RPAREN) { - return new ArgumentList(new ArrayList<>(), start.endWith(next().span)); - } - var exprs = parseListOf(Token.COMMA, Token.RPAREN, this::parseExpr); - var end = expect(Token.RPAREN, "unexpectedToken2", ",", ")").span; - return new ArgumentList(exprs, start.endWith(end)); - } - - private TypeParameter parseTypeParameter() { - TypeParameter.Variance variance = null; - var start = spanLookahead; - if (lookahead == Token.IN) { - next(); - variance = TypeParameter.Variance.IN; - } else if (lookahead == Token.OUT) { - next(); - variance = TypeParameter.Variance.OUT; - } - var identifier = parseIdentifier(); - return new TypeParameter(variance, identifier, start.endWith(identifier.span())); - } - - private TypedIdentifier parseTypedIdentifier() { - var identifier = parseIdentifier(); - TypeAnnotation typeAnnotation = null; - var end = identifier.span(); - if (lookahead == Token.COLON) { - typeAnnotation = parseTypeAnnotation(); - end = typeAnnotation.span(); - } - return new TypedIdentifier(identifier, typeAnnotation, identifier.span().endWith(end)); - } - - private TypeAnnotation parseTypeAnnotation() { - var start = expect(Token.COLON, "unexpectedToken", ":").span; - var type = parseType(); - return new TypeAnnotation(type, start.endWith(type.span())); - } - - private Identifier parseIdentifier() { - if (lookahead != Token.IDENTIFIER) { - if (lookahead.isKeyword()) { - throw parserError("keywordNotAllowedHere", lookahead.text()); - } - throw parserError("unexpectedToken", _lookahead.text(lexer), "identifier"); - } - var tk = next(); - var text = tk.text(lexer); - return new Identifier(text, tk.span); - } - - private StringConstant parseStringConstant() { - var start = spanLookahead; - var startTk = expect(Token.STRING_START, "unexpectedToken", "\""); - var builder = new StringBuilder(); - while (lookahead != Token.STRING_END) { - switch (lookahead) { - case STRING_PART -> builder.append(next().text(lexer)); - case STRING_ESCAPE_NEWLINE -> { - next(); - builder.append('\n'); - } - case STRING_ESCAPE_TAB -> { - next(); - builder.append('\t'); - } - case STRING_ESCAPE_QUOTE -> { - next(); - builder.append('"'); - } - case STRING_ESCAPE_BACKSLASH -> { - next(); - builder.append('\\'); - } - case STRING_ESCAPE_RETURN -> { - next(); - builder.append('\r'); - } - case STRING_ESCAPE_UNICODE -> builder.append(parseUnicodeEscape(next())); - case EOF -> { - var delimiter = new StringBuilder(startTk.text(lexer)).reverse().toString(); - throw parserError("missingDelimiter", delimiter); - } - case INTERPOLATION_START -> throw parserError("interpolationInConstant"); - // the lexer makes sure we only get the above tokens inside a string - default -> throw new RuntimeException("Unreacheable code"); - } - } - var end = next().span; - return new StringConstant(builder.toString(), start.endWith(end)); - } - - private String getEscapeText(FullToken tk) { - return switch (tk.token) { - case STRING_ESCAPE_NEWLINE -> "\n"; - case STRING_ESCAPE_QUOTE -> "\""; - case STRING_ESCAPE_BACKSLASH -> "\\"; - case STRING_ESCAPE_TAB -> "\t"; - case STRING_ESCAPE_RETURN -> "\r"; - case STRING_ESCAPE_UNICODE -> parseUnicodeEscape(tk); - default -> throw new RuntimeException("Unreacheable code"); - }; - } - - private String parseUnicodeEscape(FullToken tk) { - var text = tk.text(lexer); - var lastIndex = text.length() - 1; - var startIndex = text.indexOf('{', 2); - try { - var codepoint = Integer.parseInt(text.substring(startIndex + 1, lastIndex), 16); - return Character.toString(codepoint); - } catch (NumberFormatException e) { - throw new ParserError( - ErrorMessages.create("invalidUnicodeEscapeSequence", text, text.substring(0, startIndex)), - tk.span); - } - } - - private String getCommonIndent(List nodes, Span span) { - var lastNode = nodes.get(nodes.size() - 1); - if (lastNode.token == null) { - throw new ParserError( - ErrorMessages.create("closingStringDelimiterMustBeginOnNewLine"), lastNode.span()); - } - if (lastNode.token.token == Token.STRING_NEWLINE) return ""; - var beforeLast = nodes.get(nodes.size() - 2); - if (beforeLast.token != null && beforeLast.token.token == Token.STRING_NEWLINE) { - var indent = getTrailingIndent(lastNode); - if (indent != null) { - return indent; - } - } - throw new ParserError(ErrorMessages.create("closingStringDelimiterMustBeginOnNewLine"), span); - } - - private @Nullable String getTrailingIndent(TempNode node) { - var token = node.token; - if (token == null || token.token != Token.STRING_PART) return null; - var text = token.text(lexer); - for (var i = 0; i < text.length(); i++) { - var ch = text.charAt(i); - if (ch != ' ' && ch != '\t') return null; - } - return text; - } - - private int getLeadingIndentCount(String text) { - if (text.isEmpty()) return 0; - for (var i = 0; i < text.length(); i++) { - var ch = text.charAt(i); - if (ch != ' ' && ch != '\t') { - return i; - } - } - return text.length(); - } - - private record TempNode( - @Nullable FullToken token, @Nullable StringPart.StringInterpolation node) { - Span span() { - if (token != null) return token.span; - assert node != null; - return node.span(); - } - } - - private FullToken expect(Token type, String errorKey, Object... messageArgs) { - if (lookahead != type) { - var span = spanLookahead; - if (lookahead == Token.EOF || _lookahead.newLinesBetween > 0) { - // don't point at the EOF or the next line, but at the end of the last token - span = prev.span.stopSpan().move(1); - } - var args = messageArgs; - if (errorKey.startsWith("unexpectedToken")) { - args = new Object[messageArgs.length + 1]; - args[0] = lookahead == Token.EOF ? "EOF" : _lookahead.text(lexer); - System.arraycopy(messageArgs, 0, args, 1, messageArgs.length); - } - throw new ParserError(ErrorMessages.create(errorKey, args), span); - } - return next(); - } - - private List parseListOf(Token separator, Supplier parser) { - var res = new ArrayList(); - res.add(parser.get()); - while (lookahead == separator) { - next(); - res.add(parser.get()); - } - return res; - } - - private List parseListOf(Token separator, Token terminator, Supplier parser) { - var res = new ArrayList(); - res.add(parser.get()); - while (lookahead == separator) { - next(); - if (lookahead == terminator) { - break; - } - res.add(parser.get()); - } - return res; - } - - private List parseListOfParameter(Token separator, Token terminator) { - var res = new ArrayList(); - if (lookahead == terminator) { - return res; - } - - res.add(parseParameter()); - while (lookahead == separator) { - next(); - if (lookahead == terminator) { - break; - } - res.add(parseParameter()); - } - return res; - } - - private ParserError parserError(String messageKey, Object... args) { - return new ParserError(ErrorMessages.create(messageKey, args), spanLookahead); - } - - private record MemberHeader( - @Nullable DocComment docComment, List annotations, List modifiers) { - boolean isNotEmpty() { - return !(docComment == null && annotations.isEmpty() && modifiers.isEmpty()); - } - - @SuppressWarnings("DataFlowIssue") - @Nullable - Span span() { - return span(null); - } - - Span span(Span or) { - if (docComment != null) { - return docComment.span(); - } - if (!annotations().isEmpty()) { - return annotations.get(0).span(); - } - if (!modifiers().isEmpty()) { - return modifiers.get(0).span(); - } - return or; - } - - Span modifierSpan(Span or) { - if (!modifiers.isEmpty()) { - return modifiers.get(0).span(); - } - return or; - } - } - - private FullToken next() { - if (backtracking) { - backtracking = false; - lookahead = _lookahead.token; - spanLookahead = _lookahead.span; - return prev; - } - prev = _lookahead; - _lookahead = forceNext(); - lookahead = _lookahead.token; - spanLookahead = _lookahead.span; - return prev; - } - - private FullToken forceNext() { - var tk = lexer.next(); - precededBySemicolon = false; - var newLines = lexer.newLinesBetween; - while (tk.isAffix()) { - precededBySemicolon = precededBySemicolon || tk == Token.SEMICOLON; - tk = lexer.next(); - newLines += lexer.newLinesBetween; - } - return new FullToken(tk, lexer.span(), newLines); - } - - // Like next, but don't ignore comments - private FullToken nextComment() { - prev = _lookahead; - _lookahead = forceNextComment(); - lookahead = _lookahead.token; - spanLookahead = _lookahead.span; - return prev; - } - - private FullToken forceNextComment() { - var tk = lexer.next(); - precededBySemicolon = false; - while (tk == Token.SEMICOLON) { - precededBySemicolon = true; - tk = lexer.next(); - } - return new FullToken(tk, lexer.span(), lexer.newLinesBetween); - } - - /** - * Backtrack to the previous token. - * - *

Can only backtrack one token. - */ - private void backtrack() { - assert !backtracking; - lookahead = prev.token; - spanLookahead = prev.span; - backtracking = true; - } - - private void ensureEmptyHeaders(MemberHeader header, String messageArg) { - if (header.isNotEmpty()) { - throw new ParserError( - ErrorMessages.create("wrongHeaders", messageArg), header.span(spanLookahead)); - } - } - - private record FullToken(Token token, Span span, int newLinesBetween) { - String text(Lexer lexer) { - return lexer.textFor(span.charIndex(), span.length()); - } + return new ParserImpl(source).parseReplInput(); } } diff --git a/pkl-parser/src/main/java/org/pkl/parser/ParserError.java b/pkl-parser/src/main/java/org/pkl/parser/ParserError.java index 027575d8e..8b15fa5e3 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/ParserError.java +++ b/pkl-parser/src/main/java/org/pkl/parser/ParserError.java @@ -1,5 +1,5 @@ /* - * Copyright © 2025 Apple Inc. and the Pkl project authors. All rights reserved. + * Copyright © 2025-2026 Apple Inc. and the Pkl project authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ import org.pkl.parser.syntax.Module; import org.pkl.parser.util.Nullable; -public class ParserError extends RuntimeException { +public final class ParserError extends RuntimeException { private final Span span; private @Nullable Module partialParseResult; diff --git a/pkl-parser/src/main/java/org/pkl/parser/ParserImpl.java b/pkl-parser/src/main/java/org/pkl/parser/ParserImpl.java new file mode 100644 index 000000000..e42877705 --- /dev/null +++ b/pkl-parser/src/main/java/org/pkl/parser/ParserImpl.java @@ -0,0 +1,1863 @@ +/* + * Copyright © 2024-2026 Apple Inc. and the Pkl project authors. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.pkl.parser; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.function.Supplier; +import org.pkl.parser.syntax.Annotation; +import org.pkl.parser.syntax.ArgumentList; +import org.pkl.parser.syntax.Class; +import org.pkl.parser.syntax.ClassBody; +import org.pkl.parser.syntax.ClassMethod; +import org.pkl.parser.syntax.ClassProperty; +import org.pkl.parser.syntax.DocComment; +import org.pkl.parser.syntax.Expr; +import org.pkl.parser.syntax.Expr.AmendsExpr; +import org.pkl.parser.syntax.Expr.BinaryOperatorExpr; +import org.pkl.parser.syntax.Expr.BoolLiteralExpr; +import org.pkl.parser.syntax.Expr.FloatLiteralExpr; +import org.pkl.parser.syntax.Expr.FunctionLiteralExpr; +import org.pkl.parser.syntax.Expr.IfExpr; +import org.pkl.parser.syntax.Expr.IntLiteralExpr; +import org.pkl.parser.syntax.Expr.LetExpr; +import org.pkl.parser.syntax.Expr.LogicalNotExpr; +import org.pkl.parser.syntax.Expr.ModuleExpr; +import org.pkl.parser.syntax.Expr.MultiLineStringLiteralExpr; +import org.pkl.parser.syntax.Expr.NewExpr; +import org.pkl.parser.syntax.Expr.NonNullExpr; +import org.pkl.parser.syntax.Expr.NullLiteralExpr; +import org.pkl.parser.syntax.Expr.OuterExpr; +import org.pkl.parser.syntax.Expr.ParenthesizedExpr; +import org.pkl.parser.syntax.Expr.QualifiedAccessExpr; +import org.pkl.parser.syntax.Expr.ReadExpr; +import org.pkl.parser.syntax.Expr.ReadType; +import org.pkl.parser.syntax.Expr.SingleLineStringLiteralExpr; +import org.pkl.parser.syntax.Expr.SubscriptExpr; +import org.pkl.parser.syntax.Expr.SuperAccessExpr; +import org.pkl.parser.syntax.Expr.SuperSubscriptExpr; +import org.pkl.parser.syntax.Expr.ThisExpr; +import org.pkl.parser.syntax.Expr.ThrowExpr; +import org.pkl.parser.syntax.Expr.TraceExpr; +import org.pkl.parser.syntax.Expr.UnaryMinusExpr; +import org.pkl.parser.syntax.Expr.UnqualifiedAccessExpr; +import org.pkl.parser.syntax.ExtendsOrAmendsClause; +import org.pkl.parser.syntax.Identifier; +import org.pkl.parser.syntax.ImportClause; +import org.pkl.parser.syntax.Keyword; +import org.pkl.parser.syntax.Modifier; +import org.pkl.parser.syntax.Module; +import org.pkl.parser.syntax.ModuleDecl; +import org.pkl.parser.syntax.Node; +import org.pkl.parser.syntax.ObjectBody; +import org.pkl.parser.syntax.ObjectMember; +import org.pkl.parser.syntax.Operator; +import org.pkl.parser.syntax.Parameter; +import org.pkl.parser.syntax.Parameter.TypedIdentifier; +import org.pkl.parser.syntax.ParameterList; +import org.pkl.parser.syntax.QualifiedIdentifier; +import org.pkl.parser.syntax.ReplInput; +import org.pkl.parser.syntax.StringConstant; +import org.pkl.parser.syntax.StringPart; +import org.pkl.parser.syntax.StringPart.StringChars; +import org.pkl.parser.syntax.Type; +import org.pkl.parser.syntax.Type.DeclaredType; +import org.pkl.parser.syntax.Type.ParenthesizedType; +import org.pkl.parser.syntax.Type.StringConstantType; +import org.pkl.parser.syntax.TypeAlias; +import org.pkl.parser.syntax.TypeAnnotation; +import org.pkl.parser.syntax.TypeArgumentList; +import org.pkl.parser.syntax.TypeParameter; +import org.pkl.parser.syntax.TypeParameterList; +import org.pkl.parser.util.ErrorMessages; +import org.pkl.parser.util.Nullable; + +@SuppressWarnings("DuplicatedCode") +final class ParserImpl { + + private final Lexer lexer; + private Token lookahead; + private Span spanLookahead; + private boolean backtracking = false; + private FullToken prev; + private FullToken _lookahead; + private boolean precededBySemicolon = false; + + ParserImpl(String source) { + this.lexer = new Lexer(source); + _lookahead = forceNext(); + lookahead = _lookahead.token; + spanLookahead = _lookahead.span; + } + + Module parseModule() { + if (lookahead == Token.EOF) { + return new Module(Collections.singletonList(null), new Span(0, 0)); + } + if (lookahead == Token.SHEBANG) next(); + var start = spanLookahead; + Span end = null; + ModuleDecl moduleDecl; + var nodes = new ArrayList(); + try { + var header = parseMemberHeader(); + + moduleDecl = parseModuleDecl(header); + if (moduleDecl != null) { + end = moduleDecl.span(); + header = null; + } + nodes.add(moduleDecl); + // imports + while (lookahead == Token.IMPORT || lookahead == Token.IMPORT_STAR) { + if (header != null && header.isNotEmpty()) { + throw parserError("wrongHeaders", "Imports"); + } + var _import = parseImportDecl(); + nodes.add(_import); + end = _import.span(); + } + + // entries + if (header != null && header.isNotEmpty()) { + end = parseModuleMember(header, nodes); + } + + while (lookahead != Token.EOF) { + header = parseMemberHeader(); + end = parseModuleMember(header, nodes); + } + return new Module(nodes, start.endWith(spanLookahead)); + } catch (ParserError pe) { + var spanEnd = end != null ? end : start; + pe.setPartialParseResult(new Module(nodes, start.endWith(spanEnd))); + throw pe; + } + } + + Expr parseExpressionInput() { + var expr = parseExpr(); + expect(Token.EOF, "unexpectedToken", "end of file"); + return expr; + } + + ReplInput parseReplInput() { + var nodes = new ArrayList(); + while (lookahead != Token.EOF) { + var header = parseMemberHeader(); + switch (lookahead) { + case IMPORT, IMPORT_STAR -> { + ensureEmptyHeaders(header, "Imports"); + nodes.add(parseImportDecl()); + } + case MODULE, AMENDS, EXTENDS -> nodes.add(parseModuleDecl(header)); + case CLASS -> nodes.add(parseClass(header)); + case TYPE_ALIAS -> nodes.add(parseTypeAlias(header)); + case FUNCTION -> nodes.add(parseClassMethod(header)); + case IDENTIFIER -> { + next(); + switch (lookahead) { + case COLON, ASSIGN, LBRACE -> { + backtrack(); + nodes.add(parseClassProperty(header)); + } + default -> { + backtrack(); + ensureEmptyHeaders(header, "Expressions"); + nodes.add(parseExpr()); + } + } + } + default -> { + ensureEmptyHeaders(header, "Expressions"); + nodes.add(parseExpr()); + } + } + } + Span span; + if (nodes.isEmpty()) { + span = new Span(0, 0); + } else { + span = nodes.get(0).span().endWith(nodes.get(nodes.size() - 1).span()); + } + return new ReplInput(nodes, span); + } + + private @Nullable ModuleDecl parseModuleDecl(MemberHeader header) { + QualifiedIdentifier moduleName = null; + Keyword moduleKeyword = null; + var start = header.span(); + Span end = null; + if (lookahead == Token.MODULE) { + var module = expect(Token.MODULE, "unexpectedToken", "module"); + moduleKeyword = new Keyword(module.span); + if (start == null) { + start = module.span; + } + moduleName = parseQualifiedIdentifier(); + end = moduleName.span(); + } + var extendsOrAmendsDecl = parseExtendsAmendsDecl(); + if (extendsOrAmendsDecl != null) { + if (start == null) { + start = extendsOrAmendsDecl.span(); + } + end = extendsOrAmendsDecl.span(); + } + if (moduleName != null || extendsOrAmendsDecl != null) { + var children = new ArrayList(); + children.add(header.docComment); + children.addAll(header.annotations); + var modifiersOffset = children.size(); + children.addAll(header.modifiers); + var nameOffset = children.size(); + children.add(moduleKeyword); + children.add(moduleName); + children.add(extendsOrAmendsDecl); + return new ModuleDecl(children, modifiersOffset, nameOffset, start.endWith(end)); + } + return null; + } + + private QualifiedIdentifier parseQualifiedIdentifier() { + var idents = parseListOf(Token.DOT, this::parseIdentifier); + return new QualifiedIdentifier(idents); + } + + private @Nullable ExtendsOrAmendsClause parseExtendsAmendsDecl() { + if (lookahead == Token.EXTENDS) { + var tk = next().span; + var url = parseStringConstant(); + return new ExtendsOrAmendsClause( + url, ExtendsOrAmendsClause.Type.EXTENDS, tk.endWith(url.span())); + } + if (lookahead == Token.AMENDS) { + var tk = next().span; + var url = parseStringConstant(); + return new ExtendsOrAmendsClause( + url, ExtendsOrAmendsClause.Type.AMENDS, tk.endWith(url.span())); + } + return null; + } + + private ImportClause parseImportDecl() { + Span start; + boolean isGlob = false; + if (lookahead == Token.IMPORT_STAR) { + start = next().span; + isGlob = true; + } else { + start = expect(Token.IMPORT, "unexpectedToken2", "import", "import*").span; + } + var str = parseStringConstant(); + var end = str.span(); + Identifier alias = null; + if (lookahead == Token.AS) { + next(); + alias = parseIdentifier(); + end = alias.span(); + } + return new ImportClause(str, isGlob, alias, start.endWith(end)); + } + + private MemberHeader parseMemberHeader() { + DocComment docComment = null; + var annotations = new ArrayList(); + var modifiers = new ArrayList(); + if (lookahead == Token.DOC_COMMENT) { + docComment = parseDocComment(); + } + while (lookahead == Token.AT) { + annotations.add(parseAnnotation()); + } + while (lookahead.isModifier()) { + modifiers.add(parseModifier()); + } + return new MemberHeader(docComment, annotations, modifiers); + } + + private DocComment parseDocComment() { + var spans = new ArrayList(); + spans.add(nextComment().span); + while (lookahead == Token.DOC_COMMENT + || lookahead == Token.LINE_COMMENT + || lookahead == Token.BLOCK_COMMENT) { + var next = nextComment(); + // newlines are not allowed in doc comments + if (next.newLinesBetween > 1) { + if (next.token == Token.DOC_COMMENT) { + backtrack(); + } + break; + } + if (next.token == Token.DOC_COMMENT) { + spans.add(next.span); + } + } + while (lookahead == Token.LINE_COMMENT || lookahead == Token.BLOCK_COMMENT) { + nextComment(); + } + return new DocComment(spans); + } + + private Span parseModuleMember(MemberHeader header, List nodes) { + switch (lookahead) { + case IDENTIFIER -> { + var node = parseClassProperty(header); + nodes.add(node); + return node.span(); + } + case TYPE_ALIAS -> { + var node = parseTypeAlias(header); + nodes.add(node); + return node.span(); + } + case CLASS -> { + var node = parseClass(header); + nodes.add(node); + return node.span(); + } + case FUNCTION -> { + var node = parseClassMethod(header); + nodes.add(node); + return node.span(); + } + case EOF -> throw parserError("unexpectedEndOfFile"); + default -> { + if (lookahead.isKeyword()) { + throw parserError("keywordNotAllowedHere", lookahead.text()); + } + if (lookahead == Token.DOC_COMMENT) { + throw parserError("danglingDocComment"); + } + throw parserError("invalidTopLevelToken"); + } + } + } + + private TypeAlias parseTypeAlias(MemberHeader header) { + var typeAlias = next().span; + var startSpan = header.span(typeAlias); + + var identifier = parseIdentifier(); + TypeParameterList typePars = null; + if (lookahead == Token.LT) { + typePars = parseTypeParameterList(); + } + expect(Token.ASSIGN, "unexpectedToken", "="); + var type = parseType(); + var children = new ArrayList(header.annotations.size() + header.modifiers.size() + 5); + children.add(header.docComment); + children.addAll(header.annotations); + var modifiersOffset = header.annotations.size() + 1; + children.addAll(header.modifiers); + var nameOffset = modifiersOffset + header.modifiers.size(); + children.add(new Keyword(typeAlias)); + children.add(identifier); + children.add(typePars); + children.add(type); + return new TypeAlias(children, modifiersOffset, nameOffset, startSpan.endWith(type.span())); + } + + private Class parseClass(MemberHeader header) { + var classKeyword = next(); + var startSpan = header.span(classKeyword.span); + var children = new ArrayList(); + children.add(header.docComment); + children.addAll(header.annotations); + var modifiersOffset = header.annotations.size() + 1; + children.addAll(header.modifiers); + var nameOffset = modifiersOffset + header.modifiers.size(); + children.add(new Keyword(classKeyword.span)); + var name = parseIdentifier(); + children.add(name); + TypeParameterList typePars = null; + var end = name.span(); + if (lookahead == Token.LT) { + typePars = parseTypeParameterList(); + end = typePars.span(); + } + children.add(typePars); + Type superClass = null; + if (lookahead == Token.EXTENDS) { + next(); + superClass = parseType(); + end = superClass.span(); + } + children.add(superClass); + + ClassBody body = null; + if (lookahead == Token.LBRACE) { + body = parseClassBody(); + end = body.span(); + } + children.add(body); + + return new Class(children, modifiersOffset, nameOffset, startSpan.endWith(end)); + } + + private ClassBody parseClassBody() { + var start = expect(Token.LBRACE, "missingDelimiter", "{").span; + var children = new ArrayList(); + while (lookahead != Token.RBRACE && lookahead != Token.EOF) { + var entryHeader = parseMemberHeader(); + if (lookahead == Token.FUNCTION) { + children.add(parseClassMethod(entryHeader)); + } else { + children.add(parseClassProperty(entryHeader)); + } + } + if (lookahead == Token.EOF) { + throw new ParserError( + ErrorMessages.create("missingDelimiter", "}"), prev.span.stopSpan().move(1)); + } + var end = expect(Token.RBRACE, "missingDelimiter", "}").span; + return new ClassBody(children, start.endWith(end)); + } + + private ClassProperty parseClassProperty(MemberHeader header) { + var name = parseIdentifier(); + var start = header.span(name.span()); + var children = new ArrayList(); + children.add(header.docComment); + children.addAll(header.annotations); + var modifiersOffset = header.annotations.size() + 1; + children.addAll(header.modifiers); + var nameOffset = modifiersOffset + header.modifiers.size(); + TypeAnnotation typeAnnotation = null; + Expr expr = null; + var bodies = new ArrayList(); + if (lookahead == Token.COLON) { + typeAnnotation = parseTypeAnnotation(); + } + if (lookahead == Token.ASSIGN) { + next(); + expr = parseExpr(); + } else if (lookahead == Token.LBRACE) { + if (typeAnnotation != null) { + throw parserError("typeAnnotationInAmends"); + } + while (lookahead == Token.LBRACE) { + bodies.add(parseObjectBody()); + } + } + children.add(name); + children.add(typeAnnotation); + children.add(expr); + children.addAll(bodies); + if (expr != null) { + return new ClassProperty(children, modifiersOffset, nameOffset, start.endWith(expr.span())); + } + if (!bodies.isEmpty()) { + return new ClassProperty( + children, + modifiersOffset, + nameOffset, + start.endWith(bodies.get(bodies.size() - 1).span())); + } + if (typeAnnotation == null) { + throw new ParserError(ErrorMessages.create("invalidProperty"), name.span()); + } + return new ClassProperty( + children, modifiersOffset, nameOffset, start.endWith(typeAnnotation.span())); + } + + private ClassMethod parseClassMethod(MemberHeader header) { + var func = expect(Token.FUNCTION, "unexpectedToken", "function").span; + var start = header.span(func); + var headerSpanStart = header.modifierSpan(func); + var children = new ArrayList(); + children.add(header.docComment); + children.addAll(header.annotations); + var modifiersOffset = header.annotations.size() + 1; + children.addAll(header.modifiers); + var nameOffset = modifiersOffset + header.modifiers.size(); + var name = parseIdentifier(); + children.add(name); + TypeParameterList typePars = null; + if (lookahead == Token.LT) { + typePars = parseTypeParameterList(); + } + children.add(typePars); + var parameterList = parseParameterList(); + children.add(parameterList); + var end = parameterList.span(); + var endHeader = end; + TypeAnnotation typeAnnotation = null; + if (lookahead == Token.COLON) { + typeAnnotation = parseTypeAnnotation(); + end = typeAnnotation.span(); + endHeader = end; + } + children.add(typeAnnotation); + Expr expr = null; + if (lookahead == Token.ASSIGN) { + next(); + expr = parseExpr(); + end = expr.span(); + } + children.add(expr); + return new ClassMethod( + children, + modifiersOffset, + nameOffset, + headerSpanStart.endWith(endHeader), + start.endWith(end)); + } + + private ObjectBody parseObjectBody() { + var start = expect(Token.LBRACE, "unexpectedToken", "{").span; + List nodes = new ArrayList<>(); + var membersOffset = -1; + if (lookahead == Token.RBRACE) { + return new ObjectBody(List.of(), 0, start.endWith(next().span)); + } else if (lookahead == Token.UNDERSCORE) { + // it's a parameter + nodes.addAll(parseListOfParameter(Token.COMMA, Token.ARROW)); + expect(Token.ARROW, "unexpectedToken2", ",", "->"); + } else if (lookahead == Token.IDENTIFIER) { + // not sure what it is yet + var identifier = parseIdentifier(); + if (lookahead == Token.ARROW) { + // it's a parameter + next(); + nodes.add(new TypedIdentifier(identifier, null, identifier.span())); + } else if (lookahead == Token.COMMA) { + // it's a parameter + backtrack(); + nodes.addAll(parseListOfParameter(Token.COMMA, Token.ARROW)); + expect(Token.ARROW, "unexpectedToken2", ",", "->"); + } else if (lookahead == Token.COLON) { + // still not sure + var colon = next().span; + var type = parseType(); + var typeAnnotation = new TypeAnnotation(type, colon.endWith(type.span())); + if (lookahead == Token.COMMA) { + // it's a parameter + next(); + nodes.add( + new TypedIdentifier( + identifier, typeAnnotation, identifier.span().endWith(type.span()))); + nodes.addAll(parseListOfParameter(Token.COMMA, Token.ARROW)); + expect(Token.ARROW, "unexpectedToken2", ",", "->"); + } else if (lookahead == Token.ARROW) { + // it's a parameter + next(); + nodes.add( + new TypedIdentifier( + identifier, typeAnnotation, identifier.span().endWith(type.span()))); + } else { + // it's a member + expect(Token.ASSIGN, "unexpectedToken", "="); + var expr = parseExpr(); + membersOffset = 0; + nodes.add( + new ObjectMember.ObjectProperty( + Arrays.asList(identifier, typeAnnotation, expr), + 0, + identifier.span().endWith(expr.span()))); + } + } else { + // member + backtrack(); + } + } + + if (membersOffset < 0) { + membersOffset = nodes.size(); + } + // members + while (lookahead != Token.RBRACE) { + if (lookahead == Token.EOF) { + throw new ParserError( + ErrorMessages.create("missingDelimiter", "}"), prev.span.stopSpan().move(1)); + } + nodes.add(parseObjectMember()); + } + var end = next().span; + return new ObjectBody(nodes, membersOffset, start.endWith(end)); + } + + private ObjectMember parseObjectMember() { + return switch (lookahead) { + case IDENTIFIER -> { + next(); + if (lookahead == Token.LBRACE || lookahead == Token.COLON || lookahead == Token.ASSIGN) { + // it's an objectProperty + backtrack(); + yield parseObjectProperty(null); + } else { + backtrack(); + // it's an expression + yield parseObjectElement(); + } + } + case FUNCTION -> parseObjectMethod(List.of()); + case LPRED -> parseMemberPredicate(); + case LBRACK -> parseObjectEntry(); + case SPREAD, QSPREAD -> parseObjectSpread(); + case WHEN -> parseWhenGenerator(); + case FOR -> parseForGenerator(); + case TYPE_ALIAS, CLASS -> + throw new ParserError( + ErrorMessages.create("missingDelimiter", "}"), prev.span.stopSpan().move(1)); + default -> { + var modifiers = new ArrayList(); + while (lookahead.isModifier()) { + modifiers.add(parseModifier()); + } + if (!modifiers.isEmpty()) { + if (lookahead == Token.FUNCTION) { + yield parseObjectMethod(modifiers); + } else { + yield parseObjectProperty(modifiers); + } + } else { + yield parseObjectElement(); + } + } + }; + } + + private ObjectMember.ObjectElement parseObjectElement() { + var expr = parseExpr("}"); + return new ObjectMember.ObjectElement(expr, expr.span()); + } + + private ObjectMember parseObjectProperty(@Nullable List modifiers) { + var start = spanLookahead; + if (modifiers != null && !modifiers.isEmpty()) { + start = modifiers.get(0).span(); + } + var allModifiers = modifiers; + if (allModifiers == null) { + allModifiers = parseModifierList(); + } + var identifier = parseIdentifier(); + TypeAnnotation typeAnnotation = null; + if (lookahead == Token.COLON) { + typeAnnotation = parseTypeAnnotation(); + } + if (typeAnnotation != null || lookahead == Token.ASSIGN) { + expect(Token.ASSIGN, "unexpectedToken", "="); + var expr = parseExpr("}"); + var nodes = new ArrayList(allModifiers.size() + 4); + nodes.addAll(allModifiers); + nodes.add(identifier); + nodes.add(typeAnnotation); + nodes.add(expr); + return new ObjectMember.ObjectProperty( + nodes, allModifiers.size(), start.endWith(expr.span())); + } + var bodies = parseBodyList(); + var end = bodies.get(bodies.size() - 1).span(); + var nodes = new ArrayList(allModifiers.size() + 4); + nodes.addAll(allModifiers); + nodes.add(identifier); + nodes.add(null); + nodes.add(null); + nodes.addAll(bodies); + return new ObjectMember.ObjectProperty(nodes, allModifiers.size(), start.endWith(end)); + } + + private ObjectMember.ObjectMethod parseObjectMethod(List modifiers) { + var start = spanLookahead; + if (!modifiers.isEmpty()) { + start = modifiers.get(0).span(); + } + var function = expect(Token.FUNCTION, "unexpectedToken", "function").span; + var identifier = parseIdentifier(); + TypeParameterList params = null; + if (lookahead == Token.LT) { + params = parseTypeParameterList(); + } + var args = parseParameterList(); + TypeAnnotation typeAnnotation = null; + if (lookahead == Token.COLON) { + typeAnnotation = parseTypeAnnotation(); + } + expect(Token.ASSIGN, "unexpectedToken", "="); + var expr = parseExpr("}"); + var nodes = new ArrayList(modifiers.size() + 6); + nodes.addAll(modifiers); + nodes.add(new Keyword(function)); + nodes.add(identifier); + nodes.add(params); + nodes.add(args); + nodes.add(typeAnnotation); + nodes.add(expr); + return new ObjectMember.ObjectMethod(nodes, modifiers.size(), start.endWith(expr.span())); + } + + private ObjectMember parseMemberPredicate() { + var start = next().span; + var pred = parseExpr("]]"); + var firstBrack = expect(Token.RBRACK, "unexpectedToken", "]]").span; + Span secondbrack; + if (lookahead != Token.RBRACK) { + var text = _lookahead.text(lexer); + throw new ParserError(ErrorMessages.create("unexpectedToken", text, "]]"), firstBrack); + } else { + secondbrack = next().span; + } + if (firstBrack.charIndex() != secondbrack.charIndex() - 1) { + // There shouldn't be any whitespace between the first and second ']'. + var span = firstBrack.endWith(secondbrack); + var text = lexer.textFor(span.charIndex(), span.length()); + throw new ParserError(ErrorMessages.create("unexpectedToken", text, "]]"), firstBrack); + } + if (lookahead == Token.ASSIGN) { + next(); + var expr = parseExpr("}"); + return new ObjectMember.MemberPredicate(List.of(pred, expr), start.endWith(expr.span())); + } + var bodies = parseBodyList(); + var end = bodies.get(bodies.size() - 1).span(); + var nodes = new ArrayList(bodies.size() + 2); + nodes.add(pred); + nodes.add(null); + nodes.addAll(bodies); + return new ObjectMember.MemberPredicate(nodes, start.endWith(end)); + } + + private ObjectMember parseObjectEntry() { + var start = expect(Token.LBRACK, "unexpectedToken", "[").span; + var key = parseExpr("]"); + expect(Token.RBRACK, "unexpectedToken", "]"); + if (lookahead == Token.ASSIGN) { + next(); + var expr = parseExpr("}"); + return new ObjectMember.ObjectEntry(List.of(key, expr), start.endWith(expr.span())); + } + var bodies = parseBodyList(); + var end = bodies.get(bodies.size() - 1).span(); + var nodes = new ArrayList(bodies.size() + 2); + nodes.add(key); + nodes.add(null); + nodes.addAll(bodies); + return new ObjectMember.ObjectEntry(nodes, start.endWith(end)); + } + + private ObjectMember.ObjectSpread parseObjectSpread() { + var start = next(); + boolean isNullable = start.token == Token.QSPREAD; + var expr = parseExpr("}"); + return new ObjectMember.ObjectSpread(expr, isNullable, start.span.endWith(expr.span())); + } + + private ObjectMember.WhenGenerator parseWhenGenerator() { + var start = next().span; + expect(Token.LPAREN, "unexpectedToken", "("); + var pred = parseExpr(")"); + expect(Token.RPAREN, "unexpectedToken", ")"); + var body = parseObjectBody(); + var end = body.span(); + ObjectBody elseBody = null; + if (lookahead == Token.ELSE) { + next(); + elseBody = parseObjectBody(); + end = elseBody.span(); + } + return new ObjectMember.WhenGenerator(pred, body, elseBody, start.endWith(end)); + } + + private ObjectMember.ForGenerator parseForGenerator() { + var start = next().span; + expect(Token.LPAREN, "unexpectedToken", "("); + var par1 = parseParameter(); + Parameter par2 = null; + if (lookahead == Token.COMMA) { + next(); + par2 = parseParameter(); + } + expect(Token.IN, "unexpectedToken", "in"); + var expr = parseExpr(")"); + expect(Token.RPAREN, "unexpectedToken", ")"); + var body = parseObjectBody(); + return new ObjectMember.ForGenerator(par1, par2, expr, body, start.endWith(body.span())); + } + + private Expr parseExpr() { + return parseExpr(null); + } + + @SuppressWarnings("DuplicatedCode") + private Expr parseExpr(@Nullable String expectation) { + return parseExpr(expectation, 1); + } + + private Expr parseExpr(@Nullable String expectation, int minPrecedence) { + var expr = parseExprAtom(expectation); + var op = getOperator(); + while (op != null) { + if (op.getPrec() < minPrecedence) break; + // `-` must be in the same line as the left operand and have no semicolons inbetween + if (op == Operator.MINUS && (precededBySemicolon || _lookahead.newLinesBetween > 0)) break; + + next(); // operator + switch (op) { + case IS -> { + var type = parseType(); + expr = new Expr.TypeCheckExpr(expr, type, expr.span().endWith(type.span())); + } + case AS -> { + var type = parseType(); + expr = new Expr.TypeCastExpr(expr, type, expr.span().endWith(type.span())); + } + case DOT, QDOT -> { + var rhs = parseIdentifier(); + var isNullable = op == Operator.QDOT; + ArgumentList argumentList = null; + if (lookahead == Token.LPAREN + && !precededBySemicolon + && _lookahead.newLinesBetween == 0) { + argumentList = parseArgumentList(); + } + var lastSpan = argumentList != null ? argumentList.span() : rhs.span(); + expr = + new QualifiedAccessExpr( + expr, rhs, isNullable, argumentList, expr.span().endWith(lastSpan)); + } + default -> { + var nextMinPrec = op.isLeftAssoc() ? op.getPrec() + 1 : op.getPrec(); + var rhs = parseExpr(expectation, nextMinPrec); + expr = new BinaryOperatorExpr(expr, rhs, op, expr.span().endWith(rhs.span())); + } + } + op = getOperator(); + } + return expr; + } + + private @Nullable Operator getOperator() { + return switch (lookahead) { + case POW -> Operator.POW; + case STAR -> Operator.MULT; + case DIV -> Operator.DIV; + case INT_DIV -> Operator.INT_DIV; + case MOD -> Operator.MOD; + case PLUS -> Operator.PLUS; + case MINUS -> Operator.MINUS; + case GT -> Operator.GT; + case GTE -> Operator.GTE; + case LT -> Operator.LT; + case LTE -> Operator.LTE; + case IS -> Operator.IS; + case AS -> Operator.AS; + case EQUAL -> Operator.EQ_EQ; + case NOT_EQUAL -> Operator.NOT_EQ; + case AND -> Operator.AND; + case OR -> Operator.OR; + case PIPE -> Operator.PIPE; + case COALESCE -> Operator.NULL_COALESCE; + case DOT -> Operator.DOT; + case QDOT -> Operator.QDOT; + default -> null; + }; + } + + private Expr parseExprAtom(@Nullable String expectation) { + var expr = + switch (lookahead) { + case THIS -> new ThisExpr(next().span); + case OUTER -> new OuterExpr(next().span); + case MODULE -> new ModuleExpr(next().span); + case NULL -> new NullLiteralExpr(next().span); + case THROW -> { + var start = next().span; + expect(Token.LPAREN, "unexpectedToken", "("); + var exp = parseExpr(")"); + var end = expect(Token.RPAREN, "unexpectedToken", ")").span; + yield new ThrowExpr(exp, start.endWith(end)); + } + case TRACE -> { + var start = next().span; + expect(Token.LPAREN, "unexpectedToken", "("); + var exp = parseExpr(")"); + var end = expect(Token.RPAREN, "unexpectedToken", ")").span; + yield new TraceExpr(exp, start.endWith(end)); + } + case IMPORT -> { + var start = next().span; + expect(Token.LPAREN, "unexpectedToken", "("); + var strConst = parseStringConstant(); + var end = expect(Token.RPAREN, "unexpectedToken", ")").span; + yield new Expr.ImportExpr(strConst, false, start.endWith(end)); + } + case IMPORT_STAR -> { + var start = next().span; + expect(Token.LPAREN, "unexpectedToken", "("); + var strConst = parseStringConstant(); + var end = expect(Token.RPAREN, "unexpectedToken", ")").span; + yield new Expr.ImportExpr(strConst, true, start.endWith(end)); + } + case READ, READ_STAR, READ_QUESTION -> { + var readType = + switch (lookahead) { + case READ_QUESTION -> ReadType.NULL; + case READ_STAR -> ReadType.GLOB; + default -> ReadType.READ; + }; + var start = next().span; + expect(Token.LPAREN, "unexpectedToken", "("); + var exp = parseExpr(")"); + var end = expect(Token.RPAREN, "unexpectedToken", ")").span; + yield new ReadExpr(exp, readType, start.endWith(end)); + } + case NEW -> { + var start = next().span; + Type type = null; + if (lookahead != Token.LBRACE) { + type = parseType("{"); + } + var body = parseObjectBody(); + yield new NewExpr(type, body, start.endWith(body.span())); + } + case MINUS -> { + var start = next().span; + // calling `parseExprAtom` here and not `parseExpr` because + // unary minus has higher precendence than binary operators + var exp = parseExprAtom(expectation); + yield new UnaryMinusExpr(exp, start.endWith(exp.span())); + } + case NOT -> { + var start = next().span; + // calling `parseExprAtom` here and not `parseExpr` because + // logical not has higher precendence than binary operators + var exp = parseExprAtom(expectation); + yield new LogicalNotExpr(exp, start.endWith(exp.span())); + } + case LPAREN -> { + // can be function literal or parenthesized expression + var start = next().span; + yield switch (lookahead) { + case UNDERSCORE -> parseFunctionLiteral(start); + case IDENTIFIER -> parseFunctionLiteralOrParenthesized(start); + case RPAREN -> { + var endParen = next().span; + var paramList = new ParameterList(List.of(), start.endWith(endParen)); + expect(Token.ARROW, "unexpectedToken", "->"); + var exp = parseExpr(expectation); + yield new FunctionLiteralExpr(paramList, exp, start.endWith(exp.span())); + } + default -> { + // expression + var exp = parseExpr(")"); + var end = expect(Token.RPAREN, "unexpectedToken", ")").span; + yield new ParenthesizedExpr(exp, start.endWith(end)); + } + }; + } + case SUPER -> { + var start = next().span; + if (lookahead == Token.DOT) { + next(); + var identifier = parseIdentifier(); + if (lookahead == Token.LPAREN + && !precededBySemicolon + && _lookahead.newLinesBetween == 0) { + var args = parseArgumentList(); + yield new SuperAccessExpr(identifier, args, start.endWith(args.span())); + } else { + yield new SuperAccessExpr(identifier, null, start.endWith(identifier.span())); + } + } else { + expect(Token.LBRACK, "unexpectedToken", "["); + var exp = parseExpr("]"); + var end = expect(Token.RBRACK, "unexpectedToken", "]").span; + yield new SuperSubscriptExpr(exp, start.endWith(end)); + } + } + case IF -> { + var start = next().span; + expect(Token.LPAREN, "unexpectedToken", "("); + var pred = parseExpr(")"); + expect(Token.RPAREN, "unexpectedToken", ")"); + var then = parseExpr("else"); + expect(Token.ELSE, "unexpectedToken", "else"); + var elseCase = parseExpr(expectation); + yield new IfExpr(pred, then, elseCase, start.endWith(elseCase.span())); + } + case LET -> { + var start = next().span(); + expect(Token.LPAREN, "unexpectedToken", "("); + var param = parseParameter(); + expect(Token.ASSIGN, "unexpectedToken", "="); + var bindExpr = parseExpr(")"); + expect(Token.RPAREN, "unexpectedToken", ")"); + var exp = parseExpr(expectation); + yield new LetExpr(param, bindExpr, exp, start.endWith(exp.span())); + } + case TRUE -> new BoolLiteralExpr(true, next().span); + case FALSE -> new BoolLiteralExpr(false, next().span); + case INT, HEX, BIN, OCT -> { + var tk = next(); + yield new IntLiteralExpr(tk.text(lexer), tk.span); + } + case FLOAT -> { + var tk = next(); + yield new FloatLiteralExpr(tk.text(lexer), tk.span); + } + case STRING_START -> parseSingleLineStringLiteralExpr(); + case STRING_MULTI_START -> parseMultiLineStringLiteralExpr(); + case IDENTIFIER -> { + var identifier = parseIdentifier(); + if (lookahead == Token.LPAREN + && !precededBySemicolon + && _lookahead.newLinesBetween == 0) { + var args = parseArgumentList(); + yield new UnqualifiedAccessExpr( + identifier, args, identifier.span().endWith(args.span())); + } else { + yield new UnqualifiedAccessExpr(identifier, null, identifier.span()); + } + } + case EOF -> + throw new ParserError( + ErrorMessages.create("unexpectedEndOfFile"), prev.span.stopSpan().move(1)); + default -> { + var text = _lookahead.text(lexer); + if (expectation != null) { + throw parserError("unexpectedToken", text, expectation); + } + throw parserError("unexpectedTokenForExpression", text); + } + }; + return parseExprRest(expr); + } + + @SuppressWarnings("DuplicatedCode") + private Expr parseExprRest(Expr expr) { + // non null + if (lookahead == Token.NON_NULL) { + var end = next().span; + var res = new NonNullExpr(expr, expr.span().endWith(end)); + return parseExprRest(res); + } + // amends + if (lookahead == Token.LBRACE) { + if (expr instanceof ParenthesizedExpr + || expr instanceof AmendsExpr + || expr instanceof NewExpr) { + var body = parseObjectBody(); + return parseExprRest(new AmendsExpr(expr, body, expr.span().endWith(body.span()))); + } + throw parserError("unexpectedCurlyProbablyAmendsExpression", expr.text(lexer.getSource())); + } + // qualified access + if (lookahead == Token.DOT || lookahead == Token.QDOT) { + var isNullable = next().token == Token.QDOT; + var identifier = parseIdentifier(); + ArgumentList argumentList = null; + if (lookahead == Token.LPAREN && !precededBySemicolon && _lookahead.newLinesBetween == 0) { + argumentList = parseArgumentList(); + } + var lastSpan = argumentList != null ? argumentList.span() : identifier.span(); + var res = + new QualifiedAccessExpr( + expr, identifier, isNullable, argumentList, expr.span().endWith(lastSpan)); + return parseExprRest(res); + } + // subscript (needs to be in the same line as the expression) + if (lookahead == Token.LBRACK && !precededBySemicolon && _lookahead.newLinesBetween == 0) { + next(); + var exp = parseExpr("]"); + var end = expect(Token.RBRACK, "unexpectedToken", "]").span; + var res = new SubscriptExpr(expr, exp, expr.span().endWith(end)); + return parseExprRest(res); + } + return expr; + } + + private Expr parseSingleLineStringLiteralExpr() { + var start = next(); + var parts = new ArrayList(); + var builder = new StringBuilder(); + var startSpan = spanLookahead; + var end = spanLookahead; + while (lookahead != Token.STRING_END) { + switch (lookahead) { + case STRING_PART -> { + var tk = next(); + end = tk.span; + builder.append(tk.text(lexer)); + } + case STRING_ESCAPE_NEWLINE -> { + end = next().span; + builder.append('\n'); + } + case STRING_ESCAPE_TAB -> { + end = next().span; + builder.append('\t'); + } + case STRING_ESCAPE_QUOTE -> { + end = next().span; + builder.append('"'); + } + case STRING_ESCAPE_BACKSLASH -> { + end = next().span; + builder.append('\\'); + } + case STRING_ESCAPE_RETURN -> { + end = next().span; + builder.append('\r'); + } + case STRING_ESCAPE_UNICODE -> { + var tk = next(); + end = tk.span; + builder.append(parseUnicodeEscape(tk)); + } + case INTERPOLATION_START -> { + var istart = next().span; + if (!builder.isEmpty()) { + assert startSpan != null; + parts.add(new StringChars(builder.toString(), startSpan.endWith(end))); + builder = new StringBuilder(); + } + var exp = parseExpr(")"); + end = expect(Token.RPAREN, "unexpectedToken", ")").span; + parts.add(new StringPart.StringInterpolation(exp, istart.endWith(end))); + startSpan = spanLookahead; + } + case EOF -> { + var delimiter = new StringBuilder(start.text(lexer)).reverse().toString(); + throw parserError("missingDelimiter", delimiter); + } + } + } + if (!builder.isEmpty()) { + parts.add(new StringChars(builder.toString(), startSpan.endWith(end))); + } + end = next().span; + return new SingleLineStringLiteralExpr(parts, start.span, end, start.span.endWith(end)); + } + + private Expr parseMultiLineStringLiteralExpr() { + var start = next(); + var stringTokens = new ArrayList(); + while (lookahead != Token.STRING_END) { + switch (lookahead) { + case STRING_PART, + STRING_NEWLINE, + STRING_ESCAPE_NEWLINE, + STRING_ESCAPE_TAB, + STRING_ESCAPE_QUOTE, + STRING_ESCAPE_BACKSLASH, + STRING_ESCAPE_RETURN, + STRING_ESCAPE_UNICODE -> + stringTokens.add(new TempNode(next(), null)); + case INTERPOLATION_START -> { + var istart = next(); + var exp = parseExpr(")"); + var end = expect(Token.RPAREN, "unexpectedToken", ")").span; + var interpolation = new StringPart.StringInterpolation(exp, istart.span.endWith(end)); + stringTokens.add(new TempNode(null, interpolation)); + } + case EOF -> { + var delimiter = new StringBuilder(start.text(lexer)).reverse().toString(); + throw parserError("missingDelimiter", delimiter); + } + } + } + var end = next().span; + var fullSpan = start.span.endWith(end); + var parts = validateMultiLineString(stringTokens, fullSpan); + return new MultiLineStringLiteralExpr(parts, start.span, end, fullSpan); + } + + private List validateMultiLineString(List nodes, Span span) { + var firstNode = nodes.isEmpty() ? null : nodes.get(0); + if (firstNode == null + || firstNode.token == null + || firstNode.token.token != Token.STRING_NEWLINE) { + var errorSpan = firstNode == null ? span : firstNode.span(); + throw new ParserError(ErrorMessages.create("stringContentMustBeginOnNewLine"), errorSpan); + } + // only contains a newline + if (nodes.size() == 1) { + return List.of(new StringChars("", firstNode.span())); + } + var indent = getCommonIndent(nodes, span); + return renderString(nodes, indent); + } + + @SuppressWarnings("DataFlowIssue") + private List renderString(List nodes, String commonIndent) { + var parts = new ArrayList(); + var builder = new StringBuilder(); + var endOffset = nodes.get(nodes.size() - 1).token.token == Token.STRING_NEWLINE ? 1 : 2; + var isNewLine = true; + Span start = null; + Span end = null; + for (var i = 1; i < nodes.size() - endOffset; i++) { + var node = nodes.get(i); + if (node.node != null) { + if (!builder.isEmpty()) { + parts.add(new StringChars(builder.toString(), start.endWith(end))); + builder = new StringBuilder(); + start = null; + } + parts.add(node.node); + } else { + var token = node.token; + assert token != null; + if (start == null) { + start = token.span; + } + end = token.span; + switch (token.token) { + case STRING_NEWLINE -> { + builder.append('\n'); + isNewLine = true; + } + case STRING_PART -> { + var text = token.text(lexer); + if (isNewLine) { + if (text.startsWith(commonIndent)) { + builder.append(text, commonIndent.length(), text.length()); + } else { + var actualIndent = getLeadingIndentCount(text); + var textSpan = token.span.move(actualIndent).grow(-actualIndent); + throw new ParserError( + ErrorMessages.create("stringIndentationMustMatchLastLine"), textSpan); + } + } else { + builder.append(text); + } + isNewLine = false; + } + default -> { + if (isNewLine && !commonIndent.isEmpty()) { + throw new ParserError( + ErrorMessages.create("stringIndentationMustMatchLastLine"), token.span); + } + builder.append(getEscapeText(token)); + isNewLine = false; + } + } + } + } + if (!builder.isEmpty()) { + parts.add(new StringChars(builder.toString(), start.endWith(end))); + } + return parts; + } + + @SuppressWarnings("DuplicatedCode") + private Expr parseFunctionLiteralOrParenthesized(Span start) { + var identifier = parseIdentifier(); + return switch (lookahead) { + case COMMA -> { + next(); + var params = new ArrayList(); + params.add(new TypedIdentifier(identifier, null, identifier.span())); + params.addAll(parseListOfParameter(Token.COMMA, Token.RPAREN)); + var endParen = expect(Token.RPAREN, "unexpectedToken2", ",", ")").span; + var paramList = new ParameterList(params, start.endWith(endParen)); + expect(Token.ARROW, "unexpectedToken", "->"); + var expr = parseExpr(); + yield new FunctionLiteralExpr(paramList, expr, start.endWith(expr.span())); + } + case COLON -> { + var typeAnnotation = parseTypeAnnotation(); + var params = new ArrayList(); + params.add( + new TypedIdentifier( + identifier, typeAnnotation, identifier.span().endWith(typeAnnotation.span()))); + if (lookahead == Token.COMMA) { + next(); + params.addAll(parseListOfParameter(Token.COMMA, Token.RPAREN)); + } + var endParen = expect(Token.RPAREN, "unexpectedToken2", ",", ")").span; + var paramList = new ParameterList(params, start.endWith(endParen)); + expect(Token.ARROW, "unexpectedToken", "->"); + var expr = parseExpr(")"); + yield new FunctionLiteralExpr(paramList, expr, start.endWith(expr.span())); + } + case RPAREN -> { + // still not sure + var end = next().span; + if (lookahead == Token.ARROW) { + next(); + var expr = parseExpr(); + var params = new ArrayList(); + params.add(new TypedIdentifier(identifier, null, identifier.span())); + var paramList = new ParameterList(params, start.endWith(end)); + yield new FunctionLiteralExpr(paramList, expr, start.endWith(expr.span())); + } else { + var exp = new UnqualifiedAccessExpr(identifier, null, identifier.span()); + yield new ParenthesizedExpr(exp, start.endWith(end)); + } + } + default -> { + // this is an expression + backtrack(); + var expr = parseExpr(")"); + var end = expect(Token.RPAREN, "unexpectedToken", ")").span; + yield new ParenthesizedExpr(expr, start.endWith(end)); + } + }; + } + + private FunctionLiteralExpr parseFunctionLiteral(Span start) { + // the open parens is already parsed + var params = parseListOfParameter(Token.COMMA, Token.RPAREN); + var endParen = expect(Token.RPAREN, "unexpectedToken2", ",", ")").span; + var paramList = new ParameterList(params, start.endWith(endParen)); + expect(Token.ARROW, "unexpectedToken", "->"); + var expr = parseExpr(); + return new FunctionLiteralExpr(paramList, expr, start.endWith(expr.span())); + } + + private Type parseType() { + return parseType(null); + } + + private Type parseType(@Nullable String expectation) { + var defaultIndex = -1; + Span start = null; + if (lookahead == Token.STAR) { + defaultIndex = 0; + start = next().span; + } + var first = parseTypeAtom(expectation); + if (start == null) { + start = first.span(); + } + + if (lookahead != Token.UNION) { + if (defaultIndex == 0) { + throw new ParserError(ErrorMessages.create("notAUnion"), start.endWith(first.span())); + } + return first; + } + + var types = new ArrayList(); + types.add(first); + var end = start; + var i = 1; + while (lookahead == Token.UNION) { + next(); + if (lookahead == Token.STAR) { + if (defaultIndex != -1) { + throw parserError("multipleUnionDefaults"); + } + defaultIndex = i; + next(); + } + var type = parseTypeAtom(expectation); + types.add(type); + end = type.span(); + i++; + } + return new Type.UnionType(types, defaultIndex, start.endWith(end)); + } + + private Type parseTypeAtom(@Nullable String expectation) { + Type typ; + switch (lookahead) { + case UNKNOWN -> typ = new Type.UnknownType(next().span); + case NOTHING -> typ = new Type.NothingType(next().span); + case MODULE -> typ = new Type.ModuleType(next().span); + case LPAREN -> { + var tk = next(); + var children = new ArrayList(); + Span end; + if (lookahead == Token.RPAREN) { + end = next().span; + } else { + children.addAll(parseListOf(Token.COMMA, Token.RPAREN, () -> parseType(")"))); + end = expect(Token.RPAREN, "unexpectedToken2", ",", ")").span; + } + if (lookahead == Token.ARROW || children.size() > 1) { + expect(Token.ARROW, "unexpectedToken", "->"); + var ret = parseType(expectation); + children.add(ret); + typ = new Type.FunctionType(children, tk.span.endWith(ret.span())); + } else { + if (children.isEmpty()) { + throw new ParserError(ErrorMessages.create("unexpectedTokenForType", ")"), end); + } + typ = new ParenthesizedType((Type) children.get(0), tk.span.endWith(end)); + } + } + case IDENTIFIER -> { + var start = spanLookahead; + var name = parseQualifiedIdentifier(); + var end = name.span(); + TypeArgumentList typeArgumentList = null; + if (lookahead == Token.LT) { + typeArgumentList = parseTypeArgumentList(); + end = typeArgumentList.span(); + } + typ = new DeclaredType(name, typeArgumentList, start.endWith(end)); + } + case STRING_START -> { + var str = parseStringConstant(); + typ = new StringConstantType(str, str.span()); + } + default -> { + var text = _lookahead.text(lexer); + if (expectation != null) { + throw parserError("unexpectedTokenForType2", text, expectation); + } + throw parserError("unexpectedTokenForType", text); + } + } + + if (typ instanceof Type.FunctionType) return typ; + return parseTypeEnd(typ); + } + + private Type parseTypeEnd(Type type) { + // nullable types + if (lookahead == Token.QUESTION) { + var end = spanLookahead; + next(); + var res = new Type.NullableType(type, type.span().endWith(end)); + return parseTypeEnd(res); + } + // constrained types: have to start in the same line as the type + if (lookahead == Token.LPAREN && !precededBySemicolon && _lookahead.newLinesBetween == 0) { + next(); + var constraints = parseListOf(Token.COMMA, Token.RPAREN, () -> parseExpr(")")); + var end = expect(Token.RPAREN, "unexpectedToken2", ",", ")").span; + var children = new ArrayList(constraints.size() + 1); + children.add(type); + children.addAll(constraints); + var res = new Type.ConstrainedType(children, type.span().endWith(end)); + return parseTypeEnd(res); + } + return type; + } + + private Annotation parseAnnotation() { + var start = next().span; + var children = new ArrayList(2); + var type = parseType(); + children.add(type); + ObjectBody body = null; + var end = type.span(); + if (lookahead == Token.LBRACE) { + body = parseObjectBody(); + end = body.span(); + } + children.add(body); + return new Annotation(children, start.endWith(end)); + } + + private Parameter parseParameter() { + if (lookahead == Token.UNDERSCORE) { + var span = next().span; + return new Parameter.Underscore(span); + } + return parseTypedIdentifier(); + } + + private Modifier parseModifier() { + return switch (lookahead) { + case EXTERNAL -> new Modifier(Modifier.ModifierValue.EXTERNAL, next().span); + case ABSTRACT -> new Modifier(Modifier.ModifierValue.ABSTRACT, next().span); + case OPEN -> new Modifier(Modifier.ModifierValue.OPEN, next().span); + case LOCAL -> new Modifier(Modifier.ModifierValue.LOCAL, next().span); + case HIDDEN -> new Modifier(Modifier.ModifierValue.HIDDEN, next().span); + case FIXED -> new Modifier(Modifier.ModifierValue.FIXED, next().span); + case CONST -> new Modifier(Modifier.ModifierValue.CONST, next().span); + default -> throw new RuntimeException("Unreacheable code"); + }; + } + + private List parseModifierList() { + var modifiers = new ArrayList(); + while (lookahead.isModifier()) { + modifiers.add(parseModifier()); + } + return modifiers; + } + + private ParameterList parseParameterList() { + var start = expect(Token.LPAREN, "unexpectedToken", "(").span; + Span end; + List args = new ArrayList<>(); + if (lookahead == Token.RPAREN) { + end = next().span; + } else { + args = parseListOfParameter(Token.COMMA, Token.RPAREN); + end = expect(Token.RPAREN, "unexpectedToken2", ",", ")").span; + } + return new ParameterList(args, start.endWith(end)); + } + + private List parseBodyList() { + if (lookahead != Token.LBRACE) { + throw parserError("unexpectedToken2", _lookahead.text(lexer), "{", "="); + } + var bodies = new ArrayList(); + do { + bodies.add(parseObjectBody()); + } while (lookahead == Token.LBRACE); + return bodies; + } + + private TypeParameterList parseTypeParameterList() { + var start = expect(Token.LT, "unexpectedToken", "<").span; + var pars = parseListOf(Token.COMMA, Token.GT, this::parseTypeParameter); + var end = expect(Token.GT, "unexpectedToken2", ",", ">").span; + return new TypeParameterList(pars, start.endWith(end)); + } + + private TypeArgumentList parseTypeArgumentList() { + var start = expect(Token.LT, "unexpectedToken", "<").span; + var pars = parseListOf(Token.COMMA, Token.GT, this::parseType); + var end = expect(Token.GT, "unexpectedToken2", ",", ">").span; + return new TypeArgumentList(pars, start.endWith(end)); + } + + private ArgumentList parseArgumentList() { + var start = expect(Token.LPAREN, "unexpectedToken", "(").span; + if (lookahead == Token.RPAREN) { + return new ArgumentList(new ArrayList<>(), start.endWith(next().span)); + } + var exprs = parseListOf(Token.COMMA, Token.RPAREN, this::parseExpr); + var end = expect(Token.RPAREN, "unexpectedToken2", ",", ")").span; + return new ArgumentList(exprs, start.endWith(end)); + } + + private TypeParameter parseTypeParameter() { + TypeParameter.Variance variance = null; + var start = spanLookahead; + if (lookahead == Token.IN) { + next(); + variance = TypeParameter.Variance.IN; + } else if (lookahead == Token.OUT) { + next(); + variance = TypeParameter.Variance.OUT; + } + var identifier = parseIdentifier(); + return new TypeParameter(variance, identifier, start.endWith(identifier.span())); + } + + private TypedIdentifier parseTypedIdentifier() { + var identifier = parseIdentifier(); + TypeAnnotation typeAnnotation = null; + var end = identifier.span(); + if (lookahead == Token.COLON) { + typeAnnotation = parseTypeAnnotation(); + end = typeAnnotation.span(); + } + return new TypedIdentifier(identifier, typeAnnotation, identifier.span().endWith(end)); + } + + private TypeAnnotation parseTypeAnnotation() { + var start = expect(Token.COLON, "unexpectedToken", ":").span; + var type = parseType(); + return new TypeAnnotation(type, start.endWith(type.span())); + } + + private Identifier parseIdentifier() { + if (lookahead != Token.IDENTIFIER) { + if (lookahead.isKeyword()) { + throw parserError("keywordNotAllowedHere", lookahead.text()); + } + throw parserError("unexpectedToken", _lookahead.text(lexer), "identifier"); + } + var tk = next(); + var text = tk.text(lexer); + return new Identifier(text, tk.span); + } + + private StringConstant parseStringConstant() { + var start = spanLookahead; + var startTk = expect(Token.STRING_START, "unexpectedToken", "\""); + var builder = new StringBuilder(); + while (lookahead != Token.STRING_END) { + switch (lookahead) { + case STRING_PART -> builder.append(next().text(lexer)); + case STRING_ESCAPE_NEWLINE -> { + next(); + builder.append('\n'); + } + case STRING_ESCAPE_TAB -> { + next(); + builder.append('\t'); + } + case STRING_ESCAPE_QUOTE -> { + next(); + builder.append('"'); + } + case STRING_ESCAPE_BACKSLASH -> { + next(); + builder.append('\\'); + } + case STRING_ESCAPE_RETURN -> { + next(); + builder.append('\r'); + } + case STRING_ESCAPE_UNICODE -> builder.append(parseUnicodeEscape(next())); + case EOF -> { + var delimiter = new StringBuilder(startTk.text(lexer)).reverse().toString(); + throw parserError("missingDelimiter", delimiter); + } + case INTERPOLATION_START -> throw parserError("interpolationInConstant"); + // the lexer makes sure we only get the above tokens inside a string + default -> throw new RuntimeException("Unreacheable code"); + } + } + var end = next().span; + return new StringConstant(builder.toString(), start.endWith(end)); + } + + private String getEscapeText(FullToken tk) { + return switch (tk.token) { + case STRING_ESCAPE_NEWLINE -> "\n"; + case STRING_ESCAPE_QUOTE -> "\""; + case STRING_ESCAPE_BACKSLASH -> "\\"; + case STRING_ESCAPE_TAB -> "\t"; + case STRING_ESCAPE_RETURN -> "\r"; + case STRING_ESCAPE_UNICODE -> parseUnicodeEscape(tk); + default -> throw new RuntimeException("Unreacheable code"); + }; + } + + private String parseUnicodeEscape(FullToken tk) { + var text = tk.text(lexer); + var lastIndex = text.length() - 1; + var startIndex = text.indexOf('{', 2); + try { + var codepoint = Integer.parseInt(text.substring(startIndex + 1, lastIndex), 16); + return Character.toString(codepoint); + } catch (NumberFormatException e) { + throw new ParserError( + ErrorMessages.create("invalidUnicodeEscapeSequence", text, text.substring(0, startIndex)), + tk.span); + } + } + + private String getCommonIndent(List nodes, Span span) { + var lastNode = nodes.get(nodes.size() - 1); + if (lastNode.token == null) { + throw new ParserError( + ErrorMessages.create("closingStringDelimiterMustBeginOnNewLine"), lastNode.span()); + } + if (lastNode.token.token == Token.STRING_NEWLINE) return ""; + var beforeLast = nodes.get(nodes.size() - 2); + if (beforeLast.token != null && beforeLast.token.token == Token.STRING_NEWLINE) { + var indent = getTrailingIndent(lastNode); + if (indent != null) { + return indent; + } + } + throw new ParserError(ErrorMessages.create("closingStringDelimiterMustBeginOnNewLine"), span); + } + + private @Nullable String getTrailingIndent(TempNode node) { + var token = node.token; + if (token == null || token.token != Token.STRING_PART) return null; + var text = token.text(lexer); + for (var i = 0; i < text.length(); i++) { + var ch = text.charAt(i); + if (ch != ' ' && ch != '\t') return null; + } + return text; + } + + private int getLeadingIndentCount(String text) { + if (text.isEmpty()) return 0; + for (var i = 0; i < text.length(); i++) { + var ch = text.charAt(i); + if (ch != ' ' && ch != '\t') { + return i; + } + } + return text.length(); + } + + private record TempNode( + @Nullable FullToken token, @Nullable StringPart.StringInterpolation node) { + Span span() { + if (token != null) return token.span; + assert node != null; + return node.span(); + } + } + + private FullToken expect(Token type, String errorKey, Object... messageArgs) { + if (lookahead != type) { + var span = spanLookahead; + if (lookahead == Token.EOF || _lookahead.newLinesBetween > 0) { + // don't point at the EOF or the next line, but at the end of the last token + span = prev.span.stopSpan().move(1); + } + var args = messageArgs; + if (errorKey.startsWith("unexpectedToken")) { + args = new Object[messageArgs.length + 1]; + args[0] = lookahead == Token.EOF ? "EOF" : _lookahead.text(lexer); + System.arraycopy(messageArgs, 0, args, 1, messageArgs.length); + } + throw new ParserError(ErrorMessages.create(errorKey, args), span); + } + return next(); + } + + private List parseListOf(Token separator, Supplier parser) { + var res = new ArrayList(); + res.add(parser.get()); + while (lookahead == separator) { + next(); + res.add(parser.get()); + } + return res; + } + + private List parseListOf(Token separator, Token terminator, Supplier parser) { + var res = new ArrayList(); + res.add(parser.get()); + while (lookahead == separator) { + next(); + if (lookahead == terminator) { + break; + } + res.add(parser.get()); + } + return res; + } + + private List parseListOfParameter(Token separator, Token terminator) { + var res = new ArrayList(); + if (lookahead == terminator) { + return res; + } + + res.add(parseParameter()); + while (lookahead == separator) { + next(); + if (lookahead == terminator) { + break; + } + res.add(parseParameter()); + } + return res; + } + + private ParserError parserError(String messageKey, Object... args) { + return new ParserError(ErrorMessages.create(messageKey, args), spanLookahead); + } + + private record MemberHeader( + @Nullable DocComment docComment, List annotations, List modifiers) { + boolean isNotEmpty() { + return !(docComment == null && annotations.isEmpty() && modifiers.isEmpty()); + } + + @SuppressWarnings("DataFlowIssue") + @Nullable + Span span() { + return span(null); + } + + Span span(Span or) { + if (docComment != null) { + return docComment.span(); + } + if (!annotations().isEmpty()) { + return annotations.get(0).span(); + } + if (!modifiers().isEmpty()) { + return modifiers.get(0).span(); + } + return or; + } + + Span modifierSpan(Span or) { + if (!modifiers.isEmpty()) { + return modifiers.get(0).span(); + } + return or; + } + } + + private FullToken next() { + if (backtracking) { + backtracking = false; + lookahead = _lookahead.token; + spanLookahead = _lookahead.span; + return prev; + } + prev = _lookahead; + _lookahead = forceNext(); + lookahead = _lookahead.token; + spanLookahead = _lookahead.span; + return prev; + } + + private FullToken forceNext() { + var tk = lexer.next(); + precededBySemicolon = false; + var newLines = lexer.getNewLinesBetween(); + while (tk.isAffix()) { + precededBySemicolon = precededBySemicolon || tk == Token.SEMICOLON; + tk = lexer.next(); + newLines += lexer.getNewLinesBetween(); + } + return new FullToken(tk, lexer.span(), newLines); + } + + // Like next, but don't ignore comments + private FullToken nextComment() { + prev = _lookahead; + _lookahead = forceNextComment(); + lookahead = _lookahead.token; + spanLookahead = _lookahead.span; + return prev; + } + + private FullToken forceNextComment() { + var tk = lexer.next(); + precededBySemicolon = false; + while (tk == Token.SEMICOLON) { + precededBySemicolon = true; + tk = lexer.next(); + } + return new FullToken(tk, lexer.span(), lexer.getNewLinesBetween()); + } + + /** + * Backtrack to the previous token. + * + *

Can only backtrack one token. + */ + private void backtrack() { + assert !backtracking; + lookahead = prev.token; + spanLookahead = prev.span; + backtracking = true; + } + + private void ensureEmptyHeaders(MemberHeader header, String messageArg) { + if (header.isNotEmpty()) { + throw new ParserError( + ErrorMessages.create("wrongHeaders", messageArg), header.span(spanLookahead)); + } + } + + private record FullToken(Token token, Span span, int newLinesBetween) { + String text(Lexer lexer) { + return lexer.textFor(span.charIndex(), span.length()); + } + } +} diff --git a/pkl-parser/src/main/java/org/pkl/parser/syntax/Annotation.java b/pkl-parser/src/main/java/org/pkl/parser/syntax/Annotation.java index cb4bee488..5212767f8 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/syntax/Annotation.java +++ b/pkl-parser/src/main/java/org/pkl/parser/syntax/Annotation.java @@ -20,7 +20,7 @@ import org.pkl.parser.Span; import org.pkl.parser.util.Nullable; -public class Annotation extends AbstractNode { +public final class Annotation extends AbstractNode { public Annotation(List nodes, Span span) { super(span, nodes); } diff --git a/pkl-parser/src/main/java/org/pkl/parser/syntax/ArgumentList.java b/pkl-parser/src/main/java/org/pkl/parser/syntax/ArgumentList.java index a597b0f05..52cf7b01e 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/syntax/ArgumentList.java +++ b/pkl-parser/src/main/java/org/pkl/parser/syntax/ArgumentList.java @@ -1,5 +1,5 @@ /* - * Copyright © 2025 Apple Inc. and the Pkl project authors. All rights reserved. + * Copyright © 2025-2026 Apple Inc. and the Pkl project authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ import org.pkl.parser.Span; import org.pkl.parser.util.Nullable; -public class ArgumentList extends AbstractNode { +public final class ArgumentList extends AbstractNode { public ArgumentList(List arguments, Span span) { super(span, arguments); diff --git a/pkl-parser/src/main/java/org/pkl/parser/syntax/ClassBody.java b/pkl-parser/src/main/java/org/pkl/parser/syntax/ClassBody.java index 23654988d..4aeecb82f 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/syntax/ClassBody.java +++ b/pkl-parser/src/main/java/org/pkl/parser/syntax/ClassBody.java @@ -1,5 +1,5 @@ /* - * Copyright © 2025 Apple Inc. and the Pkl project authors. All rights reserved. + * Copyright © 2025-2026 Apple Inc. and the Pkl project authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ import org.pkl.parser.Span; import org.pkl.parser.util.Nullable; -public class ClassBody extends AbstractNode { +public final class ClassBody extends AbstractNode { public ClassBody(List nodes, Span span) { super(span, nodes); diff --git a/pkl-parser/src/main/java/org/pkl/parser/syntax/ClassMethod.java b/pkl-parser/src/main/java/org/pkl/parser/syntax/ClassMethod.java index edb4188e8..d830578e7 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/syntax/ClassMethod.java +++ b/pkl-parser/src/main/java/org/pkl/parser/syntax/ClassMethod.java @@ -20,7 +20,7 @@ import org.pkl.parser.Span; import org.pkl.parser.util.Nullable; -public class ClassMethod extends AbstractNode { +public final class ClassMethod extends AbstractNode { private final int modifiersOffset; private final int nameOffset; private final Span headerSpan; diff --git a/pkl-parser/src/main/java/org/pkl/parser/syntax/ExtendsOrAmendsClause.java b/pkl-parser/src/main/java/org/pkl/parser/syntax/ExtendsOrAmendsClause.java index 663fae469..699e7f418 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/syntax/ExtendsOrAmendsClause.java +++ b/pkl-parser/src/main/java/org/pkl/parser/syntax/ExtendsOrAmendsClause.java @@ -21,7 +21,7 @@ import org.pkl.parser.Span; import org.pkl.parser.util.Nullable; -public class ExtendsOrAmendsClause extends AbstractNode { +public final class ExtendsOrAmendsClause extends AbstractNode { private final Type type; public ExtendsOrAmendsClause(StringConstant url, Type type, Span span) { diff --git a/pkl-parser/src/main/java/org/pkl/parser/syntax/Keyword.java b/pkl-parser/src/main/java/org/pkl/parser/syntax/Keyword.java index 49b89513f..1fc5a4432 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/syntax/Keyword.java +++ b/pkl-parser/src/main/java/org/pkl/parser/syntax/Keyword.java @@ -1,5 +1,5 @@ /* - * Copyright © 2025 Apple Inc. and the Pkl project authors. All rights reserved. + * Copyright © 2025-2026 Apple Inc. and the Pkl project authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ import org.pkl.parser.ParserVisitor; import org.pkl.parser.Span; -public class Keyword extends AbstractNode { +public final class Keyword extends AbstractNode { public Keyword(Span span) { super(span, null); diff --git a/pkl-parser/src/main/java/org/pkl/parser/syntax/ParameterList.java b/pkl-parser/src/main/java/org/pkl/parser/syntax/ParameterList.java index a706c2418..b72f387b8 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/syntax/ParameterList.java +++ b/pkl-parser/src/main/java/org/pkl/parser/syntax/ParameterList.java @@ -1,5 +1,5 @@ /* - * Copyright © 2025 Apple Inc. and the Pkl project authors. All rights reserved. + * Copyright © 2025-2026 Apple Inc. and the Pkl project authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ import org.pkl.parser.Span; import org.pkl.parser.util.Nullable; -public class ParameterList extends AbstractNode { +public final class ParameterList extends AbstractNode { public ParameterList(List parameters, Span span) { super(span, parameters); } diff --git a/pkl-parser/src/main/java/org/pkl/parser/syntax/ReplInput.java b/pkl-parser/src/main/java/org/pkl/parser/syntax/ReplInput.java index c773e5c05..7d2e6ca9a 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/syntax/ReplInput.java +++ b/pkl-parser/src/main/java/org/pkl/parser/syntax/ReplInput.java @@ -1,5 +1,5 @@ /* - * Copyright © 2025 Apple Inc. and the Pkl project authors. All rights reserved. + * Copyright © 2025-2026 Apple Inc. and the Pkl project authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ import org.pkl.parser.Span; import org.pkl.parser.util.Nullable; -public class ReplInput extends AbstractNode { +public final class ReplInput extends AbstractNode { public ReplInput(List nodes, Span span) { super(span, nodes); } diff --git a/pkl-parser/src/main/java/org/pkl/parser/syntax/StringConstant.java b/pkl-parser/src/main/java/org/pkl/parser/syntax/StringConstant.java index bc65a391f..689f1dd6e 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/syntax/StringConstant.java +++ b/pkl-parser/src/main/java/org/pkl/parser/syntax/StringConstant.java @@ -20,7 +20,7 @@ import org.pkl.parser.Span; import org.pkl.parser.util.Nullable; -public class StringConstant extends AbstractNode { +public final class StringConstant extends AbstractNode { private final String string; public StringConstant(String string, Span span) { diff --git a/pkl-parser/src/main/java/org/pkl/parser/syntax/TypeAnnotation.java b/pkl-parser/src/main/java/org/pkl/parser/syntax/TypeAnnotation.java index 4a73756af..5991ce3d2 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/syntax/TypeAnnotation.java +++ b/pkl-parser/src/main/java/org/pkl/parser/syntax/TypeAnnotation.java @@ -20,7 +20,7 @@ import org.pkl.parser.Span; import org.pkl.parser.util.Nullable; -public class TypeAnnotation extends AbstractNode { +public final class TypeAnnotation extends AbstractNode { public TypeAnnotation(Type type, Span span) { super(span, List.of(type)); } diff --git a/pkl-parser/src/main/java/org/pkl/parser/syntax/TypeArgumentList.java b/pkl-parser/src/main/java/org/pkl/parser/syntax/TypeArgumentList.java index b01e99c2c..930d1ff00 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/syntax/TypeArgumentList.java +++ b/pkl-parser/src/main/java/org/pkl/parser/syntax/TypeArgumentList.java @@ -1,5 +1,5 @@ /* - * Copyright © 2025 Apple Inc. and the Pkl project authors. All rights reserved. + * Copyright © 2025-2026 Apple Inc. and the Pkl project authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ import org.pkl.parser.ParserVisitor; import org.pkl.parser.Span; -public class TypeArgumentList extends AbstractNode { +public final class TypeArgumentList extends AbstractNode { public TypeArgumentList(List children, Span span) { super(span, children); diff --git a/pkl-parser/src/main/java/org/pkl/parser/syntax/TypeParameterList.java b/pkl-parser/src/main/java/org/pkl/parser/syntax/TypeParameterList.java index f4297a287..b175b1e1b 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/syntax/TypeParameterList.java +++ b/pkl-parser/src/main/java/org/pkl/parser/syntax/TypeParameterList.java @@ -1,5 +1,5 @@ /* - * Copyright © 2025 Apple Inc. and the Pkl project authors. All rights reserved. + * Copyright © 2025-2026 Apple Inc. and the Pkl project authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ import org.pkl.parser.Span; import org.pkl.parser.util.Nullable; -public class TypeParameterList extends AbstractNode { +public final class TypeParameterList extends AbstractNode { public TypeParameterList(List parameters, Span span) { super(span, parameters); } diff --git a/pkl-parser/src/main/java/org/pkl/parser/syntax/generic/Node.java b/pkl-parser/src/main/java/org/pkl/parser/syntax/generic/Node.java index 98ff85a1d..810380596 100644 --- a/pkl-parser/src/main/java/org/pkl/parser/syntax/generic/Node.java +++ b/pkl-parser/src/main/java/org/pkl/parser/syntax/generic/Node.java @@ -1,5 +1,5 @@ /* - * Copyright © 2025 Apple Inc. and the Pkl project authors. All rights reserved. + * Copyright © 2025-2026 Apple Inc. and the Pkl project authors. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ import java.util.Objects; import org.pkl.parser.util.Nullable; -public class Node { +public final class Node { public final List children; public final FullSpan span; public final NodeType type;