const lex = require('./lex.js'); const utils = require('./utils.js'); /* How powerfully infix operators bind to their left and right arguments * If a value is bound more powerfully from one side, it becomes part of the operator * on that side first * * The relative binding power of operators determines their precedence, * and the relative binding power of the left and right side of a specific operator * determines its associativity: * If the *right* side binds more powerfully, the operator becomes *left* associative, * as any instance of that operator that appear first in the token stream are able * to grab their right arguments first, before further instances of the operator * (and vice versa) */ const InfixBindingPower = Object.freeze({ "+": [10, 11], "-": [10, 11], "*": [20, 21], "/": [20, 21], ".": [100, 101], "?": [6, 5], "=": [2, 1], }); /* How powerfully a prefix operator binds the value to its right * Stacked prefix operators are unambiguous, so this value is only * useful in comparison to an infix or postfix operator, and determines * which operators this prefix will get "inside of" */ const PrefixBindingPower = Object.freeze({ "-": 40, "+": 40, }); /* How powerfully a prefix operator binds the value to its right * (works exactly the same as prefix binding power, but for operators on the right) */ const PostfixBindingPower = Object.freeze({ "[": 50, "(": 60, }); // all reserved keywords const Keywords = Object.freeze([ "if" ]); // the types of AST nodes const ASTNode = Object.freeze({ Literal:1, // value literal, could be string, boolean, number, etc. PrefixOp:2, // prefix operator applied to sub-expression, i.e. -3 (3 is the sub-expression) InfixOp:3, // two sub-expressions with an operator between them Identifier:4, // some unresolved identifier, i.e variable_name_here Index:5, // some sub-expression being indexed by another expression, i.e. sub_expr[3] Call:6, // some sub-expression representing a function being called with a list of expressions Ternary:7, // a ternary expression, with a condition expr, true expr, and false expr ExprStatement:8, // a full statement that is just an expression Statements:9, // a continuous text block of statements Block:10, // a set of statements in a { } block }); // all reserved words that represent literals const Literals = Object.freeze({ "true": {node:ASTNode.Literal, value:true}, "false": {node:ASTNode.Literal, value:false}, }); class Parser { constructor(tokenStream, mustConsumeBeforeUsingError = 4) { this.tokenStream = tokenStream; this.tokenStream.index = 0; this.mustConsumeBeforeUsingError = mustConsumeBeforeUsingError; } block() { let { error:openError } = this.tokenStream.consume(lex.TokenType.Op, "{"); if (openError) return this.getError(openError); let { ast:statements, error, numMatched } = this.attemptSubtree(this.statements); if (error && numMatched > this.mustConsumeBeforeUsingError) return this.getError(this.error); let { error:closeError } = this.tokenStream.consume(lex.TokenType.Op, "}"); if (closeError) return this.getError(closeError); return { ast: { node: ASTNode.Block, statements: statements }, error: null }; } statements() { let statements = []; let error, mostErrorMatched = 0; while (!error) { let stmt, errorMatched; ({ ast:stmt, error, numMatched:errorMatched } = this.attemptSubtree(this.statement)); if (error) { if (errorMatched > mostErrorMatched) mostErrorMatched = errorMatched; } else { statements.push(stmt); } } if (mostErrorMatched > this.mustConsumeBeforeUsingError) return { ast: null, error: error }; return { ast: { node: ASTNode.Statements, statements: statements }, error: null }; } statement() { let { ast:stmt, error } = this.attemptSubtree(this.statementInternal); if (error) { return this.getError(error); } else { if (error = this.tokenStream.consume(lex.TokenType.Op, ';').error) { return this.getError(error); } } return { ast:stmt, error: null }; } statementInternal() { return this.choice( [ { parse: this.expression, handle: ast => ({ node: ASTNode.ExprStatement, expr: ast }) } ], "Expecting expression"); let error, bestError, numMatched, bestNumMatched = 0; } expression() { return this.expressionInternal(this.tokenStream, 0); } /* Parses an expression consisting of a set of values, all separated by infix operators, * as long none of the operators bind at least as powerfully as 'minBindingPower' * * This function will be called recursively to parse sub-expression, with the * minBindingPower set to whatever the last seen infix operator was. This ensures * that we never grab operators that can't be part of the subtree because their precedence * should have had them happen earlier * * The while loop in the middle of this function is the main feature that makes this a pratt parser, * and separates it from a standard recursive decent parser where you have to manually match on only * certain terms to maintain precedence * * Produces an error, or an AST with operator precedence guaranteed to be respected (sub-terms of operators are applications * of operators with higher precedence) * The nodes of the AST are marked with their type, and references to the subnodes, plus any relevent information * */ expressionInternal(minBindingPower) { // grab the type of the next token, and its string representation let { type, token, pos, error } = this.tokenStream.consume(); if (error) return this.getError(error); // start parsing the left-hand-side of the next operator at this AST node let lhs = null; if (type == lex.TokenType.Op) { // the binding power of the op we will read let power; // the value of this part of the expression starts with an operator // it will either be prefix operator, or a bracketed expression if (token == "(") { // parse a subexpression let { ast:expr, error } = this.attemptSubtree(this.expression); if (error) return this.getError(error); lhs = expr; // ensure the next token after the sub expression is a closing paren, and consume it if (error = this.tokenStream.consume(lex.TokenType.Op, ")").error) { return this.getError(error); } } else if ((power = PrefixBindingPower[token]) !== undefined) { // this is a prefix operator of the given power // get the expression it applies to... let { ast:rhs, error } = this.attemptSubtree(this.expressionInternal, power); if (error) return this.getError(error); // ... and generate an AST node for it // this becomes the new LHS of the operator we are parsing lhs = { node:ASTNode.PrefixOp, rhs:rhs, op:token }; } else { return this.getError({ msg: "Invalid Op '" + token + "'", ...this.tokenStream.stringPosToLinePos(pos) }); } } else if (type == lex.TokenType.Word) { if (Keywords.includes(token)) { // TODO: handle keywords that can go in expressions return this.getError({ msg: "Keyword '" + token + "' not expected", ...this.tokenStream.stringPosToLinePos(pos) }); } else if (Literals[token] !== undefined) { // this is a hardcoded keyword literal, we already defined how those nodes look lhs = Literals[token]; } else { // some unresolved identifier (this is the domain of the runtime/interpreter!) lhs = { node:ASTNode.Identifier, value:token }; } } else if (type == lex.TokenType.NumberLit) { lhs = {node:ASTNode.Literal, value:parseFloat(token)}; } else if (type == lex.TokenType.StringLit) { lhs = {node:ASTNode.Literal, value:token}; } else { return this.getError({ msg: "Unexpected " + lex.TokenType.keyName(type) + " of '" + token + "'", ...this.tokenStream.stringPosToLinePos(pos) }); } while (!this.tokenStream.eof()) { // start grabbing further operators and expression, that don't have higher precedence // (higher precedence operators will be parsed as sub-expressions) /* Expecting an operator now * Depending on its precedence, we may not actually get to parse it ourselves, * so don't actually consume the token yet, just peek it (just in case) */ let { type:opType, token:opToken, pos:opPos, error } = this.tokenStream.peek(lex.TokenType.Op); if (error) return this.getError(error); if (PostfixBindingPower[opToken] !== undefined) { // this is a post-fix operator, but it may apply to... if (PostfixBindingPower[opToken] < minBindingPower) { //... some outer expression (we were too low power to bind to lhs) break; } //... or our current lhs, in which case we get to consume the token this.tokenStream.consume(); if (opToken == "[") { /* Parsing an index on the current lhs * We need to parse the index expression. Note we reset the binding power, * this is because we will always end with ], so we don't need to worry about our current precedence */ let { ast:index, error } = this.attemptSubtree(this.expression); if (error) return this.getError(error); // parse the closing ] if (error = this.tokenStream.consume(lex.TokenType.Op, "]").error) { return this.getError(error); } // our new node is the old lhs indexed with the list of argument expressions lhs = { node:ASTNode.Index, lhs:lhs, index:index }; } else if (opToken == "(") { /* Parsing a function call * lhs represents the function we are calling * We need to parse a list of expression separated by , as the arguments */ let args = []; while (true) { // peek the next op let { token:nextToken, type:nextType, pos:nextPos, error } = this.tokenStream.peek(); if (error) return this.getError(error); if (nextType == lex.TokenType.Op && nextToken == ")") { // no more arguments, we should end the call (after consuming the bracket) this.tokenStream.consume(); break; } if (args.length > 0) { if (error = this.tokenStream.consume(lex.TokenType.Op, ",").error) { return this.getError(error); } } { // avoid re-declaring error... annoying let { ast:arg, error } = this.attemptSubtree(this.expression); if (error) return this.getError(error); // our new node is the old lhs as a function, called with the index expression args.push(arg); } } // generate the AST node lhs = {node:ASTNode.Call, func:lhs, args:args}; } else { return this.getError({ msg: "Unexpected postfix Op '" + opToken + "'", ...this.tokenStream.stringPosToLinePos(opPos) }); } // after parsing a postfix operator, we are expecting an operator again continue; } // if we didn't get an infix operator, we are done this expression if (InfixBindingPower[opToken] === undefined) { break; } let [ leftBindingPower, rightBindingPower ] = InfixBindingPower[opToken]; // if the binding power is too low, we are done parsing the current subexpression, leave // the current operator for our caller... if (leftBindingPower < minBindingPower) break; //... otherwise, this operator is for us, so consume it this.tokenStream.consume(); if (opToken == "?") { /* Parsing a ternary expression * We need to parse the 'true' condition, then a ':', then the 'false' condition * No expression has ':' naturally in it (except for us manually parsing it here) * so this expression will automatically end at the next ':', and we don't need * to worry about precedence */ let { ast:trueExpr, error } = this.attemptSubtree(this.expression); if (error) return this.getError(error); // parse the ':' if (error = this.tokenStream.consume(lex.TokenType.Op, ":").error) { return this.getError(error); } { let { ast:falseExpr, error } = this.attemptSubtree(this.expression); if (error) return this.getError(error); // our new node is the old lhs indexed with the list of argument expressions lhs = { node:ASTNode.Ternary, condition:lhs, trueCase:trueExpr, falseCase:falseExpr }; } } else { // the next expression is the right side of our current operator, but only as much as the // right-side binding power of our operator can grab let { ast:rhs, error } = this.attemptSubtree(this.expressionInternal, rightBindingPower); if (error) return this.getError(error); // our new lhs becomes our applied operator, // leaving us free to match more operators of an appropriate precedence lhs = {node:ASTNode.InfixOp, lhs:lhs, rhs:rhs, op:opToken}; } } return { ast: lhs, error: null }; } attemptSubtree(parseFunction, ...args) { this.tokenStream.startMatch(); const subtree = parseFunction.apply(this, args); let numMatched; if (subtree.error) { numMatched = this.tokenStream.failMatch(); } else { numMatched = this.tokenStream.finishMatch(); } return { numMatched: numMatched, ...subtree}; } getError(error) { return { ast: null, error: error }; } choice(choices, genericError) { let bestError, bestErrorNumMatched = 0; for (let choice of choices) { if (choice.parse && choice.handle) { let { ast, error, numMatched } = this.attemptSubtree(choice.parse); if (!error) return { ast: choice.handle(ast), error: null }; if (numMatched > bestErrorNumMatched) { bestError = error; bestErrorNumMatched = numMatched; } } } if (bestError && bestErrorNumMatched > this.mustConsumeBeforeUsingError) { return { ast: null, error: bestError }; } return { ast: null, error: { msg: genericError, ...this.tokenStream.currentLinePos() } }; } } function getPrettyAST(ast) { return ast.mapKey("node", v => ASTNode.keyName(v)); } module.exports = { Parser: Parser, getPrettyAST: getPrettyAST };