const lex = require('./lex.js'); const utils = require('./utils.js'); /* How powerfully infix operators bind to their left and right arguments * If a value is bound more powerfully from one side, it becomes part of the operator * on that side first * * The relative binding power of operators determines their precedence, * and the relative binding power of the left and right side of a specific operator * determines its associativity: * If the *right* side binds more powerfully, the operator becomes *left* associative, * as any instance of that operator that appear first in the token stream are able * to grab their right arguments first, before further instances of the operator * (and vice versa) */ const InfixBindingPower = Object.freeze({ "+": [10, 11], "-": [10, 11], "*": [20, 21], "/": [20, 21], ".": [100, 101], "?": [6, 5], "=": [2, 1], "..": [4, 3], }); /* How powerfully a prefix operator binds the value to its right * Stacked prefix operators are unambiguous, so this value is only * useful in comparison to an infix or postfix operator, and determines * which operators this prefix will get "inside of" */ const PrefixBindingPower = Object.freeze({ "-": 40, "+": 40, }); /* How powerfully a prefix operator binds the value to its right * (works exactly the same as prefix binding power, but for operators on the right) */ const PostfixBindingPower = Object.freeze({ "[": 50, "(": 60, }); let InfixOpTrie = {}; let PrefixOpTrie = {}; let PostfixOpTrie = {}; /* turns a flat list of variable character length operators * into a trie (tree of operators, where each node * is indexed by the next character in the string) */ function buildOpTrie(allOps, opTrie) { for (let op in allOps) { let currentOpTrie = opTrie; for (let i = 0; i < op.length; i++) { if (currentOpTrie[op[i]] === undefined) { currentOpTrie[op[i]] = { more: {} }; } if (i == op.length - 1) { currentOpTrie[op[i]].op = allOps[op]; } currentOpTrie = currentOpTrie[op[i]].more; } } } // all reserved keywords const Keywords = Object.freeze([ "if", "else", "on", "wait", "for", "map", "in", "while", ]); // the types of AST nodes const ASTNode = Object.freeze({ Literal:1, // value literal, could be string, boolean, number, etc. PrefixOp:2, // prefix operator applied to sub-expression, i.e. -3 (3 is the sub-expression) InfixOp:3, // two sub-expressions with an operator between them Identifier:4, // some unresolved identifier, i.e variable_name_here Index:5, // some sub-expression being indexed by another expression, i.e. sub_expr[3] Call:6, // some sub-expression representing a function being called with a list of expressions Ternary:7, // a ternary expression, with a condition expr, true expr, and false expr ExprStatement:8, // a full statement that is just an expression Statements:9, // a continuous text block of statements Block:10, // a set of statements in a { } block If:11, // if statement with condition, trueCase, and optional falseCase For:12, // a for statement with loop variable, range, and loop body Map:13, // like a for statement, but is actually an expression where you can 'return' in the body and generate an array On:14, // handle a named event on a given target with given params, returning given values While:15, // while loop with condition Wait:16, // statement that pauses exeuction until the async call is done }); const StatementsWithSemicolon = Object.freeze([ ASTNode.ExprStatement, ASTNode.Wait, ]); // all reserved words that represent literals const Literals = Object.freeze({ "true": {node:ASTNode.Literal, value:true}, "false": {node:ASTNode.Literal, value:false}, }); class Parser { constructor(tokenStream, mustMatchBeforeConsiderExpression = 4) { this.tokenStream = tokenStream; this.tokenStream.index = 0; this.mustMatchBeforeConsiderExpression = mustMatchBeforeConsiderExpression; } block() { let { error:openError } = this.tokenStream.consume(lex.TokenType.Op, "{"); if (openError) return this.getError(openError); let { ast:statements, error, numMatched } = this.withBacktrack(this.statements); if (error) return this.getError(error); let { error:closeError } = this.tokenStream.consume(lex.TokenType.Op, "}"); if (closeError) return this.getError(closeError); return { ast: { node: ASTNode.Block, statements: statements.statements } }; } statements() { let statements = []; let error, mostErrorMatched = 0; while (!error) { if (!this.tokenStream.peek(lex.TokenType.Op, "}").error || this.tokenStream.eof()) { break; } let stmt, errorMatched; ({ ast:stmt, error, numMatched:errorMatched } = this.withBacktrack(this.statement)); if (error) { if (error.expectingMatch) return { error: error }; break; } else { statements.push(stmt); } } if (error) return this.getError({ msg: "Expecting statement", ...this.tokenStream.currentLinePos() }); return { ast: { node: ASTNode.Statements, statements: statements } }; } statement() { let { ast:stmt, error } = this.withBacktrack(this.statementInternal); if (error) { return this.getError(error); } else if (StatementsWithSemicolon.includes(stmt.node)) { if (error = this.tokenStream.consume(lex.TokenType.Op, ';').error) { return this.getError(error, true); } } return { ast:stmt }; } exprInBrackets(expectingMatch = true) { let error; error = this.tokenStream.consume(lex.TokenType.Op, "(").error; if (error) return this.getError(error, expectingMatch); let expr; ({ ast:expr, error } = this.withBacktrack(this.expression)); if (error) return this.getError(error, expectingMatch); error = this.tokenStream.consume(lex.TokenType.Op, ")").error; if (error) return this.getError(error, expectingMatch); return { ast:expr }; } whileStmt() { let error; error = this.tokenStream.consume(lex.TokenType.Word, "while").error; if (error) return this.getError(error); let condition; ({ ast:condition, error } = this.exprInBrackets()); if (error) return this.getError(error, true); let loop; ({ ast:loop, error } = this.withBacktrack(this.block)); if (error) return this.getError(error, true); return { ast: { node: ASTNode.While, condition: condition, loop: loop } }; } whileStmt() { let error; error = this.tokenStream.consume(lex.TokenType.Word, "wait").error; if (error) return this.getError(error); let expr; ({ ast:expr, error } = this.expression()); if (error) return this.getError(error, true); return { ast: { node: ASTNode.Wait, expr: expr } }; } onStmt() { let error; error = this.tokenStream.consume(lex.TokenType.Word, "on").error; if (error) return this.getError(error); let target; ({ ast:target, error } = this.expression()); if (error) return this.getError(error, true); let eventName; ({ token:eventName, error } = this.tokenStream.consume(lex.TokenType.Word)); if (error) return this.getError(error, true); let eventParams; ({ args:eventParams, error } = this.funcParams(true, true)); if (error) { eventParams = []; if (error.expectingMatch) { return this.getError(error); } } let eventValues; ({ args:eventValues, error } = this.funcParams(false, false)); if (error) { eventValues = []; if (error.expectingMatch) { return this.getError(error); } } let body; ({ ast:body, error } = this.withBacktrack(this.block)); if (error) return this.getError(error, true); return { ast: { node: ASTNode.On, target: target, eventName: eventName, eventParams: eventParams, eventValues: eventValues, body: body } }; } forStmt() { let error = this.tokenStream.consume(lex.TokenType.Word, "for").error; if (error) return this.getError(error); let { ast:forBody, error:bodyError } = this.forBody(); if (bodyError) return this.getError(bodyError, true); return { ast: { node: ASTNode.For, variable: forBody.variable, range: forBody.range, loop: forBody.loop } }; } forBody() { let error; error = this.tokenStream.consume(lex.TokenType.Op, "(").error; if (error) return this.getError(error, true); let variable; ({ token:variable, error } = this.tokenStream.consume(lex.TokenType.Word)); if (error) return this.getError(error, true); error = this.tokenStream.consume(lex.TokenType.Word, "in").error; if (error) return this.getError(error, true); let range; ({ ast:range, error } = this.withBacktrack(this.expression)); if (error) return this.getError(error, true); error = this.tokenStream.consume(lex.TokenType.Op, ")").error; if (error) return this.getError(error, true); let loop; ({ ast:loop, error } = this.withBacktrack(this.block)); if (error) return this.getError(error, true); return { ast: { variable: variable, range: range, loop: loop } }; } ifStmt() { let error; error = this.tokenStream.consume(lex.TokenType.Word, "if").error; if (error) return this.getError(error); let condition; ({ ast:condition, error } = this.exprInBrackets()); if (error) return this.getError(error, true); let trueCase; ({ ast:trueCase, error } = this.withBacktrack(this.block)); if (error) return this.getError(error, true); let ret = { ast: { node: ASTNode.If, condition: condition, trueCase: trueCase } }; if (!this.tokenStream.consume(lex.TokenType.Word, "else").error) { let falseCase; ({ ast:falseCase, error } = this.withBacktrack(this.block)); if (error) return this.getError(error, true); ret.ast.falseCase = falseCase; } return ret; } statementInternal() { return this.choice( [ { parse: this.expression, handle: ast => ({ node: ASTNode.ExprStatement, expr: ast }) }, { parse: this.ifStmt, handle: ast => ast }, { parse: this.forStmt, handle: ast => ast }, { parse: this.onStmt, handle: ast => ast }, { parse: this.whileStmt, handle: ast => ast }, { parse: this.waitStmt, handle: ast => ast }, ], "Expecting statement"); let error, bestError, numMatched, bestNumMatched = 0; } funcParams(consumeOpeningBracket, hasClosingBracket) { let error; if (consumeOpeningBracket) { if (error = this.tokenStream.consume(lex.TokenType.Op, "(").error) { if (error) return this.getError(error); } } /* Parsing a function call * lhs represents the function we are calling * We need to parse a list of expression separated by , as the arguments */ let args = []; while (true) { // peek the next op let { token:nextToken, type:nextType, pos:nextPos, error } = this.tokenStream.peek(); if (error) return this.getError(error, true); if (hasClosingBracket && nextType == lex.TokenType.Op && nextToken == ")") { // no more arguments, we should end the call (after consuming the bracket) this.tokenStream.consume(); break; } if (args.length > 0) { if (error = this.tokenStream.consume(lex.TokenType.Op, ",").error) { if (!hasClosingBracket) break; return this.getError(error, true); } } let arg; // avoid re-declaring error... annoying ({ ast:arg, error } = this.withBacktrack(this.expression)); if (error) return this.getError(error); // our new node is the old lhs as a function, called with the index expression args.push(arg); } return { args: args }; } expression() { let result = this.withBacktrack(this.expressionInternal, 0); if (result.error) return this.getError(result.error, result.error.expectingMatch || result.numMatched > this.mustMatchBeforeConsiderExpression); return result; } /* Parses an expression consisting of a set of values, all separated by infix operators, * as long none of the operators bind at least as powerfully as 'minBindingPower' * * This function will be called recursively to parse sub-expression, with the * minBindingPower set to whatever the last seen infix operator was. This ensures * that we never grab operators that can't be part of the subtree because their precedence * should have had them happen earlier * * The while loop in the middle of this function is the main feature that makes this a pratt parser, * and separates it from a standard recursive decent parser where you have to manually match on only * certain terms to maintain precedence * * Produces an error, or an AST with operator precedence guaranteed to be respected (sub-terms of operators are applications * of operators with higher precedence) * The nodes of the AST are marked with their type, and references to the subnodes, plus any relevent information * */ expressionInternal(minBindingPower) { // grab the type of the next token, and its string representation let { type, token, pos, error } = this.tokenStream.consume(); if (error) return this.getError(error); // start parsing the left-hand-side of the next operator at this AST node let lhs = null; if (type == lex.TokenType.Op) { // the binding power of the op we will read let power; // the value of this part of the expression starts with an operator // it will either be prefix operator, or a bracketed expression if (token == "(") { // parse a subexpression let { ast:expr, error } = this.withBacktrack(this.expression); if (error) return this.getError(error); lhs = expr; // ensure the next token after the sub expression is a closing paren, and consume it if (error = this.tokenStream.consume(lex.TokenType.Op, ")").error) { return this.getError(error); } } else { const largestOp = this.matchLargestOp(PrefixOpTrie, token, true); if (largestOp) { token = largestOp.op; let power = largestOp.value; // this is a prefix operator of the given power // get the expression it applies to... let { ast:rhs, error } = this.withBacktrack(this.expressionInternal, power); if (error) return this.getError(error); // ... and generate an AST node for it // this becomes the new LHS of the operator we are parsing lhs = { node:ASTNode.PrefixOp, rhs:rhs, op:token }; } else { return this.getError({ msg: "Invalid Op '" + token + "'", ...this.tokenStream.stringPosToLinePos(pos) }); } } } else if (type == lex.TokenType.Word) { if (token == "map") { let { ast:mapBody, error } = this.forBody(); if (error) return this.getError(error, true); lhs = { node:ASTNode.Map, variable: mapBody.variable, range: mapBody.range, loop: mapBody.loop }; } else if (Keywords.includes(token)) { // TODO: handle keywords that can go in expressions return this.getError({ msg: "Keyword '" + token + "' not expected", ...this.tokenStream.stringPosToLinePos(pos) }); } else if (Literals[token] !== undefined) { // this is a hardcoded keyword literal, we already defined how those nodes look lhs = Literals[token]; } else { // some unresolved identifier (this is the domain of the runtime/interpreter!) lhs = { node:ASTNode.Identifier, value:token }; } } else if (type == lex.TokenType.NumberLit) { lhs = {node:ASTNode.Literal, value:parseFloat(token)}; } else if (type == lex.TokenType.StringLit) { lhs = {node:ASTNode.Literal, value:token}; } else { return this.getError({ msg: "Unexpected " + lex.TokenType.keyName(type) + " of '" + token + "'", ...this.tokenStream.stringPosToLinePos(pos) }); } while (!this.tokenStream.eof()) { // start grabbing further operators and expression, that don't have higher precedence // (higher precedence operators will be parsed as sub-expressions) /* Expecting an operator now * Depending on its precedence, we may not actually get to parse it ourselves, * so don't actually consume the token yet, just peek it (just in case) */ let { type:opType, token:opToken, pos:opPos, error } = this.tokenStream.peek(lex.TokenType.Op); if (error) return { ast: lhs }; let largestOp = this.matchLargestOp(PostfixOpTrie, opToken); if (largestOp) { opToken = largestOp.op; let power = largestOp.value // this is a post-fix operator, but it may apply to... if (power < minBindingPower) { //... some outer expression (we were too low power to bind to lhs) break; } //... or our current lhs, in which case we consume the bracket this.tokenStream.consumeCount(opToken.length); if (opToken == "[") { /* Parsing an index on the current lhs * We need to parse the index expression. Note we reset the binding power, * this is because we will always end with ], so we don't need to worry about our current precedence */ let { ast:index, error } = this.withBacktrack(this.expression); if (error) return this.getError(error); // parse the closing ] if (error = this.tokenStream.consume(lex.TokenType.Op, "]").error) { return this.getError(error); } // our new node is the old lhs indexed with the list of argument expressions lhs = { node:ASTNode.Index, lhs:lhs, index:index }; } else if (opToken == "(") { // funcParams consumes the first ( token for us // false = don't consume opening bracket (we already did) // true = need closing bracket let { args, error } = this.funcParams(false, true); if (error) return this.getError(error, true); // generate the AST node lhs = {node:ASTNode.Call, func:lhs, args:args}; } else { return this.getError({ msg: "Unexpected postfix Op '" + opToken + "'", ...this.tokenStream.stringPosToLinePos(opPos) }); } // after parsing a postfix operator, we are expecting an operator again continue; } largestOp = this.matchLargestOp(InfixOpTrie, opToken, false); // if we didn't get an infix operator, we are done this expression if (!largestOp) { break; } opToken = largestOp.op; let [ leftBindingPower, rightBindingPower ] = largestOp.value; // if the binding power is too low, we are done parsing the current subexpression, leave // the current operator for our caller... if (leftBindingPower < minBindingPower) break; //... otherwise, this operator is for us, so consume it this.tokenStream.consumeCount(opToken.length); if (opToken == "?") { /* Parsing a ternary expression * We need to parse the 'true' condition, then a ':', then the 'false' condition * No expression has ':' naturally in it (except for us manually parsing it here) * so this expression will automatically end at the next ':', and we don't need * to worry about precedence */ let { ast:trueExpr, error } = this.withBacktrack(this.expression); if (error) return this.getError(error); // parse the ':' if (error = this.tokenStream.consume(lex.TokenType.Op, ":").error) { return this.getError(error); } { let { ast:falseExpr, error } = this.withBacktrack(this.expression); if (error) return this.getError(error); // our new node is the old lhs indexed with the list of argument expressions lhs = { node:ASTNode.Ternary, condition:lhs, trueCase:trueExpr, falseCase:falseExpr }; } } else { // the next expression is the right side of our current operator, but only as much as the // right-side binding power of our operator can grab let { ast:rhs, error } = this.withBacktrack(this.expressionInternal, rightBindingPower); if (error) return this.getError(error); // our new lhs becomes our applied operator, // leaving us free to match more operators of an appropriate precedence lhs = {node:ASTNode.InfixOp, lhs:lhs, rhs:rhs, op:opToken}; } } return { ast: lhs }; } matchLargestOp(opTrie, op, opAlreadyConsumed) { const ret = this.matchLargestOpInternal(opTrie, op); if (ret && ret.op.length > 1 && opAlreadyConsumed) { this.tokenStream.consumeCount(ret.op.length - 1); } return ret; } matchLargestOpInternal(opTrie, op) { const lastOp = op[op.length - 1]; if (opTrie[lastOp] !== undefined) { let bestOp = null, bestOpLength = 0; for (let nextOp in opTrie[lastOp].more) { if (!this.tokenStream.peekAt(op.length, lex.TokenType.Op, nextOp).error) { let betterOp = this.matchLargestOpInternal(opTrie[op].more, op + nextOp); if (betterOp && betterOp.op.length > bestOpLength) { bestOp = betterOp; bestOpLength = bestOp.op.length; } } } if (bestOp) { return bestOp; } if (opTrie[lastOp].op) { return { op: op, value: opTrie[lastOp].op }; } } return null; } /* attempt to match the input strema using this.parseFunction, * but if it fails, rollback the input stream to where it is now * * the return value is either the matched ast or an error, decorated with * the most number of tokens we ever managed to match before erroring */ withBacktrack(parseFunction, ...args) { this.tokenStream.startMatch(); const subtree = parseFunction.apply(this, args); let numMatched; if (subtree.error) { numMatched = this.tokenStream.failMatch(); } else { numMatched = this.tokenStream.finishMatch(); } return { numMatched: numMatched, ...subtree}; } getError(error, expectingMatch = undefined) { if (expectingMatch === undefined) { if (error.expectingMatch !== undefined) { expectingMatch = error.expectingMatch; } else { expectingMatch = false; } } return { error: { ...error, expectingMatch: expectingMatch } }; } choice(choices, genericError) { for (let choice of choices) { if (choice.parse && choice.handle) { let { ast, error, numMatched } = this.withBacktrack(choice.parse); if (!error) return { ast: choice.handle(ast) }; if (error.expectingMatch) return this.getError(error); } } return this.getError({ msg: genericError, ...this.tokenStream.currentLinePos() }); } } function getPrettyAST(ast) { return ast.mapKey("node", v => ASTNode.keyName(v)); } (function() { buildOpTrie(InfixBindingPower, InfixOpTrie); buildOpTrie(PrefixBindingPower, PrefixOpTrie); buildOpTrie(PostfixBindingPower, PostfixOpTrie); })(); module.exports = { Parser: Parser, getPrettyAST: getPrettyAST };