diff --git a/parse.js b/parse.js index f1a45d5..bb6bfba 100644 --- a/parse.js +++ b/parse.js @@ -43,31 +43,35 @@ "(": 60, }); -let InfixOpTree = {}; -let PrefixOpTree = {}; -let PostfixOpTree = {}; -function buildOpTree(allOps, opTree) +let InfixOpTrie = {}; +let PrefixOpTrie = {}; +let PostfixOpTrie = {}; +/* turns a flat list of variable character length operators + * into a trie (tree of operators, where each node + * is indexed by the next character in the string) + */ +function buildOpTrie(allOps, opTrie) { for (let op in allOps) { - let currentOpTree = opTree; + let currentOpTrie = opTrie; for (let i = 0; i < op.length; i++) { - if (currentOpTree[op[i]] === undefined) + if (currentOpTrie[op[i]] === undefined) { - currentOpTree[op[i]] = { more: {} }; + currentOpTrie[op[i]] = { more: {} }; } if (i == op.length - 1) { - currentOpTree[op[i]].op = allOps[op]; + currentOpTrie[op[i]].op = allOps[op]; } - currentOpTree = currentOpTree[op[i]].more; + currentOpTrie = currentOpTrie[op[i]].more; } } } // all reserved keywords -const Keywords = Object.freeze([ "if", "on", "wait", "for", "map", "in" ]); +const Keywords = Object.freeze([ "if", "else", "on", "wait", "for", "map", "in", ]); // the types of AST nodes const ASTNode = Object.freeze({ @@ -111,13 +115,13 @@ let { error:openError } = this.tokenStream.consume(lex.TokenType.Op, "{"); if (openError) return this.getError(openError); - let { ast:statements, error, numMatched } = this.attemptSubtree(this.statements); + let { ast:statements, error, numMatched } = this.withBacktrack(this.statements); if (error) return this.getError(error); let { error:closeError } = this.tokenStream.consume(lex.TokenType.Op, "}"); if (closeError) return this.getError(closeError); - return { ast: { node: ASTNode.Block, statements: statements.statements }, error: null }; + return { ast: { node: ASTNode.Block, statements: statements.statements } }; } statements() @@ -134,11 +138,11 @@ let stmt, errorMatched; - ({ ast:stmt, error, numMatched:errorMatched } = this.attemptSubtree(this.statement)); + ({ ast:stmt, error, numMatched:errorMatched } = this.withBacktrack(this.statement)); if (error) { - if (error.expectingMatch) return { ast: null, error: error }; + if (error.expectingMatch) return { error: error }; break; } else @@ -149,12 +153,12 @@ if (error) return this.getError({ msg: "Expecting statement", ...this.tokenStream.currentLinePos() }); - return { ast: { node: ASTNode.Statements, statements: statements }, error: null }; + return { ast: { node: ASTNode.Statements, statements: statements } }; } statement() { - let { ast:stmt, error } = this.attemptSubtree(this.statementInternal); + let { ast:stmt, error } = this.withBacktrack(this.statementInternal); if (error) { return this.getError(error); @@ -167,7 +171,7 @@ } } - return { ast:stmt, error: null }; + return { ast:stmt }; } exprInBrackets(expectingMatch = true) @@ -178,13 +182,13 @@ if (error) return this.getError(error, expectingMatch); let expr; - ({ ast:expr, error } = this.attemptSubtree(this.expression)); + ({ ast:expr, error } = this.withBacktrack(this.expression)); if (error) return this.getError(error, expectingMatch); error = this.tokenStream.consume(lex.TokenType.Op, ")").error; if (error) return this.getError(error, expectingMatch); - return { ast:expr, error: null }; + return { ast:expr }; } onStmt() @@ -225,10 +229,10 @@ } let body; - ({ ast:body, error } = this.attemptSubtree(this.block)); + ({ ast:body, error } = this.withBacktrack(this.block)); if (error) return this.getError(error, true); - return { ast: { node: ASTNode.On, target: target, eventName: eventName, eventParams: eventParams, eventValues: eventValues }, error: null }; + return { ast: { node: ASTNode.On, target: target, eventName: eventName, eventParams: eventParams, eventValues: eventValues } }; } forStmt() @@ -239,7 +243,7 @@ let { ast:forBody, error:bodyError } = this.forBody(); if (bodyError) return this.getError(bodyError, true); - return { ast: { node: ASTNode.For, variable: forBody.variable, range: forBody.range, loop: forBody.loop }, error: null }; + return { ast: { node: ASTNode.For, variable: forBody.variable, range: forBody.range, loop: forBody.loop } }; } forBody() @@ -257,17 +261,17 @@ if (error) return this.getError(error, true); let range; - ({ ast:range, error } = this.attemptSubtree(this.expression)); + ({ ast:range, error } = this.withBacktrack(this.expression)); if (error) return this.getError(error, true); error = this.tokenStream.consume(lex.TokenType.Op, ")").error; if (error) return this.getError(error, true); let loop; - ({ ast:loop, error } = this.attemptSubtree(this.block)); + ({ ast:loop, error } = this.withBacktrack(this.block)); if (error) return this.getError(error, true); - return { ast: { variable: variable, range: range, loop: loop }, error: null }; + return { ast: { variable: variable, range: range, loop: loop } }; } ifStmt() @@ -282,15 +286,15 @@ if (error) return this.getError(error, true); let trueCase; - ({ ast:trueCase, error } = this.attemptSubtree(this.block)); + ({ ast:trueCase, error } = this.withBacktrack(this.block)); if (error) return this.getError(error, true); - let ret = { ast: { node: ASTNode.If, condition: condition, trueCase: trueCase }, error: null }; + let ret = { ast: { node: ASTNode.If, condition: condition, trueCase: trueCase } }; if (!this.tokenStream.consume(lex.TokenType.Word, "else").error) { let falseCase; - ({ ast:falseCase, error } = this.attemptSubtree(this.block)); + ({ ast:falseCase, error } = this.withBacktrack(this.block)); if (error) return this.getError(error, true); ret.ast.falseCase = falseCase; @@ -352,19 +356,19 @@ let arg; // avoid re-declaring error... annoying - ({ ast:arg, error } = this.attemptSubtree(this.expression)); + ({ ast:arg, error } = this.withBacktrack(this.expression)); if (error) return this.getError(error); // our new node is the old lhs as a function, called with the index expression args.push(arg); } - return { args: args, error: null }; + return { args: args }; } expression() { - let result = this.attemptSubtree(this.expressionInternal, 0); + let result = this.withBacktrack(this.expressionInternal, 0); if (result.error) return this.getError(result.error, result.error.expectingMatch || result.numMatched > this.mustMatchBeforeConsiderExpression); return result; } @@ -404,7 +408,7 @@ if (token == "(") { // parse a subexpression - let { ast:expr, error } = this.attemptSubtree(this.expression); + let { ast:expr, error } = this.withBacktrack(this.expression); if (error) return this.getError(error); lhs = expr; @@ -417,7 +421,7 @@ } else { - const largestOp = this.matchLargestOp(PrefixOpTree, token, true); + const largestOp = this.matchLargestOp(PrefixOpTrie, token, true); if (largestOp) { token = largestOp.op; @@ -425,7 +429,7 @@ // this is a prefix operator of the given power // get the expression it applies to... - let { ast:rhs, error } = this.attemptSubtree(this.expressionInternal, power); + let { ast:rhs, error } = this.withBacktrack(this.expressionInternal, power); if (error) return this.getError(error); // ... and generate an AST node for it @@ -486,9 +490,9 @@ * so don't actually consume the token yet, just peek it (just in case) */ let { type:opType, token:opToken, pos:opPos, error } = this.tokenStream.peek(lex.TokenType.Op); - if (error) return { ast: lhs, error: null }; + if (error) return { ast: lhs }; - let largestOp = this.matchLargestOp(PostfixOpTree, opToken); + let largestOp = this.matchLargestOp(PostfixOpTrie, opToken); if (largestOp) { opToken = largestOp.op; @@ -510,7 +514,7 @@ * We need to parse the index expression. Note we reset the binding power, * this is because we will always end with ], so we don't need to worry about our current precedence */ - let { ast:index, error } = this.attemptSubtree(this.expression); + let { ast:index, error } = this.withBacktrack(this.expression); if (error) return this.getError(error); // parse the closing ] @@ -542,7 +546,7 @@ continue; } - largestOp = this.matchLargestOp(InfixOpTree, opToken, false); + largestOp = this.matchLargestOp(InfixOpTrie, opToken, false); // if we didn't get an infix operator, we are done this expression if (!largestOp) { @@ -567,7 +571,7 @@ * so this expression will automatically end at the next ':', and we don't need * to worry about precedence */ - let { ast:trueExpr, error } = this.attemptSubtree(this.expression); + let { ast:trueExpr, error } = this.withBacktrack(this.expression); if (error) return this.getError(error); // parse the ':' @@ -577,7 +581,7 @@ } { - let { ast:falseExpr, error } = this.attemptSubtree(this.expression); + let { ast:falseExpr, error } = this.withBacktrack(this.expression); if (error) return this.getError(error); // our new node is the old lhs indexed with the list of argument expressions @@ -588,7 +592,7 @@ { // the next expression is the right side of our current operator, but only as much as the // right-side binding power of our operator can grab - let { ast:rhs, error } = this.attemptSubtree(this.expressionInternal, rightBindingPower); + let { ast:rhs, error } = this.withBacktrack(this.expressionInternal, rightBindingPower); if (error) return this.getError(error); // our new lhs becomes our applied operator, @@ -597,12 +601,12 @@ } } - return { ast: lhs, error: null }; + return { ast: lhs }; } - matchLargestOp(opTree, op, opAlreadyConsumed) + matchLargestOp(opTrie, op, opAlreadyConsumed) { - const ret = this.matchLargestOpInternal(opTree, op); + const ret = this.matchLargestOpInternal(opTrie, op); if (ret && ret.op.length > 1 && opAlreadyConsumed) { this.tokenStream.consumeCount(ret.op.length - 1); @@ -610,17 +614,17 @@ return ret; } - matchLargestOpInternal(opTree, op) + matchLargestOpInternal(opTrie, op) { const lastOp = op[op.length - 1]; - if (opTree[lastOp] !== undefined) + if (opTrie[lastOp] !== undefined) { let bestOp = null, bestOpLength = 0; - for (let nextOp in opTree[lastOp].more) + for (let nextOp in opTrie[lastOp].more) { if (!this.tokenStream.peekAt(op.length, lex.TokenType.Op, nextOp).error) { - let betterOp = this.matchLargestOpInternal(opTree[op].more, op + nextOp); + let betterOp = this.matchLargestOpInternal(opTrie[op].more, op + nextOp); if (betterOp && betterOp.op.length > bestOpLength) { bestOp = betterOp; @@ -632,15 +636,21 @@ { return bestOp; } - if (opTree[lastOp].op) + if (opTrie[lastOp].op) { - return { op: op, value: opTree[lastOp].op }; + return { op: op, value: opTrie[lastOp].op }; } } return null; } - attemptSubtree(parseFunction, ...args) + /* attempt to match the input strema using this.parseFunction, + * but if it fails, rollback the input stream to where it is now + * + * the return value is either the matched ast or an error, decorated with + * the most number of tokens we ever managed to match before erroring + */ + withBacktrack(parseFunction, ...args) { this.tokenStream.startMatch(); const subtree = parseFunction.apply(this, args); @@ -671,7 +681,7 @@ expectingMatch = false; } } - return { ast: null, error: { ...error, expectingMatch: expectingMatch } }; + return { error: { ...error, expectingMatch: expectingMatch } }; } choice(choices, genericError) @@ -680,8 +690,8 @@ { if (choice.parse && choice.handle) { - let { ast, error, numMatched } = this.attemptSubtree(choice.parse); - if (!error) return { ast: choice.handle(ast), error: null }; + let { ast, error, numMatched } = this.withBacktrack(choice.parse); + if (!error) return { ast: choice.handle(ast) }; if (error.expectingMatch) return this.getError(error); } } @@ -696,9 +706,9 @@ } (function() { - buildOpTree(InfixBindingPower, InfixOpTree); - buildOpTree(PrefixBindingPower, PrefixOpTree); - buildOpTree(PostfixBindingPower, PostfixOpTree); + buildOpTrie(InfixBindingPower, InfixOpTrie); + buildOpTrie(PrefixBindingPower, PrefixOpTrie); + buildOpTrie(PostfixBindingPower, PostfixOpTrie); })(); module.exports = { Parser: Parser, getPrettyAST: getPrettyAST };