const lex = require('./lex.js');
const utils = require('./utils.js');
/* How powerfully infix operators bind to their left and right arguments
* If a value is bound more powerfully from one side, it becomes part of the operator
* on that side first
*
* The relative binding power of operators determines their precedence,
* and the relative binding power of the left and right side of a specific operator
* determines its associativity:
* If the *right* side binds more powerfully, the operator becomes *left* associative,
* as any instance of that operator that appear first in the token stream are able
* to grab their right arguments first, before further instances of the operator
* (and vice versa)
*/
const InfixBindingPower = Object.freeze({
"+": [10, 11],
"-": [10, 11],
"*": [20, 21],
"/": [20, 21],
".": [100, 101],
"?": [6, 5],
"=": [2, 1],
"..": [4, 3],
});
/* How powerfully a prefix operator binds the value to its right
* Stacked prefix operators are unambiguous, so this value is only
* useful in comparison to an infix or postfix operator, and determines
* which operators this prefix will get "inside of"
*/
const PrefixBindingPower = Object.freeze({
"-": 40,
"+": 40,
});
/* How powerfully a prefix operator binds the value to its right
* (works exactly the same as prefix binding power, but for operators on the right)
*/
const PostfixBindingPower = Object.freeze({
"[": 50,
"(": 60,
});
let InfixOpTrie = {};
let PrefixOpTrie = {};
let PostfixOpTrie = {};
/* turns a flat list of variable character length operators
* into a trie (tree of operators, where each node
* is indexed by the next character in the string)
*/
function buildOpTrie(allOps, opTrie)
{
for (let op in allOps)
{
let currentOpTrie = opTrie;
for (let i = 0; i < op.length; i++)
{
if (currentOpTrie[op[i]] === undefined)
{
currentOpTrie[op[i]] = { more: {} };
}
if (i == op.length - 1)
{
currentOpTrie[op[i]].op = allOps[op];
}
currentOpTrie = currentOpTrie[op[i]].more;
}
}
}
// all reserved keywords
const Keywords = Object.freeze([ "if", "else", "on", "wait", "for", "map", "in", "while", ]);
// the types of AST nodes
const ASTNode = Object.freeze({
Literal:1, // value literal, could be string, boolean, number, etc.
PrefixOp:2, // prefix operator applied to sub-expression, i.e. -3 (3 is the sub-expression)
InfixOp:3, // two sub-expressions with an operator between them
Identifier:4, // some unresolved identifier, i.e variable_name_here
Index:5, // some sub-expression being indexed by another expression, i.e. sub_expr[3]
Call:6, // some sub-expression representing a function being called with a list of expressions
Ternary:7, // a ternary expression, with a condition expr, true expr, and false expr
ExprStatement:8, // a full statement that is just an expression
Statements:9, // a continuous text block of statements
Block:10, // a set of statements in a { } block
If:11, // if statement with condition, trueCase, and optional falseCase
For:12, // a for statement with loop variable, range, and loop body
Map:13, // like a for statement, but is actually an expression where you can 'return' in the body and generate an array
On:14, // handle a named event on a given target with given params, returning given values
While:15, // while loop with condition
Wait:16, // statement that pauses exeuction until the async call is done
});
const StatementsWithSemicolon = Object.freeze([
ASTNode.ExprStatement, ASTNode.Wait,
]);
// all reserved words that represent literals
const Literals = Object.freeze({
"true": {node:ASTNode.Literal, value:true},
"false": {node:ASTNode.Literal, value:false},
});
class Parser
{
constructor(tokenStream, mustMatchBeforeConsiderExpression = 4)
{
this.tokenStream = tokenStream;
this.tokenStream.index = 0;
this.mustMatchBeforeConsiderExpression = mustMatchBeforeConsiderExpression;
}
block()
{
let { error:openError } = this.tokenStream.consume(lex.TokenType.Op, "{");
if (openError) return this.getError(openError);
let { ast:statements, error, numMatched } = this.withBacktrack(this.statements);
if (error) return this.getError(error);
let { error:closeError } = this.tokenStream.consume(lex.TokenType.Op, "}");
if (closeError) return this.getError(closeError);
return { ast: { node: ASTNode.Block, statements: statements.statements } };
}
statements()
{
let statements = [];
let error, mostErrorMatched = 0;
while (!error)
{
if (!this.tokenStream.peek(lex.TokenType.Op, "}").error || this.tokenStream.eof())
{
break;
}
let stmt, errorMatched;
({ ast:stmt, error, numMatched:errorMatched } = this.withBacktrack(this.statement));
if (error)
{
if (error.expectingMatch) return { error: error };
break;
}
else
{
statements.push(stmt);
}
}
if (error) return this.getError({ msg: "Expecting statement", ...this.tokenStream.currentLinePos() });
return { ast: { node: ASTNode.Statements, statements: statements } };
}
statement()
{
let { ast:stmt, error } = this.withBacktrack(this.statementInternal);
if (error)
{
return this.getError(error);
}
else if (StatementsWithSemicolon.includes(stmt.node))
{
if (error = this.tokenStream.consume(lex.TokenType.Op, ';').error)
{
return this.getError(error, true);
}
}
return { ast:stmt };
}
exprInBrackets(expectingMatch = true)
{
let error;
error = this.tokenStream.consume(lex.TokenType.Op, "(").error;
if (error) return this.getError(error, expectingMatch);
let expr;
({ ast:expr, error } = this.withBacktrack(this.expression));
if (error) return this.getError(error, expectingMatch);
error = this.tokenStream.consume(lex.TokenType.Op, ")").error;
if (error) return this.getError(error, expectingMatch);
return { ast:expr };
}
whileStmt()
{
let error;
error = this.tokenStream.consume(lex.TokenType.Word, "while").error;
if (error) return this.getError(error);
let condition;
({ ast:condition, error } = this.exprInBrackets());
if (error) return this.getError(error, true);
let loop;
({ ast:loop, error } = this.withBacktrack(this.block));
if (error) return this.getError(error, true);
return { ast: { node: ASTNode.While, condition: condition, loop: loop } };
}
whileStmt()
{
let error;
error = this.tokenStream.consume(lex.TokenType.Word, "wait").error;
if (error) return this.getError(error);
let expr;
({ ast:expr, error } = this.expression());
if (error) return this.getError(error, true);
return { ast: { node: ASTNode.Wait, expr: expr } };
}
onStmt()
{
let error;
error = this.tokenStream.consume(lex.TokenType.Word, "on").error;
if (error) return this.getError(error);
let target;
({ ast:target, error } = this.expression());
if (error) return this.getError(error, true);
let eventName;
({ token:eventName, error } = this.tokenStream.consume(lex.TokenType.Word));
if (error) return this.getError(error, true);
let eventParams;
({ args:eventParams, error } = this.funcParams(true, true));
if (error)
{
eventParams = [];
if (error.expectingMatch)
{
return this.getError(error);
}
}
let eventValues;
({ args:eventValues, error } = this.funcParams(false, false));
if (error)
{
eventValues = [];
if (error.expectingMatch)
{
return this.getError(error);
}
}
let body;
({ ast:body, error } = this.withBacktrack(this.block));
if (error) return this.getError(error, true);
return { ast: { node: ASTNode.On, target: target, eventName: eventName, eventParams: eventParams, eventValues: eventValues, body: body } };
}
forStmt()
{
let error = this.tokenStream.consume(lex.TokenType.Word, "for").error;
if (error) return this.getError(error);
let { ast:forBody, error:bodyError } = this.forBody();
if (bodyError) return this.getError(bodyError, true);
return { ast: { node: ASTNode.For, variable: forBody.variable, range: forBody.range, loop: forBody.loop } };
}
forBody()
{
let error;
error = this.tokenStream.consume(lex.TokenType.Op, "(").error;
if (error) return this.getError(error, true);
let variable;
({ token:variable, error } = this.tokenStream.consume(lex.TokenType.Word));
if (error) return this.getError(error, true);
error = this.tokenStream.consume(lex.TokenType.Word, "in").error;
if (error) return this.getError(error, true);
let range;
({ ast:range, error } = this.withBacktrack(this.expression));
if (error) return this.getError(error, true);
error = this.tokenStream.consume(lex.TokenType.Op, ")").error;
if (error) return this.getError(error, true);
let loop;
({ ast:loop, error } = this.withBacktrack(this.block));
if (error) return this.getError(error, true);
return { ast: { variable: variable, range: range, loop: loop } };
}
ifStmt()
{
let error;
error = this.tokenStream.consume(lex.TokenType.Word, "if").error;
if (error) return this.getError(error);
let condition;
({ ast:condition, error } = this.exprInBrackets());
if (error) return this.getError(error, true);
let trueCase;
({ ast:trueCase, error } = this.withBacktrack(this.block));
if (error) return this.getError(error, true);
let ret = { ast: { node: ASTNode.If, condition: condition, trueCase: trueCase } };
if (!this.tokenStream.consume(lex.TokenType.Word, "else").error)
{
let falseCase;
({ ast:falseCase, error } = this.withBacktrack(this.block));
if (error) return this.getError(error, true);
ret.ast.falseCase = falseCase;
}
return ret;
}
statementInternal()
{
return this.choice(
[
{ parse: this.expression, handle: ast => ({ node: ASTNode.ExprStatement, expr: ast }) },
{ parse: this.ifStmt, handle: ast => ast },
{ parse: this.forStmt, handle: ast => ast },
{ parse: this.onStmt, handle: ast => ast },
{ parse: this.whileStmt, handle: ast => ast },
{ parse: this.waitStmt, handle: ast => ast },
],
"Expecting statement");
let error, bestError, numMatched, bestNumMatched = 0;
}
funcParams(consumeOpeningBracket, hasClosingBracket)
{
let error;
if (consumeOpeningBracket)
{
if (error = this.tokenStream.consume(lex.TokenType.Op, "(").error)
{
if (error) return this.getError(error);
}
}
/* Parsing a function call
* lhs represents the function we are calling
* We need to parse a list of expression separated by , as the arguments
*/
let args = [];
while (true)
{
// peek the next op
let { token:nextToken, type:nextType, pos:nextPos, error } = this.tokenStream.peek();
if (error) return this.getError(error, true);
if (hasClosingBracket && nextType == lex.TokenType.Op && nextToken == ")")
{
// no more arguments, we should end the call (after consuming the bracket)
this.tokenStream.consume();
break;
}
if (args.length > 0)
{
if (error = this.tokenStream.consume(lex.TokenType.Op, ",").error)
{
if (!hasClosingBracket) break;
return this.getError(error, true);
}
}
let arg;
// avoid re-declaring error... annoying
({ ast:arg, error } = this.withBacktrack(this.expression));
if (error) return this.getError(error);
// our new node is the old lhs as a function, called with the index expression
args.push(arg);
}
return { args: args };
}
expression()
{
let result = this.withBacktrack(this.expressionInternal, 0);
if (result.error) return this.getError(result.error, result.error.expectingMatch || result.numMatched > this.mustMatchBeforeConsiderExpression);
return result;
}
/* Parses an expression consisting of a set of values, all separated by infix operators,
* as long none of the operators bind at least as powerfully as 'minBindingPower'
*
* This function will be called recursively to parse sub-expression, with the
* minBindingPower set to whatever the last seen infix operator was. This ensures
* that we never grab operators that can't be part of the subtree because their precedence
* should have had them happen earlier
*
* The while loop in the middle of this function is the main feature that makes this a pratt parser,
* and separates it from a standard recursive decent parser where you have to manually match on only
* certain terms to maintain precedence
*
* Produces an error, or an AST with operator precedence guaranteed to be respected (sub-terms of operators are applications
* of operators with higher precedence)
* The nodes of the AST are marked with their type, and references to the subnodes, plus any relevent information
*
*/
expressionInternal(minBindingPower)
{
// grab the type of the next token, and its string representation
let { type, token, pos, error } = this.tokenStream.consume();
if (error) return this.getError(error);
// start parsing the left-hand-side of the next operator at this AST node
let lhs = null;
if (type == lex.TokenType.Op)
{
// the binding power of the op we will read
let power;
// the value of this part of the expression starts with an operator
// it will either be prefix operator, or a bracketed expression
if (token == "(")
{
// parse a subexpression
let { ast:expr, error } = this.withBacktrack(this.expression);
if (error) return this.getError(error);
lhs = expr;
// ensure the next token after the sub expression is a closing paren, and consume it
if (error = this.tokenStream.consume(lex.TokenType.Op, ")").error)
{
return this.getError(error);
}
}
else
{
const largestOp = this.matchLargestOp(PrefixOpTrie, token, true);
if (largestOp)
{
token = largestOp.op;
let power = largestOp.value;
// this is a prefix operator of the given power
// get the expression it applies to...
let { ast:rhs, error } = this.withBacktrack(this.expressionInternal, power);
if (error) return this.getError(error);
// ... and generate an AST node for it
// this becomes the new LHS of the operator we are parsing
lhs = { node:ASTNode.PrefixOp, rhs:rhs, op:token };
}
else
{
return this.getError({ msg: "Invalid Op '" + token + "'", ...this.tokenStream.stringPosToLinePos(pos) });
}
}
}
else if (type == lex.TokenType.Word)
{
if (token == "map")
{
let { ast:mapBody, error } = this.forBody();
if (error) return this.getError(error, true);
lhs = { node:ASTNode.Map, variable: mapBody.variable, range: mapBody.range, loop: mapBody.loop };
}
else if (Keywords.includes(token))
{
// TODO: handle keywords that can go in expressions
return this.getError({ msg: "Keyword '" + token + "' not expected", ...this.tokenStream.stringPosToLinePos(pos) });
}
else if (Literals[token] !== undefined)
{
// this is a hardcoded keyword literal, we already defined how those nodes look
lhs = Literals[token];
}
else
{
// some unresolved identifier (this is the domain of the runtime/interpreter!)
lhs = { node:ASTNode.Identifier, value:token };
}
}
else if (type == lex.TokenType.NumberLit)
{
lhs = {node:ASTNode.Literal, value:parseFloat(token)};
}
else if (type == lex.TokenType.StringLit)
{
lhs = {node:ASTNode.Literal, value:token};
}
else
{
return this.getError({ msg: "Unexpected " + lex.TokenType.keyName(type) + " of '" + token + "'", ...this.tokenStream.stringPosToLinePos(pos) });
}
while (!this.tokenStream.eof())
{
// start grabbing further operators and expression, that don't have higher precedence
// (higher precedence operators will be parsed as sub-expressions)
/* Expecting an operator now
* Depending on its precedence, we may not actually get to parse it ourselves,
* so don't actually consume the token yet, just peek it (just in case)
*/
let { type:opType, token:opToken, pos:opPos, error } = this.tokenStream.peek(lex.TokenType.Op);
if (error) return { ast: lhs };
let largestOp = this.matchLargestOp(PostfixOpTrie, opToken);
if (largestOp)
{
opToken = largestOp.op;
let power = largestOp.value
// this is a post-fix operator, but it may apply to...
if (power < minBindingPower)
{
//... some outer expression (we were too low power to bind to lhs)
break;
}
//... or our current lhs, in which case we consume the bracket
this.tokenStream.consumeCount(opToken.length);
if (opToken == "[")
{
/* Parsing an index on the current lhs
* We need to parse the index expression. Note we reset the binding power,
* this is because we will always end with ], so we don't need to worry about our current precedence
*/
let { ast:index, error } = this.withBacktrack(this.expression);
if (error) return this.getError(error);
// parse the closing ]
if (error = this.tokenStream.consume(lex.TokenType.Op, "]").error)
{
return this.getError(error);
}
// our new node is the old lhs indexed with the list of argument expressions
lhs = { node:ASTNode.Index, lhs:lhs, index:index };
}
else if (opToken == "(")
{
// funcParams consumes the first ( token for us
// false = don't consume opening bracket (we already did)
// true = need closing bracket
let { args, error } = this.funcParams(false, true);
if (error) return this.getError(error, true);
// generate the AST node
lhs = {node:ASTNode.Call, func:lhs, args:args};
}
else
{
return this.getError({ msg: "Unexpected postfix Op '" + opToken + "'", ...this.tokenStream.stringPosToLinePos(opPos) });
}
// after parsing a postfix operator, we are expecting an operator again
continue;
}
largestOp = this.matchLargestOp(InfixOpTrie, opToken, false);
// if we didn't get an infix operator, we are done this expression
if (!largestOp)
{
break;
}
opToken = largestOp.op;
let [ leftBindingPower, rightBindingPower ] = largestOp.value;
// if the binding power is too low, we are done parsing the current subexpression, leave
// the current operator for our caller...
if (leftBindingPower < minBindingPower) break;
//... otherwise, this operator is for us, so consume it
this.tokenStream.consumeCount(opToken.length);
if (opToken == "?")
{
/* Parsing a ternary expression
* We need to parse the 'true' condition, then a ':', then the 'false' condition
* No expression has ':' naturally in it (except for us manually parsing it here)
* so this expression will automatically end at the next ':', and we don't need
* to worry about precedence
*/
let { ast:trueExpr, error } = this.withBacktrack(this.expression);
if (error) return this.getError(error);
// parse the ':'
if (error = this.tokenStream.consume(lex.TokenType.Op, ":").error)
{
return this.getError(error);
}
{
let { ast:falseExpr, error } = this.withBacktrack(this.expression);
if (error) return this.getError(error);
// our new node is the old lhs indexed with the list of argument expressions
lhs = { node:ASTNode.Ternary, condition:lhs, trueCase:trueExpr, falseCase:falseExpr };
}
}
else
{
// the next expression is the right side of our current operator, but only as much as the
// right-side binding power of our operator can grab
let { ast:rhs, error } = this.withBacktrack(this.expressionInternal, rightBindingPower);
if (error) return this.getError(error);
// our new lhs becomes our applied operator,
// leaving us free to match more operators of an appropriate precedence
lhs = {node:ASTNode.InfixOp, lhs:lhs, rhs:rhs, op:opToken};
}
}
return { ast: lhs };
}
matchLargestOp(opTrie, op, opAlreadyConsumed)
{
const ret = this.matchLargestOpInternal(opTrie, op);
if (ret && ret.op.length > 1 && opAlreadyConsumed)
{
this.tokenStream.consumeCount(ret.op.length - 1);
}
return ret;
}
matchLargestOpInternal(opTrie, op)
{
const lastOp = op[op.length - 1];
if (opTrie[lastOp] !== undefined)
{
let bestOp = null, bestOpLength = 0;
for (let nextOp in opTrie[lastOp].more)
{
if (!this.tokenStream.peekAt(op.length, lex.TokenType.Op, nextOp).error)
{
let betterOp = this.matchLargestOpInternal(opTrie[op].more, op + nextOp);
if (betterOp && betterOp.op.length > bestOpLength)
{
bestOp = betterOp;
bestOpLength = bestOp.op.length;
}
}
}
if (bestOp)
{
return bestOp;
}
if (opTrie[lastOp].op)
{
return { op: op, value: opTrie[lastOp].op };
}
}
return null;
}
/* attempt to match the input strema using this.parseFunction,
* but if it fails, rollback the input stream to where it is now
*
* the return value is either the matched ast or an error, decorated with
* the most number of tokens we ever managed to match before erroring
*/
withBacktrack(parseFunction, ...args)
{
this.tokenStream.startMatch();
const subtree = parseFunction.apply(this, args);
let numMatched;
if (subtree.error)
{
numMatched = this.tokenStream.failMatch();
}
else
{
numMatched = this.tokenStream.finishMatch();
}
return { numMatched: numMatched, ...subtree};
}
getError(error, expectingMatch = undefined)
{
if (expectingMatch === undefined)
{
if (error.expectingMatch !== undefined)
{
expectingMatch = error.expectingMatch;
}
else
{
expectingMatch = false;
}
}
return { error: { ...error, expectingMatch: expectingMatch } };
}
choice(choices, genericError)
{
for (let choice of choices)
{
if (choice.parse && choice.handle)
{
let { ast, error, numMatched } = this.withBacktrack(choice.parse);
if (!error) return { ast: choice.handle(ast) };
if (error.expectingMatch) return this.getError(error);
}
}
return this.getError({ msg: genericError, ...this.tokenStream.currentLinePos() });
}
}
function getPrettyAST(ast)
{
return ast.mapKey("node", v => ASTNode.keyName(v));
}
(function() {
buildOpTrie(InfixBindingPower, InfixOpTrie);
buildOpTrie(PrefixBindingPower, PrefixOpTrie);
buildOpTrie(PostfixBindingPower, PostfixOpTrie);
})();
module.exports = { Parser: Parser, getPrettyAST: getPrettyAST };