const lex = require('./lex.js');
const utils = require('./utils.js');
/* How powerfully infix operators bind to their left and right arguments
* If a value is bound more powerfully from one side, it becomes part of the operator
* on that side first
*
* The relative binding power of operators determines their precedence,
* and the relative binding power of the left and right side of a specific operator
* determines its associativity:
* If the *right* side binds more powerfully, the operator becomes *left* associative,
* as any instance of that operator that appear first in the token stream are able
* to grab their right arguments first, before further instances of the operator
* (and vice versa)
*/
const InfixBindingPower = Object.freeze({
"+": [10, 11],
"-": [10, 11],
"*": [20, 21],
"/": [20, 21],
".": [100, 101],
"?": [6, 5],
"=": [2, 1],
});
/* How powerfully a prefix operator binds the value to its right
* Stacked prefix operators are unambiguous, so this value is only
* useful in comparison to an infix or postfix operator, and determines
* which operators this prefix will get "inside of"
*/
const PrefixBindingPower = Object.freeze({
"-": 40,
"+": 40,
});
/* How powerfully a prefix operator binds the value to its right
* (works exactly the same as prefix binding power, but for operators on the right)
*/
const PostfixBindingPower = Object.freeze({
"[": 50,
"(": 60,
});
// all reserved keywords
const Keywords = Object.freeze([ "if", "on", "wait", "for", "map", "in" ]);
// the types of AST nodes
const ASTNode = Object.freeze({
Literal:1, // value literal, could be string, boolean, number, etc.
PrefixOp:2, // prefix operator applied to sub-expression, i.e. -3 (3 is the sub-expression)
InfixOp:3, // two sub-expressions with an operator between them
Identifier:4, // some unresolved identifier, i.e variable_name_here
Index:5, // some sub-expression being indexed by another expression, i.e. sub_expr[3]
Call:6, // some sub-expression representing a function being called with a list of expressions
Ternary:7, // a ternary expression, with a condition expr, true expr, and false expr
ExprStatement:8, // a full statement that is just an expression
Statements:9, // a continuous text block of statements
Block:10, // a set of statements in a { } block
});
const StatementsWithSemicolon = Object.freeze([
ASTNode.ExprStatement,
]);
// all reserved words that represent literals
const Literals = Object.freeze({
"true": {node:ASTNode.Literal, value:true},
"false": {node:ASTNode.Literal, value:false},
});
class Parser
{
constructor(tokenStream, mustConsumeBeforeUsingError = 4)
{
this.tokenStream = tokenStream;
this.tokenStream.index = 0;
this.mustConsumeBeforeUsingError = mustConsumeBeforeUsingError;
}
block()
{
let { error:openError } = this.tokenStream.consume(lex.TokenType.Op, "{");
if (openError) return this.getError(openError);
let { ast:statements, error, numMatched } = this.attemptSubtree(this.statements);
if (error && numMatched > this.mustConsumeBeforeUsingError) return this.getError(error);
let { error:closeError } = this.tokenStream.consume(lex.TokenType.Op, "}");
if (closeError) return this.getError(closeError);
return { ast: { node: ASTNode.Block, statements: statements.statements }, error: null };
}
statements()
{
let statements = [];
let error, mostErrorMatched = 0;
while (!error)
{
let stmt, errorMatched;
({ ast:stmt, error, numMatched:errorMatched } = this.attemptSubtree(this.statement));
if (error)
{
if (errorMatched > mostErrorMatched) mostErrorMatched = errorMatched;
}
else
{
statements.push(stmt);
}
}
if (mostErrorMatched > this.mustConsumeBeforeUsingError) return { ast: null, error: error };
return { ast: { node: ASTNode.Statements, statements: statements }, error: null };
}
statement()
{
let { ast:stmt, error } = this.attemptSubtree(this.statementInternal);
if (error)
{
return this.getError(error);
}
else if (StatementsWithSemicolon.includes(stmt.node))
{
if (error = this.tokenStream.consume(lex.TokenType.Op, ';').error)
{
return this.getError(error);
}
}
return { ast:stmt, error: null };
}
statementInternal()
{
return this.choice(
[
{ parse: this.expression, handle: ast => ({ node: ASTNode.ExprStatement, expr: ast }) }
],
"Expecting expression");
let error, bestError, numMatched, bestNumMatched = 0;
}
expression()
{
return this.expressionInternal(this.tokenStream, 0);
}
/* Parses an expression consisting of a set of values, all separated by infix operators,
* as long none of the operators bind at least as powerfully as 'minBindingPower'
*
* This function will be called recursively to parse sub-expression, with the
* minBindingPower set to whatever the last seen infix operator was. This ensures
* that we never grab operators that can't be part of the subtree because their precedence
* should have had them happen earlier
*
* The while loop in the middle of this function is the main feature that makes this a pratt parser,
* and separates it from a standard recursive decent parser where you have to manually match on only
* certain terms to maintain precedence
*
* Produces an error, or an AST with operator precedence guaranteed to be respected (sub-terms of operators are applications
* of operators with higher precedence)
* The nodes of the AST are marked with their type, and references to the subnodes, plus any relevent information
*
*/
expressionInternal(minBindingPower)
{
// grab the type of the next token, and its string representation
let { type, token, pos, error } = this.tokenStream.consume();
if (error) return this.getError(error);
// start parsing the left-hand-side of the next operator at this AST node
let lhs = null;
if (type == lex.TokenType.Op)
{
// the binding power of the op we will read
let power;
// the value of this part of the expression starts with an operator
// it will either be prefix operator, or a bracketed expression
if (token == "(")
{
// parse a subexpression
let { ast:expr, error } = this.attemptSubtree(this.expression);
if (error) return this.getError(error);
lhs = expr;
// ensure the next token after the sub expression is a closing paren, and consume it
if (error = this.tokenStream.consume(lex.TokenType.Op, ")").error)
{
return this.getError(error);
}
}
else if ((power = PrefixBindingPower[token]) !== undefined)
{
// this is a prefix operator of the given power
// get the expression it applies to...
let { ast:rhs, error } = this.attemptSubtree(this.expressionInternal, power);
if (error) return this.getError(error);
// ... and generate an AST node for it
// this becomes the new LHS of the operator we are parsing
lhs = { node:ASTNode.PrefixOp, rhs:rhs, op:token };
}
else
{
return this.getError({ msg: "Invalid Op '" + token + "'", ...this.tokenStream.stringPosToLinePos(pos) });
}
}
else if (type == lex.TokenType.Word)
{
if (Keywords.includes(token))
{
// TODO: handle keywords that can go in expressions
return this.getError({ msg: "Keyword '" + token + "' not expected", ...this.tokenStream.stringPosToLinePos(pos) });
}
else if (Literals[token] !== undefined)
{
// this is a hardcoded keyword literal, we already defined how those nodes look
lhs = Literals[token];
}
else
{
// some unresolved identifier (this is the domain of the runtime/interpreter!)
lhs = { node:ASTNode.Identifier, value:token };
}
}
else if (type == lex.TokenType.NumberLit)
{
lhs = {node:ASTNode.Literal, value:parseFloat(token)};
}
else if (type == lex.TokenType.StringLit)
{
lhs = {node:ASTNode.Literal, value:token};
}
else
{
return this.getError({ msg: "Unexpected " + lex.TokenType.keyName(type) + " of '" + token + "'", ...this.tokenStream.stringPosToLinePos(pos) });
}
while (!this.tokenStream.eof())
{
// start grabbing further operators and expression, that don't have higher precedence
// (higher precedence operators will be parsed as sub-expressions)
/* Expecting an operator now
* Depending on its precedence, we may not actually get to parse it ourselves,
* so don't actually consume the token yet, just peek it (just in case)
*/
let { type:opType, token:opToken, pos:opPos, error } = this.tokenStream.peek(lex.TokenType.Op);
if (error) return this.getError(error);
if (PostfixBindingPower[opToken] !== undefined)
{
// this is a post-fix operator, but it may apply to...
if (PostfixBindingPower[opToken] < minBindingPower)
{
//... some outer expression (we were too low power to bind to lhs)
break;
}
//... or our current lhs, in which case we get to consume the token
this.tokenStream.consume();
if (opToken == "[")
{
/* Parsing an index on the current lhs
* We need to parse the index expression. Note we reset the binding power,
* this is because we will always end with ], so we don't need to worry about our current precedence
*/
let { ast:index, error } = this.attemptSubtree(this.expression);
if (error) return this.getError(error);
// parse the closing ]
if (error = this.tokenStream.consume(lex.TokenType.Op, "]").error)
{
return this.getError(error);
}
// our new node is the old lhs indexed with the list of argument expressions
lhs = { node:ASTNode.Index, lhs:lhs, index:index };
}
else if (opToken == "(")
{
/* Parsing a function call
* lhs represents the function we are calling
* We need to parse a list of expression separated by , as the arguments
*/
let args = [];
while (true)
{
// peek the next op
let { token:nextToken, type:nextType, pos:nextPos, error } = this.tokenStream.peek();
if (error) return this.getError(error);
if (nextType == lex.TokenType.Op && nextToken == ")")
{
// no more arguments, we should end the call (after consuming the bracket)
this.tokenStream.consume();
break;
}
if (args.length > 0)
{
if (error = this.tokenStream.consume(lex.TokenType.Op, ",").error)
{
return this.getError(error);
}
}
{
// avoid re-declaring error... annoying
let { ast:arg, error } = this.attemptSubtree(this.expression);
if (error) return this.getError(error);
// our new node is the old lhs as a function, called with the index expression
args.push(arg);
}
}
// generate the AST node
lhs = {node:ASTNode.Call, func:lhs, args:args};
}
else
{
return this.getError({ msg: "Unexpected postfix Op '" + opToken + "'", ...this.tokenStream.stringPosToLinePos(opPos) });
}
// after parsing a postfix operator, we are expecting an operator again
continue;
}
// if we didn't get an infix operator, we are done this expression
if (InfixBindingPower[opToken] === undefined)
{
break;
}
let [ leftBindingPower, rightBindingPower ] = InfixBindingPower[opToken];
// if the binding power is too low, we are done parsing the current subexpression, leave
// the current operator for our caller...
if (leftBindingPower < minBindingPower) break;
//... otherwise, this operator is for us, so consume it
this.tokenStream.consume();
if (opToken == "?")
{
/* Parsing a ternary expression
* We need to parse the 'true' condition, then a ':', then the 'false' condition
* No expression has ':' naturally in it (except for us manually parsing it here)
* so this expression will automatically end at the next ':', and we don't need
* to worry about precedence
*/
let { ast:trueExpr, error } = this.attemptSubtree(this.expression);
if (error) return this.getError(error);
// parse the ':'
if (error = this.tokenStream.consume(lex.TokenType.Op, ":").error)
{
return this.getError(error);
}
{
let { ast:falseExpr, error } = this.attemptSubtree(this.expression);
if (error) return this.getError(error);
// our new node is the old lhs indexed with the list of argument expressions
lhs = { node:ASTNode.Ternary, condition:lhs, trueCase:trueExpr, falseCase:falseExpr };
}
}
else
{
// the next expression is the right side of our current operator, but only as much as the
// right-side binding power of our operator can grab
let { ast:rhs, error } = this.attemptSubtree(this.expressionInternal, rightBindingPower);
if (error) return this.getError(error);
// our new lhs becomes our applied operator,
// leaving us free to match more operators of an appropriate precedence
lhs = {node:ASTNode.InfixOp, lhs:lhs, rhs:rhs, op:opToken};
}
}
return { ast: lhs, error: null };
}
attemptSubtree(parseFunction, ...args)
{
this.tokenStream.startMatch();
const subtree = parseFunction.apply(this, args);
let numMatched;
if (subtree.error)
{
numMatched = this.tokenStream.failMatch();
}
else
{
numMatched = this.tokenStream.finishMatch();
}
return { numMatched: numMatched, ...subtree};
}
getError(error)
{
return { ast: null, error: error };
}
choice(choices, genericError)
{
let bestError, bestErrorNumMatched = 0;
for (let choice of choices)
{
if (choice.parse && choice.handle)
{
let { ast, error, numMatched } = this.attemptSubtree(choice.parse);
if (!error) return { ast: choice.handle(ast), error: null };
if (numMatched > bestErrorNumMatched)
{
bestError = error;
bestErrorNumMatched = numMatched;
}
}
}
if (bestError && bestErrorNumMatched > this.mustConsumeBeforeUsingError)
{
return { ast: null, error: bestError };
}
return { ast: null, error: { msg: genericError, ...this.tokenStream.currentLinePos() } };
}
}
function getPrettyAST(ast)
{
return ast.mapKey("node", v => ASTNode.keyName(v));
}
module.exports = { Parser: Parser, getPrettyAST: getPrettyAST };