diff --git a/parse.js b/parse.js index 893afd1..c81e300 100644 --- a/parse.js +++ b/parse.js @@ -1,6 +1,6 @@ const TokenType = Object.freeze({ Word:1, NumberLit:2, StringLit:3, Op:4 }); -/* takes a string, and returns a flat array of tokens +/* takes a string, and returns a flat array of tokens, and a list of the locations of newlines in the source * * recognized types: * Word: as a token type, reprsents any symbol/word: variable names, or actual keywords @@ -10,6 +10,17 @@ */ function lex(input) { + let inputLength = input.length; + + let newlines = []; + + let i = 0; + while ((i = input.indexOf("\n", i)) != -1) + { + newlines.push(i); + i++; + } + // break the input down into "parts": either words, digit strings, single character operators, or whitespace let parts = []; @@ -123,7 +134,11 @@ } // keywords and identifiers are case insensitive - return tokens.map(t => t.type == TokenType.Word ? { ...t, token:t.token.trim().toLowerCase() } : t); + return { + tokens: tokens.map(t => t.type == TokenType.Word ? { ...t, token:t.token.trim().toLowerCase() } : t), + newlines: newlines, + inputLength: inputLength, + }; } /* How powerfully infix operators bind to their left and right arguments @@ -199,13 +214,29 @@ "false": {node:ASTNode.Literal, value:false}, }); +function stringIndexToLinePos(index, newlines) +{ + let line = newlines.length + 1; + let previousLinesLength = 0; + for (let i = 0; i < newlines.length; i++) + { + if (index <= newlines[i]) + { + line = i + 1; + break; + } + previousLinesLength += newlines[i] + 1; + } + return {line:line, pos:index - previousLinesLength + 1}; +} + /* The main entry point for parsing * * Takes an array of tokens from the lexer, and produces an AST of the program as raw objects */ -function parseExpression(tokens) +function parseExpression(tokenInfo) { - return parseExpressionInternal(tokens, 0); + return parseExpressionInternal(tokenInfo, 0); } @@ -225,10 +256,14 @@ * of operators with higher precedence) * The nodes of the AST are marked with their type, and references to the subnodes, plus any relevent information */ -function parseExpressionInternal(tokens, minBindingPower) +function parseExpressionInternal(tokenInfo, minBindingPower) { + let {tokens, newlines, inputLength} = tokenInfo; + + let eofError = "Expected more input at " + JSON.stringify(stringIndexToLinePos(inputLength - 1, newlines)); + // grab the type of the next token, and its string representation - const {type, token} = tokens.shiftThrow("No more input"); + const {type, token, index:tokenIndex} = tokens.shiftThrow(eofError); // start parsing the left-hand-side of the next operator at this AST node let lhs = null; @@ -242,24 +277,24 @@ lhs = parseExpressionsInternal(tokens, 0); // ensure the next token after the sub expression is a closing paren, and consume it - const {token:closeToken, type:closeType} = tokens.shiftThrow("No more input"); + const {token:closeToken, type:closeType, index:closeIndex} = tokens.shiftThrow("Expecting ')', not '" + closeToken + "' at " + JSON.stringify(stringIndexToLinePos(inputLength - 1, newlines))); if (closeType != TokenType.Op && closeToken != ")") { - throw "Unclosed parenthesis"; + throw "Expecting ')', not '" + closeToken + "' at " + JSON.stringify(stringIndexToLinePos(closeIndex, newlines)); } } else if ((power = PrefixBindingPower[token]) !== undefined) { // this is a prefix operator of the given power // get the expression it applies to... - const rhs = parseExpressionInternal(tokens, power); + const rhs = parseExpressionInternal({...tokenInfo, tokens:tokens}, power); // ... and generate an AST node for it // this becomes the new LHS of the operator we are parsing lhs = {node:ASTNode.PrefixOp, rhs:rhs, op:token}; } else { - throw "Bad operator " + token; + throw "Invalid operator '" + token + "' at " + JSON.stringify(stringIndexToLinePos(tokenIndex, newlines)); } } else if (type == TokenType.Word) @@ -267,7 +302,7 @@ if (Keywords.includes(token)) { // TODO: handle keywords that can go in expressions - throw "Keyword " + token + " not expected here"; + throw "Keyword '" + token + "' not expected at " + JSON.stringify(stringIndexToLinePos(tokenIndex, newlines)); } else if (Literals[token] !== undefined) { @@ -290,7 +325,7 @@ } else { - throw "Unexpcted token " + TokenType.keyName(type) + " of " + token; + throw "Unexpcted token " + TokenType.keyName(type) + " of '" + token + "' at " + JSON.stringify(stringIndexToLinePos(tokenIndex, newlines)); } while (tokens.length > 0) @@ -302,10 +337,10 @@ * Depending on its precedence, we may not actually get to parse it ourselves, * so don't actually consume the token yet, just peek it (just in case) */ - const {type:opType, token:opToken} = tokens[0]; + const {type:opType, token:opToken, index:opIndex} = tokens[0]; if (opType != TokenType.Op) { - throw "Operator expected here, instead of " + TokenType.keyName(opType) + " of " + opToken; + throw "Instead of " + TokenType.keyName(opType) + " of '" + opToken + "', expected operator at " + JSON.stringify(stringIndexToLinePos(opIndex, newlines)); } if (PostfixBindingPower[opToken] !== undefined) @@ -326,13 +361,13 @@ * We need to parse the index expression. Note we reset the binding power, * this is because we will always end with ], so we don't need to worry about our current precedence */ - let index = parseExpressionInternal(tokens, 0); + let index = parseExpressionInternal({...tokenInfo, tokens:tokens}, 0); // parse the closing ] - const {token:closeToken, type:closeType} = tokens.shiftThrow("No more input"); + const {token:closeToken, type:closeType, index:closeIndex} = tokens.shiftThrow("Unclosed indexing expression, expecting ']' at " + JSON.stringify(stringIndexToLinePos(inputLength - 1, newlines))); if (closeType != TokenType.Op && closeToken != "]") { - throw "Unclosed indexing expression"; + throw "Unclosed indexing expression, expecting ']' at " + JSON.stringify(stringIndexToLinePos(closeIndex, newlines)); } // our new node is the old lhs indexed with the list of argument expressions @@ -347,10 +382,10 @@ let args = []; while (true) { - if (tokens.length == 0) throw "No more input"; + if (tokens.length == 0) throw "Unclosed argument list, expected ',' or ')' at " + JSON.stringify(stringIndexToLinePos(inputLength - 1, newlines)); // peek the next op - const {token:nextToken, type:nextType} = tokens[0]; + const {token:nextToken, type:nextType, index:nextIndex} = tokens[0]; if (nextType == TokenType.Op && nextToken == ")") { // no more arguments, we should end the call (after consuming the bracket) @@ -363,13 +398,13 @@ // if we have already parsed args, we need to consume a comma if (nextType != TokenType.Op || nextToken != ",") { - throw "Expected , or )"; + throw "Unclosed argument list, expected ',' or ')' at " + JSON.stringify(stringIndexToLinePos(nextToken, newlines)); } tokens.shift(); } // our new node is the old lhs as a function, called with the index expression - args.push(parseExpressionInternal(tokens, 0)); + args.push(parseExpressionInternal({...tokenInfo, tokens:tokens}, 0)); } // generate the AST node @@ -377,7 +412,7 @@ } else { - throw "Unexpected postfix operator " + opToken; + throw "Unexpected postfix operator '" + opToken + "' at " + JSON.stringify(stringIndexToLinePos(opIndex, newlines)); } // after parsing a postfix operator, we are expecting an operator again @@ -401,7 +436,7 @@ // the next expression is the right side of our current operator, but only as much as the // right-side binding power of our operator can grab - let rhs = parseExpressionInternal(tokens, rightBinding); + let rhs = parseExpressionInternal({...tokenInfo, tokens:tokens}, rightBinding); // our new lhs becomes our applied operator, // leaving us free to match more operators of an appropriate precedence