diff --git a/lex.js b/lex.js index e4afe2b..d24183c 100644 --- a/lex.js +++ b/lex.js @@ -16,7 +16,8 @@ this.tokens = lexed.tokens; this.newlines = lexed.newlines; - // when we attempt to match a sub-rule, save the location where we roll back to + // when we attempt to match a sub-rule, save the location where we roll back to, + // as well as the max number of tokens we ever matched this.savedLocations = []; } @@ -52,7 +53,11 @@ { if (token.type !== type) { - return { error: { msg: "Expecting " . TokenType.keyName(type) + ", not " + TokenType.keyName(token.type) + " '" + token.token + "'", ...this.tokenToLinePos(token) } }; + if (tokenValue !== undefined) + { + return { error: { msg: "Expecting " + TokenType.keyName(type) + " '" + tokenValue + "', not " + TokenType.keyName(token.type) + " '" + token.token + "'", ...this.tokenToLinePos(token) } }; + } + return { error: { msg: "Expecting " + TokenType.keyName(type) + ", not " + TokenType.keyName(token.type) + " '" + token.token + "'", ...this.tokenToLinePos(token) } }; } if (tokenValue !== undefined) @@ -72,25 +77,36 @@ const token = this.peek(type, tokenValue); if (token.error) return token; + for (let saved of this.savedLocations) + { + saved.matched++; + } + this.index++; return token; } startMatch() { - this.savedLocations.push(this.index); + this.savedLocations.push({ index: this.index, matched: 0 }); } + // returns the number of tokens in the match finishMatch() { console.assert(this.savedLocations.length > 0, "Unexpected finishMatch()"); - this.savedLocations.pop(); + return this.savedLocations.pop().matched; } + // returns the number of tokens we matched before we failed failMatch() { console.assert(this.savedLocations.length > 0, "Unexpected failMatch()"); - this.index = this.savedLocations.pop(); + + const saved = this.savedLocations.pop(); + this.index = saved.index; + + return saved.matched; } stringPosToLinePos(pos) @@ -122,6 +138,11 @@ { return this.stringPosToLinePos(token.pos); } + + currentLinePos() + { + return this.tokenIndexToLinePos(this.index); + } } /* takes a string, and returns a flat array of tokens, and a list of the locations of newlines in the source diff --git a/lex.js b/lex.js index e4afe2b..d24183c 100644 --- a/lex.js +++ b/lex.js @@ -16,7 +16,8 @@ this.tokens = lexed.tokens; this.newlines = lexed.newlines; - // when we attempt to match a sub-rule, save the location where we roll back to + // when we attempt to match a sub-rule, save the location where we roll back to, + // as well as the max number of tokens we ever matched this.savedLocations = []; } @@ -52,7 +53,11 @@ { if (token.type !== type) { - return { error: { msg: "Expecting " . TokenType.keyName(type) + ", not " + TokenType.keyName(token.type) + " '" + token.token + "'", ...this.tokenToLinePos(token) } }; + if (tokenValue !== undefined) + { + return { error: { msg: "Expecting " + TokenType.keyName(type) + " '" + tokenValue + "', not " + TokenType.keyName(token.type) + " '" + token.token + "'", ...this.tokenToLinePos(token) } }; + } + return { error: { msg: "Expecting " + TokenType.keyName(type) + ", not " + TokenType.keyName(token.type) + " '" + token.token + "'", ...this.tokenToLinePos(token) } }; } if (tokenValue !== undefined) @@ -72,25 +77,36 @@ const token = this.peek(type, tokenValue); if (token.error) return token; + for (let saved of this.savedLocations) + { + saved.matched++; + } + this.index++; return token; } startMatch() { - this.savedLocations.push(this.index); + this.savedLocations.push({ index: this.index, matched: 0 }); } + // returns the number of tokens in the match finishMatch() { console.assert(this.savedLocations.length > 0, "Unexpected finishMatch()"); - this.savedLocations.pop(); + return this.savedLocations.pop().matched; } + // returns the number of tokens we matched before we failed failMatch() { console.assert(this.savedLocations.length > 0, "Unexpected failMatch()"); - this.index = this.savedLocations.pop(); + + const saved = this.savedLocations.pop(); + this.index = saved.index; + + return saved.matched; } stringPosToLinePos(pos) @@ -122,6 +138,11 @@ { return this.stringPosToLinePos(token.pos); } + + currentLinePos() + { + return this.tokenIndexToLinePos(this.index); + } } /* takes a string, and returns a flat array of tokens, and a list of the locations of newlines in the source diff --git a/parse.js b/parse.js index 5ff99ad..3af758c 100644 --- a/parse.js +++ b/parse.js @@ -54,6 +54,9 @@ Index:5, // some sub-expression being indexed by another expression, i.e. sub_expr[3] Call:6, // some sub-expression representing a function being called with a list of expressions Ternary:7, // a ternary expression, with a condition expr, true expr, and false expr + ExprStatement:8, // a full statement that is just an expression + Statements:9, // a continuous text block of statements + Block:10, // a set of statements in a { } block }); // all reserved words that represent literals @@ -64,16 +67,84 @@ class Parser { - constructor(tokenStream) + constructor(tokenStream, mustConsumeBeforeUsingError = 4) { this.tokenStream = tokenStream; this.tokenStream.index = 0; + this.mustConsumeBeforeUsingError = mustConsumeBeforeUsingError; } - - expression(tokenStream) + block() { - return this.expressionInternal(tokenStream, 0); + let { error:openError } = this.tokenStream.consume(lex.TokenType.Op, "{"); + if (openError) return this.getError(openError); + + let { ast:statements, error, numMatched } = this.attemptSubtree(this.statements); + if (error && numMatched > this.mustConsumeBeforeUsingError) return this.getError(this.error); + + let { error:closeError } = this.tokenStream.consume(lex.TokenType.Op, "}"); + if (closeError) return this.getError(closeError); + + return { ast: { node: ASTNode.Block, statements: statements }, error: null }; + } + + statements() + { + let statements = []; + + let error, mostErrorMatched = 0; + while (!error) + { + let stmt, errorMatched; + + ({ ast:stmt, error, numMatched:errorMatched } = this.attemptSubtree(this.statement)); + + if (error) + { + if (errorMatched > mostErrorMatched) mostErrorMatched = errorMatched; + } + else + { + statements.push(stmt); + } + } + + if (mostErrorMatched > this.mustConsumeBeforeUsingError) return { ast: null, error: error }; + + return { ast: { node: ASTNode.Statements, statements: statements }, error: null }; + } + + statement() + { + let { ast:stmt, error } = this.attemptSubtree(this.statementInternal); + if (error) + { + return this.getError(error); + } + else + { + if (error = this.tokenStream.consume(lex.TokenType.Op, ';').error) + { + return this.getError(error); + } + } + + return { ast:stmt, error: null }; + } + + statementInternal() + { + return this.choice( + [ + { parse: this.expression, handle: ast => ({ node: ASTNode.ExprStatement, expr: ast }) } + ], + "Expecting expression"); + let error, bestError, numMatched, bestNumMatched = 0; + } + + expression() + { + return this.expressionInternal(this.tokenStream, 0); } /* Parses an expression consisting of a set of values, all separated by infix operators, @@ -322,22 +393,48 @@ this.tokenStream.startMatch(); const subtree = parseFunction.apply(this, args); + let numMatched; if (subtree.error) { - this.tokenStream.failMatch(); + numMatched = this.tokenStream.failMatch(); } else { - this.tokenStream.finishMatch(); + numMatched = this.tokenStream.finishMatch(); } - return subtree; + return { numMatched: numMatched, ...subtree}; } getError(error) { return { ast: null, error: error }; } + + choice(choices, genericError) + { + let bestError, bestErrorNumMatched = 0; + + for (let choice of choices) + { + if (choice.parse && choice.handle) + { + let { ast, error, numMatched } = this.attemptSubtree(choice.parse); + if (!error) return { ast: choice.handle(ast), error: null }; + if (numMatched > bestErrorNumMatched) + { + bestError = error; + bestErrorNumMatched = numMatched; + } + } + } + + if (bestError && bestErrorNumMatched > this.mustConsumeBeforeUsingError) + { + return { ast: null, error: bestError }; + } + return { ast: null, error: { msg: genericError, ...this.tokenStream.currentLinePos() } }; + } } function getPrettyAST(ast) diff --git a/lex.js b/lex.js index e4afe2b..d24183c 100644 --- a/lex.js +++ b/lex.js @@ -16,7 +16,8 @@ this.tokens = lexed.tokens; this.newlines = lexed.newlines; - // when we attempt to match a sub-rule, save the location where we roll back to + // when we attempt to match a sub-rule, save the location where we roll back to, + // as well as the max number of tokens we ever matched this.savedLocations = []; } @@ -52,7 +53,11 @@ { if (token.type !== type) { - return { error: { msg: "Expecting " . TokenType.keyName(type) + ", not " + TokenType.keyName(token.type) + " '" + token.token + "'", ...this.tokenToLinePos(token) } }; + if (tokenValue !== undefined) + { + return { error: { msg: "Expecting " + TokenType.keyName(type) + " '" + tokenValue + "', not " + TokenType.keyName(token.type) + " '" + token.token + "'", ...this.tokenToLinePos(token) } }; + } + return { error: { msg: "Expecting " + TokenType.keyName(type) + ", not " + TokenType.keyName(token.type) + " '" + token.token + "'", ...this.tokenToLinePos(token) } }; } if (tokenValue !== undefined) @@ -72,25 +77,36 @@ const token = this.peek(type, tokenValue); if (token.error) return token; + for (let saved of this.savedLocations) + { + saved.matched++; + } + this.index++; return token; } startMatch() { - this.savedLocations.push(this.index); + this.savedLocations.push({ index: this.index, matched: 0 }); } + // returns the number of tokens in the match finishMatch() { console.assert(this.savedLocations.length > 0, "Unexpected finishMatch()"); - this.savedLocations.pop(); + return this.savedLocations.pop().matched; } + // returns the number of tokens we matched before we failed failMatch() { console.assert(this.savedLocations.length > 0, "Unexpected failMatch()"); - this.index = this.savedLocations.pop(); + + const saved = this.savedLocations.pop(); + this.index = saved.index; + + return saved.matched; } stringPosToLinePos(pos) @@ -122,6 +138,11 @@ { return this.stringPosToLinePos(token.pos); } + + currentLinePos() + { + return this.tokenIndexToLinePos(this.index); + } } /* takes a string, and returns a flat array of tokens, and a list of the locations of newlines in the source diff --git a/parse.js b/parse.js index 5ff99ad..3af758c 100644 --- a/parse.js +++ b/parse.js @@ -54,6 +54,9 @@ Index:5, // some sub-expression being indexed by another expression, i.e. sub_expr[3] Call:6, // some sub-expression representing a function being called with a list of expressions Ternary:7, // a ternary expression, with a condition expr, true expr, and false expr + ExprStatement:8, // a full statement that is just an expression + Statements:9, // a continuous text block of statements + Block:10, // a set of statements in a { } block }); // all reserved words that represent literals @@ -64,16 +67,84 @@ class Parser { - constructor(tokenStream) + constructor(tokenStream, mustConsumeBeforeUsingError = 4) { this.tokenStream = tokenStream; this.tokenStream.index = 0; + this.mustConsumeBeforeUsingError = mustConsumeBeforeUsingError; } - - expression(tokenStream) + block() { - return this.expressionInternal(tokenStream, 0); + let { error:openError } = this.tokenStream.consume(lex.TokenType.Op, "{"); + if (openError) return this.getError(openError); + + let { ast:statements, error, numMatched } = this.attemptSubtree(this.statements); + if (error && numMatched > this.mustConsumeBeforeUsingError) return this.getError(this.error); + + let { error:closeError } = this.tokenStream.consume(lex.TokenType.Op, "}"); + if (closeError) return this.getError(closeError); + + return { ast: { node: ASTNode.Block, statements: statements }, error: null }; + } + + statements() + { + let statements = []; + + let error, mostErrorMatched = 0; + while (!error) + { + let stmt, errorMatched; + + ({ ast:stmt, error, numMatched:errorMatched } = this.attemptSubtree(this.statement)); + + if (error) + { + if (errorMatched > mostErrorMatched) mostErrorMatched = errorMatched; + } + else + { + statements.push(stmt); + } + } + + if (mostErrorMatched > this.mustConsumeBeforeUsingError) return { ast: null, error: error }; + + return { ast: { node: ASTNode.Statements, statements: statements }, error: null }; + } + + statement() + { + let { ast:stmt, error } = this.attemptSubtree(this.statementInternal); + if (error) + { + return this.getError(error); + } + else + { + if (error = this.tokenStream.consume(lex.TokenType.Op, ';').error) + { + return this.getError(error); + } + } + + return { ast:stmt, error: null }; + } + + statementInternal() + { + return this.choice( + [ + { parse: this.expression, handle: ast => ({ node: ASTNode.ExprStatement, expr: ast }) } + ], + "Expecting expression"); + let error, bestError, numMatched, bestNumMatched = 0; + } + + expression() + { + return this.expressionInternal(this.tokenStream, 0); } /* Parses an expression consisting of a set of values, all separated by infix operators, @@ -322,22 +393,48 @@ this.tokenStream.startMatch(); const subtree = parseFunction.apply(this, args); + let numMatched; if (subtree.error) { - this.tokenStream.failMatch(); + numMatched = this.tokenStream.failMatch(); } else { - this.tokenStream.finishMatch(); + numMatched = this.tokenStream.finishMatch(); } - return subtree; + return { numMatched: numMatched, ...subtree}; } getError(error) { return { ast: null, error: error }; } + + choice(choices, genericError) + { + let bestError, bestErrorNumMatched = 0; + + for (let choice of choices) + { + if (choice.parse && choice.handle) + { + let { ast, error, numMatched } = this.attemptSubtree(choice.parse); + if (!error) return { ast: choice.handle(ast), error: null }; + if (numMatched > bestErrorNumMatched) + { + bestError = error; + bestErrorNumMatched = numMatched; + } + } + } + + if (bestError && bestErrorNumMatched > this.mustConsumeBeforeUsingError) + { + return { ast: null, error: bestError }; + } + return { ast: null, error: { msg: genericError, ...this.tokenStream.currentLinePos() } }; + } } function getPrettyAST(ast) diff --git a/test.js b/test.js index 83521b0..8ba2fe7 100644 --- a/test.js +++ b/test.js @@ -5,13 +5,13 @@ function debugExpr(expr) { const tokens = new lex.TokenStream(expr); - console.log(tokens.getPrettyTokenList()); + //console.log(tokens.getPrettyTokenList()); const parser = new parse.Parser(tokens); - const result = parser.expression(tokens); + const result = parser.block(); - console.format(parse.getPrettyAST(result)); //ast.mapKey("node", v => ASTNode.keyName(v))); + console.format(parse.getPrettyAST(result)); } debugExpr(fs.readFileSync(process.argv[2], "utf8"));