require('./utils.js'); const TokenType = Object.freeze({ Word:1, NumberLit:2, StringLit:3, Op:4 }); class TokenStream { constructor(input) { this.input = input; this.inputLength = input.length; // how far in the token stream we are this.index = 0; const lexed = lex(this.input); this.tokens = lexed.tokens; this.newlines = lexed.newlines; // when we attempt to match a sub-rule, save the location where we roll back to, // as well as the max number of tokens we ever matched this.savedLocations = []; } getPrettyTokenList() { return this.tokens.mapKey("type", v => TokenType.keyName(v)); } eof() { return this.index < 0 || this.index >= this.tokens.length; } peek(type = undefined, tokenValue = undefined) { return this.peekAt(0, type, tokenValue); } peekAt(forwardAmount, type = undefined, tokenValue = undefined) { if (this.index + forwardAmount < 0 || this.index + forwardAmount >= this.tokens.length) { let msg = "Unexpected EOF"; if (type !== undefined) { msg += ", expected " + TokenType.keyName(type); if (tokenValue !== undefined) { msg += " of '" + tokenValue + "'"; } } return { error: { msg: msg, ...this.stringPosToLinePos(this.inputLength - 1) } }; } const token = this.tokens[this.index + forwardAmount]; if (type) { if (token.type !== type) { if (tokenValue !== undefined) { return { error: { msg: "Expecting " + TokenType.keyName(type) + " '" + tokenValue + "', not " + TokenType.keyName(token.type) + " '" + token.token + "'", ...this.tokenToLinePos(token) } }; } return { error: { msg: "Expecting " + TokenType.keyName(type) + ", not " + TokenType.keyName(token.type) + " '" + token.token + "'", ...this.tokenToLinePos(token) } }; } if (tokenValue !== undefined) { if (token.token !== tokenValue) { return { error: { msg: "Expecting " + TokenType.keyName(type) + " '" + tokenValue + "', not '" + token.token + "'", ...this.tokenToLinePos(token) } }; } } } return token; } consume(type = undefined, tokenValue = undefined) { const token = this.peek(type, tokenValue); if (token.error) return token; for (let saved of this.savedLocations) { saved.matched++; } this.index++; return token; } consumeCount(amount) { let { error } = this.peekAt(amount - 1); if (error) return { error: error }; for (let saved of this.savedLocations) { saved.matched += amount; } this.index += amount; return { error: null }; } startMatch() { this.savedLocations.push({ index: this.index, matched: 0 }); } // returns the number of tokens in the match finishMatch() { console.assert(this.savedLocations.length > 0, "Unexpected finishMatch()"); return this.savedLocations.pop().matched; } // returns the number of tokens we matched before we failed failMatch() { console.assert(this.savedLocations.length > 0, "Unexpected failMatch()"); const saved = this.savedLocations.pop(); this.index = saved.index; return saved.matched; } stringPosToLinePos(pos) { pos = Math.max(0, Math.min(this.inputLength - 1, pos)); let line = this.newlines.length + 1; let previousLinesLength = 0; for (let i = 0; i < this.newlines.length; i++) { if (pos <= this.newlines[i]) { line = i + 1; break; } previousLinesLength = this.newlines[i] + 1; } return {inputPos:pos, line:line, pos:pos - previousLinesLength + 1}; } tokenIndexToLinePos(index) { if (index < 0 || index >= this.tokens.length) return this.stringPosToLinePos(this.inputLength - 1); return this.tokenToLinePos(this.tokens[index]); } tokenToLinePos(token) { return this.stringPosToLinePos(token.pos); } currentLinePos() { return this.tokenIndexToLinePos(this.index); } } /* takes a string, and returns a flat array of tokens, and a list of the locations of newlines in the source * * recognized types: * Word: as a token type, reprsents any symbol/word: variable names, or actual keywords * NumberLit: a literal value for the number type * StringLit: a literal value for the string type * Op: an operator (usually a single character of punctuation) */ function lex(input) { let inputLength = input.length; let newlines = []; let i = 0; while ((i = input.indexOf("\n", i)) != -1) { newlines.push(i); i++; } // break the input down into "parts": either words, digit strings, single character operators, or whitespace let parts = []; // track where we are in the original string let pos = 0; while (input.length > 0) { // match our possible sequences of more than one character (words, digits, or whitespace) if ((match = input.match(/^\w+/)) || (match = input.match(/^\d+/)) || (match = input.match(/^\s+/))) { const matched = match[0]; parts.push({token:matched, pos:pos}); // advance the string input = input.substr(matched.length); pos += matched.length; } else { // everything else is a single character (punctuation) parts.push({token:input[0], pos:pos}); input = input.substr(1); pos++; } } let tokens = []; for (let i = 0; i < parts.length; i++) { const partsLeft = parts.length - i - 1; let { token, pos } = parts[i]; if (token.trim().length == 0) { // remove whitespace (whitespace in a string is handled in the block that handles the " token below) continue; } if (token.match(/\d+/)) { // try and get the decimal, if any if (partsLeft > 2 && parts[i + 1].token == "." && parts[i + 2].token.match(/\d+/)) { token += "." + parts[i + 2].token; } tokens.push({type:TokenType.NumberLit, token:token, pos:pos}); } else if (token == '"') { // we are starting a string literal // don't include the initial " token = ""; let closed = false; for (let j = i + 1; j < parts.length; j++) { // find an escape character if (parts[j].token == '\\') { if (j + 1 < parts.length) { // the escape character is the next character (all tokens are guaranteed length > 0) const escaped = parts[j + 1].token[0]; // remove the escaped character from the next token, being sure to remove the token // if it was only that 1 character parts[j + 1].token = parts[j + 1].token.substr(1); if (parts[j + 1].token.length == 0) parts.splice(j + 1, 1); // for now, just put the escaped character in literally, we'll deal with escape sequences later token += escaped; } } else if (parts[j].token == '"') { // close the string i = j; closed = true; break; } else { // build up the string out of all tokens before the closing " token += parts[j].token; } } if (!closed) { throw "Unclosed string"; } tokens.push({type:TokenType.StringLit, token:token, pos:pos}); } else if (token.match(/\w+/)) { // any keyword or identifier tokens.push({type:TokenType.Word, token:token, pos:pos}); } else { // anything left is an operator tokens.push({type:TokenType.Op, token:token, pos:pos}); } } // keywords and identifiers are case insensitive return { tokens: tokens.map(t => t.type == TokenType.Word ? { ...t, token:t.token.trim().toLowerCase() } : t), newlines: newlines, }; } module.exports = { TokenStream: TokenStream, TokenType: TokenType };