require('./utils.js');
const TokenType = Object.freeze({ Word:1, NumberLit:2, StringLit:3, Op:4 });
class TokenStream
{
constructor(input)
{
this.input = input;
this.inputLength = input.length;
// how far in the token stream we are
this.index = 0;
const lexed = lex(this.input);
this.tokens = lexed.tokens;
this.newlines = lexed.newlines;
// when we attempt to match a sub-rule, save the location where we roll back to,
// as well as the max number of tokens we ever matched
this.savedLocations = [];
}
getPrettyTokenList()
{
return this.tokens.mapKey("type", v => TokenType.keyName(v));
}
eof()
{
return this.index < 0 || this.index >= this.tokens.length;
}
peek(type = undefined, tokenValue = undefined)
{
return this.peekAt(0, type, tokenValue);
}
peekAt(forwardAmount, type = undefined, tokenValue = undefined)
{
if (this.index + forwardAmount < 0 || this.index + forwardAmount >= this.tokens.length)
{
let msg = "Unexpected EOF";
if (type !== undefined)
{
msg += ", expected " + TokenType.keyName(type);
if (tokenValue !== undefined)
{
msg += " of '" + tokenValue + "'";
}
}
return { error: { msg: msg, ...this.stringPosToLinePos(this.inputLength - 1) } };
}
const token = this.tokens[this.index + forwardAmount];
if (type)
{
if (token.type !== type)
{
if (tokenValue !== undefined)
{
return { error: { msg: "Expecting " + TokenType.keyName(type) + " '" + tokenValue + "', not " + TokenType.keyName(token.type) + " '" + token.token + "'", ...this.tokenToLinePos(token) } };
}
return { error: { msg: "Expecting " + TokenType.keyName(type) + ", not " + TokenType.keyName(token.type) + " '" + token.token + "'", ...this.tokenToLinePos(token) } };
}
if (tokenValue !== undefined)
{
if (token.token !== tokenValue)
{
return { error: { msg: "Expecting " + TokenType.keyName(type) + " '" + tokenValue + "', not '" + token.token + "'", ...this.tokenToLinePos(token) } };
}
}
}
return token;
}
consume(type = undefined, tokenValue = undefined)
{
const token = this.peek(type, tokenValue);
if (token.error) return token;
for (let saved of this.savedLocations)
{
saved.matched++;
}
this.index++;
return token;
}
consumeCount(amount)
{
let { error } = this.peekAt(amount - 1);
if (error) return { error: error };
for (let saved of this.savedLocations)
{
saved.matched += amount;
}
this.index += amount;
return { error: null };
}
startMatch()
{
this.savedLocations.push({ index: this.index, matched: 0 });
}
// returns the number of tokens in the match
finishMatch()
{
console.assert(this.savedLocations.length > 0, "Unexpected finishMatch()");
return this.savedLocations.pop().matched;
}
// returns the number of tokens we matched before we failed
failMatch()
{
console.assert(this.savedLocations.length > 0, "Unexpected failMatch()");
const saved = this.savedLocations.pop();
this.index = saved.index;
return saved.matched;
}
stringPosToLinePos(pos)
{
pos = Math.max(0, Math.min(this.inputLength - 1, pos));
let line = this.newlines.length + 1;
let previousLinesLength = 0;
for (let i = 0; i < this.newlines.length; i++)
{
if (pos <= this.newlines[i])
{
line = i + 1;
break;
}
previousLinesLength = this.newlines[i] + 1;
}
return {inputPos:pos, line:line, pos:pos - previousLinesLength + 1};
}
tokenIndexToLinePos(index)
{
if (index < 0 || index >= this.tokens.length) return this.stringPosToLinePos(this.inputLength - 1);
return this.tokenToLinePos(this.tokens[index]);
}
tokenToLinePos(token)
{
return this.stringPosToLinePos(token.pos);
}
currentLinePos()
{
return this.tokenIndexToLinePos(this.index);
}
}
/* takes a string, and returns a flat array of tokens, and a list of the locations of newlines in the source
*
* recognized types:
* Word: as a token type, reprsents any symbol/word: variable names, or actual keywords
* NumberLit: a literal value for the number type
* StringLit: a literal value for the string type
* Op: an operator (usually a single character of punctuation)
*/
function lex(input)
{
let inputLength = input.length;
let newlines = [];
let i = 0;
while ((i = input.indexOf("\n", i)) != -1)
{
newlines.push(i);
i++;
}
// break the input down into "parts": either words, digit strings, single character operators, or whitespace
let parts = [];
// track where we are in the original string
let pos = 0;
while (input.length > 0)
{
// match our possible sequences of more than one character (words, digits, or whitespace)
if ((match = input.match(/^\w+/)) ||
(match = input.match(/^\d+/)) ||
(match = input.match(/^\s+/)))
{
const matched = match[0];
parts.push({token:matched, pos:pos});
// advance the string
input = input.substr(matched.length);
pos += matched.length;
}
else
{
// everything else is a single character (punctuation)
parts.push({token:input[0], pos:pos});
input = input.substr(1);
pos++;
}
}
let tokens = [];
for (let i = 0; i < parts.length; i++)
{
const partsLeft = parts.length - i - 1;
let { token, pos } = parts[i];
if (token.trim().length == 0)
{
// remove whitespace (whitespace in a string is handled in the block that handles the " token below)
continue;
}
if (token.match(/\d+/))
{
// try and get the decimal, if any
if (partsLeft > 2 && parts[i + 1].token == "." && parts[i + 2].token.match(/\d+/))
{
token += "." + parts[i + 2].token;
}
tokens.push({type:TokenType.NumberLit, token:token, pos:pos});
}
else if (token == '"')
{
// we are starting a string literal
// don't include the initial "
token = "";
let closed = false;
for (let j = i + 1; j < parts.length; j++)
{
// find an escape character
if (parts[j].token == '\\')
{
if (j + 1 < parts.length)
{
// the escape character is the next character (all tokens are guaranteed length > 0)
const escaped = parts[j + 1].token[0];
// remove the escaped character from the next token, being sure to remove the token
// if it was only that 1 character
parts[j + 1].token = parts[j + 1].token.substr(1);
if (parts[j + 1].token.length == 0) parts.splice(j + 1, 1);
// for now, just put the escaped character in literally, we'll deal with escape sequences later
token += escaped;
}
}
else if (parts[j].token == '"')
{
// close the string
i = j;
closed = true;
break;
}
else
{
// build up the string out of all tokens before the closing "
token += parts[j].token;
}
}
if (!closed)
{
throw "Unclosed string";
}
tokens.push({type:TokenType.StringLit, token:token, pos:pos});
}
else if (token.match(/\w+/))
{
// any keyword or identifier
tokens.push({type:TokenType.Word, token:token, pos:pos});
}
else
{
// anything left is an operator
tokens.push({type:TokenType.Op, token:token, pos:pos});
}
}
// keywords and identifiers are case insensitive
return {
tokens: tokens.map(t => t.type == TokenType.Word ? { ...t, token:t.token.trim().toLowerCase() } : t),
newlines: newlines,
};
}
module.exports = { TokenStream: TokenStream, TokenType: TokenType };