diff --git a/parse.js b/parse.js new file mode 100644 index 0000000..629285e --- /dev/null +++ b/parse.js @@ -0,0 +1,235 @@ +function lex(input) +{ + let tokens = []; + + const words = input.split(/(?!\w)|\b/); + console.log(words); + + const digits = /\d+/; + const keyword = /\w+/; + + for (let i = 0; i < words.length; i++) + { + const wordsLeft = words.length - i - 1; + let word = words[i]; + + if (word.match(digits)) + { + if (wordsLeft > 2 && words[i + 1] == "." && words[i + 2].match(digits)) + { + word += "." + words[i + 2]; + } + tokens.push({type:'number', value:word}); + } + else if (word == '"') + { + word = ""; + + let closed = false; + for (let j = i + 1; j < words.length; j++) + { + if (words[j] == '\\') + { + if (j + 1 < words.length) + { + const escaped = words[j + 1][0]; + words[j + 1] = words[j + 1].substr(1); + if (words[j + 1].length == 0) words.splice(j + 1, 1); + + word += escaped; + } + } + else if (words[j] == '"') + { + i = j; + closed = true; + break; + } + else + { + word += words[j]; + } + } + + if (!closed) + { + throw "Unclosed string"; + } + + tokens.push({type:'string', value:word}); + } + else if (word.match(keyword)) + { + tokens.push({type:'keyword', value:word}); + } + else + { + tokens.push({type:'op', value:word}); + } + } + + return tokens; +} + +function expect(tokens, type) +{ + if (tokens.length > 0 && tokens[0].type == type) + { + return tokens.shift().value; + } + return null; +} + +const infixBindingPower = { + "+": [10, 11], + "-": [10, 11], + "*": [20, 21], + ".": [100, 101], +}; +const prefixBindingPower = { + "-": 40, +}; +const postfixBindingPower = { + "[": 50, + "(": 60, +}; + +Object.defineProperty(Array.prototype, 'shiftThrow', { + enumerable: false, + value: function(err) { + if (this.length == 0) throw err; + return this.shift(); + } +}); + +const keywords = [ "if" ]; + +const literals = { + "true": {node:"literal", value:true}, + "false": {node:"literal", value:false}, +}; + +function parseExpression(tokens, minBindingPower) +{ + const {value, type} = tokens.shiftThrow("No more input"); + + let lhs = null; + if (type == "op") + { + if (value == "(") + { + lhs = parseExpressions(tokens, 0); + const {value:closeValue, type:closeType} = tokens.shiftThrow("No more input"); + if (closeType != "op" && closeValue != ")") + { + throw "Unclosed parenthesis"; + } + } + else if ((power = prefixBindingPower[value]) !== undefined) + { + const rhs = parseExpression(tokens, power); + lhs = {node:"prefix_op", operand:rhs, op:value}; + } + else + { + throw "Bad operator"; + } + } + else if (type == "keyword") + { + if (keywords.indexOf(value) != -1) + { + throw "Keyword " + value + " not expected here"; + } + else if (literals[value] !== undefined) + { + lhs = literals[value]; + } + else + { + lhs = {type:"identifier", value:value}; + } + } + else if (type == "number") + { + lhs = {node:"literal", value:parseFloat(value)}; + } + else if (type == "string") + { + lhs = {node:"literal", value:value}; + } + else + { + throw "Unexpcted token " + type + " of " + value; + } + + while (tokens.length > 0) + { + const {type:opType, value:opValue} = tokens[0]; + if (opType != "op") + { + throw "Operator expected here"; + } + + if (postfixBindingPower[opValue] !== undefined) + { + if (postfixBindingPower[opValue] < minBindingPower) + { + break; + } + + tokens.shift(); + + if (opValue == "[") + { + let index = parseExpression(tokens, 0); + const {value:closeValue, type:closeType} = tokens.shiftThrow("No more input"); + if (closeType != "op" && closeValue != "]") + { + throw "Unclosed indexing"; + } + lhs = {node:"index", value:lhs, index:index}; + } + if (opValue == "(") + { + while (true) + { + } + let args = parseExpression(tokens, 0); + const {value:closeValue, type:closeType} = tokens.shiftThrow("No more input"); + if (closeType != "op" && closeValue != ")") + { + throw "Unclosed function call"; + } + lhs = {node:"call", func:lhs, args:args}; + } + else + { + throw "Unexpected postfix operator " + opValue; + } + + continue; + } + + if (infixBindingPower[opValue] === undefined) + { + break; + } + + [leftBinding, rightBinding] = infixBindingPower[opValue]; + + if (leftBinding < minBindingPower) break; + + tokens.shift(); + + let rhs = parseExpression(tokens, rightBinding); + + lhs = {type:"op", left:lhs, right:rhs, op:opValue}; + } + + return lhs; +} + + +const tokens = lex("test.blah(3+\"4x\\\"4\\\"\")"); +console.log(parseExpression(tokens, 0));