Newer
Older
parsetest / parse.js
function lex(input)
{
    let tokens = [];
    
    const words = input.split(/(?!\w)|\b/);
    console.log(words);
    
    const digits = /\d+/;
    const keyword = /\w+/;
    
    for (let i = 0; i < words.length; i++)
    {
        const wordsLeft = words.length - i - 1;
        let word = words[i];
        
        if (word.match(digits))
        {
            if (wordsLeft > 2 && words[i + 1] == "." && words[i + 2].match(digits))
            {
                word += "." + words[i + 2];
            }
            tokens.push({type:'number', value:word});
        }       
        else if (word == '"')
        {
            word = "";

            let closed = false;
            for (let j = i + 1; j < words.length; j++)
            {
                if (words[j] == '\\')
                {
                    if (j + 1 < words.length)
                    {
                        const escaped = words[j + 1][0];
                        words[j + 1] = words[j + 1].substr(1);
                        if (words[j + 1].length == 0) words.splice(j + 1, 1);

                        word += escaped;
                    }
                }
                else if (words[j] == '"')
                {
                    i = j;
                    closed = true;
                    break;
                }
                else
                {
                    word += words[j];
                }
            }
            
            if (!closed)
            {
                throw "Unclosed string";
            }
            
            tokens.push({type:'string', value:word});
        }
        else if (word.match(keyword))
        {
            tokens.push({type:'keyword', value:word});
        }
        else
        {
            tokens.push({type:'op', value:word});
        }
    }
    
    return tokens;
}

function expect(tokens, type)
{
    if (tokens.length > 0 && tokens[0].type == type)
    {
        return tokens.shift().value;
    }
    return null;
}

const infixBindingPower = {
    "+": [10, 11],
    "-": [10, 11],
    "*": [20, 21],
    ".": [100, 101],
};
const prefixBindingPower = {
    "-": 40,
};
const postfixBindingPower = {
    "[": 50,
    "(": 60,
};

Object.defineProperty(Array.prototype, 'shiftThrow', {
    enumerable: false,
    value: function(err) { 
        if (this.length == 0) throw err;
        return this.shift();
    }
});

const keywords = [ "if" ];

const literals = {
    "true": {node:"literal", value:true},
    "false": {node:"literal", value:false},
};

function parseExpression(tokens, minBindingPower)
{
    const {value, type} = tokens.shiftThrow("No more input");

    let lhs = null;
    if (type == "op")
    {
        if (value == "(")
        {
            lhs = parseExpressions(tokens, 0);
            const {value:closeValue, type:closeType} = tokens.shiftThrow("No more input");
            if (closeType != "op" && closeValue != ")")
            {
                throw "Unclosed parenthesis";
            }
        }
        else if ((power = prefixBindingPower[value]) !== undefined)
        {
            const rhs = parseExpression(tokens, power);
            lhs = {node:"prefix_op", operand:rhs, op:value};
        }
        else
        {
            throw "Bad operator";
        }
    }
    else if (type == "keyword")
    {
        if (keywords.indexOf(value) != -1)
        {
            throw "Keyword " + value + " not expected here";
        }
        else if (literals[value] !== undefined)
        {
            lhs = literals[value];
        }
        else
        {
            lhs = {type:"identifier", value:value};
        }
    }
    else if (type == "number")
    {
        lhs = {node:"literal", value:parseFloat(value)};
    }
    else if (type == "string")
    {
        lhs = {node:"literal", value:value};
    }
    else
    {
        throw "Unexpcted token " + type + " of " + value;
    }

    while (tokens.length > 0)
    {
        const {type:opType, value:opValue} = tokens[0];
        if (opType != "op")
        {
            throw "Operator expected here";
        }

        if (postfixBindingPower[opValue] !== undefined)
        {
            if (postfixBindingPower[opValue] < minBindingPower)
            {
                break;
            }

            tokens.shift();

            if (opValue == "[")
            {
                let index = parseExpression(tokens, 0);
                const {value:closeValue, type:closeType} = tokens.shiftThrow("No more input");
                if (closeType != "op" && closeValue != "]")
                {
                    throw "Unclosed indexing";
                }
                lhs = {node:"index", value:lhs, index:index};
            }
            if (opValue == "(")
            {
                while (true)
                {
                }
                let args = parseExpression(tokens, 0);
                const {value:closeValue, type:closeType} = tokens.shiftThrow("No more input");
                if (closeType != "op" && closeValue != ")")
                {
                    throw "Unclosed function call";
                }
                lhs = {node:"call", func:lhs, args:args};
            }
            else
            {
                throw "Unexpected postfix operator " + opValue;
            }

            continue;
        }

        if (infixBindingPower[opValue] === undefined)
        {
            break;
        }

        [leftBinding, rightBinding] = infixBindingPower[opValue];

        if (leftBinding < minBindingPower) break;

        tokens.shift();

        let rhs = parseExpression(tokens, rightBinding);

        lhs = {type:"op", left:lhs, right:rhs, op:opValue};
    }

    return lhs;
}


const tokens = lex("test.blah(3+\"4x\\\"4\\\"\")");
console.log(parseExpression(tokens, 0));