// SPDX-FileCopyrightText: 2022 Johannes Loher
//
// SPDX-License-Identifier: MIT

import type { Token } from "./grammar";

export class Lexer {
    constructor(private readonly input: string) {}

    *[Symbol.iterator](): Generator<Token, void> {
        let pos = 0;
        while (true) {
            if (this.isWhiteSpace(this.input[pos])) {
                pos += 1;
                continue;
            }
            const [token, newPos] = this.getNextToken(pos);
            pos = newPos;
            yield token;
            if (token.type === "eof" || token.type === "invalid") {
                break;
            }
        }
    }

    private getNextToken(pos: number): [Token, number] {
        const current = this.input[pos];

        if (current === undefined) {
            return [{ type: "eof", pos }, pos];
        }
        if (this.isOperatorStart(current)) {
            return this.getOperator(pos);
        }
        if (this.isDigit(current)) {
            return this.getNumber(pos);
        }
        if (current === "'" || current === '"' || current === "`") {
            return this.getString(pos);
        }
        if (current === ".") {
            const next = this.input[pos + 1];
            if (this.isDigit(next)) {
                return this.getNumber(pos);
            }
            return this.getOperator(pos);
        }
        if (this.isIdentifierStart(current)) {
            return this.getIdentifier(pos);
        }
        return [{ type: "invalid", pos }, pos];
    }

    private isOperatorStart(char: string) {
        const operatorStartChars: (string | undefined)[] = [
            "+",
            "-",
            "*",
            "/",
            "%",
            "=",
            "!",
            ">",
            "<",
            "&",
            "|",
            "~",
            "^",
            "?",
            ":",
            "!",
            ",",
            "(",
            ")",
            "[",
            "]",
            "{",
            "}",
        ];
        return operatorStartChars.includes(char[0]);
    }

    private getOperator(pos: number): [Token, number] {
        const current = this.input[pos];
        const next = this.input[pos + 1];
        const nextButOne = this.input[pos + 2];
        switch (current) {
            case "+":
            case "-":
            case "/":
            case "%":
            case "~":
            case "^":
            case ".":
            case ":":
            case ",":
            case "(":
            case ")":
            case "[":
            case "]":
            case "{":
            case "}": {
                return [{ type: current, pos }, pos + 1];
            }
            case "*": {
                if (next === "*") {
                    return [{ type: "**", pos }, pos + 2];
                }
                return [{ type: "*", pos }, pos + 1];
            }
            case "=": {
                if (next === "=") {
                    if (nextButOne === "=") {
                        return [{ type: "===", pos }, pos + 3];
                    }
                    return [{ type: "==", pos }, pos + 2];
                }
                return [{ type: "invalid", pos }, pos];
            }
            case "!": {
                if (next === "=") {
                    if (nextButOne === "=") {
                        return [{ type: "!==", pos }, pos + 3];
                    }
                    return [{ type: "!=", pos }, pos + 2];
                }
                return [{ type: "!", pos }, pos + 1];
            }
            case ">": {
                switch (next) {
                    case ">": {
                        if (nextButOne === ">") {
                            return [{ type: ">>>", pos }, pos + 3];
                        }
                        return [{ type: ">>", pos }, pos + 2];
                    }
                    case "=": {
                        return [{ type: ">=", pos }, pos + 2];
                    }
                    default: {
                        return [{ type: ">", pos }, pos + 1];
                    }
                }
            }
            case "<": {
                switch (next) {
                    case "=": {
                        return [{ type: "<=", pos }, pos + 2];
                    }
                    case "<": {
                        return [{ type: "<<", pos }, pos + 2];
                    }
                    default: {
                        return [{ type: "<", pos }, pos + 1];
                    }
                }
            }
            case "&": {
                if (next === "&") {
                    return [{ type: "&&", pos }, pos + 2];
                }
                return [{ type: "&", pos }, pos + 1];
            }
            case "|": {
                if (next === "|") {
                    return [{ type: "||", pos }, pos + 2];
                }
                return [{ type: "|", pos }, pos + 1];
            }
            case "?": {
                switch (next) {
                    case ".": {
                        return [{ type: "?.", pos }, pos + 2];
                    }
                    case "?": {
                        return [{ type: "??", pos }, pos + 2];
                    }
                    default: {
                        return [{ type: "?", pos }, pos + 1];
                    }
                }
            }
        }
        return [{ type: "invalid", pos }, pos];
    }

    private isDigit(char: string | undefined): char is `${number}` {
        return /\d/.test(char?.[0] ?? "");
    }

    private getNumber(pos: number): [Token, number] {
        let endPos = pos;
        let foundDot = false;
        let only0s = false;
        while (
            this.isDigit(this.input[endPos]) ||
            this.input[endPos] === "." ||
            (this.input[endPos] === "_" && endPos > pos)
        ) {
            if (this.input[endPos] === ".") {
                if (foundDot) {
                    return [{ type: "invalid", pos }, pos];
                }
                foundDot = true;
            }
            if (this.input[endPos] === "0") {
                only0s = endPos === pos ? true : only0s;
            }

            if (
                this.input[endPos] === "_" &&
                (this.input[endPos - 1] === "_" || this.input[endPos - 1] === "." || only0s)
            ) {
                return [{ type: "invalid", pos }, pos];
            }

            endPos += 1;
        }
        if (pos === endPos) {
            return [{ type: "invalid", pos }, pos];
        }
        if (this.input[endPos - 1] === "_") {
            return [{ type: "invalid", pos }, pos];
        }
        return [{ type: "number", symbol: this.input.slice(pos, endPos), pos }, endPos];
    }

    private isIdentifierStart(char: string | undefined) {
        return /[$_\p{ID_Start}]/u.test(char?.[0] ?? "");
    }

    private isIdentifier(char: string | undefined) {
        return /[$\u200c\u200d\p{ID_Continue}]/u.test(char?.[0] ?? "");
    }

    private getIdentifier(pos: number): [Token, number] {
        let endPos = pos;
        while (endPos < this.input.length && this.isIdentifier(this.input[endPos])) {
            endPos += 1;
        }
        if (endPos === pos) {
            return [{ type: "invalid", pos }, pos];
        }
        return [{ type: "iden", symbol: this.input.slice(pos, endPos), pos }, endPos];
    }

    private getString(pos: number): [Token, number] {
        const quote = this.input[pos];
        let endPos = pos + 1;
        let prev = this.input[pos];
        while (endPos < this.input.length && (this.input[endPos] !== quote || prev === "\\")) {
            prev = this.input[endPos];
            endPos += 1;
        }
        if (endPos === pos || this.input[endPos] !== quote) {
            return [{ type: "invalid", pos }, pos];
        }
        return [{ type: "string", symbol: this.input.slice(pos, endPos + 1), pos }, endPos + 1];
    }

    private isWhiteSpace(char: string | undefined) {
        return /\s/.test(char?.[0] ?? "");
    }
}