ds4/src/expression-evaluation/lexer.ts

264 lines
8 KiB
TypeScript

// SPDX-FileCopyrightText: 2022 Johannes Loher
//
// SPDX-License-Identifier: MIT
import type { Token } from "./grammar";
export class Lexer {
constructor(private readonly input: string) {}
*[Symbol.iterator](): Generator<Token, void> {
let pos = 0;
while (true) {
if (this.isWhiteSpace(this.input[pos])) {
pos += 1;
continue;
}
const [token, newPos] = this.getNextToken(pos);
pos = newPos;
yield token;
if (token.type === "eof" || token.type === "invalid") {
break;
}
}
}
private getNextToken(pos: number): [Token, number] {
const current = this.input[pos];
if (current === undefined) {
return [{ type: "eof", pos }, pos];
}
if (this.isOperatorStart(current)) {
return this.getOperator(pos);
}
if (this.isDigit(current)) {
return this.getNumber(pos);
}
if (current === "'" || current === '"' || current === "`") {
return this.getString(pos);
}
if (current === ".") {
const next = this.input[pos + 1];
if (this.isDigit(next)) {
return this.getNumber(pos);
}
return this.getOperator(pos);
}
if (this.isIdentifierStart(current)) {
return this.getIdentifier(pos);
}
return [{ type: "invalid", pos }, pos];
}
private isOperatorStart(char: string) {
const operatorStartChars: (string | undefined)[] = [
"+",
"-",
"*",
"/",
"%",
"=",
"!",
">",
"<",
"&",
"|",
"~",
"^",
"?",
":",
"!",
",",
"(",
")",
"[",
"]",
"{",
"}",
];
return operatorStartChars.includes(char[0]);
}
private getOperator(pos: number): [Token, number] {
const current = this.input[pos];
const next = this.input[pos + 1];
const nextButOne = this.input[pos + 2];
switch (current) {
case "+":
case "-":
case "/":
case "%":
case "~":
case "^":
case ".":
case ":":
case ",":
case "(":
case ")":
case "[":
case "]":
case "{":
case "}": {
return [{ type: current, pos }, pos + 1];
}
case "*": {
if (next === "*") {
return [{ type: "**", pos }, pos + 2];
}
return [{ type: "*", pos }, pos + 1];
}
case "=": {
if (next === "=") {
if (nextButOne === "=") {
return [{ type: "===", pos }, pos + 3];
}
return [{ type: "==", pos }, pos + 2];
}
return [{ type: "invalid", pos }, pos];
}
case "!": {
if (next === "=") {
if (nextButOne === "=") {
return [{ type: "!==", pos }, pos + 3];
}
return [{ type: "!=", pos }, pos + 2];
}
return [{ type: "!", pos }, pos + 1];
}
case ">": {
switch (next) {
case ">": {
if (nextButOne === ">") {
return [{ type: ">>>", pos }, pos + 3];
}
return [{ type: ">>", pos }, pos + 2];
}
case "=": {
return [{ type: ">=", pos }, pos + 2];
}
default: {
return [{ type: ">", pos }, pos + 1];
}
}
}
case "<": {
switch (next) {
case "=": {
return [{ type: "<=", pos }, pos + 2];
}
case "<": {
return [{ type: "<<", pos }, pos + 2];
}
default: {
return [{ type: "<", pos }, pos + 1];
}
}
}
case "&": {
if (next === "&") {
return [{ type: "&&", pos }, pos + 2];
}
return [{ type: "&", pos }, pos + 1];
}
case "|": {
if (next === "|") {
return [{ type: "||", pos }, pos + 2];
}
return [{ type: "|", pos }, pos + 1];
}
case "?": {
switch (next) {
case ".": {
return [{ type: "?.", pos }, pos + 2];
}
case "?": {
return [{ type: "??", pos }, pos + 2];
}
default: {
return [{ type: "?", pos }, pos + 1];
}
}
}
}
return [{ type: "invalid", pos }, pos];
}
private isDigit(char: string | undefined): char is `${number}` {
return /\d/.test(char?.[0] ?? "");
}
private getNumber(pos: number): [Token, number] {
let endPos = pos;
let foundDot = false;
let only0s = false;
while (
this.isDigit(this.input[endPos]) ||
this.input[endPos] === "." ||
(this.input[endPos] === "_" && endPos > pos)
) {
if (this.input[endPos] === ".") {
if (foundDot) {
return [{ type: "invalid", pos }, pos];
}
foundDot = true;
}
if (this.input[endPos] === "0") {
only0s = endPos === pos ? true : only0s;
}
if (
this.input[endPos] === "_" &&
(this.input[endPos - 1] === "_" || this.input[endPos - 1] === "." || only0s)
) {
return [{ type: "invalid", pos }, pos];
}
endPos += 1;
}
if (pos === endPos) {
return [{ type: "invalid", pos }, pos];
}
if (this.input[endPos - 1] === "_") {
return [{ type: "invalid", pos }, pos];
}
return [{ type: "number", symbol: this.input.slice(pos, endPos), pos }, endPos];
}
private isIdentifierStart(char: string | undefined) {
return /[$_\p{ID_Start}]/u.test(char?.[0] ?? "");
}
private isIdentifier(char: string | undefined) {
return /[$\u200c\u200d\p{ID_Continue}]/u.test(char?.[0] ?? "");
}
private getIdentifier(pos: number): [Token, number] {
let endPos = pos;
while (endPos < this.input.length && this.isIdentifier(this.input[endPos])) {
endPos += 1;
}
if (endPos === pos) {
return [{ type: "invalid", pos }, pos];
}
return [{ type: "iden", symbol: this.input.slice(pos, endPos), pos }, endPos];
}
private getString(pos: number): [Token, number] {
const quote = this.input[pos];
let endPos = pos + 1;
let prev = this.input[pos];
while (endPos < this.input.length && (this.input[endPos] !== quote || prev === "\\")) {
prev = this.input[endPos];
endPos += 1;
}
if (endPos === pos || this.input[endPos] !== quote) {
return [{ type: "invalid", pos }, pos];
}
return [{ type: "string", symbol: this.input.slice(pos, endPos + 1), pos }, endPos + 1];
}
private isWhiteSpace(char: string | undefined) {
return /\s/.test(char?.[0] ?? "");
}
}