264 lines
8 KiB
TypeScript
264 lines
8 KiB
TypeScript
// SPDX-FileCopyrightText: 2022 Johannes Loher
|
|
//
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
import type { Token } from "./grammar";
|
|
|
|
export class Lexer {
|
|
constructor(private readonly input: string) {}
|
|
|
|
*[Symbol.iterator](): Generator<Token, void> {
|
|
let pos = 0;
|
|
while (true) {
|
|
if (this.isWhiteSpace(this.input[pos])) {
|
|
pos += 1;
|
|
continue;
|
|
}
|
|
const [token, newPos] = this.getNextToken(pos);
|
|
pos = newPos;
|
|
yield token;
|
|
if (token.type === "eof" || token.type === "invalid") {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
private getNextToken(pos: number): [Token, number] {
|
|
const current = this.input[pos];
|
|
|
|
if (current === undefined) {
|
|
return [{ type: "eof", pos }, pos];
|
|
}
|
|
if (this.isOperatorStart(current)) {
|
|
return this.getOperator(pos);
|
|
}
|
|
if (this.isDigit(current)) {
|
|
return this.getNumber(pos);
|
|
}
|
|
if (current === "'" || current === '"' || current === "`") {
|
|
return this.getString(pos);
|
|
}
|
|
if (current === ".") {
|
|
const next = this.input[pos + 1];
|
|
if (this.isDigit(next)) {
|
|
return this.getNumber(pos);
|
|
}
|
|
return this.getOperator(pos);
|
|
}
|
|
if (this.isIdentifierStart(current)) {
|
|
return this.getIdentifier(pos);
|
|
}
|
|
return [{ type: "invalid", pos }, pos];
|
|
}
|
|
|
|
private isOperatorStart(char: string) {
|
|
const operatorStartChars: (string | undefined)[] = [
|
|
"+",
|
|
"-",
|
|
"*",
|
|
"/",
|
|
"%",
|
|
"=",
|
|
"!",
|
|
">",
|
|
"<",
|
|
"&",
|
|
"|",
|
|
"~",
|
|
"^",
|
|
"?",
|
|
":",
|
|
"!",
|
|
",",
|
|
"(",
|
|
")",
|
|
"[",
|
|
"]",
|
|
"{",
|
|
"}",
|
|
];
|
|
return operatorStartChars.includes(char[0]);
|
|
}
|
|
|
|
private getOperator(pos: number): [Token, number] {
|
|
const current = this.input[pos];
|
|
const next = this.input[pos + 1];
|
|
const nextButOne = this.input[pos + 2];
|
|
switch (current) {
|
|
case "+":
|
|
case "-":
|
|
case "/":
|
|
case "%":
|
|
case "~":
|
|
case "^":
|
|
case ".":
|
|
case ":":
|
|
case ",":
|
|
case "(":
|
|
case ")":
|
|
case "[":
|
|
case "]":
|
|
case "{":
|
|
case "}": {
|
|
return [{ type: current, pos }, pos + 1];
|
|
}
|
|
case "*": {
|
|
if (next === "*") {
|
|
return [{ type: "**", pos }, pos + 2];
|
|
}
|
|
return [{ type: "*", pos }, pos + 1];
|
|
}
|
|
case "=": {
|
|
if (next === "=") {
|
|
if (nextButOne === "=") {
|
|
return [{ type: "===", pos }, pos + 3];
|
|
}
|
|
return [{ type: "==", pos }, pos + 2];
|
|
}
|
|
return [{ type: "invalid", pos }, pos];
|
|
}
|
|
case "!": {
|
|
if (next === "=") {
|
|
if (nextButOne === "=") {
|
|
return [{ type: "!==", pos }, pos + 3];
|
|
}
|
|
return [{ type: "!=", pos }, pos + 2];
|
|
}
|
|
return [{ type: "!", pos }, pos + 1];
|
|
}
|
|
case ">": {
|
|
switch (next) {
|
|
case ">": {
|
|
if (nextButOne === ">") {
|
|
return [{ type: ">>>", pos }, pos + 3];
|
|
}
|
|
return [{ type: ">>", pos }, pos + 2];
|
|
}
|
|
case "=": {
|
|
return [{ type: ">=", pos }, pos + 2];
|
|
}
|
|
default: {
|
|
return [{ type: ">", pos }, pos + 1];
|
|
}
|
|
}
|
|
}
|
|
case "<": {
|
|
switch (next) {
|
|
case "=": {
|
|
return [{ type: "<=", pos }, pos + 2];
|
|
}
|
|
case "<": {
|
|
return [{ type: "<<", pos }, pos + 2];
|
|
}
|
|
default: {
|
|
return [{ type: "<", pos }, pos + 1];
|
|
}
|
|
}
|
|
}
|
|
case "&": {
|
|
if (next === "&") {
|
|
return [{ type: "&&", pos }, pos + 2];
|
|
}
|
|
return [{ type: "&", pos }, pos + 1];
|
|
}
|
|
case "|": {
|
|
if (next === "|") {
|
|
return [{ type: "||", pos }, pos + 2];
|
|
}
|
|
return [{ type: "|", pos }, pos + 1];
|
|
}
|
|
case "?": {
|
|
switch (next) {
|
|
case ".": {
|
|
return [{ type: "?.", pos }, pos + 2];
|
|
}
|
|
case "?": {
|
|
return [{ type: "??", pos }, pos + 2];
|
|
}
|
|
default: {
|
|
return [{ type: "?", pos }, pos + 1];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return [{ type: "invalid", pos }, pos];
|
|
}
|
|
|
|
private isDigit(char: string | undefined): char is `${number}` {
|
|
return /\d/.test(char?.[0] ?? "");
|
|
}
|
|
|
|
private getNumber(pos: number): [Token, number] {
|
|
let endPos = pos;
|
|
let foundDot = false;
|
|
let only0s = false;
|
|
while (
|
|
this.isDigit(this.input[endPos]) ||
|
|
this.input[endPos] === "." ||
|
|
(this.input[endPos] === "_" && endPos > pos)
|
|
) {
|
|
if (this.input[endPos] === ".") {
|
|
if (foundDot) {
|
|
return [{ type: "invalid", pos }, pos];
|
|
}
|
|
foundDot = true;
|
|
}
|
|
if (this.input[endPos] === "0") {
|
|
only0s = endPos === pos ? true : only0s;
|
|
}
|
|
|
|
if (
|
|
this.input[endPos] === "_" &&
|
|
(this.input[endPos - 1] === "_" || this.input[endPos - 1] === "." || only0s)
|
|
) {
|
|
return [{ type: "invalid", pos }, pos];
|
|
}
|
|
|
|
endPos += 1;
|
|
}
|
|
if (pos === endPos) {
|
|
return [{ type: "invalid", pos }, pos];
|
|
}
|
|
if (this.input[endPos - 1] === "_") {
|
|
return [{ type: "invalid", pos }, pos];
|
|
}
|
|
return [{ type: "number", symbol: this.input.slice(pos, endPos), pos }, endPos];
|
|
}
|
|
|
|
private isIdentifierStart(char: string | undefined) {
|
|
return /[$_\p{ID_Start}]/u.test(char?.[0] ?? "");
|
|
}
|
|
|
|
private isIdentifier(char: string | undefined) {
|
|
return /[$\u200c\u200d\p{ID_Continue}]/u.test(char?.[0] ?? "");
|
|
}
|
|
|
|
private getIdentifier(pos: number): [Token, number] {
|
|
let endPos = pos;
|
|
while (endPos < this.input.length && this.isIdentifier(this.input[endPos])) {
|
|
endPos += 1;
|
|
}
|
|
if (endPos === pos) {
|
|
return [{ type: "invalid", pos }, pos];
|
|
}
|
|
return [{ type: "iden", symbol: this.input.slice(pos, endPos), pos }, endPos];
|
|
}
|
|
|
|
private getString(pos: number): [Token, number] {
|
|
const quote = this.input[pos];
|
|
let endPos = pos + 1;
|
|
let prev = this.input[pos];
|
|
while (endPos < this.input.length && (this.input[endPos] !== quote || prev === "\\")) {
|
|
prev = this.input[endPos];
|
|
endPos += 1;
|
|
}
|
|
if (endPos === pos || this.input[endPos] !== quote) {
|
|
return [{ type: "invalid", pos }, pos];
|
|
}
|
|
return [{ type: "string", symbol: this.input.slice(pos, endPos + 1), pos }, endPos + 1];
|
|
}
|
|
|
|
private isWhiteSpace(char: string | undefined) {
|
|
return /\s/.test(char?.[0] ?? "");
|
|
}
|
|
}
|