From 20ea70d96a19a154ff7d0ee6f60181f11ddba6fc Mon Sep 17 00:00:00 2001 From: Johannes Loher Date: Mon, 31 Oct 2022 22:58:04 +0100 Subject: [PATCH] fix: make expression evaluation in active effects more secure --- spec/expression-evaluation/evaluator.spec.ts | 90 +++ spec/expression-evaluation/lexer.spec.ts | 602 +++++++++++++++++++ spec/expression-evaluation/validator.spec.ts | 126 ++++ src/active-effect.ts | 12 +- src/expression-evaluation/evaluator.ts | 38 ++ src/expression-evaluation/grammar.ts | 61 ++ src/expression-evaluation/lexer.ts | 264 ++++++++ src/expression-evaluation/validator.ts | 30 + 8 files changed, 1222 insertions(+), 1 deletion(-) create mode 100644 spec/expression-evaluation/evaluator.spec.ts create mode 100644 spec/expression-evaluation/lexer.spec.ts create mode 100644 spec/expression-evaluation/validator.spec.ts create mode 100644 src/expression-evaluation/evaluator.ts create mode 100644 src/expression-evaluation/grammar.ts create mode 100644 src/expression-evaluation/lexer.ts create mode 100644 src/expression-evaluation/validator.ts diff --git a/spec/expression-evaluation/evaluator.spec.ts b/spec/expression-evaluation/evaluator.spec.ts new file mode 100644 index 00000000..bce5c0f5 --- /dev/null +++ b/spec/expression-evaluation/evaluator.spec.ts @@ -0,0 +1,90 @@ +// SPDX-FileCopyrightText: 2022 Johannes Loher +// +// SPDX-License-Identifier: MIT + +import { describe, expect, it } from "vitest"; + +import { defaultEvaluator, Evaluator, mathEvaluator } from "../../src/expression-evaluation/evaluator"; + +describe("Evaluator", () => { + it("evaluates expressions that only use identifiers according to the given predicate", () => { + // given + const expression = "typeof 'foo' === 'string' ? 42 : null"; + + // when + const result = defaultEvaluator.evaluate(expression); + + // then + expect(result).toEqual(42); + }); + + it("fails to evaluate expressions that contain identifiers that are not allowed by the predicate", () => { + // given + const expression = "typeof 'foo' === 'string' ? 42 : function (){}"; + + // when + const evaluate = () => defaultEvaluator.evaluate(expression); + + // then + expect(evaluate).toThrowError("'function' is not an allowed identifier."); + }); + + it("fails to evaluate expressions that contain invalid tokens", () => { + // given + const expression = "1;"; + + // when + const evaluate = () => defaultEvaluator.evaluate(expression); + + // then + expect(evaluate).toThrowError("Invalid or unexpected token (1)"); + }); + + it("fails to evaluate expressions that contain arrow functions", () => { + // given + const expression = "(() => 1)()"; + + // when + const evaluate = () => defaultEvaluator.evaluate(expression); + + // then + expect(evaluate).toThrowError("Invalid or unexpected token (4)"); + }); + + it("makes the given context available", () => { + // given + const context = { floor: Math.floor }; + const evaluator = new Evaluator({ context }); + const expression = "floor(0.5)"; + + // when + const result = evaluator.evaluate(expression); + + // then + expect(result).toEqual(0); + }); + + describe("mathEvaluator", () => { + it("makes the given context available", () => { + // given + const expression = "sqrt(sin(PI))"; + + // when + const result = mathEvaluator.evaluate(expression); + + // then + expect(result).toEqual(Math.sqrt(Math.sin(Math.PI))); + }); + + it("does not give acces to the function constructor", () => { + // given + const expression = "sqrt.constructor"; + + // when + const evaluate = () => mathEvaluator.evaluate(expression); + + // then + expect(evaluate).toThrowError("'constructor' is not an allowed identifier."); + }); + }); +}); diff --git a/spec/expression-evaluation/lexer.spec.ts b/spec/expression-evaluation/lexer.spec.ts new file mode 100644 index 00000000..6004e467 --- /dev/null +++ b/spec/expression-evaluation/lexer.spec.ts @@ -0,0 +1,602 @@ +// SPDX-FileCopyrightText: 2022 Johannes Loher +// +// SPDX-License-Identifier: MIT + +import { describe, expect, it } from "vitest"; + +import { Lexer } from "../../src/expression-evaluation/lexer"; + +import type { Token } from "../../src/expression-evaluation/grammar"; + +describe("Lexer", () => { + const singleOperatorTestCases: { input: string; expected: Token[] }[] = [ + { + input: "+", + expected: [ + { type: "+", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "-", + expected: [ + { type: "-", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "*", + expected: [ + { type: "*", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "**", + expected: [ + { type: "**", pos: 0 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: "/", + expected: [ + { type: "/", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "%", + expected: [ + { type: "%", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "===", + expected: [ + { type: "===", pos: 0 }, + { type: "eof", pos: 3 }, + ], + }, + { + input: "!==", + expected: [ + { type: "!==", pos: 0 }, + { type: "eof", pos: 3 }, + ], + }, + { + input: "==", + expected: [ + { type: "==", pos: 0 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: "<", + expected: [ + { type: "<", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "<=", + expected: [ + { type: "<=", pos: 0 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: ">", + expected: [ + { type: ">", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: ">=", + expected: [ + { type: ">=", pos: 0 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: "&&", + expected: [ + { type: "&&", pos: 0 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: "||", + expected: [ + { type: "||", pos: 0 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: "&", + expected: [ + { type: "&", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "|", + expected: [ + { type: "|", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "<<", + expected: [ + { type: "<<", pos: 0 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: ">>>", + expected: [ + { type: ">>>", pos: 0 }, + { type: "eof", pos: 3 }, + ], + }, + { + input: ".", + expected: [ + { type: ".", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "?.", + expected: [ + { type: "?.", pos: 0 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: "??", + expected: [ + { type: "??", pos: 0 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: "?", + expected: [ + { type: "?", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: ":", + expected: [ + { type: ":", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "(", + expected: [ + { type: "(", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: ")", + expected: [ + { type: ")", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "[", + expected: [ + { type: "[", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "]", + expected: [ + { type: "]", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: ",", + expected: [ + { type: ",", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "{", + expected: [ + { type: "{", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "}", + expected: [ + { type: "}", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + ]; + + const singleNumberTestCases: { input: string; expected: Token[] }[] = [ + { + input: "1", + expected: [ + { type: "number", symbol: "1", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "42", + expected: [ + { type: "number", symbol: "42", pos: 0 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: "42.9", + expected: [ + { type: "number", symbol: "42.9", pos: 0 }, + { type: "eof", pos: 4 }, + ], + }, + { + input: ".9", + expected: [ + { type: "number", symbol: ".9", pos: 0 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: "1_1", + expected: [ + { type: "number", symbol: "1_1", pos: 0 }, + { type: "eof", pos: 3 }, + ], + }, + { + input: "10_1", + expected: [ + { type: "number", symbol: "10_1", pos: 0 }, + { type: "eof", pos: 4 }, + ], + }, + { + input: "1_1_1", + expected: [ + { type: "number", symbol: "1_1_1", pos: 0 }, + { type: "eof", pos: 5 }, + ], + }, + { + input: ".1_1", + expected: [ + { type: "number", symbol: ".1_1", pos: 0 }, + { type: "eof", pos: 4 }, + ], + }, + ]; + + const invalidNumberTestCases: { input: string; expected: Token[] }[] = [ + { input: "1.1.1", expected: [{ type: "invalid", pos: 0 }] }, + { input: "1__1", expected: [{ type: "invalid", pos: 0 }] }, + { input: "1_", expected: [{ type: "invalid", pos: 0 }] }, + { input: "1._1", expected: [{ type: "invalid", pos: 0 }] }, + { input: "0_1", expected: [{ type: "invalid", pos: 0 }] }, + { input: "00_1", expected: [{ type: "invalid", pos: 0 }] }, + ]; + + const singleIdentifierTestCases: { input: string; expected: Token[] }[] = [ + { + input: "foo", + expected: [ + { type: "iden", symbol: "foo", pos: 0 }, + { type: "eof", pos: 3 }, + ], + }, + { + input: "_foo", + expected: [ + { type: "iden", symbol: "_foo", pos: 0 }, + { type: "eof", pos: 4 }, + ], + }, + { + input: "$foo", + expected: [ + { type: "iden", symbol: "$foo", pos: 0 }, + { type: "eof", pos: 4 }, + ], + }, + { + input: "foo1", + expected: [ + { type: "iden", symbol: "foo1", pos: 0 }, + { type: "eof", pos: 4 }, + ], + }, + { + input: "_foo1_", + expected: [ + { type: "iden", symbol: "_foo1_", pos: 0 }, + { type: "eof", pos: 6 }, + ], + }, + { + input: "μ", + expected: [ + { type: "iden", symbol: "μ", pos: 0 }, + { type: "eof", pos: 1 }, + ], + }, + { + input: "._1", + expected: [ + { type: ".", pos: 0 }, + { type: "iden", symbol: "_1", pos: 1 }, + { type: "eof", pos: 3 }, + ], + }, + { + input: "true", + expected: [ + { type: "iden", symbol: "true", pos: 0 }, + { type: "eof", pos: 4 }, + ], + }, + { + input: "false", + expected: [ + { type: "iden", symbol: "false", pos: 0 }, + { type: "eof", pos: 5 }, + ], + }, + { + input: "null", + expected: [ + { type: "iden", symbol: "null", pos: 0 }, + { type: "eof", pos: 4 }, + ], + }, + { + input: "undefined", + expected: [ + { type: "iden", symbol: "undefined", pos: 0 }, + { type: "eof", pos: 9 }, + ], + }, + ]; + + const invalidIdentifierTestCases: { input: string; expected: Token[] }[] = [ + { + input: "1foo", + expected: [ + { type: "number", symbol: "1", pos: 0 }, + { type: "iden", symbol: "foo", pos: 1 }, + { type: "eof", pos: 4 }, + ], + }, + { input: "↓", expected: [{ type: "invalid", pos: 0 }] }, + { input: '"', expected: [{ type: "invalid", pos: 0 }] }, + ]; + + const singleStringTestCases: { input: string; expected: Token[] }[] = [ + { + input: '""', + expected: [ + { type: "string", symbol: '""', pos: 0 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: '"foo"', + expected: [ + { type: "string", symbol: '"foo"', pos: 0 }, + { type: "eof", pos: 5 }, + ], + }, + { + input: '"\\""', + expected: [ + { type: "string", symbol: '"\\""', pos: 0 }, + { type: "eof", pos: 4 }, + ], + }, + { + input: '"\\\'"', + expected: [ + { type: "string", symbol: '"\\\'"', pos: 0 }, + { type: "eof", pos: 4 }, + ], + }, + { + input: "''", + expected: [ + { type: "string", symbol: "''", pos: 0 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: "'foo'", + expected: [ + { type: "string", symbol: "'foo'", pos: 0 }, + { type: "eof", pos: 5 }, + ], + }, + { + input: "'\\''", + expected: [ + { type: "string", symbol: "'\\''", pos: 0 }, + { type: "eof", pos: 4 }, + ], + }, + { + input: "'\\\"'", + expected: [ + { type: "string", symbol: "'\\\"'", pos: 0 }, + { type: "eof", pos: 4 }, + ], + }, + { + input: "``", + expected: [ + { type: "string", symbol: "``", pos: 0 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: "`foo`", + expected: [ + { type: "string", symbol: "`foo`", pos: 0 }, + { type: "eof", pos: 5 }, + ], + }, + { + input: "`\\``", + expected: [ + { type: "string", symbol: "`\\``", pos: 0 }, + { type: "eof", pos: 4 }, + ], + }, + { + input: '`\\"`', + expected: [ + { type: "string", symbol: '`\\"`', pos: 0 }, + { type: "eof", pos: 4 }, + ], + }, + ]; + + const invalidStringTestCases: { input: string; expected: Token[] }[] = [ + { input: '"', expected: [{ type: "invalid", pos: 0 }] }, + { input: '"\\"', expected: [{ type: "invalid", pos: 0 }] }, + { input: "'", expected: [{ type: "invalid", pos: 0 }] }, + { input: "'\\'", expected: [{ type: "invalid", pos: 0 }] }, + ]; + + const whiteSpaceTestCases: { input: string; expected: Token[] }[] = [ + { input: " ", expected: [{ type: "eof", pos: 1 }] }, + { input: " ", expected: [{ type: "eof", pos: 3 }] }, + { input: "\n", expected: [{ type: "eof", pos: 1 }] }, + { input: " \n", expected: [{ type: "eof", pos: 2 }] }, + { input: " ", expected: [{ type: "eof", pos: 1 }] }, + ]; + + const complicatedTermTestCases: { input: string; expected: Token[] }[] = [ + { + input: "5x", + expected: [ + { type: "number", symbol: "5", pos: 0 }, + { type: "iden", symbol: "x", pos: 1 }, + { type: "eof", pos: 2 }, + ], + }, + { + input: "5*x", + expected: [ + { type: "number", symbol: "5", pos: 0 }, + { type: "*", pos: 1 }, + { type: "iden", symbol: "x", pos: 2 }, + { type: "eof", pos: 3 }, + ], + }, + { + input: "5 * x", + expected: [ + { type: "number", symbol: "5", pos: 0 }, + { type: "*", pos: 2 }, + { type: "iden", symbol: "x", pos: 4 }, + { type: "eof", pos: 5 }, + ], + }, + { + input: "(5 * 5 + 2) / 1.2 === 'foo'", + expected: [ + { type: "(", pos: 0 }, + { type: "number", symbol: "5", pos: 1 }, + { type: "*", pos: 3 }, + { type: "number", symbol: "5", pos: 5 }, + { type: "+", pos: 7 }, + { type: "number", symbol: "2", pos: 9 }, + { type: ")", pos: 10 }, + { type: "/", pos: 12 }, + { type: "number", symbol: "1.2", pos: 14 }, + { type: "===", pos: 18 }, + { type: "string", symbol: "'foo'", pos: 22 }, + { type: "eof", pos: 27 }, + ], + }, + { + input: "(() => {console.log('foo'); return 1;})()", + expected: [ + { type: "(", pos: 0 }, + { type: "(", pos: 1 }, + { type: ")", pos: 2 }, + { type: "invalid", pos: 4 }, + ], + }, + { + input: "(function() {console.log('foo'); return 1;})()", + expected: [ + { type: "(", pos: 0 }, + { type: "iden", symbol: "function", pos: 1 }, + { type: "(", pos: 9 }, + { type: ")", pos: 10 }, + { type: "{", pos: 12 }, + { type: "iden", symbol: "console", pos: 13 }, + { type: ".", pos: 20 }, + { type: "iden", symbol: "log", pos: 21 }, + { type: "(", pos: 24 }, + { type: "string", symbol: "'foo'", pos: 25 }, + { type: ")", pos: 30 }, + { type: "invalid", pos: 31 }, + ], + }, + { + input: "'ranged' === 'ranged'", + expected: [ + { type: "string", symbol: "'ranged'", pos: 0 }, + { type: "===", pos: 9 }, + { type: "string", symbol: "'ranged'", pos: 13 }, + { type: "eof", pos: 21 }, + ], + }, + ]; + + it.each([ + ...singleOperatorTestCases, + ...singleNumberTestCases, + ...invalidNumberTestCases, + ...singleIdentifierTestCases, + ...invalidIdentifierTestCases, + ...singleStringTestCases, + ...invalidStringTestCases, + ...whiteSpaceTestCases, + ...complicatedTermTestCases, + ])("lexes $input correctly", ({ input, expected }) => { + // when + const result = consume(new Lexer(input)); + + // then + expect(result).toEqual(expected); + }); +}); + +function consume(iterable: Iterable): T[] { + const result: T[] = []; + for (const value of iterable) { + result.push(value); + } + return result; +} diff --git a/spec/expression-evaluation/validator.spec.ts b/spec/expression-evaluation/validator.spec.ts new file mode 100644 index 00000000..b92de58f --- /dev/null +++ b/spec/expression-evaluation/validator.spec.ts @@ -0,0 +1,126 @@ +// SPDX-FileCopyrightText: 2022 Johannes Loher +// +// SPDX-License-Identifier: MIT + +import { describe, expect, it } from "vitest"; + +import { literals, safeOperators } from "../../src/expression-evaluation/grammar"; +import { Validator } from "../../src/expression-evaluation/validator"; + +describe("Validator", () => { + it("allows identifier according to the given predicate", () => { + // given + const predicate = (identifier: string) => identifier === "true"; + const validator = new Validator(predicate); + const input = "true"; + + // when + const validate = () => validator.validate(input); + + // then + expect(validate).not.toThrow(); + }); + + it("disallows identifier according to the given predicate", () => { + // given + const predicate = (identifier: string) => identifier === "false"; + const validator = new Validator(predicate); + const input = "true"; + + // when + const validate = () => validator.validate(input); + + // then + expect(validate).toThrowError("'true' is not an allowed identifier"); + }); + + it("allows multiple identifiers according to the given predicate", () => { + // given + const predicate = (identifier: string) => identifier === "true" || identifier === "null"; + const validator = new Validator(predicate); + const input = "true null"; + + // when + const validate = () => validator.validate(input); + + // then + expect(validate).not.toThrow(); + }); + + it("allows multiple identifiers in a more complex expression according to the given rule", () => { + // given + const predicate = (identifier: string) => identifier === "true" || identifier === "null"; + const validator = new Validator(predicate); + const input = "true === null"; + + // when + const validate = () => validator.validate(input); + + // then + expect(validate).not.toThrow(); + }); + + it("mentions the first not allowed identifier in the thrown errror", () => { + // given + const predicate = (identifier: string) => identifier === "true" || identifier === "null"; + const validator = new Validator(predicate); + const input = "true === null && undefined === false"; + + // when + const validate = () => validator.validate(input); + + // then + expect(validate).toThrowError("'undefined' is not an allowed identifier."); + }); + + it("disallows invalid invalid tokens", () => { + // given + const validator = new Validator(); + const input = ";"; + + // when + const validate = () => validator.validate(input); + + // then + expect(validate).toThrowError("Invalid or unexpected token (0)"); + }); + + it("allows a complicated valid expression", () => { + // given + const predicate = (identifier: string) => + [...safeOperators, ...literals, "floor", "random"].includes(identifier); + const validator = new Validator(predicate); + const input = "typeof (floor(random() * 5) / 2) === 'number' ? 42 : 'foo'"; + + // when + const validate = () => validator.validate(input); + + // then + expect(validate).not.toThrow(); + }); + + it("disallows a complicated expression if it contains a disallowed identifier", () => { + // given + const predicate = (identifier: string) => [...safeOperators, ...literals, "ceil"].includes(identifier); + const validator = new Validator(predicate); + const input = "ceil.constructor('alert(1); return 1;')()"; + + // when + const validate = () => validator.validate(input); + + // then + expect(validate).toThrowError("'constructor' is not an allowed identifier."); + }); + + it("disallows arrow functions", () => { + // given + const validator = new Validator(); + const input = "() => {}"; + + // when + const validate = () => validator.validate(input); + + // then + expect(validate).toThrowError("Invalid or unexpected token (3)"); + }); +}); diff --git a/src/active-effect.ts b/src/active-effect.ts index d3a28380..0c587f96 100644 --- a/src/active-effect.ts +++ b/src/active-effect.ts @@ -3,6 +3,7 @@ // SPDX-License-Identifier: MIT import { DS4Actor } from "./actor/actor"; +import { mathEvaluator } from "./expression-evaluation/evaluator"; import { getGame } from "./helpers"; import type { DS4Item } from "./item/item"; @@ -62,8 +63,9 @@ export class DS4ActiveEffect extends ActiveEffect { override apply(actor: DS4Actor, change: foundry.data.ActiveEffectData["changes"][number]): unknown { change.value = Roll.replaceFormulaData(change.value, actor.data); try { - change.value = Roll.safeEval(change.value).toString(); + change.value = DS4ActiveEffect.safeEval(change.value).toString(); } catch (e) { + logger.warn(e); // this is a valid case, e.g., if the effect change simply is a string } return super.apply(actor, change); @@ -104,4 +106,12 @@ export class DS4ActiveEffect extends ActiveEffect { return this.create(createData, { parent, pack: parent.pack ?? undefined }); } + + static safeEval(expression: string): number | `${number | boolean}` { + const result = mathEvaluator.evaluate(expression); + if (!Number.isNumeric(result)) { + throw new Error(`mathEvaluator.evaluate produced a non-numeric result from expression "${expression}"`); + } + return result as number | `${number | boolean}`; + } } diff --git a/src/expression-evaluation/evaluator.ts b/src/expression-evaluation/evaluator.ts new file mode 100644 index 00000000..4cb1e8b6 --- /dev/null +++ b/src/expression-evaluation/evaluator.ts @@ -0,0 +1,38 @@ +// SPDX-FileCopyrightText: 2022 Johannes Loher +// +// SPDX-License-Identifier: MIT + +import { Validator } from "./validator"; + +export class Evaluator { + context?: Context; + validator: Validator; + + constructor({ + context, + predicate = Validator.defaultPredicate, + }: { context?: Context; predicate?: (identifier: string) => boolean } = {}) { + let actualPredicate = predicate; + if (context) { + this.context = new Proxy(context, { + has: () => true, + get: (t, k) => (k === Symbol.unscopables ? undefined : t[k as keyof typeof t]), + }); + actualPredicate = (identifier: string) => + predicate(identifier) || Object.getOwnPropertyNames(Math).includes(identifier); + } + this.validator = new Validator(actualPredicate); + } + + evaluate(expression: string): unknown { + this.validator.validate(expression); + + const body = `with (sandbox) { return ${expression}; }`; + const evaluate = new Function("sandbox", body); + return evaluate(this.context ?? {}); + } +} + +export const defaultEvaluator = new Evaluator(); + +export const mathEvaluator = new Evaluator({ context: Math }); diff --git a/src/expression-evaluation/grammar.ts b/src/expression-evaluation/grammar.ts new file mode 100644 index 00000000..fc4d4b95 --- /dev/null +++ b/src/expression-evaluation/grammar.ts @@ -0,0 +1,61 @@ +// SPDX-FileCopyrightText: 2022 Johannes Loher +// +// SPDX-License-Identifier: MIT + +export type Token = TokenWithSymbol | TokenWithoutSymbol; + +export interface TokenWithSymbol { + type: TypeWithSymbol; + symbol: string; + pos: number; +} + +interface TokenWithoutSymbol { + type: TypeWithoutSymbol; + pos: number; +} + +type TypeWithSymbol = "iden" | "number" | "string"; + +type TypeWithoutSymbol = + | "+" + | "-" + | "*" + | "**" + | "/" + | "%" + | "===" + | "!==" + | "==" + | "!=" + | "<" + | "<=" + | ">" + | ">=" + | "&&" + | "||" + | "&" + | "|" + | "~" + | "^" + | "<<" + | ">>" + | ">>>" + | "." + | "?." + | "??" + | "!" + | "?" + | ":" + | "(" + | ")" + | "[" + | "]" + | "," + | "{" + | "}" + | "invalid" + | "eof"; + +export const literals = ["true", "false", "null", "undefined"]; +export const safeOperators = ["in", "instanceof", "typeof", "void"]; diff --git a/src/expression-evaluation/lexer.ts b/src/expression-evaluation/lexer.ts new file mode 100644 index 00000000..64195b93 --- /dev/null +++ b/src/expression-evaluation/lexer.ts @@ -0,0 +1,264 @@ +// SPDX-FileCopyrightText: 2022 Johannes Loher +// +// SPDX-License-Identifier: MIT + +import type { Token } from "./grammar"; + +export class Lexer { + constructor(private readonly input: string) {} + + *[Symbol.iterator](): Generator { + let pos = 0; + while (true) { + if (this.isWhiteSpace(this.input[pos])) { + pos += 1; + continue; + } + const [token, newPos] = this.getNextToken(pos); + pos = newPos; + yield token; + if (token.type === "eof" || token.type === "invalid") { + break; + } + } + } + + private getNextToken(pos: number): [Token, number] { + const current = this.input[pos]; + + if (current === undefined) { + return [{ type: "eof", pos }, pos]; + } + if (this.isOperatorStart(current)) { + return this.getOperator(pos); + } + if (this.isDigit(current)) { + return this.getNumber(pos); + } + if (current === "'" || current === '"' || current === "`") { + return this.getString(pos); + } + if (current === ".") { + const next = this.input[pos + 1]; + if (this.isDigit(next)) { + return this.getNumber(pos); + } + return this.getOperator(pos); + } + if (this.isIdentifierStart(current)) { + return this.getIdentifier(pos); + } + return [{ type: "invalid", pos }, pos]; + } + + private isOperatorStart(char: string) { + const operatorStartChars: (string | undefined)[] = [ + "+", + "-", + "*", + "/", + "%", + "=", + "!", + ">", + "<", + "&", + "|", + "~", + "^", + "?", + ":", + "!", + ",", + "(", + ")", + "[", + "]", + "{", + "}", + ]; + return operatorStartChars.includes(char[0]); + } + + private getOperator(pos: number): [Token, number] { + const current = this.input[pos]; + const next = this.input[pos + 1]; + const nextButOne = this.input[pos + 2]; + switch (current) { + case "+": + case "-": + case "/": + case "%": + case "~": + case "^": + case ".": + case ":": + case ",": + case "(": + case ")": + case "[": + case "]": + case "{": + case "}": { + return [{ type: current, pos }, pos + 1]; + } + case "*": { + if (next === "*") { + return [{ type: "**", pos }, pos + 2]; + } + return [{ type: "*", pos }, pos + 1]; + } + case "=": { + if (next === "=") { + if (nextButOne === "=") { + return [{ type: "===", pos }, pos + 3]; + } + return [{ type: "==", pos }, pos + 2]; + } + return [{ type: "invalid", pos }, pos]; + } + case "!": { + if (next === "=") { + if (nextButOne === "=") { + return [{ type: "!==", pos }, pos + 3]; + } + return [{ type: "!=", pos }, pos + 2]; + } + return [{ type: "!", pos }, pos + 1]; + } + case ">": { + switch (next) { + case ">": { + if (nextButOne === ">") { + return [{ type: ">>>", pos }, pos + 3]; + } + return [{ type: ">>", pos }, pos + 2]; + } + case "=": { + return [{ type: ">=", pos }, pos + 2]; + } + default: { + return [{ type: ">", pos }, pos + 1]; + } + } + } + case "<": { + switch (next) { + case "=": { + return [{ type: "<=", pos }, pos + 2]; + } + case "<": { + return [{ type: "<<", pos }, pos + 2]; + } + default: { + return [{ type: "<", pos }, pos + 1]; + } + } + } + case "&": { + if (next === "&") { + return [{ type: "&&", pos }, pos + 2]; + } + return [{ type: "&", pos }, pos + 1]; + } + case "|": { + if (next === "|") { + return [{ type: "||", pos }, pos + 2]; + } + return [{ type: "|", pos }, pos + 1]; + } + case "?": { + switch (next) { + case ".": { + return [{ type: "?.", pos }, pos + 2]; + } + case "?": { + return [{ type: "??", pos }, pos + 2]; + } + default: { + return [{ type: "?", pos }, pos + 1]; + } + } + } + } + return [{ type: "invalid", pos }, pos]; + } + + private isDigit(char: string | undefined): char is `${number}` { + return /\d/.test(char?.[0] ?? ""); + } + + private getNumber(pos: number): [Token, number] { + let endPos = pos; + let foundDot = false; + let only0s = false; + while ( + this.isDigit(this.input[endPos]) || + this.input[endPos] === "." || + (this.input[endPos] === "_" && endPos > pos) + ) { + if (this.input[endPos] === ".") { + if (foundDot) { + return [{ type: "invalid", pos }, pos]; + } + foundDot = true; + } + if (this.input[endPos] === "0") { + only0s = endPos === pos ? true : only0s; + } + + if ( + this.input[endPos] === "_" && + (this.input[endPos - 1] === "_" || this.input[endPos - 1] === "." || only0s) + ) { + return [{ type: "invalid", pos }, pos]; + } + + endPos += 1; + } + if (pos === endPos) { + return [{ type: "invalid", pos }, pos]; + } + if (this.input[endPos - 1] === "_") { + return [{ type: "invalid", pos }, pos]; + } + return [{ type: "number", symbol: this.input.slice(pos, endPos), pos }, endPos]; + } + + private isIdentifierStart(char: string | undefined) { + return /[$_\p{ID_Start}]/u.test(char?.[0] ?? ""); + } + + private isIdentifier(char: string | undefined) { + return /[$\u200c\u200d\p{ID_Continue}]/u.test(char?.[0] ?? ""); + } + + private getIdentifier(pos: number): [Token, number] { + let endPos = pos; + while (endPos < this.input.length && this.isIdentifier(this.input[endPos])) { + endPos += 1; + } + if (endPos === pos) { + return [{ type: "invalid", pos }, pos]; + } + return [{ type: "iden", symbol: this.input.slice(pos, endPos), pos }, endPos]; + } + + private getString(pos: number): [Token, number] { + const quote = this.input[pos]; + let endPos = pos + 1; + let prev = this.input[pos]; + while (endPos < this.input.length && (this.input[endPos] !== quote || prev === "\\")) { + prev = this.input[endPos]; + endPos += 1; + } + if (endPos === pos || this.input[endPos] !== quote) { + return [{ type: "invalid", pos }, pos]; + } + return [{ type: "string", symbol: this.input.slice(pos, endPos + 1), pos }, endPos + 1]; + } + + private isWhiteSpace(char: string | undefined) { + return /\s/.test(char?.[0] ?? ""); + } +} diff --git a/src/expression-evaluation/validator.ts b/src/expression-evaluation/validator.ts new file mode 100644 index 00000000..58a99845 --- /dev/null +++ b/src/expression-evaluation/validator.ts @@ -0,0 +1,30 @@ +// SPDX-FileCopyrightText: 2022 Johannes Loher +// +// SPDX-License-Identifier: MIT + +import { literals, safeOperators } from "./grammar"; +import { Lexer } from "./lexer"; + +export class Validator { + constructor(private readonly predicate: (identifier: string) => boolean = Validator.defaultPredicate) {} + + static readonly defaultPredicate = (identifier: string) => [...literals, ...safeOperators].includes(identifier); + + public validate(input: string): void { + const lexer = new Lexer(input); + for (const token of lexer) { + if (token.type === "iden" && !this.predicate(token.symbol)) { + throw new ValidationError(token.symbol); + } + if (token.type === "invalid") { + throw new SyntaxError(`Invalid or unexpected token (${token.pos})`); + } + } + } +} + +class ValidationError extends Error { + constructor(identifier: string) { + super(`'${identifier}' is not an allowed identifier.`); + } +}