// SPDX-FileCopyrightText: 2022 Johannes Loher // // SPDX-License-Identifier: MIT import { describe, expect, it } from 'vitest'; import { Lexer } from '../../src/expression-evaluation/lexer'; import type { Token } from '../../src/expression-evaluation/grammar'; describe('Lexer', () => { const singleOperatorTestCases: { input: string; expected: Token[] }[] = [ { input: '+', expected: [ { type: '+', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '-', expected: [ { type: '-', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '*', expected: [ { type: '*', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '**', expected: [ { type: '**', pos: 0 }, { type: 'eof', pos: 2 }, ], }, { input: '/', expected: [ { type: '/', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '%', expected: [ { type: '%', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '===', expected: [ { type: '===', pos: 0 }, { type: 'eof', pos: 3 }, ], }, { input: '!==', expected: [ { type: '!==', pos: 0 }, { type: 'eof', pos: 3 }, ], }, { input: '==', expected: [ { type: '==', pos: 0 }, { type: 'eof', pos: 2 }, ], }, { input: '<', expected: [ { type: '<', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '<=', expected: [ { type: '<=', pos: 0 }, { type: 'eof', pos: 2 }, ], }, { input: '>', expected: [ { type: '>', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '>=', expected: [ { type: '>=', pos: 0 }, { type: 'eof', pos: 2 }, ], }, { input: '&&', expected: [ { type: '&&', pos: 0 }, { type: 'eof', pos: 2 }, ], }, { input: '||', expected: [ { type: '||', pos: 0 }, { type: 'eof', pos: 2 }, ], }, { input: '&', expected: [ { type: '&', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '|', expected: [ { type: '|', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '<<', expected: [ { type: '<<', pos: 0 }, { type: 'eof', pos: 2 }, ], }, { input: '>>>', expected: [ { type: '>>>', pos: 0 }, { type: 'eof', pos: 3 }, ], }, { input: '.', expected: [ { type: '.', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '?.', expected: [ { type: '?.', pos: 0 }, { type: 'eof', pos: 2 }, ], }, { input: '??', expected: [ { type: '??', pos: 0 }, { type: 'eof', pos: 2 }, ], }, { input: '?', expected: [ { type: '?', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: ':', expected: [ { type: ':', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '(', expected: [ { type: '(', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: ')', expected: [ { type: ')', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '[', expected: [ { type: '[', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: ']', expected: [ { type: ']', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: ',', expected: [ { type: ',', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '{', expected: [ { type: '{', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '}', expected: [ { type: '}', pos: 0 }, { type: 'eof', pos: 1 }, ], }, ]; const singleNumberTestCases: { input: string; expected: Token[] }[] = [ { input: '1', expected: [ { type: 'number', symbol: '1', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '42', expected: [ { type: 'number', symbol: '42', pos: 0 }, { type: 'eof', pos: 2 }, ], }, { input: '42.9', expected: [ { type: 'number', symbol: '42.9', pos: 0 }, { type: 'eof', pos: 4 }, ], }, { input: '.9', expected: [ { type: 'number', symbol: '.9', pos: 0 }, { type: 'eof', pos: 2 }, ], }, { input: '1_1', expected: [ { type: 'number', symbol: '1_1', pos: 0 }, { type: 'eof', pos: 3 }, ], }, { input: '10_1', expected: [ { type: 'number', symbol: '10_1', pos: 0 }, { type: 'eof', pos: 4 }, ], }, { input: '1_1_1', expected: [ { type: 'number', symbol: '1_1_1', pos: 0 }, { type: 'eof', pos: 5 }, ], }, { input: '.1_1', expected: [ { type: 'number', symbol: '.1_1', pos: 0 }, { type: 'eof', pos: 4 }, ], }, ]; const invalidNumberTestCases: { input: string; expected: Token[] }[] = [ { input: '1.1.1', expected: [{ type: 'invalid', pos: 0 }] }, { input: '1__1', expected: [{ type: 'invalid', pos: 0 }] }, { input: '1_', expected: [{ type: 'invalid', pos: 0 }] }, { input: '1._1', expected: [{ type: 'invalid', pos: 0 }] }, { input: '0_1', expected: [{ type: 'invalid', pos: 0 }] }, { input: '00_1', expected: [{ type: 'invalid', pos: 0 }] }, ]; const singleIdentifierTestCases: { input: string; expected: Token[] }[] = [ { input: 'foo', expected: [ { type: 'iden', symbol: 'foo', pos: 0 }, { type: 'eof', pos: 3 }, ], }, { input: '_foo', expected: [ { type: 'iden', symbol: '_foo', pos: 0 }, { type: 'eof', pos: 4 }, ], }, { input: '$foo', expected: [ { type: 'iden', symbol: '$foo', pos: 0 }, { type: 'eof', pos: 4 }, ], }, { input: 'foo1', expected: [ { type: 'iden', symbol: 'foo1', pos: 0 }, { type: 'eof', pos: 4 }, ], }, { input: '_foo1_', expected: [ { type: 'iden', symbol: '_foo1_', pos: 0 }, { type: 'eof', pos: 6 }, ], }, { input: 'μ', expected: [ { type: 'iden', symbol: 'μ', pos: 0 }, { type: 'eof', pos: 1 }, ], }, { input: '._1', expected: [ { type: '.', pos: 0 }, { type: 'iden', symbol: '_1', pos: 1 }, { type: 'eof', pos: 3 }, ], }, { input: 'true', expected: [ { type: 'iden', symbol: 'true', pos: 0 }, { type: 'eof', pos: 4 }, ], }, { input: 'false', expected: [ { type: 'iden', symbol: 'false', pos: 0 }, { type: 'eof', pos: 5 }, ], }, { input: 'null', expected: [ { type: 'iden', symbol: 'null', pos: 0 }, { type: 'eof', pos: 4 }, ], }, { input: 'undefined', expected: [ { type: 'iden', symbol: 'undefined', pos: 0 }, { type: 'eof', pos: 9 }, ], }, ]; const invalidIdentifierTestCases: { input: string; expected: Token[] }[] = [ { input: '1foo', expected: [ { type: 'number', symbol: '1', pos: 0 }, { type: 'iden', symbol: 'foo', pos: 1 }, { type: 'eof', pos: 4 }, ], }, { input: '↓', expected: [{ type: 'invalid', pos: 0 }] }, { input: '"', expected: [{ type: 'invalid', pos: 0 }] }, ]; const singleStringTestCases: { input: string; expected: Token[] }[] = [ { input: '""', expected: [ { type: 'string', symbol: '""', pos: 0 }, { type: 'eof', pos: 2 }, ], }, { input: '"foo"', expected: [ { type: 'string', symbol: '"foo"', pos: 0 }, { type: 'eof', pos: 5 }, ], }, { input: '"\\""', expected: [ { type: 'string', symbol: '"\\""', pos: 0 }, { type: 'eof', pos: 4 }, ], }, { input: '"\\\'"', expected: [ { type: 'string', symbol: '"\\\'"', pos: 0 }, { type: 'eof', pos: 4 }, ], }, { input: "''", expected: [ { type: 'string', symbol: "''", pos: 0 }, { type: 'eof', pos: 2 }, ], }, { input: "'foo'", expected: [ { type: 'string', symbol: "'foo'", pos: 0 }, { type: 'eof', pos: 5 }, ], }, { input: "'\\''", expected: [ { type: 'string', symbol: "'\\''", pos: 0 }, { type: 'eof', pos: 4 }, ], }, { input: "'\\\"'", expected: [ { type: 'string', symbol: "'\\\"'", pos: 0 }, { type: 'eof', pos: 4 }, ], }, { input: '``', expected: [ { type: 'string', symbol: '``', pos: 0 }, { type: 'eof', pos: 2 }, ], }, { input: '`foo`', expected: [ { type: 'string', symbol: '`foo`', pos: 0 }, { type: 'eof', pos: 5 }, ], }, { input: '`\\``', expected: [ { type: 'string', symbol: '`\\``', pos: 0 }, { type: 'eof', pos: 4 }, ], }, { input: '`\\"`', expected: [ { type: 'string', symbol: '`\\"`', pos: 0 }, { type: 'eof', pos: 4 }, ], }, ]; const invalidStringTestCases: { input: string; expected: Token[] }[] = [ { input: '"', expected: [{ type: 'invalid', pos: 0 }] }, { input: '"\\"', expected: [{ type: 'invalid', pos: 0 }] }, { input: "'", expected: [{ type: 'invalid', pos: 0 }] }, { input: "'\\'", expected: [{ type: 'invalid', pos: 0 }] }, ]; const whiteSpaceTestCases: { input: string; expected: Token[] }[] = [ { input: ' ', expected: [{ type: 'eof', pos: 1 }] }, { input: ' ', expected: [{ type: 'eof', pos: 3 }] }, { input: '\n', expected: [{ type: 'eof', pos: 1 }] }, { input: ' \n', expected: [{ type: 'eof', pos: 2 }] }, { input: ' ', expected: [{ type: 'eof', pos: 1 }] }, ]; const complicatedTermTestCases: { input: string; expected: Token[] }[] = [ { input: '5x', expected: [ { type: 'number', symbol: '5', pos: 0 }, { type: 'iden', symbol: 'x', pos: 1 }, { type: 'eof', pos: 2 }, ], }, { input: '5*x', expected: [ { type: 'number', symbol: '5', pos: 0 }, { type: '*', pos: 1 }, { type: 'iden', symbol: 'x', pos: 2 }, { type: 'eof', pos: 3 }, ], }, { input: '5 * x', expected: [ { type: 'number', symbol: '5', pos: 0 }, { type: '*', pos: 2 }, { type: 'iden', symbol: 'x', pos: 4 }, { type: 'eof', pos: 5 }, ], }, { input: "(5 * 5 + 2) / 1.2 === 'foo'", expected: [ { type: '(', pos: 0 }, { type: 'number', symbol: '5', pos: 1 }, { type: '*', pos: 3 }, { type: 'number', symbol: '5', pos: 5 }, { type: '+', pos: 7 }, { type: 'number', symbol: '2', pos: 9 }, { type: ')', pos: 10 }, { type: '/', pos: 12 }, { type: 'number', symbol: '1.2', pos: 14 }, { type: '===', pos: 18 }, { type: 'string', symbol: "'foo'", pos: 22 }, { type: 'eof', pos: 27 }, ], }, { input: "(() => {console.log('foo'); return 1;})()", expected: [ { type: '(', pos: 0 }, { type: '(', pos: 1 }, { type: ')', pos: 2 }, { type: 'invalid', pos: 4 }, ], }, { input: "(function() {console.log('foo'); return 1;})()", expected: [ { type: '(', pos: 0 }, { type: 'iden', symbol: 'function', pos: 1 }, { type: '(', pos: 9 }, { type: ')', pos: 10 }, { type: '{', pos: 12 }, { type: 'iden', symbol: 'console', pos: 13 }, { type: '.', pos: 20 }, { type: 'iden', symbol: 'log', pos: 21 }, { type: '(', pos: 24 }, { type: 'string', symbol: "'foo'", pos: 25 }, { type: ')', pos: 30 }, { type: 'invalid', pos: 31 }, ], }, { input: "'ranged' === 'ranged'", expected: [ { type: 'string', symbol: "'ranged'", pos: 0 }, { type: '===', pos: 9 }, { type: 'string', symbol: "'ranged'", pos: 13 }, { type: 'eof', pos: 21 }, ], }, ]; it.each([ ...singleOperatorTestCases, ...singleNumberTestCases, ...invalidNumberTestCases, ...singleIdentifierTestCases, ...invalidIdentifierTestCases, ...singleStringTestCases, ...invalidStringTestCases, ...whiteSpaceTestCases, ...complicatedTermTestCases, ])('lexes $input correctly', ({ input, expected }) => { // when const result = consume(new Lexer(input)); // then expect(result).toEqual(expected); }); }); function consume(iterable: Iterable): T[] { const result: T[] = []; for (const value of iterable) { result.push(value); } return result; }