forked from github/quartz
feat(bases): migrate from vault to upstream
Signed-off-by: Aaron Pham <contact@aarnphm.xyz>
This commit is contained in:
300
quartz/util/base/compiler/lexer.ts
Normal file
300
quartz/util/base/compiler/lexer.ts
Normal file
@@ -0,0 +1,300 @@
|
||||
import { Position, Span } from "./ast"
|
||||
import { Diagnostic } from "./errors"
|
||||
import {
|
||||
Operator,
|
||||
Punctuation,
|
||||
Token,
|
||||
StringToken,
|
||||
RegexToken,
|
||||
NumberToken,
|
||||
BooleanToken,
|
||||
NullToken,
|
||||
ThisToken,
|
||||
IdentifierToken,
|
||||
OperatorToken,
|
||||
PunctuationToken,
|
||||
EofToken,
|
||||
} from "./tokens"
|
||||
|
||||
type LexResult = { tokens: Token[]; diagnostics: Diagnostic[] }
|
||||
|
||||
const operatorTokens: Operator[] = [
|
||||
"==",
|
||||
"!=",
|
||||
">=",
|
||||
"<=",
|
||||
"&&",
|
||||
"||",
|
||||
"+",
|
||||
"-",
|
||||
"*",
|
||||
"/",
|
||||
"%",
|
||||
"!",
|
||||
">",
|
||||
"<",
|
||||
]
|
||||
|
||||
const punctuationTokens: Punctuation[] = [".", ",", "(", ")", "[", "]"]
|
||||
|
||||
const isOperator = (value: string): value is Operator =>
|
||||
operatorTokens.some((token) => token === value)
|
||||
|
||||
const isPunctuation = (value: string): value is Punctuation =>
|
||||
punctuationTokens.some((token) => token === value)
|
||||
|
||||
export function lex(input: string, file?: string): LexResult {
|
||||
const tokens: Token[] = []
|
||||
const diagnostics: Diagnostic[] = []
|
||||
let index = 0
|
||||
let line = 1
|
||||
let column = 1
|
||||
let canStartRegex = true
|
||||
|
||||
const makePosition = (offset: number, lineValue: number, columnValue: number): Position => ({
|
||||
offset,
|
||||
line: lineValue,
|
||||
column: columnValue,
|
||||
})
|
||||
|
||||
const currentPosition = (): Position => makePosition(index, line, column)
|
||||
|
||||
const makeSpan = (start: Position, end: Position): Span => ({ start, end, file })
|
||||
|
||||
const advance = (): string => {
|
||||
const ch = input[index]
|
||||
index += 1
|
||||
if (ch === "\n") {
|
||||
line += 1
|
||||
column = 1
|
||||
} else {
|
||||
column += 1
|
||||
}
|
||||
return ch
|
||||
}
|
||||
|
||||
const peek = (offset = 0): string => input[index + offset] ?? ""
|
||||
|
||||
const addDiagnostic = (message: string, span: Span) => {
|
||||
diagnostics.push({ kind: "lex", message, span })
|
||||
}
|
||||
|
||||
const updateRegexState = (token: Token | null) => {
|
||||
if (!token) {
|
||||
canStartRegex = true
|
||||
return
|
||||
}
|
||||
if (token.type === "operator") {
|
||||
canStartRegex = true
|
||||
return
|
||||
}
|
||||
if (token.type === "punctuation") {
|
||||
canStartRegex = token.value === "(" || token.value === "[" || token.value === ","
|
||||
return
|
||||
}
|
||||
canStartRegex = false
|
||||
}
|
||||
|
||||
const isWhitespace = (ch: string) => ch === " " || ch === "\t" || ch === "\n" || ch === "\r"
|
||||
const isDigit = (ch: string) => ch >= "0" && ch <= "9"
|
||||
const isIdentStart = (ch: string) =>
|
||||
(ch >= "a" && ch <= "z") || (ch >= "A" && ch <= "Z") || ch === "_"
|
||||
const isIdentContinue = (ch: string) => isIdentStart(ch) || isDigit(ch)
|
||||
|
||||
while (index < input.length) {
|
||||
const ch = peek()
|
||||
|
||||
if (isWhitespace(ch)) {
|
||||
advance()
|
||||
continue
|
||||
}
|
||||
|
||||
const start = currentPosition()
|
||||
|
||||
if (ch === "=" && peek(1) !== "=") {
|
||||
let offset = 1
|
||||
while (isWhitespace(peek(offset))) {
|
||||
offset += 1
|
||||
}
|
||||
if (peek(offset) === ">") {
|
||||
advance()
|
||||
for (let step = 1; step < offset; step += 1) {
|
||||
advance()
|
||||
}
|
||||
if (peek() === ">") {
|
||||
advance()
|
||||
}
|
||||
const end = currentPosition()
|
||||
addDiagnostic(
|
||||
"arrow functions are not supported, use list.filter(expression)",
|
||||
makeSpan(start, end),
|
||||
)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if (ch === '"' || ch === "'") {
|
||||
const quote = advance()
|
||||
let value = ""
|
||||
let closed = false
|
||||
|
||||
while (index < input.length) {
|
||||
const curr = advance()
|
||||
if (curr === quote) {
|
||||
closed = true
|
||||
break
|
||||
}
|
||||
if (curr === "\\") {
|
||||
const next = advance()
|
||||
if (next === "n") value += "\n"
|
||||
else if (next === "t") value += "\t"
|
||||
else if (next === "r") value += "\r"
|
||||
else if (next === "\\" || next === "'" || next === '"') value += next
|
||||
else value += next
|
||||
} else {
|
||||
value += curr
|
||||
}
|
||||
}
|
||||
|
||||
const end = currentPosition()
|
||||
const span = makeSpan(start, end)
|
||||
if (!closed) addDiagnostic("unterminated string literal", span)
|
||||
const token: StringToken = { type: "string", value, span }
|
||||
tokens.push(token)
|
||||
updateRegexState(token)
|
||||
continue
|
||||
}
|
||||
|
||||
if (ch === "/" && canStartRegex) {
|
||||
const next = peek(1)
|
||||
if (next !== "/" && next !== "") {
|
||||
advance()
|
||||
let pattern = ""
|
||||
let closed = false
|
||||
let inClass = false
|
||||
while (index < input.length) {
|
||||
const curr = advance()
|
||||
if (curr === "\\" && index < input.length) {
|
||||
const escaped = advance()
|
||||
pattern += `\\${escaped}`
|
||||
continue
|
||||
}
|
||||
if (curr === "[" && !inClass) inClass = true
|
||||
if (curr === "]" && inClass) inClass = false
|
||||
if (curr === "/" && !inClass) {
|
||||
closed = true
|
||||
break
|
||||
}
|
||||
pattern += curr
|
||||
}
|
||||
let flags = ""
|
||||
while (index < input.length) {
|
||||
const flag = peek()
|
||||
if (!/^[gimsuy]$/.test(flag)) break
|
||||
flags += advance()
|
||||
}
|
||||
const end = currentPosition()
|
||||
const span = makeSpan(start, end)
|
||||
if (!closed) addDiagnostic("unterminated regex literal", span)
|
||||
const token: RegexToken = { type: "regex", pattern, flags, span }
|
||||
tokens.push(token)
|
||||
updateRegexState(token)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
if (isDigit(ch)) {
|
||||
let num = ""
|
||||
while (index < input.length && isDigit(peek())) {
|
||||
num += advance()
|
||||
}
|
||||
if (peek() === "." && isDigit(peek(1))) {
|
||||
num += advance()
|
||||
while (index < input.length && isDigit(peek())) {
|
||||
num += advance()
|
||||
}
|
||||
}
|
||||
const end = currentPosition()
|
||||
const span = makeSpan(start, end)
|
||||
const token: NumberToken = { type: "number", value: Number(num), span }
|
||||
tokens.push(token)
|
||||
updateRegexState(token)
|
||||
continue
|
||||
}
|
||||
|
||||
if (isIdentStart(ch)) {
|
||||
let ident = ""
|
||||
while (index < input.length && isIdentContinue(peek())) {
|
||||
ident += advance()
|
||||
}
|
||||
const end = currentPosition()
|
||||
const span = makeSpan(start, end)
|
||||
if (ident === "true" || ident === "false") {
|
||||
const token: BooleanToken = { type: "boolean", value: ident === "true", span }
|
||||
tokens.push(token)
|
||||
updateRegexState(token)
|
||||
continue
|
||||
}
|
||||
if (ident === "null") {
|
||||
const token: NullToken = { type: "null", span }
|
||||
tokens.push(token)
|
||||
updateRegexState(token)
|
||||
continue
|
||||
}
|
||||
if (ident === "this") {
|
||||
const token: ThisToken = { type: "this", span }
|
||||
tokens.push(token)
|
||||
updateRegexState(token)
|
||||
continue
|
||||
}
|
||||
const token: IdentifierToken = { type: "identifier", value: ident, span }
|
||||
tokens.push(token)
|
||||
updateRegexState(token)
|
||||
continue
|
||||
}
|
||||
|
||||
const twoChar = ch + peek(1)
|
||||
if (isOperator(twoChar)) {
|
||||
advance()
|
||||
advance()
|
||||
const end = currentPosition()
|
||||
const span = makeSpan(start, end)
|
||||
const token: OperatorToken = { type: "operator", value: twoChar, span }
|
||||
tokens.push(token)
|
||||
updateRegexState(token)
|
||||
continue
|
||||
}
|
||||
|
||||
if (isOperator(ch)) {
|
||||
advance()
|
||||
const end = currentPosition()
|
||||
const span = makeSpan(start, end)
|
||||
const token: OperatorToken = { type: "operator", value: ch, span }
|
||||
tokens.push(token)
|
||||
updateRegexState(token)
|
||||
continue
|
||||
}
|
||||
|
||||
if (isPunctuation(ch)) {
|
||||
advance()
|
||||
const end = currentPosition()
|
||||
const span = makeSpan(start, end)
|
||||
const token: PunctuationToken = { type: "punctuation", value: ch, span }
|
||||
tokens.push(token)
|
||||
updateRegexState(token)
|
||||
continue
|
||||
}
|
||||
|
||||
advance()
|
||||
const end = currentPosition()
|
||||
addDiagnostic(`unexpected character: ${ch}`, makeSpan(start, end))
|
||||
}
|
||||
|
||||
const eofPos = currentPosition()
|
||||
const eofSpan = makeSpan(eofPos, eofPos)
|
||||
const eofToken: EofToken = { type: "eof", span: eofSpan }
|
||||
tokens.push(eofToken)
|
||||
updateRegexState(eofToken)
|
||||
|
||||
return { tokens, diagnostics }
|
||||
}
|
||||
Reference in New Issue
Block a user