JsonRPC/src/tokenizer.ts

96 lines
2.5 KiB
TypeScript

export type TokenTypes =
| "space"
| "comment"
| "string"
| "keyword"
| "at"
| "colon"
| "semicolon"
| "comma"
| "equals"
| "curly_open"
| "curly_close"
| "bracket_open"
| "bracket_close"
| "array"
| "questionmark"
| "number"
| "text";
export type Token = {
type: TokenTypes;
value: string;
startIdx: number;
endIdx: number;
};
type Matcher = (input: string, index: number) => undefined | Token;
export class TokenizerError extends Error {
index: number;
constructor(message: string, index: number) {
super(message);
this.index = index;
}
}
function regexMatcher(regex: string | RegExp, type: TokenTypes): Matcher {
if (typeof regex === "string") regex = new RegExp(regex);
return (input: string, index: number) => {
let matches = input.substring(index).match(regex as RegExp);
if (!matches || matches.length <= 0) return undefined;
return {
type,
value: matches[0],
startIdx: index,
endIdx: index + matches[0].length,
} as Token;
};
}
const matcher = [
regexMatcher(/^\s+/, "space"),
regexMatcher(/^(\/\*)(.|\s)*?(\*\/)/g, "comment"),
regexMatcher(/^\/\/.+/, "comment"),
regexMatcher(/^#.+/, "comment"),
regexMatcher(/^".*?"/, "string"),
// regexMatcher(/(?<=^")(.*?)(?=")/, "string"),
regexMatcher(/^(type|enum|import|service|define)\b/, "keyword"),
regexMatcher(/^\@/, "at"),
regexMatcher(/^\:/, "colon"),
regexMatcher(/^\;/, "semicolon"),
regexMatcher(/^\,/, "comma"),
regexMatcher(/^\=/, "equals"),
regexMatcher(/^{/, "curly_open"),
regexMatcher(/^}/, "curly_close"),
regexMatcher(/^\(/, "bracket_open"),
regexMatcher(/^\)/, "bracket_close"),
regexMatcher(/^\[\]/, "array"),
regexMatcher(/^\?/, "questionmark"),
regexMatcher(/^[\.0-9]+/, "number"),
regexMatcher(/^[a-zA-Z_]([a-zA-Z0-9_]?)+/, "text"),
];
export default function tokenize(input: string) {
let index = 0;
let tokens: Token[] = [];
while (index < input.length) {
const matches = matcher.map((m) => m(input, index)).filter((e) => !!e);
let match = matches[0];
if (match) {
if (match.type !== "space" && match.type !== "comment") {
tokens.push(match);
}
index += match.value.length;
} else {
throw new TokenizerError(
`Unexpected token '${input.substring(index, index + 1)}'`,
index
);
}
}
return tokens;
}