JsonRPC/src/tokenizer.ts

export type TokenTypes =
   | "space"
   | "comment"
   | "string"
   | "keyword"
   | "at"
   | "colon"
   | "semicolon"
   | "comma"
   | "equals"
   | "curly_open"
   | "curly_close"
   | "bracket_open"
   | "bracket_close"
   | "array"
   | "questionmark"
   | "number"
   | "text";

export type Token = {
   type: TokenTypes;
   value: string;
   startIdx: number;
   endIdx: number;
};

type Matcher = (input: string, index: number) => undefined | Token;

export class TokenizerError extends Error {
   index: number;
   constructor(message: string, index: number) {
      super(message);
      this.index = index;
   }
}

function regexMatcher(regex: string | RegExp, type: TokenTypes): Matcher {
   if (typeof regex === "string") regex = new RegExp(regex);

   return (input: string, index: number) => {
      let matches = input.substring(index).match(regex as RegExp);
      if (!matches || matches.length <= 0) return undefined;

      return {
         type,
         value: matches[0],
         startIdx: index,
         endIdx: index + matches[0].length,
      } as Token;
   };
}

const matcher = [
   regexMatcher(/^\s+/, "space"),
   regexMatcher(/^(\/\*)(.|\s)*?(\*\/)/g, "comment"),
   regexMatcher(/^\/\/.+/, "comment"),
   regexMatcher(/^#.+/, "comment"),
   regexMatcher(/^".*?"/, "string"),
   // regexMatcher(/(?<=^")(.*?)(?=")/, "string"),
   regexMatcher(/^(type|enum|import|service|define)\b/, "keyword"),
   regexMatcher(/^\@/, "at"),
   regexMatcher(/^\:/, "colon"),
   regexMatcher(/^\;/, "semicolon"),
   regexMatcher(/^\,/, "comma"),
   regexMatcher(/^\=/, "equals"),
   regexMatcher(/^{/, "curly_open"),
   regexMatcher(/^}/, "curly_close"),
   regexMatcher(/^\(/, "bracket_open"),
   regexMatcher(/^\)/, "bracket_close"),
   regexMatcher(/^\[\]/, "array"),
   regexMatcher(/^\?/, "questionmark"),
   regexMatcher(/^[\.0-9]+/, "number"),
   regexMatcher(/^[a-zA-Z_]([a-zA-Z0-9_]?)+/, "text"),
];

export default function tokenize(input: string) {
   let index = 0;
   let tokens: Token[] = [];
   while (index < input.length) {
      const matches = matcher.map((m) => m(input, index)).filter((e) => !!e);
      let match = matches[0];
      if (match) {
         if (match.type !== "space" && match.type !== "comment") {
            tokens.push(match);
         }
         index += match.value.length;
      } else {
         throw new TokenizerError(
            `Unexpected token '${input.substring(index, index + 1)}'`,
            index
         );
      }
   }
   return tokens;
}
First Commit Yes, that is what I do at 31.12.2021 at 22:39 local time... 2021-12-31 21:38:26 +00:00			`export type TokenTypes =`
			`\| "space"`
			`\| "comment"`
			`\| "string"`
			`\| "keyword"`
Adding Decorators for comments 2022-01-01 18:47:34 +00:00			`\| "at"`
First Commit Yes, that is what I do at 31.12.2021 at 22:39 local time... 2021-12-31 21:38:26 +00:00			`\| "colon"`
			`\| "semicolon"`
			`\| "comma"`
			`\| "equals"`
			`\| "curly_open"`
			`\| "curly_close"`
			`\| "bracket_open"`
			`\| "bracket_close"`
			`\| "array"`
			`\| "questionmark"`
			`\| "number"`
			`\| "text";`

			`export type Token = {`
			`type: TokenTypes;`
			`value: string;`
			`startIdx: number;`
			`endIdx: number;`
			`};`

			`type Matcher = (input: string, index: number) => undefined \| Token;`

			`export class TokenizerError extends Error {`
			`index: number;`
			`constructor(message: string, index: number) {`
			`super(message);`
			`this.index = index;`
			`}`
			`}`

			`function regexMatcher(regex: string \| RegExp, type: TokenTypes): Matcher {`
			`if (typeof regex === "string") regex = new RegExp(regex);`

			`return (input: string, index: number) => {`
			`let matches = input.substring(index).match(regex as RegExp);`
			`if (!matches \|\| matches.length <= 0) return undefined;`

			`return {`
			`type,`
			`value: matches[0],`
			`startIdx: index,`
			`endIdx: index + matches[0].length,`
			`} as Token;`
			`};`
			`}`

			`const matcher = [`
			`regexMatcher(/^\s+/, "space"),`
			`regexMatcher(/^(\/\)(.\|\s)?(\*\/)/g, "comment"),`
			`regexMatcher(/^\/\/.+/, "comment"),`
			`regexMatcher(/^#.+/, "comment"),`
			`regexMatcher(/^".*?"/, "string"),`
			`// regexMatcher(/(?<=^")(.*?)(?=")/, "string"),`
Adding C# Support. Badly tested currently, but kindof working 2022-01-05 21:16:17 +00:00			`regexMatcher(/^(type\|enum\|import\|service\|define)\b/, "keyword"),`
Adding Decorators for comments 2022-01-01 18:47:34 +00:00			`regexMatcher(/^\@/, "at"),`
First Commit Yes, that is what I do at 31.12.2021 at 22:39 local time... 2021-12-31 21:38:26 +00:00			`regexMatcher(/^\:/, "colon"),`
			`regexMatcher(/^\;/, "semicolon"),`
			`regexMatcher(/^\,/, "comma"),`
			`regexMatcher(/^\=/, "equals"),`
			`regexMatcher(/^{/, "curly_open"),`
			`regexMatcher(/^}/, "curly_close"),`
			`regexMatcher(/^\(/, "bracket_open"),`
			`regexMatcher(/^\)/, "bracket_close"),`
			`regexMatcher(/^\[\]/, "array"),`
			`regexMatcher(/^\?/, "questionmark"),`
			`regexMatcher(/^[\.0-9]+/, "number"),`
			`regexMatcher(/^[a-zA-Z_]([a-zA-Z0-9_]?)+/, "text"),`
			`];`

			`export default function tokenize(input: string) {`
			`let index = 0;`
			`let tokens: Token[] = [];`
			`while (index < input.length) {`
			`const matches = matcher.map((m) => m(input, index)).filter((e) => !!e);`
			`let match = matches[0];`
			`if (match) {`
			`if (match.type !== "space" && match.type !== "comment") {`
			`tokens.push(match);`
			`}`
			`index += match.value.length;`
			`} else {`
			`throw new TokenizerError(`
			`Unexpected token '${input.substring(index, index + 1)}'`,
			`index`
			`);`
			`}`
			`}`
			`return tokens;`
			`}`