DenReg/markdown/src/block-lexer.ts

521 lines
15 KiB
TypeScript

/**
* @license
*
* Copyright (c) 2011-2014, Christopher Jeffrey. (MIT Licensed)
* https://github.com/chjj/marked
*
* Copyright (c) 2018, Костя Третяк. (MIT Licensed)
* https://github.com/ts-stack/markdown
*/
import { ExtendRegexp } from "./extend-regexp.ts";
import {
Align,
LexerReturns,
Links,
MarkedOptions,
RulesBlockBase,
RulesBlockGfm,
RulesBlockTables,
Token,
TokenType,
Obj
} from "./interfaces.ts";
import { Marked } from "./marked.ts";
import { load } from "https://deno.land/std/encoding/_yaml/loader/loader.ts";
export class BlockLexer<T extends typeof BlockLexer> {
static simpleRules: RegExp[] = [];
protected static rulesBase: RulesBlockBase;
/**
* GFM Block Grammar.
*/
protected static rulesGfm: RulesBlockGfm;
/**
* GFM + Tables Block Grammar.
*/
protected static rulesTables: RulesBlockTables;
protected rules!: RulesBlockBase | RulesBlockGfm | RulesBlockTables;
protected options: MarkedOptions;
protected links: Links = {};
protected tokens: Token[] = [];
protected frontmatter: Obj = {};
protected hasRulesGfm!: boolean;
protected hasRulesTables!: boolean;
constructor(protected staticThis: typeof BlockLexer, options?: object) {
this.options = options || Marked.options;
this.setRules();
}
/**
* Accepts Markdown text and returns object with tokens and links.
*
* @param src String of markdown source to be compiled.
* @param options Hash of options.
*/
static lex(
src: string,
options?: MarkedOptions,
top?: boolean,
isBlockQuote?: boolean,
): LexerReturns {
const lexer = new this(this, options);
return lexer.getTokens(src, top, isBlockQuote);
}
protected static getRulesBase(): RulesBlockBase {
if (this.rulesBase) {
return this.rulesBase;
}
const base: RulesBlockBase = {
newline: /^\n+/,
code: /^( {4}[^\n]+\n*)+/,
hr: /^( *[-*_]){3,} *(?:\n+|$)/,
heading: /^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)/,
lheading: /^([^\n]+)\n *(=|-){2,} *(?:\n+|$)/,
blockquote: /^( *>[^\n]+(\n[^\n]+)*\n*)+/,
list: /^( *)(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?!\1bull )\n*|\s*$)/,
html:
/^ *(?:comment *(?:\n|\s*$)|closed *(?:\n{2,}|\s*$)|closing *(?:\n{2,}|\s*$))/,
def: /^ *\[([^\]]+)\]: *<?([^\s>]+)>?(?: +["(]([^\n]+)[")])? *(?:\n+|$)/,
paragraph:
/^((?:[^\n]+\n?(?!hr|heading|lheading|blockquote|tag|def))+)\n*/,
text: /^[^\n]+/,
bullet: /(?:[*+-]|\d+\.)/,
item: /^( *)(bull) [^\n]*(?:\n(?!\1bull )[^\n]*)*/,
};
base.item = new ExtendRegexp(base.item, "gm").setGroup(/bull/g, base.bullet)
.getRegexp();
base.list = new ExtendRegexp(base.list)
.setGroup(/bull/g, base.bullet)
.setGroup("hr", "\\n+(?=\\1?(?:[-*_] *){3,}(?:\\n+|$))")
.setGroup("def", "\\n+(?=" + base.def.source + ")")
.getRegexp();
const tag = "(?!(?:" +
"a|em|strong|small|s|cite|q|dfn|abbr|data|time|code" +
"|var|samp|kbd|sub|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo" +
"|span|br|wbr|ins|del|img)\\b)\\w+(?!:/|[^\\w\\s@]*@)\\b";
base.html = new ExtendRegexp(base.html)
.setGroup("comment", /<!--[\s\S]*?-->/)
.setGroup("closed", /<(tag)[\s\S]+?<\/\1>/)
.setGroup("closing", /<tag(?:"[^"]*"|'[^']*'|[^'">])*?>/)
.setGroup(/tag/g, tag)
.getRegexp();
base.paragraph = new ExtendRegexp(base.paragraph)
.setGroup("hr", base.hr)
.setGroup("heading", base.heading)
.setGroup("lheading", base.lheading)
.setGroup("blockquote", base.blockquote)
.setGroup("tag", "<" + tag)
.setGroup("def", base.def)
.getRegexp();
return (this.rulesBase = base);
}
protected static getRulesGfm(): RulesBlockGfm {
if (this.rulesGfm) {
return this.rulesGfm;
}
const base = this.getRulesBase();
const gfm: RulesBlockGfm = {
...base,
...{
fences: /^ *(`{3,}|~{3,})[ \.]*(\S+)? *\n([\s\S]*?)\s*\1 *(?:\n+|$)/,
paragraph: /^/,
heading: /^ *(#{1,6}) +([^\n]+?) *#* *(?:\n+|$)/,
},
};
const group1 = gfm.fences.source.replace("\\1", "\\2");
const group2 = base.list.source.replace("\\1", "\\3");
gfm.paragraph = new ExtendRegexp(base.paragraph).setGroup(
"(?!",
`(?!${group1}|${group2}|`,
).getRegexp();
return (this.rulesGfm = gfm);
}
protected static getRulesTable(): RulesBlockTables {
if (this.rulesTables) {
return this.rulesTables;
}
return (this.rulesTables = {
...this.getRulesGfm(),
...{
nptable:
/^ *(\S.*\|.*)\n *([-:]+ *\|[-| :]*)\n((?:.*\|.*(?:\n|$))*)\n*/,
table: /^ *\|(.+)\n *\|( *[-:]+[-| :]*)\n((?: *\|.*(?:\n|$))*)\n*/,
},
});
}
protected setRules() {
if (this.options.gfm) {
if (this.options.tables) {
this.rules = this.staticThis.getRulesTable();
} else {
this.rules = this.staticThis.getRulesGfm();
}
} else {
this.rules = this.staticThis.getRulesBase();
}
this.hasRulesGfm = (this.rules as RulesBlockGfm).fences !== undefined;
this.hasRulesTables = (this.rules as RulesBlockTables).table !== undefined;
}
/**
* Lexing.
*/
protected getTokens(
src: string,
top?: boolean,
isBlockQuote?: boolean,
): LexerReturns {
let nextPart = src;
let execArr, fmArr: RegExpExecArray | null;
mainLoop:
while (nextPart) {
// newline
if ((execArr = this.rules.newline.exec(nextPart))) {
nextPart = nextPart.substring(execArr[0].length);
if (execArr[0].length > 1) {
this.tokens.push({ type: TokenType.space });
}
}
// code
if ((execArr = this.rules.code.exec(nextPart))) {
nextPart = nextPart.substring(execArr[0].length);
const code = execArr[0].replace(/^ {4}/gm, "");
this.tokens.push({
type: TokenType.code,
text: !this.options.pedantic ? code.replace(/\n+$/, "") : code,
});
continue;
}
// fences code (gfm)
if (
this.hasRulesGfm &&
(execArr = (this.rules as RulesBlockGfm).fences.exec(nextPart))
) {
nextPart = nextPart.substring(execArr[0].length);
this.tokens.push({
type: TokenType.code,
lang: execArr[2],
text: execArr[3] || "",
});
continue;
}
// heading
if ((execArr = this.rules.heading.exec(nextPart))) {
nextPart = nextPart.substring(execArr[0].length);
this.tokens.push({
type: TokenType.heading,
depth: execArr[1].length,
text: execArr[2],
});
continue;
}
// table no leading pipe (gfm)
if (
top && this.hasRulesTables &&
(execArr = (this.rules as RulesBlockTables).nptable.exec(nextPart))
) {
nextPart = nextPart.substring(execArr[0].length);
const item: Token = {
type: TokenType.table,
header: execArr[1].replace(/^ *| *\| *$/g, "").split(/ *\| */),
align: execArr[2].replace(/^ *|\| *$/g, "").split(
/ *\| */,
) as Align[],
cells: [],
};
if (!item.align) throw ReferenceError;
for (let i = 0; i < item.align.length; i++) {
if (/^ *-+: *$/.test(item.align[i])) {
item.align[i] = "right";
} else if (/^ *:-+: *$/.test(item.align[i])) {
item.align[i] = "center";
} else if (/^ *:-+ *$/.test(item.align[i])) {
item.align[i] = "left";
} else {
item.align[i] = "";
}
}
const td: string[] = execArr[3].replace(/\n$/, "").split("\n");
if (!item.cells) throw ReferenceError;
for (let i = 0; i < td.length; i++) {
item.cells[i] = td[i].split(/ *\| */);
}
this.tokens.push(item);
continue;
}
// lheading
if ((execArr = this.rules.lheading.exec(nextPart))) {
nextPart = nextPart.substring(execArr[0].length);
this.tokens.push({
type: TokenType.heading,
depth: execArr[2] === "=" ? 1 : 2,
text: execArr[1],
});
continue;
}
// hr
if ((execArr = this.rules.hr.exec(nextPart))) {
// Checks if the previous string contains a content.
if ((this.tokens.length == 0) || (this.tokens.every(object => object.type == TokenType.space))) {
// Grabs front-matter data and parse it into Javascript object.
if (fmArr = /^(?:\-\-\-)(.*?)(?:\-\-\-|\.\.\.)/s.exec(nextPart)) {
nextPart = nextPart.substring(fmArr[0].length);
this.frontmatter = <Obj> load(fmArr[1]);
}
continue;
} else {
nextPart = nextPart.substring(execArr[0].length);
this.tokens.push({ type: TokenType.hr });
continue;
}
}
// blockquote
if ((execArr = this.rules.blockquote.exec(nextPart))) {
nextPart = nextPart.substring(execArr[0].length);
this.tokens.push({ type: TokenType.blockquoteStart });
const str = execArr[0].replace(/^ *> ?/gm, "");
// Pass `top` to keep the current
// "toplevel" state. This is exactly
// how markdown.pl works.
this.getTokens(str);
this.tokens.push({ type: TokenType.blockquoteEnd });
continue;
}
// list
if ((execArr = this.rules.list.exec(nextPart))) {
nextPart = nextPart.substring(execArr[0].length);
const bull: string = execArr[2];
this.tokens.push(
{ type: TokenType.listStart, ordered: bull.length > 1 },
);
// Get each top-level item.
const str = execArr[0].match(this.rules.item) || "";
const length = str.length;
let next = false;
let space: number;
let blockBullet: string;
let loose: boolean;
for (let i = 0; i < length; i++) {
let item = str[i];
// Remove the list item's bullet so it is seen as the next token.
space = item.length;
item = item.replace(/^ *([*+-]|\d+\.) +/, "");
// Outdent whatever the list item contains. Hacky.
if (item.indexOf("\n ") !== -1) {
space -= item.length;
item = !this.options.pedantic
? item.replace(new RegExp("^ {1," + space + "}", "gm"), "")
: item.replace(/^ {1,4}/gm, "");
}
// Determine whether the next list item belongs here.
// Backpedal if it does not belong in this list.
if (this.options.smartLists && i !== length - 1) {
const bb = this.staticThis.getRulesBase().bullet.exec(str[i + 1]);
blockBullet = bb ? bb[0] : "";
if (
bull !== blockBullet &&
!(bull.length > 1 && blockBullet.length > 1)
) {
nextPart = (str.slice(i + 1) as string[]).join("\n") + nextPart;
i = length - 1;
}
}
// Determine whether item is loose or not.
// Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/
// for discount behavior.
loose = next || /\n\n(?!\s*$)/.test(item);
if (i !== length - 1) {
next = item.charAt(item.length - 1) === "\n";
if (!loose) {
loose = next;
}
}
this.tokens.push(
{
type: loose ? TokenType.looseItemStart : TokenType.listItemStart,
},
);
// Recurse.
this.getTokens(item, false, isBlockQuote);
this.tokens.push({ type: TokenType.listItemEnd });
}
this.tokens.push({ type: TokenType.listEnd });
continue;
}
// html
if ((execArr = this.rules.html.exec(nextPart))) {
nextPart = nextPart.substring(execArr[0].length);
const attr = execArr[1];
const isPre = attr === "pre" || attr === "script" || attr === "style";
this.tokens.push({
type: this.options.sanitize ? TokenType.paragraph : TokenType.html,
pre: !this.options.sanitizer && isPre,
text: execArr[0],
});
continue;
}
// def
if (top && (execArr = this.rules.def.exec(nextPart))) {
nextPart = nextPart.substring(execArr[0].length);
this.links[execArr[1].toLowerCase()] = {
href: execArr[2],
title: execArr[3],
};
continue;
}
// table (gfm)
if (
top && this.hasRulesTables &&
(execArr = (this.rules as RulesBlockTables).table.exec(nextPart))
) {
nextPart = nextPart.substring(execArr[0].length);
const item: Token = {
type: TokenType.table,
header: execArr[1].replace(/^ *| *\| *$/g, "").split(/ *\| */),
align: execArr[2].replace(/^ *|\| *$/g, "").split(
/ *\| */,
) as Align[],
cells: [],
};
if (!item.align) throw ReferenceError;
for (let i = 0; i < item.align.length; i++) {
if (/^ *-+: *$/.test(item.align[i])) {
item.align[i] = "right";
} else if (/^ *:-+: *$/.test(item.align[i])) {
item.align[i] = "center";
} else if (/^ *:-+ *$/.test(item.align[i])) {
item.align[i] = "left";
} else {
item.align[i] = "";
}
}
const td = execArr[3].replace(/(?: *\| *)?\n$/, "").split("\n");
if (!item.cells) throw ReferenceError;
for (let i = 0; i < td.length; i++) {
item.cells[i] = td[i].replace(/^ *\| *| *\| *$/g, "").split(/ *\| */);
}
this.tokens.push(item);
continue;
}
// simple rules
if (this.staticThis.simpleRules.length) {
const simpleRules = this.staticThis.simpleRules;
for (let i = 0; i < simpleRules.length; i++) {
if ((execArr = simpleRules[i].exec(nextPart))) {
nextPart = nextPart.substring(execArr[0].length);
const type = "simpleRule" + (i + 1);
this.tokens.push({ type, execArr });
continue mainLoop;
}
}
}
// top-level paragraph
if (top && (execArr = this.rules.paragraph.exec(nextPart))) {
nextPart = nextPart.substring(execArr[0].length);
if (execArr[1].slice(-1) === "\n") {
this.tokens.push({
type: TokenType.paragraph,
text: execArr[1].slice(0, -1),
});
} else {
this.tokens.push({
type: this.tokens.length > 0 ? TokenType.paragraph : TokenType.text,
text: execArr[1],
});
}
continue;
}
// text
// Top-level should never reach here.
if ((execArr = this.rules.text.exec(nextPart))) {
nextPart = nextPart.substring(execArr[0].length);
this.tokens.push({ type: TokenType.text, text: execArr[0] });
continue;
}
if (nextPart) {
throw new Error(
"Infinite loop on byte: " + nextPart.charCodeAt(0) +
`, near text '${nextPart.slice(0, 30)}...'`,
);
}
}
return { tokens: this.tokens, links: this.links, meta: this.frontmatter };
}
}