From 5be9d6d70ea35be6398ee35958fb8ac955a89fbe Mon Sep 17 00:00:00 2001 From: Trevor Buckner Date: Tue, 15 Jun 2021 19:22:00 -0400 Subject: [PATCH] feat: Custom Tokenizer/Renderer extensions (#2043) Co-authored-by: Tony Brix --- docs/USING_PRO.md | 333 ++++++++++++++++----- lib/marked.esm.js | 605 +++++++++++++++++++++++---------------- src/Lexer.js | 74 ++++- src/Parser.js | 27 +- src/defaults.js | 1 + src/marked.js | 145 +++++++--- test/unit/marked-spec.js | 509 ++++++++++++++++++++++++++++++++ 7 files changed, 1345 insertions(+), 349 deletions(-) diff --git a/docs/USING_PRO.md b/docs/USING_PRO.md index 0119911ff4..012f7634e8 100644 --- a/docs/USING_PRO.md +++ b/docs/USING_PRO.md @@ -1,24 +1,71 @@ ## Extending Marked -To champion the single-responsibility and open/closed principles, we have tried to make it relatively painless to extend marked. If you are looking to add custom functionality, this is the place to start. +To champion the single-responsibility and open/closed principles, we have tried to make it relatively painless to extend Marked. If you are looking to add custom functionality, this is the place to start.

marked.use()

-`marked.use(options)` is the recommended way to extend marked. The options object can contain any [option](/using_advanced#options) available in marked. +`marked.use(extension)` is the recommended way to extend Marked. The `extension` object can contain any [option](/using_advanced#options) available in Marked: -The `renderer` and `tokenizer` options can be an object with functions that will be merged into the `renderer` and `tokenizer` respectively. -The `renderer` and `tokenizer` functions can return false to fallback to the previous function. +```js +const marked = require('marked'); + +marked.use({ + pedantic: false, + gfm: true, + breaks: false, + sanitize: false, + smartLists: true, + smartypants: false, + xhtml: false +}); +``` + +You can also supply multiple `extension` objects at once. + +``` +marked.use(myExtension, extension2, extension3); + +\\ EQUIVALENT TO: + +marked.use(myExtension); +marked.use(extension2); +marked.use(extension3); + +``` -The `walkTokens` option can be a function that will be called with every token before rendering. When calling `use` multiple times with different `walkTokens` functions each function will be called in the **reverse** order in which they were assigned. +All options will overwrite those previously set, except for the following options which will be merged with the existing framework and can be used to change or extend the functionality of Marked: `renderer`, `tokenizer`, `walkTokens`, and `extensions`. + +* The `renderer` and `tokenizer` options are objects with functions that will be merged into the built-in `renderer` and `tokenizer` respectively. + +* The `walkTokens` option is a function that will be called to post-process every token before rendering. + +* The `extensions` option is an array of objects that can contain additional custom `renderer` and `tokenizer` steps that will execute before any of the default parsing logic occurs. + +*** + +

The Marked Pipeline

+ +Before building your custom extensions, it is important to understand the components that Marked uses to translate from Markdown to HTML: + +1) The user supplies Marked with an input string to be translated. +2) The `lexer` feeds segments of the input text string into each `tokenizer`, and from their output, generates a series of tokens in a nested tree structure. +3) Each `tokenizer` receives a segment of Markdown text and, if it matches a particular pattern, generates a token object containing any relevant information. +4) The `walkTokens` function will traverse every token in the tree and perform any final adjustments to the token contents. +4) The `parser` traverses the token tree and feeds each token into the appropriate `renderer`, and concatenates their outputs into the final HTML result. +5) Each `renderer` receives a token and manipulates its contents to generate a segment of HTML. + +Marked provides methods of directly overriding the `renderer` and `tokenizer` for any existing token type, as well as inserting additional custom `renderer` and `tokenizer` functions to handle entirely custom syntax. + +*** -All other options will overwrite previously set options. +

The Renderer : renderer

-

The renderer

+The renderer defines the HTML output of a given token. If you supply a `renderer` object to the Marked options, it will be merged with the built-in renderer and any functions inside will override the default handling of that token type. -The renderer defines the output of the parser. +Calling `marked.use()` to override the same function multiple times will give priority to the version that was assigned *last*. Overriding functions can return `false` to fall back to the previous override in the sequence, or resume default behavior if all overrides return `false`. Returning any other value (including nothing) will prevent fallback behavior. -**Example:** Overriding default heading token by adding an embedded anchor tag like on GitHub. +**Example:** Overriding output of the default `heading` token by adding an embedded anchor tag like on GitHub. ```js // Create reference instance @@ -56,20 +103,31 @@ console.log(marked('# heading+')); ``` -### Block level renderer methods - -- code(*string* code, *string* infostring, *boolean* escaped) -- blockquote(*string* quote) -- html(*string* html) -- heading(*string* text, *number* level, *string* raw, *Slugger* slugger) -- hr() -- list(*string* body, *boolean* ordered, *number* start) -- listitem(*string* text, *boolean* task, *boolean* checked) -- checkbox(*boolean* checked) -- paragraph(*string* text) -- table(*string* header, *string* body) -- tablerow(*string* content) -- tablecell(*string* content, *object* flags) +### Block-level renderer methods + +- **code**(*string* code, *string* infostring, *boolean* escaped) +- **blockquote**(*string* quote) +- **html**(*string* html) +- **heading**(*string* text, *number* level, *string* raw, *Slugger* slugger) +- **hr**() +- **list**(*string* body, *boolean* ordered, *number* start) +- **listitem**(*string* text, *boolean* task, *boolean* checked) +- **checkbox**(*boolean* checked) +- **paragraph**(*string* text) +- **table**(*string* header, *string* body) +- **tablerow**(*string* content) +- **tablecell**(*string* content, *object* flags) + +### Inline-level renderer methods + +- **strong**(*string* text) +- **em**(*string* text) +- **codespan**(*string* code) +- **br**() +- **del**(*string* text) +- **link**(*string* href, *string* title, *string* text) +- **image**(*string* href, *string* title, *string* text) +- **text**(*string* text) `slugger` has the `slug` method to create a unique id from value: @@ -103,20 +161,13 @@ slugger.slug('foo') // foo-4 } ``` -### Inline level renderer methods +*** -- strong(*string* text) -- em(*string* text) -- codespan(*string* code) -- br() -- del(*string* text) -- link(*string* href, *string* title, *string* text) -- image(*string* href, *string* title, *string* text) -- text(*string* text) +

The Tokenizer : tokenizer

-

The tokenizer

+The tokenizer defines how to turn markdown text into tokens. If you supply a `tokenizer` object to the Marked options, it will be merged with the built-in tokenizer and any functions inside will override the default handling of that token type. -The tokenizer defines how to turn markdown text into tokens. +Calling `marked.use()` to override the same function multiple times will give priority to the version that was assigned *last*. Overriding functions can return `false` to fall back to the previous override in the sequence, or resume default behavior if all overrides return `false`. Returning any other value (including nothing) will prevent fallback behavior. **Example:** Overriding default `codespan` tokenizer to include LaTeX. @@ -157,34 +208,34 @@ console.log(marked('$ latex code $\n\n` other code `')); ### Block level tokenizer methods -- space(*string* src) -- code(*string* src) -- fences(*string* src) -- heading(*string* src) -- nptable(*string* src) -- hr(*string* src) -- blockquote(*string* src) -- list(*string* src) -- html(*string* src) -- def(*string* src) -- table(*string* src) -- lheading(*string* src) -- paragraph(*string* src) -- text(*string* src) +- **space**(*string* src) +- **code**(*string* src) +- **fences**(*string* src) +- **heading**(*string* src) +- **nptable**(*string* src) +- **hr**(*string* src) +- **blockquote**(*string* src) +- **list**(*string* src) +- **html**(*string* src) +- **def**(*string* src) +- **table**(*string* src) +- **lheading**(*string* src) +- **paragraph**(*string* src) +- **text**(*string* src) ### Inline level tokenizer methods -- escape(*string* src) -- tag(*string* src, *bool* inLink, *bool* inRawBlock) -- link(*string* src) -- reflink(*string* src, *object* links) -- emStrong(*string* src, *string* maskedSrc, *string* prevChar) -- codespan(*string* src) -- br(*string* src) -- del(*string* src) -- autolink(*string* src, *function* mangle) -- url(*string* src, *function* mangle) -- inlineText(*string* src, *bool* inRawBlock, *function* smartypants) +- **escape**(*string* src) +- **tag**(*string* src, *bool* inLink, *bool* inRawBlock) +- **link**(*string* src) +- **reflink**(*string* src, *object* links) +- **emStrong**(*string* src, *string* maskedSrc, *string* prevChar) +- **codespan**(*string* src) +- **br**(*string* src) +- **del**(*string* src) +- **autolink**(*string* src, *function* mangle) +- **url**(*string* src, *function* mangle) +- **inlineText**(*string* src, *bool* inRawBlock, *function* smartypants) `mangle` is a method that changes text to HTML character references: @@ -202,10 +253,14 @@ smartypants('"this ... string"') // "“this … string”" ``` -

Walk Tokens

+*** + +

Walk Tokens : walkTokens

The walkTokens function gets called with every token. Child tokens are called before moving on to sibling tokens. Each token is passed by reference so updates are persisted when passed to the parser. The return value of the function is ignored. +`marked.use()` can be called multiple times with different `walkTokens` functions. Each function will be called in order, starting with the function that was assigned *last*. + **Example:** Overriding heading tokens to start at h2. ```js @@ -231,17 +286,165 @@ console.log(marked('# heading 2\n\n## heading 3'));

heading 3

``` -

The lexer

+*** -The lexer takes a markdown string and calls the tokenizer functions. +

Custom Extensions : extensions

-

The parser

+You may supply an `extensions` array to the `options` object. This array can contain any number of `extension` objects, using the following properties: -The parser takes tokens as input and calls the renderer functions. +
+
name
+
A string used to identify the token that will be handled by this extension. + +If the name matches an existing extension name, or an existing method in the tokenizer/renderer methods listed above, they will override the previously assigned behavior, with priority on the extension that was assigned **last**. An extension can return `false` to fall back to the previous behavior.
+ +
level
+
A string to determine when to run the extension tokenizer. Must be equal to 'block' or 'inline'. + +A **block-level** extension will be handled before any of the block-level tokenizer methods listed above, and generally consists of 'container-type' text (paragraphs, tables, blockquotes, etc.). + +An **inline-level** extension will be handled inside each block-level token, before any of the inline-level tokenizer methods listed above. These generally consist of 'style-type' text (italics, bold, etc.).
+ +
start(string src)
+
A function that returns the index of the next potential start of the custom token. + +The index can be the result of a src.match().index, or even a simple src.index(). Marked will use this function to ensure that it does not skip over any text that should be part of the custom token.
+ +
tokenizer(string src, array tokens)
+
A function that reads a string of Markdown text and returns a generated token. The tokens parameter contains the array of tokens that have been generated by the lexer up to that point, and can be used to access the previous token, for instance. + +The return value should be an object with the following parameters: + +
+
type
+
A string that matches the name parameter of the extension.
+ +
raw
+
A string containing all of the text that this token consumes from the source.
+ +
tokens [optional]
+
An array of child tokens that will be traversed by the walkTokens function by default.
+
+ +The returned token can also contain any other custom parameters of your choice that your custom `renderer` might need to access. + +The tokenizer function has access to the lexer in the `this` object, which can be used if any internal section of the string needs to be parsed further, such as in handling any inline syntax on the text within a block token. The key functions that may be useful include: + +
+
this.blockTokens(string text)
+
Runs the block tokenizer functions (including any extensions) on the provided text, and returns an array containing a nested tree of tokens.
+ +
this.inlineTokens(string text)
+
Runs the inline tokenizer functions (including any extensions) on the provided text, and returns an array containing a nested tree of tokens. This can be used to generate the tokens parameter.
+
+ +
renderer(object token)
+
A function that reads a token and returns the generated HTML output string. + +The renderer function has access to the parser in the `this` object, which can be used if any part of the token needs needs to be parsed further, such as any child tokens. The key functions that may be useful include: + +
+
this.parse(array tokens)
+
Runs the block renderer functions (including any extensions) on the provided array of tokens, and returns the resulting HTML string output.
+ +
this.parseInline(array tokens)
+
Runs the inline renderer functions (including any extensions) on the provided array of tokens, and returns the resulting HTML string output. This could be used to generate text from any child tokens, for example.
+
+ +
+ +
childTokens [optional]
+
An array of strings that match the names of any token parameters that should be traversed by the walkTokens functions. For instance, if you want to use a second custom parameter to contain child tokens in addition to tokens, it could be listed here. If childTokens is provided, the tokens array will not be walked by default unless it is also included in the childTokens array. +
+ + + + +**Example:** Add a custom syntax to generate `
` description lists. + +``` js +const descriptionlist = { + name: 'descriptionList', + level: 'block', // Is this a block-level or inline-level tokenizer? + start(src) { return src.match(/:[^:\n]/)?.index; }, // Hint to Marked.js to stop and check for a match + tokenizer(src, tokens) { + const rule = /^(?::[^:\n]+:[^:\n]*(?:\n|$))+/; // Regex for the complete token + const match = rule.exec(src); + if (match) { + return { // Token to generate + type: 'descriptionList', // Should match "name" above + raw: match[0], // Text to consume from the source + text: match[0].trim(), // Additional custom properties + tokens: this.inlineTokens(match[0].trim()) // inlineTokens to process **bold**, *italics*, etc. + }; + } + }, + renderer(token) { + return `
${this.parseInline(token.tokens)}\n
`; // parseInline to turn child tokens into HTML + } +}; + +const description = { + name: 'description', + level: 'inline', // Is this a block-level or inline-level tokenizer? + start(src) { return src.match(/:/)?.index; }, // Hint to Marked.js to stop and check for a match + tokenizer(src, tokens) { + const rule = /^:([^:\n]+):([^:\n]*)(?:\n|$)/; // Regex for the complete token + const match = rule.exec(src); + if (match) { + return { // Token to generate + type: 'description', // Should match "name" above + raw: match[0], // Text to consume from the source + dt: this.inlineTokens(match[1].trim()), // Additional custom properties + dd: this.inlineTokens(match[2].trim()) + }; + } + }, + renderer(token) { + return `\n
${this.parseInline(token.dt)}
${this.parseInline(token.dd)}
`; + }, + childTokens: ['dt', 'dd'], // Any child tokens to be visited by walkTokens + walkTokens(token) { // Post-processing on the completed token tree + if (token.type === 'strong') { + token.text += ' walked'; + } + } +}; + +marked.use({ extensions: [descriptionlist, description] }); + +\\ EQUIVALENT TO: + +marked.use({extensions: [descriptionList] }); +marked.use({extensions: [description] }); + +console.log(marked('A Description List:\n' + + ': Topic 1 : Description 1\n' + + ': **Topic 2** : *Description 2*')); +``` + +**Output** + +``` bash +

A Description List:

+
+
Topic 1
Description 1
+
Topic 2 walked
Description 2
+
+``` *** -

Access to lexer and parser

+

The Lexer

+ +The lexer takes a markdown string and calls the tokenizer functions. + + +

The Parser

+ +The parser takes tokens as input and calls the renderer functions. + +

Access to Lexer and Parser

You also have direct access to the lexer and parser if you so desire. diff --git a/lib/marked.esm.js b/lib/marked.esm.js index 8d916b5c92..c707636dfe 100644 --- a/lib/marked.esm.js +++ b/lib/marked.esm.js @@ -11,10 +11,12 @@ var defaults$5 = {exports: {}}; -function getDefaults$1() { +var defaults = createCommonjsModule(function (module) { +function getDefaults() { return { baseUrl: null, breaks: false, + extensions: null, gfm: true, headerIds: true, headerPrefix: '', @@ -60,7 +62,7 @@ const escapeReplacements = { "'": ''' }; const getEscapeReplacement = (ch) => escapeReplacements[ch]; -function escape$3(html, encode) { +function escape(html, encode) { if (encode) { if (escapeTest.test(html)) { return html.replace(escapeReplace, getEscapeReplacement); @@ -76,7 +78,7 @@ function escape$3(html, encode) { const unescapeTest = /&(#(?:\d+)|(?:#x[0-9A-Fa-f]+)|(?:\w+));?/ig; -function unescape$1(html) { +function unescape(html) { // explicitly match decimal, hex, and named HTML entities return html.replace(unescapeTest, (_, n) => { n = n.toLowerCase(); @@ -91,7 +93,7 @@ function unescape$1(html) { } const caret = /(^|[^\[])\^/g; -function edit$1(regex, opt) { +function edit(regex, opt) { regex = regex.source || regex; opt = opt || ''; const obj = { @@ -110,11 +112,11 @@ function edit$1(regex, opt) { const nonWordAndColonTest = /[^\w:]/g; const originIndependentUrl = /^$|^[a-z][a-z0-9+.-]*:|^[?#]/i; -function cleanUrl$1(sanitize, base, href) { +function cleanUrl(sanitize, base, href) { if (sanitize) { let prot; try { - prot = decodeURIComponent(unescape$1(href)) + prot = decodeURIComponent(unescape(href)) .replace(nonWordAndColonTest, '') .toLowerCase(); } catch (e) { @@ -148,7 +150,7 @@ function resolveUrl(base, href) { if (justDomain.test(base)) { baseUrls[' ' + base] = base + '/'; } else { - baseUrls[' ' + base] = rtrim$1(base, '/', true); + baseUrls[' ' + base] = rtrim(base, '/', true); } } base = baseUrls[' ' + base]; @@ -169,9 +171,9 @@ function resolveUrl(base, href) { } } -const noopTest$1 = { exec: function noopTest() {} }; +const noopTest = { exec: function noopTest() {} }; -function merge$2(obj) { +function merge(obj) { let i = 1, target, key; @@ -188,7 +190,7 @@ function merge$2(obj) { return obj; } -function splitCells$1(tableRow, count) { +function splitCells(tableRow, count) { // ensure that every cell-delimiting pipe has a space // before it to distinguish it from an escaped pipe const row = tableRow.replace(/\|/g, (match, offset, str) => { @@ -223,7 +225,7 @@ function splitCells$1(tableRow, count) { // Remove trailing 'c's. Equivalent to str.replace(/c*$/, ''). // /c*$/ is vulnerable to REDOS. // invert: Remove suffix of non-c chars instead. Default falsey. -function rtrim$1(str, c, invert) { +function rtrim(str, c, invert) { const l = str.length; if (l === 0) { return ''; @@ -247,7 +249,7 @@ function rtrim$1(str, c, invert) { return str.substr(0, l - suffLen); } -function findClosingBracket$1(str, b) { +function findClosingBracket(str, b) { if (str.indexOf(b[1]) === -1) { return -1; } @@ -269,14 +271,14 @@ function findClosingBracket$1(str, b) { return -1; } -function checkSanitizeDeprecation$1(opt) { +function checkSanitizeDeprecation(opt) { if (opt && opt.sanitize && !opt.silent) { console.warn('marked(): sanitize and sanitizer parameters are deprecated since version 0.7.0, should not be used and will be removed in the future. Read more here: https://marked.js.org/#/USING_ADVANCED.md#options'); } } // copied from https://stackoverflow.com/a/5450113/806777 -function repeatString$1(pattern, count) { +function repeatString(pattern, count) { if (count < 1) { return ''; } @@ -292,31 +294,31 @@ function repeatString$1(pattern, count) { } var helpers = { - escape: escape$3, - unescape: unescape$1, - edit: edit$1, - cleanUrl: cleanUrl$1, + escape, + unescape, + edit, + cleanUrl, resolveUrl, - noopTest: noopTest$1, - merge: merge$2, - splitCells: splitCells$1, - rtrim: rtrim$1, - findClosingBracket: findClosingBracket$1, - checkSanitizeDeprecation: checkSanitizeDeprecation$1, - repeatString: repeatString$1 + noopTest, + merge, + splitCells, + rtrim, + findClosingBracket, + checkSanitizeDeprecation, + repeatString }; -const { defaults: defaults$4 } = defaults$5.exports; +const { defaults: defaults$1 } = defaults; const { - rtrim, - splitCells, - escape: escape$2, - findClosingBracket + rtrim: rtrim$1, + splitCells: splitCells$1, + escape: escape$1, + findClosingBracket: findClosingBracket$1 } = helpers; function outputLink(cap, link, raw) { const href = link.href; - const title = link.title ? escape$2(link.title) : null; + const title = link.title ? escape$1(link.title) : null; const text = cap[1].replace(/\\([\[\]])/g, '$1'); if (cap[0].charAt(0) !== '!') { @@ -333,7 +335,7 @@ function outputLink(cap, link, raw) { raw, href, title, - text: escape$2(text) + text: escape$1(text) }; } } @@ -371,7 +373,7 @@ function indentCodeCompensation(raw, text) { */ var Tokenizer_1 = class Tokenizer { constructor(options) { - this.options = options || defaults$4; + this.options = options || defaults$1; } space(src) { @@ -396,7 +398,7 @@ var Tokenizer_1 = class Tokenizer { raw: cap[0], codeBlockStyle: 'indented', text: !this.options.pedantic - ? rtrim(text, '\n') + ? rtrim$1(text, '\n') : text }; } @@ -424,7 +426,7 @@ var Tokenizer_1 = class Tokenizer { // remove trailing #s if (/#$/.test(text)) { - const trimmed = rtrim(text, '#'); + const trimmed = rtrim$1(text, '#'); if (this.options.pedantic) { text = trimmed.trim(); } else if (!trimmed || / $/.test(trimmed)) { @@ -447,7 +449,7 @@ var Tokenizer_1 = class Tokenizer { if (cap) { const item = { type: 'table', - header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')), + header: splitCells$1(cap[1].replace(/^ *| *\| *$/g, '')), align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [], raw: cap[0] @@ -470,7 +472,7 @@ var Tokenizer_1 = class Tokenizer { l = item.cells.length; for (i = 0; i < l; i++) { - item.cells[i] = splitCells(item.cells[i], item.header.length); + item.cells[i] = splitCells$1(item.cells[i], item.header.length); } return item; @@ -592,7 +594,7 @@ var Tokenizer_1 = class Tokenizer { } // trim item newlines at end - item = rtrim(item, '\n'); + item = rtrim$1(item, '\n'); if (i !== l - 1) { raw = raw + '\n'; } @@ -644,7 +646,7 @@ var Tokenizer_1 = class Tokenizer { raw: cap[0], pre: !this.options.sanitizer && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'), - text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape$2(cap[0])) : cap[0] + text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape$1(cap[0])) : cap[0] }; } } @@ -669,7 +671,7 @@ var Tokenizer_1 = class Tokenizer { if (cap) { const item = { type: 'table', - header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')), + header: splitCells$1(cap[1].replace(/^ *| *\| *$/g, '')), align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [] }; @@ -693,7 +695,7 @@ var Tokenizer_1 = class Tokenizer { l = item.cells.length; for (i = 0; i < l; i++) { - item.cells[i] = splitCells( + item.cells[i] = splitCells$1( item.cells[i].replace(/^ *\| *| *\| *$/g, ''), item.header.length); } @@ -745,7 +747,7 @@ var Tokenizer_1 = class Tokenizer { return { type: 'escape', raw: cap[0], - text: escape$2(cap[1]) + text: escape$1(cap[1]) }; } } @@ -774,7 +776,7 @@ var Tokenizer_1 = class Tokenizer { text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) - : escape$2(cap[0])) + : escape$1(cap[0])) : cap[0] }; } @@ -791,13 +793,13 @@ var Tokenizer_1 = class Tokenizer { } // ending angle bracket cannot be escaped - const rtrimSlash = rtrim(trimmedUrl.slice(0, -1), '\\'); + const rtrimSlash = rtrim$1(trimmedUrl.slice(0, -1), '\\'); if ((trimmedUrl.length - rtrimSlash.length) % 2 === 0) { return; } } else { // find closing parenthesis - const lastParenIndex = findClosingBracket(cap[2], '()'); + const lastParenIndex = findClosingBracket$1(cap[2], '()'); if (lastParenIndex > -1) { const start = cap[0].indexOf('!') === 0 ? 5 : 4; const linkLen = start + cap[1].length + lastParenIndex; @@ -925,7 +927,7 @@ var Tokenizer_1 = class Tokenizer { if (hasNonSpaceChars && hasSpaceCharsOnBothEnds) { text = text.substring(1, text.length - 1); } - text = escape$2(text, true); + text = escape$1(text, true); return { type: 'codespan', raw: cap[0], @@ -960,10 +962,10 @@ var Tokenizer_1 = class Tokenizer { if (cap) { let text, href; if (cap[2] === '@') { - text = escape$2(this.options.mangle ? mangle(cap[1]) : cap[1]); + text = escape$1(this.options.mangle ? mangle(cap[1]) : cap[1]); href = 'mailto:' + text; } else { - text = escape$2(cap[1]); + text = escape$1(cap[1]); href = text; } @@ -988,7 +990,7 @@ var Tokenizer_1 = class Tokenizer { if (cap = this.rules.inline.url.exec(src)) { let text, href; if (cap[2] === '@') { - text = escape$2(this.options.mangle ? mangle(cap[0]) : cap[0]); + text = escape$1(this.options.mangle ? mangle(cap[0]) : cap[0]); href = 'mailto:' + text; } else { // do extended autolink path validation @@ -997,7 +999,7 @@ var Tokenizer_1 = class Tokenizer { prevCapZero = cap[0]; cap[0] = this.rules.inline._backpedal.exec(cap[0])[0]; } while (prevCapZero !== cap[0]); - text = escape$2(cap[0]); + text = escape$1(cap[0]); if (cap[1] === 'www.') { href = 'http://' + text; } else { @@ -1025,9 +1027,9 @@ var Tokenizer_1 = class Tokenizer { if (cap) { let text; if (inRawBlock) { - text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape$2(cap[0])) : cap[0]; + text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape$1(cap[0])) : cap[0]; } else { - text = escape$2(this.options.smartypants ? smartypants(cap[0]) : cap[0]); + text = escape$1(this.options.smartypants ? smartypants(cap[0]) : cap[0]); } return { type: 'text', @@ -1039,15 +1041,15 @@ var Tokenizer_1 = class Tokenizer { }; const { - noopTest, - edit, + noopTest: noopTest$1, + edit: edit$1, merge: merge$1 } = helpers; /** * Block-Level Grammar */ -const block$1 = { +const block = { newline: /^(?: *(?:\n|$))+/, code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/, fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/, @@ -1066,8 +1068,8 @@ const block$1 = { + '|(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag + ')', def: /^ {0,3}\[(label)\]: *\n? *]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/, - nptable: noopTest, - table: noopTest, + nptable: noopTest$1, + table: noopTest$1, lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/, // regex template, placeholders will be replaced according to different paragraph // interruption rules of commonmark and the original markdown spec: @@ -1075,68 +1077,68 @@ const block$1 = { text: /^[^\n]+/ }; -block$1._label = /(?!\s*\])(?:\\[\[\]]|[^\[\]])+/; -block$1._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/; -block$1.def = edit(block$1.def) - .replace('label', block$1._label) - .replace('title', block$1._title) +block._label = /(?!\s*\])(?:\\[\[\]]|[^\[\]])+/; +block._title = /(?:"(?:\\"?|[^"\\])*"|'[^'\n]*(?:\n[^'\n]+)*\n?'|\([^()]*\))/; +block.def = edit$1(block.def) + .replace('label', block._label) + .replace('title', block._title) .getRegex(); -block$1.bullet = /(?:[*+-]|\d{1,9}[.)])/; -block$1.item = /^( *)(bull) ?[^\n]*(?:\n(?! *bull ?)[^\n]*)*/; -block$1.item = edit(block$1.item, 'gm') - .replace(/bull/g, block$1.bullet) +block.bullet = /(?:[*+-]|\d{1,9}[.)])/; +block.item = /^( *)(bull) ?[^\n]*(?:\n(?! *bull ?)[^\n]*)*/; +block.item = edit$1(block.item, 'gm') + .replace(/bull/g, block.bullet) .getRegex(); -block$1.listItemStart = edit(/^( *)(bull) */) - .replace('bull', block$1.bullet) +block.listItemStart = edit$1(/^( *)(bull) */) + .replace('bull', block.bullet) .getRegex(); -block$1.list = edit(block$1.list) - .replace(/bull/g, block$1.bullet) +block.list = edit$1(block.list) + .replace(/bull/g, block.bullet) .replace('hr', '\\n+(?=\\1?(?:(?:- *){3,}|(?:_ *){3,}|(?:\\* *){3,})(?:\\n+|$))') - .replace('def', '\\n+(?=' + block$1.def.source + ')') + .replace('def', '\\n+(?=' + block.def.source + ')') .getRegex(); -block$1._tag = 'address|article|aside|base|basefont|blockquote|body|caption' +block._tag = 'address|article|aside|base|basefont|blockquote|body|caption' + '|center|col|colgroup|dd|details|dialog|dir|div|dl|dt|fieldset|figcaption' + '|figure|footer|form|frame|frameset|h[1-6]|head|header|hr|html|iframe' + '|legend|li|link|main|menu|menuitem|meta|nav|noframes|ol|optgroup|option' + '|p|param|section|source|summary|table|tbody|td|tfoot|th|thead|title|tr' + '|track|ul'; -block$1._comment = /|$)/; -block$1.html = edit(block$1.html, 'i') - .replace('comment', block$1._comment) - .replace('tag', block$1._tag) +block._comment = /|$)/; +block.html = edit$1(block.html, 'i') + .replace('comment', block._comment) + .replace('tag', block._tag) .replace('attribute', / +[a-zA-Z:_][\w.:-]*(?: *= *"[^"\n]*"| *= *'[^'\n]*'| *= *[^\s"'=<>`]+)?/) .getRegex(); -block$1.paragraph = edit(block$1._paragraph) - .replace('hr', block$1.hr) +block.paragraph = edit$1(block._paragraph) + .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('|lheading', '') // setex headings don't interrupt commonmark paragraphs .replace('blockquote', ' {0,3}>') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', ')|<(?:script|pre|style|!--)') - .replace('tag', block$1._tag) // pars can be interrupted by type (6) html blocks + .replace('tag', block._tag) // pars can be interrupted by type (6) html blocks .getRegex(); -block$1.blockquote = edit(block$1.blockquote) - .replace('paragraph', block$1.paragraph) +block.blockquote = edit$1(block.blockquote) + .replace('paragraph', block.paragraph) .getRegex(); /** * Normal Block Grammar */ -block$1.normal = merge$1({}, block$1); +block.normal = merge$1({}, block); /** * GFM Block Grammar */ -block$1.gfm = merge$1({}, block$1.normal, { +block.gfm = merge$1({}, block.normal, { nptable: '^ *([^|\\n ].*\\|.*)\\n' // Header + ' {0,3}([-:]+ *\\|[-| :]*)' // Align + '(?:\\n((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)', // Cells @@ -1145,38 +1147,38 @@ block$1.gfm = merge$1({}, block$1.normal, { + '(?:\\n *((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells }); -block$1.gfm.nptable = edit(block$1.gfm.nptable) - .replace('hr', block$1.hr) +block.gfm.nptable = edit$1(block.gfm.nptable) + .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('blockquote', ' {0,3}>') .replace('code', ' {4}[^\\n]') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', ')|<(?:script|pre|style|!--)') - .replace('tag', block$1._tag) // tables can be interrupted by type (6) html blocks + .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks .getRegex(); -block$1.gfm.table = edit(block$1.gfm.table) - .replace('hr', block$1.hr) +block.gfm.table = edit$1(block.gfm.table) + .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') .replace('blockquote', ' {0,3}>') .replace('code', ' {4}[^\\n]') .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt .replace('html', ')|<(?:script|pre|style|!--)') - .replace('tag', block$1._tag) // tables can be interrupted by type (6) html blocks + .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks .getRegex(); /** * Pedantic grammar (original John Gruber's loose markdown specification) */ -block$1.pedantic = merge$1({}, block$1.normal, { - html: edit( +block.pedantic = merge$1({}, block.normal, { + html: edit$1( '^ *(?:comment *(?:\\n|\\s*$)' + '|<(tag)[\\s\\S]+? *(?:\\n{2,}|\\s*$)' // closed tag + '|\\s]*)*?/?> *(?:\\n{2,}|\\s*$))') - .replace('comment', block$1._comment) + .replace('comment', block._comment) .replace(/tag/g, '(?!(?:' + 'a|em|strong|small|s|cite|q|dfn|abbr|data|time|code|var|samp|kbd|sub' + '|sup|i|b|u|mark|ruby|rt|rp|bdi|bdo|span|br|wbr|ins|del|img)' @@ -1184,11 +1186,11 @@ block$1.pedantic = merge$1({}, block$1.normal, { .getRegex(), def: /^ *\[([^\]]+)\]: *]+)>?(?: +(["(][^\n]+[")]))? *(?:\n+|$)/, heading: /^(#{1,6})(.*)(?:\n+|$)/, - fences: noopTest, // fences not supported - paragraph: edit(block$1.normal._paragraph) - .replace('hr', block$1.hr) + fences: noopTest$1, // fences not supported + paragraph: edit$1(block.normal._paragraph) + .replace('hr', block.hr) .replace('heading', ' *#{1,6} *[^\n]') - .replace('lheading', block$1.lheading) + .replace('lheading', block.lheading) .replace('blockquote', ' {0,3}>') .replace('|fences', '') .replace('|list', '') @@ -1199,10 +1201,10 @@ block$1.pedantic = merge$1({}, block$1.normal, { /** * Inline-Level Grammar */ -const inline$1 = { +const inline = { escape: /^\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/, autolink: /^<(scheme:[^\s\x00-\x1f<>]*|email)>/, - url: noopTest, + url: noopTest$1, tag: '^comment' + '|^' // self-closing tag + '|^<[a-zA-Z][\\w-]*(?:attribute)*?\\s*/?>' // open tag @@ -1222,80 +1224,80 @@ const inline$1 = { }, code: /^(`+)([^`]|[^`][\s\S]*?[^`])\1(?!`)/, br: /^( {2,}|\\)\n(?!\s*$)/, - del: noopTest, + del: noopTest$1, text: /^(`+|[^`])(?:(?= {2,}\n)|[\s\S]*?(?:(?=[\\?@\\[\\]`^{|}~'; -inline$1.punctuation = edit(inline$1.punctuation).replace(/punctuation/g, inline$1._punctuation).getRegex(); +inline._punctuation = '!"#$%&\'()+\\-.,/:;<=>?@\\[\\]`^{|}~'; +inline.punctuation = edit$1(inline.punctuation).replace(/punctuation/g, inline._punctuation).getRegex(); // sequences em should skip over [title](link), `code`, -inline$1.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g; -inline$1.escapedEmSt = /\\\*|\\_/g; +inline.blockSkip = /\[[^\]]*?\]\([^\)]*?\)|`[^`]*?`|<[^>]*?>/g; +inline.escapedEmSt = /\\\*|\\_/g; -inline$1._comment = edit(block$1._comment).replace('(?:-->|$)', '-->').getRegex(); +inline._comment = edit$1(block._comment).replace('(?:-->|$)', '-->').getRegex(); -inline$1.emStrong.lDelim = edit(inline$1.emStrong.lDelim) - .replace(/punct/g, inline$1._punctuation) +inline.emStrong.lDelim = edit$1(inline.emStrong.lDelim) + .replace(/punct/g, inline._punctuation) .getRegex(); -inline$1.emStrong.rDelimAst = edit(inline$1.emStrong.rDelimAst, 'g') - .replace(/punct/g, inline$1._punctuation) +inline.emStrong.rDelimAst = edit$1(inline.emStrong.rDelimAst, 'g') + .replace(/punct/g, inline._punctuation) .getRegex(); -inline$1.emStrong.rDelimUnd = edit(inline$1.emStrong.rDelimUnd, 'g') - .replace(/punct/g, inline$1._punctuation) +inline.emStrong.rDelimUnd = edit$1(inline.emStrong.rDelimUnd, 'g') + .replace(/punct/g, inline._punctuation) .getRegex(); -inline$1._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g; +inline._escapes = /\\([!"#$%&'()*+,\-./:;<=>?@\[\]\\^_`{|}~])/g; -inline$1._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/; -inline$1._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/; -inline$1.autolink = edit(inline$1.autolink) - .replace('scheme', inline$1._scheme) - .replace('email', inline$1._email) +inline._scheme = /[a-zA-Z][a-zA-Z0-9+.-]{1,31}/; +inline._email = /[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+(@)[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+(?![-_])/; +inline.autolink = edit$1(inline.autolink) + .replace('scheme', inline._scheme) + .replace('email', inline._email) .getRegex(); -inline$1._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/; +inline._attribute = /\s+[a-zA-Z:_][\w.:-]*(?:\s*=\s*"[^"]*"|\s*=\s*'[^']*'|\s*=\s*[^\s"'=<>`]+)?/; -inline$1.tag = edit(inline$1.tag) - .replace('comment', inline$1._comment) - .replace('attribute', inline$1._attribute) +inline.tag = edit$1(inline.tag) + .replace('comment', inline._comment) + .replace('attribute', inline._attribute) .getRegex(); -inline$1._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/; -inline$1._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/; -inline$1._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/; +inline._label = /(?:\[(?:\\.|[^\[\]\\])*\]|\\.|`[^`]*`|[^\[\]\\`])*?/; +inline._href = /<(?:\\.|[^\n<>\\])+>|[^\s\x00-\x1f]*/; +inline._title = /"(?:\\"?|[^"\\])*"|'(?:\\'?|[^'\\])*'|\((?:\\\)?|[^)\\])*\)/; -inline$1.link = edit(inline$1.link) - .replace('label', inline$1._label) - .replace('href', inline$1._href) - .replace('title', inline$1._title) +inline.link = edit$1(inline.link) + .replace('label', inline._label) + .replace('href', inline._href) + .replace('title', inline._title) .getRegex(); -inline$1.reflink = edit(inline$1.reflink) - .replace('label', inline$1._label) +inline.reflink = edit$1(inline.reflink) + .replace('label', inline._label) .getRegex(); -inline$1.reflinkSearch = edit(inline$1.reflinkSearch, 'g') - .replace('reflink', inline$1.reflink) - .replace('nolink', inline$1.nolink) +inline.reflinkSearch = edit$1(inline.reflinkSearch, 'g') + .replace('reflink', inline.reflink) + .replace('nolink', inline.nolink) .getRegex(); /** * Normal Inline Grammar */ -inline$1.normal = merge$1({}, inline$1); +inline.normal = merge$1({}, inline); /** * Pedantic Inline Grammar */ -inline$1.pedantic = merge$1({}, inline$1.normal, { +inline.pedantic = merge$1({}, inline.normal, { strong: { start: /^__|\*\*/, middle: /^__(?=\S)([\s\S]*?\S)__(?!_)|^\*\*(?=\S)([\s\S]*?\S)\*\*(?!\*)/, @@ -1308,11 +1310,11 @@ inline$1.pedantic = merge$1({}, inline$1.normal, { endAst: /\*(?!\*)/g, endUnd: /_(?!_)/g }, - link: edit(/^!?\[(label)\]\((.*?)\)/) - .replace('label', inline$1._label) + link: edit$1(/^!?\[(label)\]\((.*?)\)/) + .replace('label', inline._label) .getRegex(), - reflink: edit(/^!?\[(label)\]\s*\[([^\]]*)\]/) - .replace('label', inline$1._label) + reflink: edit$1(/^!?\[(label)\]\s*\[([^\]]*)\]/) + .replace('label', inline._label) .getRegex() }); @@ -1320,8 +1322,8 @@ inline$1.pedantic = merge$1({}, inline$1.normal, { * GFM Inline Grammar */ -inline$1.gfm = merge$1({}, inline$1.normal, { - escape: edit(inline$1.escape).replace('])', '~|])').getRegex(), +inline.gfm = merge$1({}, inline.normal, { + escape: edit$1(inline.escape).replace('])', '~|])').getRegex(), _extended_email: /[A-Za-z0-9._+-]+(@)[a-zA-Z0-9-_]+(?:\.[a-zA-Z0-9-_]*[a-zA-Z0-9])+(?![-_])/, url: /^((?:ftp|https?):\/\/|www\.)(?:[a-zA-Z0-9\-]+\.?)+[^\s<]*|^email/, _backpedal: /(?:[^?!.,:;*_~()&]+|\([^)]*\)|&(?![a-zA-Z0-9]+;$)|[?!.,:;*_~)]+(?!$))+/, @@ -1329,30 +1331,29 @@ inline$1.gfm = merge$1({}, inline$1.normal, { text: /^([`~]+|[^`~])(?:(?= {2,}\n)|(?=[a-zA-Z0-9.!#$%&'*+\/=?_`{\|}~-]+@)|[\s\S]*?(?:(?=[\\ { + if (token = extTokenizer.call(this, src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + return true; + } + return false; + })) { + continue; + } + // newline if (token = this.tokenizer.space(src)) { src = src.substring(token.raw.length); @@ -1581,9 +1594,30 @@ var Lexer_1 = class Lexer { } // top-level paragraph - if (top && (token = this.tokenizer.paragraph(src))) { + // prevent paragraph consuming extensions by clipping 'src' to extension start + cutSrc = src; + if (this.options.extensions?.startBlock) { + let startIndex = Infinity; + const tempSrc = src.slice(1); + let tempStart; + this.options.extensions.startBlock.forEach(function(getStartIndex) { + tempStart = getStartIndex(tempSrc); + if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); } + }); + if (startIndex < Infinity && startIndex >= 0) { + cutSrc = src.substring(0, startIndex + 1); + } + } + if (top && (token = this.tokenizer.paragraph(cutSrc))) { + lastToken = tokens[tokens.length - 1]; + if (lastParagraphClipped && lastToken.type === 'paragraph') { + lastToken.raw += '\n' + token.raw; + lastToken.text += '\n' + token.text; + } else { + tokens.push(token); + } + lastParagraphClipped = (cutSrc.length !== src.length); src = src.substring(token.raw.length); - tokens.push(token); continue; } @@ -1680,7 +1714,7 @@ var Lexer_1 = class Lexer { * Lexing/Compiling */ inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) { - let token, lastToken; + let token, lastToken, cutSrc; // String with links masked to avoid interference with em and strong let maskedSrc = src; @@ -1693,14 +1727,14 @@ var Lexer_1 = class Lexer { if (links.length > 0) { while ((match = this.tokenizer.rules.inline.reflinkSearch.exec(maskedSrc)) != null) { if (links.includes(match[0].slice(match[0].lastIndexOf('[') + 1, -1))) { - maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex); + maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString$1('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.reflinkSearch.lastIndex); } } } } // Mask out other blocks while ((match = this.tokenizer.rules.inline.blockSkip.exec(maskedSrc)) != null) { - maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex); + maskedSrc = maskedSrc.slice(0, match.index) + '[' + repeatString$1('a', match[0].length - 2) + ']' + maskedSrc.slice(this.tokenizer.rules.inline.blockSkip.lastIndex); } // Mask out escaped em & strong delimiters @@ -1714,6 +1748,19 @@ var Lexer_1 = class Lexer { } keepPrevChar = false; + // extensions + if (this.options?.extensions?.inline + && this.options.extensions.inline.some((extTokenizer) => { + if (token = extTokenizer.call(this, src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + return true; + } + return false; + })) { + continue; + } + // escape if (token = this.tokenizer.escape(src)) { src = src.substring(token.raw.length); @@ -1726,7 +1773,7 @@ var Lexer_1 = class Lexer { src = src.substring(token.raw.length); inLink = token.inLink; inRawBlock = token.inRawBlock; - const lastToken = tokens[tokens.length - 1]; + lastToken = tokens[tokens.length - 1]; if (lastToken && token.type === 'text' && lastToken.type === 'text') { lastToken.raw += token.raw; lastToken.text += token.text; @@ -1749,7 +1796,7 @@ var Lexer_1 = class Lexer { // reflink, nolink if (token = this.tokenizer.reflink(src, this.tokens.links)) { src = src.substring(token.raw.length); - const lastToken = tokens[tokens.length - 1]; + lastToken = tokens[tokens.length - 1]; if (token.type === 'link') { token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); tokens.push(token); @@ -1807,7 +1854,18 @@ var Lexer_1 = class Lexer { } // text - if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) { + // prevent inlineText consuming extensions by clipping 'src' to extension start + cutSrc = src; + if (this.options.extensions?.startInline) { + let startIndex = Infinity; + this.options.extensions.startInline.forEach(function(getStartIndex) { + startIndex = Math.max(0, Math.min(getStartIndex(src), startIndex)); + }); + if (startIndex < Infinity && startIndex > 0) { + cutSrc = src.substring(0, startIndex); + } + } + if (token = this.tokenizer.inlineText(cutSrc, inRawBlock, smartypants)) { src = src.substring(token.raw.length); if (token.raw.slice(-1) !== '_') { // Track prevChar before string of ____ started prevChar = token.raw.slice(-1); @@ -1838,10 +1896,10 @@ var Lexer_1 = class Lexer { } }; -const { defaults: defaults$2 } = defaults$5.exports; +const { defaults: defaults$3 } = defaults; const { - cleanUrl, - escape: escape$1 + cleanUrl: cleanUrl$1, + escape: escape$2 } = helpers; /** @@ -1849,7 +1907,7 @@ const { */ var Renderer_1 = class Renderer { constructor(options) { - this.options = options || defaults$2; + this.options = options || defaults$3; } code(code, infostring, escaped) { @@ -1866,15 +1924,15 @@ var Renderer_1 = class Renderer { if (!lang) { return '
'
-        + (escaped ? code : escape$1(code, true))
+        + (escaped ? code : escape$2(code, true))
         + '
\n'; } return '
'
-      + (escaped ? code : escape$1(code, true))
+      + (escaped ? code : escape$2(code, true))
       + '
\n'; } @@ -1974,11 +2032,11 @@ var Renderer_1 = class Renderer { } link(href, title, text) { - href = cleanUrl(this.options.sanitize, this.options.baseUrl, href); + href = cleanUrl$1(this.options.sanitize, this.options.baseUrl, href); if (href === null) { return text; } - let out = 'An error occurred:

'
-        + escape(e.message + '', true)
+        + escape$3(e.message + '', true)
         + '
'; } throw e; @@ -2494,59 +2562,113 @@ function marked(src, opt, callback) { marked.options = marked.setOptions = function(opt) { - merge(marked.defaults, opt); + merge$2(marked.defaults, opt); changeDefaults(marked.defaults); return marked; }; marked.getDefaults = getDefaults; -marked.defaults = defaults; +marked.defaults = defaults$5; /** * Use Extension */ marked.use = function(extension) { - const opts = merge({}, extension); - if (extension.renderer) { - const renderer = marked.defaults.renderer || new Renderer(); - for (const prop in extension.renderer) { - const prevRenderer = renderer[prop]; - renderer[prop] = (...args) => { - let ret = extension.renderer[prop].apply(renderer, args); - if (ret === false) { - ret = prevRenderer.apply(renderer, args); - } - return ret; - }; - } - opts.renderer = renderer; + if (!Array.isArray(extension)) { // Wrap in array if not already to unify processing + extension = [extension]; } - if (extension.tokenizer) { - const tokenizer = marked.defaults.tokenizer || new Tokenizer(); - for (const prop in extension.tokenizer) { - const prevTokenizer = tokenizer[prop]; - tokenizer[prop] = (...args) => { - let ret = extension.tokenizer[prop].apply(tokenizer, args); - if (ret === false) { - ret = prevTokenizer.apply(tokenizer, args); + + const opts = merge$2({}, ...extension); + const extensions = marked.defaults.extensions || { renderers: {}, walkableTokens: {} }; + let hasExtensions; + + extension.forEach((pack) => { + // ==-- Parse "addon" extensions --== // + if (pack.extensions) { + hasExtensions = true; + pack.extensions.forEach((ext) => { + if (ext.renderer && ext.name) { // Renderers must have 'name' property + extensions.renderers[ext.name] = ext.renderer; } - return ret; - }; + if (ext.walkableTokens && ext.name) { // walkableTokens must have 'name' + extensions.walkableTokens[ext.name] = ext.walkableTokens; + } + if (ext.tokenizer && ext.level) { // Tokenizers must have 'level' property + if (extensions[ext.level]) { + extensions[ext.level].push(ext.tokenizer); + } else { + extensions[ext.level] = [ext.tokenizer]; + } + if (ext.start) { // Function to check for start of token + if (ext.level === 'block') { + if (extensions.startBlock) { + extensions.startBlock.push(ext.start); + } else { + extensions.startBlock = [ext.start]; + } + } else if (ext.level === 'inline') { + if (extensions.startInline) { + extensions.startInline.push(ext.start); + } else { + extensions.startInline = [ext.start]; + } + } + } + } + }); } - opts.tokenizer = tokenizer; - } - if (extension.walkTokens) { - const walkTokens = marked.defaults.walkTokens; - opts.walkTokens = (token) => { - extension.walkTokens(token); - if (walkTokens) { - walkTokens(token); + + // ==-- Parse "overwrite" extensions --== // + if (pack.renderer) { + const renderer = marked.defaults.renderer || new Renderer_1(); + for (const prop in pack.renderer) { + const prevRenderer = renderer[prop]; + // Replace renderer with func to run extension, but fall back if fail + renderer[prop] = (...args) => { + let ret = pack.renderer[prop].apply(renderer, args); + if (ret === false) { + ret = prevRenderer.apply(renderer, args); + } + return ret; + }; } - }; - } - marked.setOptions(opts); + opts.renderer = renderer; + } + if (pack.tokenizer) { + const tokenizer = marked.defaults.tokenizer || new Tokenizer_1(); + for (const prop in pack.tokenizer) { + const prevTokenizer = tokenizer[prop]; + // Replace tokenizer with func to run extension, but fall back if fail + tokenizer[prop] = (...args) => { + let ret = pack.tokenizer[prop].apply(tokenizer, args); + if (ret === false) { + ret = prevTokenizer.apply(tokenizer, args); + } + return ret; + }; + } + opts.tokenizer = tokenizer; + } + + // ==-- Parse WalkTokens extensions --== // + if (pack.walkTokens) { + const walkTokens = marked.defaults.walkTokens; + opts.walkTokens = (token) => { + pack.walkTokens(token); + if (walkTokens) { + walkTokens(token); + } + }; + } + + if (hasExtensions) { + opts.extensions = extensions; + } + + marked.setOptions(opts); + }); }; /** @@ -2573,7 +2695,12 @@ marked.walkTokens = function(tokens, callback) { break; } default: { - if (token.tokens) { + if (marked.defaults?.extensions?.walkableTokens?.[token.type]) { // Walk any extensions + marked.defaults?.extensions.walkableTokens[token.type].forEach(function(walkableTokens) { + marked.walkTokens(token[walkableTokens], callback); + }); + } + if (token.tokens && !marked.defaults?.extensions?.walkableTokens[token.type]?.tokens) { marked.walkTokens(token.tokens, callback); } } @@ -2594,8 +2721,8 @@ marked.parseInline = function(src, opt) { + Object.prototype.toString.call(src) + ', string expected'); } - opt = merge({}, marked.defaults, opt || {}); - checkSanitizeDeprecation(opt); + opt = merge$2({}, marked.defaults, opt || {}); + checkSanitizeDeprecation$1(opt); try { const tokens = Lexer.lexInline(src, opt); @@ -2607,7 +2734,7 @@ marked.parseInline = function(src, opt) { e.message += '\nPlease report this to https://github.com/markedjs/marked.'; if (opt.silent) { return '

An error occurred:

'
-        + escape(e.message + '', true)
+        + escape$3(e.message + '', true)
         + '
'; } throw e; diff --git a/src/Lexer.js b/src/Lexer.js index 6c02ed65e5..03a06960f6 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -123,9 +123,21 @@ module.exports = class Lexer { if (this.options.pedantic) { src = src.replace(/^ +$/gm, ''); } - let token, i, l, lastToken; + let token, i, l, lastToken, cutSrc, lastParagraphClipped; while (src) { + if (this.options?.extensions?.block + && this.options.extensions.block.some((extTokenizer) => { + if (token = extTokenizer.call(this, src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + return true; + } + return false; + })) { + continue; + } + // newline if (token = this.tokenizer.space(src)) { src = src.substring(token.raw.length); @@ -230,9 +242,30 @@ module.exports = class Lexer { } // top-level paragraph - if (top && (token = this.tokenizer.paragraph(src))) { + // prevent paragraph consuming extensions by clipping 'src' to extension start + cutSrc = src; + if (this.options.extensions?.startBlock) { + let startIndex = Infinity; + const tempSrc = src.slice(1); + let tempStart; + this.options.extensions.startBlock.forEach(function(getStartIndex) { + tempStart = getStartIndex.call(this, tempSrc); + if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); } + }); + if (startIndex < Infinity && startIndex >= 0) { + cutSrc = src.substring(0, startIndex + 1); + } + } + if (top && (token = this.tokenizer.paragraph(cutSrc))) { + lastToken = tokens[tokens.length - 1]; + if (lastParagraphClipped && lastToken.type === 'paragraph') { + lastToken.raw += '\n' + token.raw; + lastToken.text += '\n' + token.text; + } else { + tokens.push(token); + } + lastParagraphClipped = (cutSrc.length !== src.length); src = src.substring(token.raw.length); - tokens.push(token); continue; } @@ -332,7 +365,7 @@ module.exports = class Lexer { * Lexing/Compiling */ inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) { - let token, lastToken; + let token, lastToken, cutSrc; // String with links masked to avoid interference with em and strong let maskedSrc = src; @@ -366,6 +399,19 @@ module.exports = class Lexer { } keepPrevChar = false; + // extensions + if (this.options?.extensions?.inline + && this.options.extensions.inline.some((extTokenizer) => { + if (token = extTokenizer.call(this, src, tokens)) { + src = src.substring(token.raw.length); + tokens.push(token); + return true; + } + return false; + })) { + continue; + } + // escape if (token = this.tokenizer.escape(src)) { src = src.substring(token.raw.length); @@ -378,7 +424,7 @@ module.exports = class Lexer { src = src.substring(token.raw.length); inLink = token.inLink; inRawBlock = token.inRawBlock; - const lastToken = tokens[tokens.length - 1]; + lastToken = tokens[tokens.length - 1]; if (lastToken && token.type === 'text' && lastToken.type === 'text') { lastToken.raw += token.raw; lastToken.text += token.text; @@ -401,7 +447,7 @@ module.exports = class Lexer { // reflink, nolink if (token = this.tokenizer.reflink(src, this.tokens.links)) { src = src.substring(token.raw.length); - const lastToken = tokens[tokens.length - 1]; + lastToken = tokens[tokens.length - 1]; if (token.type === 'link') { token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); tokens.push(token); @@ -459,7 +505,21 @@ module.exports = class Lexer { } // text - if (token = this.tokenizer.inlineText(src, inRawBlock, smartypants)) { + // prevent inlineText consuming extensions by clipping 'src' to extension start + cutSrc = src; + if (this.options.extensions?.startInline) { + let startIndex = Infinity; + const tempSrc = src.slice(1); + let tempStart; + this.options.extensions.startInline.forEach(function(getStartIndex) { + tempStart = getStartIndex.call(this, tempSrc); + if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); } + }); + if (startIndex < Infinity && startIndex >= 0) { + cutSrc = src.substring(0, startIndex + 1); + } + } + if (token = this.tokenizer.inlineText(cutSrc, inRawBlock, smartypants)) { src = src.substring(token.raw.length); if (token.raw.slice(-1) !== '_') { // Track prevChar before string of ____ started prevChar = token.raw.slice(-1); diff --git a/src/Parser.js b/src/Parser.js index 81fcb7da1e..f766269e0b 100644 --- a/src/Parser.js +++ b/src/Parser.js @@ -57,11 +57,22 @@ module.exports = class Parser { item, checked, task, - checkbox; + checkbox, + ret; const l = tokens.length; for (i = 0; i < l; i++) { token = tokens[i]; + + // Run any renderer extensions + if (this.options.extensions?.renderers?.[token.type]) { + ret = this.options.extensions.renderers[token.type].call(this, token); + if (ret !== false || !['space', 'hr', 'heading', 'code', 'table', 'blockquote', 'list', 'html', 'paragraph', 'text'].includes(token.type)) { + out += ret || ''; + continue; + } + } + switch (token.type) { case 'space': { continue; @@ -179,6 +190,7 @@ module.exports = class Parser { out += top ? this.renderer.paragraph(body) : body; continue; } + default: { const errMsg = 'Token with "' + token.type + '" type was not found.'; if (this.options.silent) { @@ -201,11 +213,22 @@ module.exports = class Parser { renderer = renderer || this.renderer; let out = '', i, - token; + token, + ret; const l = tokens.length; for (i = 0; i < l; i++) { token = tokens[i]; + + // Run any renderer extensions + if (this.options.extensions?.renderers?.[token.type]) { + ret = this.options.extensions.renderers[token.type].call(this, token); + if (ret !== false || !['escape', 'html', 'link', 'image', 'strong', 'em', 'codespan', 'br', 'del', 'text'].includes(token.type)) { + out += ret || ''; + continue; + } + } + switch (token.type) { case 'escape': { out += renderer.text(token.text); diff --git a/src/defaults.js b/src/defaults.js index fe376563da..a4b451fe2f 100644 --- a/src/defaults.js +++ b/src/defaults.js @@ -2,6 +2,7 @@ function getDefaults() { return { baseUrl: null, breaks: false, + extensions: null, gfm: true, headerIds: true, headerPrefix: '', diff --git a/src/marked.js b/src/marked.js index 0ba08156af..540eb20852 100644 --- a/src/marked.js +++ b/src/marked.js @@ -141,46 +141,114 @@ marked.defaults = defaults; * Use Extension */ -marked.use = function(extension) { - const opts = merge({}, extension); - if (extension.renderer) { - const renderer = marked.defaults.renderer || new Renderer(); - for (const prop in extension.renderer) { - const prevRenderer = renderer[prop]; - renderer[prop] = (...args) => { - let ret = extension.renderer[prop].apply(renderer, args); - if (ret === false) { - ret = prevRenderer.apply(renderer, args); +marked.use = function(...args) { + const opts = merge({}, ...args); + const extensions = marked.defaults.extensions || { renderers: {}, childTokens: {} }; + let hasExtensions; + + args.forEach((pack) => { + // ==-- Parse "addon" extensions --== // + if (pack.extensions) { + hasExtensions = true; + pack.extensions.forEach((ext) => { + if (!ext.name) { + throw new Error('extension name required'); } - return ret; - }; + if (ext.renderer) { // Renderer extensions + const prevRenderer = extensions.renderers?.[ext.name]; + if (prevRenderer) { + // Replace extension with func to run new extension but fall back if false + extensions.renderers[ext.name] = function(...args) { + let ret = ext.renderer.apply(this, args); + if (ret === false) { + ret = prevRenderer.apply(this, args); + } + return ret; + }; + } else { + extensions.renderers[ext.name] = ext.renderer; + } + } + if (ext.tokenizer) { // Tokenizer Extensions + if (!ext.level || (ext.level !== 'block' && ext.level !== 'inline')) { + throw new Error("extension level must be 'block' or 'inline'"); + } + if (extensions[ext.level]) { + extensions[ext.level].unshift(ext.tokenizer); + } else { + extensions[ext.level] = [ext.tokenizer]; + } + if (ext.start) { // Function to check for start of token + if (ext.level === 'block') { + if (extensions.startBlock) { + extensions.startBlock.push(ext.start); + } else { + extensions.startBlock = [ext.start]; + } + } else if (ext.level === 'inline') { + if (extensions.startInline) { + extensions.startInline.push(ext.start); + } else { + extensions.startInline = [ext.start]; + } + } + } + } + if (ext.childTokens) { // Child tokens to be visited by walkTokens + extensions.childTokens[ext.name] = ext.childTokens; + } + }); } - opts.renderer = renderer; - } - if (extension.tokenizer) { - const tokenizer = marked.defaults.tokenizer || new Tokenizer(); - for (const prop in extension.tokenizer) { - const prevTokenizer = tokenizer[prop]; - tokenizer[prop] = (...args) => { - let ret = extension.tokenizer[prop].apply(tokenizer, args); - if (ret === false) { - ret = prevTokenizer.apply(tokenizer, args); + + // ==-- Parse "overwrite" extensions --== // + if (pack.renderer) { + const renderer = marked.defaults.renderer || new Renderer(); + for (const prop in pack.renderer) { + const prevRenderer = renderer[prop]; + // Replace renderer with func to run extension, but fall back if false + renderer[prop] = (...args) => { + let ret = pack.renderer[prop].apply(renderer, args); + if (ret === false) { + ret = prevRenderer.apply(renderer, args); + } + return ret; + }; + } + opts.renderer = renderer; + } + if (pack.tokenizer) { + const tokenizer = marked.defaults.tokenizer || new Tokenizer(); + for (const prop in pack.tokenizer) { + const prevTokenizer = tokenizer[prop]; + // Replace tokenizer with func to run extension, but fall back if false + tokenizer[prop] = (...args) => { + let ret = pack.tokenizer[prop].apply(tokenizer, args); + if (ret === false) { + ret = prevTokenizer.apply(tokenizer, args); + } + return ret; + }; + } + opts.tokenizer = tokenizer; + } + + // ==-- Parse WalkTokens extensions --== // + if (pack.walkTokens) { + const walkTokens = marked.defaults.walkTokens; + opts.walkTokens = (token) => { + pack.walkTokens.call(this, token); + if (walkTokens) { + walkTokens(token); } - return ret; }; } - opts.tokenizer = tokenizer; - } - if (extension.walkTokens) { - const walkTokens = marked.defaults.walkTokens; - opts.walkTokens = (token) => { - extension.walkTokens(token); - if (walkTokens) { - walkTokens(token); - } - }; - } - marked.setOptions(opts); + + if (hasExtensions) { + opts.extensions = extensions; + } + + marked.setOptions(opts); + }); }; /** @@ -207,7 +275,12 @@ marked.walkTokens = function(tokens, callback) { break; } default: { - if (token.tokens) { + if (marked.defaults?.extensions?.childTokens?.[token.type]) { // Walk any extensions + marked.defaults?.extensions.childTokens[token.type].forEach(function(childTokens) { + marked.walkTokens(token[childTokens], callback); + }); + } + if (token.tokens && !marked.defaults?.extensions?.childTokens[token.type]) { marked.walkTokens(token.tokens, callback); } } diff --git a/test/unit/marked-spec.js b/test/unit/marked-spec.js index 86813cc29e..0845909222 100644 --- a/test/unit/marked-spec.js +++ b/test/unit/marked-spec.js @@ -137,6 +137,515 @@ describe('parseInline', () => { }); describe('use extension', () => { + it('should use custom block tokenizer + renderer extensions', () => { + const underline = { + name: 'underline', + level: 'block', + tokenizer(src) { + const rule = /^:([^\n]*)(?:\n|$)/; + const match = rule.exec(src); + if (match) { + return { + type: 'underline', + raw: match[0], // This is the text that you want your token to consume from the source + text: match[1].trim() // You can add additional properties to your tokens to pass along to the renderer + }; + } + }, + renderer(token) { + return `${token.text}\n`; + } + }; + marked.use({ extensions: [underline] }); + let html = marked('Not Underlined\n:Underlined\nNot Underlined'); + expect(html).toBe('

Not Underlined\n:Underlined\nNot Underlined

\n'); + + html = marked('Not Underlined\n\n:Underlined\n\nNot Underlined'); + expect(html).toBe('

Not Underlined

\nUnderlined\n

Not Underlined

\n'); + }); + + it('should interrupt paragraphs if using "start" property', () => { + const underline = { + extensions: [{ + name: 'underline', + level: 'block', + start(src) { return src.match(/:/)?.index; }, + tokenizer(src) { + const rule = /^:([^\n]*):(?:\n|$)/; + const match = rule.exec(src); + if (match) { + return { + type: 'underline', + raw: match[0], // This is the text that you want your token to consume from the source + text: match[1].trim() // You can add additional properties to your tokens to pass along to the renderer + }; + } + }, + renderer(token) { + return `${token.text}\n`; + } + }] + }; + marked.use(underline); + const html = marked('Not Underlined A\n:Underlined B:\nNot Underlined C\n:Not Underlined D'); + expect(html).toBe('

Not Underlined A

\nUnderlined B\n

Not Underlined C\n:Not Underlined D

\n'); + }); + + it('should use custom inline tokenizer + renderer extensions', () => { + const underline = { + name: 'underline', + level: 'inline', + start(src) { return src.match(/=/)?.index; }, + tokenizer(src) { + const rule = /^=([^=]+)=/; + const match = rule.exec(src); + if (match) { + return { + type: 'underline', + raw: match[0], // This is the text that you want your token to consume from the source + text: match[1].trim() // You can add additional properties to your tokens to pass along to the renderer + }; + } + }, + renderer(token) { + return `${token.text}`; + } + }; + marked.use({ extensions: [underline] }); + const html = marked('Not Underlined =Underlined= Not Underlined'); + expect(html).toBe('

Not Underlined Underlined Not Underlined

\n'); + }); + + it('should handle interacting block and inline extensions', () => { + const descriptionlist = { + name: 'descriptionList', + level: 'block', + start(src) { return src.match(/:[^:\n]/)?.index; }, + tokenizer(src, tokens) { + const rule = /^(?::[^:\n]+:[^:\n]*(?:\n|$))+/; + const match = rule.exec(src); + if (match) { + return { + type: 'descriptionList', + raw: match[0], // This is the text that you want your token to consume from the source + text: match[0].trim(), // You can add additional properties to your tokens to pass along to the renderer + tokens: this.inlineTokens(match[0].trim()) + }; + } + }, + renderer(token) { + return `
${this.parseInline(token.tokens)}\n
`; + } + }; + + const description = { + name: 'description', + level: 'inline', + start(src) { return src.match(/:/)?.index; }, + tokenizer(src, tokens) { + const rule = /^:([^:\n]+):([^:\n]*)(?:\n|$)/; + const match = rule.exec(src); + if (match) { + return { + type: 'description', + raw: match[0], // This is the text that you want your token to consume from the source + dt: this.inlineTokens(match[1].trim()), // You can add additional properties to your tokens to pass along to the renderer + dd: this.inlineTokens(match[2].trim()) + }; + } + }, + renderer(token) { + return `\n
${this.parseInline(token.dt)}
${this.parseInline(token.dd)}
`; + } + }; + marked.use({ extensions: [descriptionlist, description] }); + const html = marked('A Description List with One Description:\n' + + ': Topic 1 : Description 1\n' + + ': **Topic 2** : *Description 2*'); + expect(html).toBe('

A Description List with One Description:

\n' + + '
' + + '\n
Topic 1
Description 1
' + + '\n
Topic 2
Description 2
' + + '\n
'); + }); + + it('should allow other options mixed into the extension', () => { + const extension = { + name: 'underline', + level: 'block', + start(src) { return src.indexOf(':'); }, + tokenizer(src) { + const rule = /^:([^\n]*):(?:\n|$)/; + const match = rule.exec(src); + if (match) { + return { + type: 'underline', + raw: match[0], // This is the text that you want your token to consume from the source + text: match[1].trim() // You can add additional properties to your tokens to pass along to the renderer + }; + } + }, + renderer(token) { + return `${token.text}\n`; + } + }; + marked.use({ sanitize: true, silent: true, extensions: [extension] }); + const html = marked(':test:\ntest\n
'); + expect(html).toBe('test\n

test

\n

<div></div>

\n'); + }); + + it('should handle renderers that return false', () => { + const extension = { + name: 'test', + level: 'block', + tokenizer(src) { + const rule = /^:([^\n]*):(?:\n|$)/; + const match = rule.exec(src); + if (match) { + return { + type: 'test', + raw: match[0], // This is the text that you want your token to consume from the source + text: match[1].trim() // You can add additional properties to your tokens to pass along to the renderer + }; + } + }, + renderer(token) { + if (token.text === 'test') { + return 'test'; + } + return false; + } + }; + const fallbackRenderer = { + name: 'test', + level: 'block', + renderer(token) { + if (token.text === 'Test') { + return 'fallback'; + } + return false; + } + }; + marked.use({ extensions: [fallbackRenderer, extension] }); + const html = marked(':Test:\n\n:test:\n\n:none:'); + expect(html).toBe('fallbacktest'); + }); + + it('should fall back when tokenizers return false', () => { + const extension = { + name: 'test', + level: 'block', + tokenizer(src) { + const rule = /^:([^\n]*):(?:\n|$)/; + const match = rule.exec(src); + if (match) { + return { + type: 'test', + raw: match[0], // This is the text that you want your token to consume from the source + text: match[1].trim() // You can add additional properties to your tokens to pass along to the renderer + }; + } + return false; + }, + renderer(token) { + return token.text; + } + }; + const extension2 = { + name: 'test', + level: 'block', + tokenizer(src) { + const rule = /^:([^\n]*):(?:\n|$)/; + const match = rule.exec(src); + if (match) { + if (match[1].match(/^[A-Z]/)) { + return { + type: 'test', + raw: match[0], + text: match[1].trim().toUpperCase() + }; + } + } + return false; + } + }; + marked.use({ extensions: [extension, extension2] }); + const html = marked(':Test:\n\n:test:'); + expect(html).toBe('TESTtest'); + }); + + it('should override original tokenizer/renderer with same name, but fall back if returns false', () => { + const extension = { + extensions: [{ + name: 'heading', + level: 'block', + tokenizer(src) { + return false; // fall back to default `heading` tokenizer + }, + renderer(token) { + return '' + token.text + ' RENDERER EXTENSION\n'; + } + }, + { + name: 'code', + level: 'block', + tokenizer(src) { + const rule = /^:([^\n]*):(?:\n|$)/; + const match = rule.exec(src); + if (match) { + return { + type: 'code', + raw: match[0], + text: match[1].trim() + ' TOKENIZER EXTENSION' + }; + } + }, + renderer(token) { + return false; // fall back to default `code` renderer + } + }] + }; + marked.use(extension); + const html = marked('# extension1\n:extension2:'); + expect(html).toBe('

extension1 RENDERER EXTENSION

\n
extension2 TOKENIZER EXTENSION\n
\n'); + }); + + it('should walk only specified child tokens', () => { + const walkableDescription = { + extensions: [{ + name: 'walkableDescription', + level: 'inline', + start(src) { return src.match(/:/)?.index; }, + tokenizer(src, tokens) { + const rule = /^:([^:\n]+):([^:\n]*)(?:\n|$)/; + const match = rule.exec(src); + if (match) { + return { + type: 'walkableDescription', + raw: match[0], // This is the text that you want your token to consume from the source + dt: this.inlineTokens(match[1].trim()), // You can add additional properties to your tokens to pass along to the renderer + dd: this.inlineTokens(match[2].trim()), + tokens: this.inlineTokens('unwalked') + }; + } + }, + renderer(token) { + return `\n
${this.parseInline(token.dt)} - ${this.parseInline(token.tokens)}
${this.parseInline(token.dd)}
`; + }, + childTokens: ['dd', 'dt'] + }], + walkTokens(token) { + if (token.type === 'text') { + token.text += ' walked'; + } + } + }; + marked.use(walkableDescription); + const html = marked(': Topic 1 : Description 1\n' + + ': **Topic 2** : *Description 2*'); + expect(html).toBe('

\n

Topic 1 walked - unwalked
Description 1 walked
' + + '\n
Topic 2 walked - unwalked
Description 2 walked

\n'); + }); + + describe('multiple extensions', () => { + function createExtension(name) { + return { + extensions: [{ + name: `block-${name}`, + level: 'block', + start(src) { return src.indexOf('::'); }, + tokenizer(src, tokens) { + if (src.startsWith(`::${name}\n`)) { + const text = `:${name}`; + return { + type: `block-${name}`, + raw: `::${name}\n`, + text, + tokens: this.inlineTokens(text) + }; + } + }, + renderer(token) { + return `<${token.type}>${this.parseInline(token.tokens)}\n`; + } + }, { + name: `inline-${name}`, + level: 'inline', + start(src) { return src.indexOf(':'); }, + tokenizer(src, tokens) { + if (src.startsWith(`:${name}`)) { + return { + type: `inline-${name}`, + raw: `:${name}`, + text: `used ${name}` + }; + } + }, + renderer(token) { + return token.text; + } + }], + tokenizer: { + heading(src) { + if (src.startsWith(`# ${name}`)) { + return { + type: 'heading', + raw: `# ${name}`, + text: `used ${name}`, + depth: 1 + }; + } + return false; + } + }, + renderer: { + heading(text, depth, raw, slugger) { + if (text === name) { + return `${text}\n`; + } + return false; + } + }, + walkTokens(token) { + if (token.text === `used ${name}`) { + token.text += ' walked'; + } + }, + headerIds: false + }; + } + + function createFalseExtension(name) { + return { + extensions: [{ + name: `block-${name}`, + level: 'block', + start(src) { return src.indexOf('::'); }, + tokenizer(src, tokens) { + return false; + }, + renderer(token) { + return false; + } + }, { + name: `inline-${name}`, + level: 'inline', + start(src) { return src.indexOf(':'); }, + tokenizer(src, tokens) { + return false; + }, + renderer(token) { + return false; + } + }], + headerIds: false + }; + } + + function runTest() { + const html = marked(` +::extension1 +::extension2 + +:extension1 +:extension2 + +# extension1 + +# extension2 + +# no extension +`); + + expect(`\n${html}\n`.replace(/\n+/g, '\n')).toBe(` +used extension1 walked +used extension2 walked +

used extension1 walked +used extension2 walked

+

used extension1 walked

+

used extension2 walked

+

no extension

+`); + } + + it('should merge extensions when calling marked.use multiple times', () => { + marked.use(createExtension('extension1')); + marked.use(createExtension('extension2')); + + runTest(); + }); + + it('should merge extensions when calling marked.use with multiple extensions', () => { + marked.use( + createExtension('extension1'), + createExtension('extension2') + ); + + runTest(); + }); + + it('should fall back to any extensions with the same name if the first returns false', () => { + marked.use( + createExtension('extension1'), + createExtension('extension2'), + createFalseExtension('extension1'), + createFalseExtension('extension2') + ); + + runTest(); + }); + }); + + it('should allow deleting/editing tokens', () => { + const styleTags = { + extensions: [{ + name: 'inlineStyleTag', + level: 'inline', + start(src) { return src.match(/ *{[^\{]/)?.index; }, + tokenizer(src, tokens) { + const rule = /^ *{([^\{\}\n]+)}$/; + const match = rule.exec(src); + if (match) { + return { + type: 'inlineStyleTag', + raw: match[0], // This is the text that you want your token to consume from the source + text: match[1] + }; + } + } + }, + { + name: 'styled', + renderer(token) { + token.type = token.originalType; + const text = this.parse([token]); + const openingTag = /(<[^\s<>]+)([^\n<>]*>.*)/s.exec(text); + if (openingTag) { + return `${openingTag[1]} ${token.style}${openingTag[2]}`; + } + return text; + } + }], + walkTokens(token) { + if (token.tokens) { + const finalChildToken = token.tokens[token.tokens.length - 1]; + if (finalChildToken?.type === 'inlineStyleTag') { + token.originalType = token.type; + token.type = 'styled'; + token.style = `style="color:${finalChildToken.text};"`; + token.tokens.pop(); + } + } + }, + headerIds: false + }; + marked.use(styleTags); + const html = marked('This is a *paragraph* with blue text. {blue}\n' + + '# This is a *header* with red text {red}'); + expect(html).toBe('

This is a paragraph with blue text.

\n' + + '

This is a header with red text

\n'); + }); + it('should use renderer', () => { const extension = { renderer: {