diff --git a/docs/USING_PRO.md b/docs/USING_PRO.md index 2966aabf73..c134b2d2b7 100644 --- a/docs/USING_PRO.md +++ b/docs/USING_PRO.md @@ -226,7 +226,7 @@ console.log(marked('$ latex code $\n\n` other code `')); ### Inline level tokenizer methods - **escape**(*string* src) -- **tag**(*string* src, *bool* inLink, *bool* inRawBlock) +- **tag**(*string* src) - **link**(*string* src) - **reflink**(*string* src, *object* links) - **emStrong**(*string* src, *string* maskedSrc, *string* prevChar) @@ -235,7 +235,7 @@ console.log(marked('$ latex code $\n\n` other code `')); - **del**(*string* src) - **autolink**(*string* src, *function* mangle) - **url**(*string* src, *function* mangle) -- **inlineText**(*string* src, *bool* inRawBlock, *function* smartypants) +- **inlineText**(*string* src, *function* smartypants) `mangle` is a method that changes text to HTML character references: @@ -331,11 +331,15 @@ The returned token can also contain any other custom parameters of your choice t The tokenizer function has access to the lexer in the `this` object, which can be used if any internal section of the string needs to be parsed further, such as in handling any inline syntax on the text within a block token. The key functions that may be useful include:
-
this.blockTokens(string text)
-
Runs the block tokenizer functions (including any extensions) on the provided text, and returns an array containing a nested tree of tokens.
+
this.lexer.blockTokens(string text, array tokens)
+
This runs the block tokenizer functions (including any block-level extensions) on the provided text, and appends any resulting tokens onto the tokens array. The tokens array is also returned by the function. You might use this, for example, if your extension creates a "container"-type token (such as a blockquote) that can potentially include other block-level tokens inside.
-
this.inlineTokens(string text)
-
Runs the inline tokenizer functions (including any extensions) on the provided text, and returns an array containing a nested tree of tokens. This can be used to generate the tokens parameter.
+
+
this.lexer.inline(string text, array tokens)
+
Parsing of inline-level tokens only occurs after all block-level tokens have been generated. This function adds text and tokens to a queue to be processed using inline-level tokenizers (including any inline-level extensions) at that later step. Tokens will be generated using the provided text, and any resulting tokens will be appended to the tokens array. Note that this function does **NOT** return anything since the inline processing cannot happen until the block-level processing is complete.
+ +
this.lexer.inlineTokens(string text, array tokens)
+
Sometimes an inline-level token contains further nested inline tokens (such as a
**strong**
token inside of a
### Heading
). This runs the inline tokenizer functions (including any inline-level extensions) on the provided text, and appends any resulting tokens onto the tokens array. The tokens array is also returned by the function.
renderer(object token)
@@ -344,11 +348,11 @@ The tokenizer function has access to the lexer in the `this` object, which can b The renderer function has access to the parser in the `this` object, which can be used if any part of the token needs needs to be parsed further, such as any child tokens. The key functions that may be useful include:
-
this.parse(array tokens)
-
Runs the block renderer functions (including any extensions) on the provided array of tokens, and returns the resulting HTML string output.
+
this.parser.parse(array tokens)
+
Runs the block renderer functions (including any extensions) on the provided array of tokens, and returns the resulting HTML string output. This is used to generate the HTML from any child block-level tokens, for example if your extension is a "container"-type token (such as a blockquote) that can potentially include other block-level tokens inside.
-
this.parseInline(array tokens)
-
Runs the inline renderer functions (including any extensions) on the provided array of tokens, and returns the resulting HTML string output. This could be used to generate text from any child tokens, for example.
+
this.parser.parseInline(array tokens)
+
Runs the inline renderer functions (including any extensions) on the provided array of tokens, and returns the resulting HTML string output. This is used to generate the HTML from any child inline-level tokens.
@@ -371,16 +375,18 @@ const descriptionlist = { const rule = /^(?::[^:\n]+:[^:\n]*(?:\n|$))+/; // Regex for the complete token const match = rule.exec(src); if (match) { - return { // Token to generate + const token = { // Token to generate type: 'descriptionList', // Should match "name" above raw: match[0], // Text to consume from the source text: match[0].trim(), // Additional custom properties - tokens: this.inlineTokens(match[0].trim()) // inlineTokens to process **bold**, *italics*, etc. + tokens: [] // Array where child inline tokens will be generated }; + this.lexer.inline(token.text, token.tokens); // Queue this data to be processed for inline tokens + return token; } }, renderer(token) { - return `
${this.parseInline(token.tokens)}\n
`; // parseInline to turn child tokens into HTML + return `
${this.parser.parseInline(token.tokens)}\n
`; // parseInline to turn child tokens into HTML } }; @@ -392,16 +398,16 @@ const description = { const rule = /^:([^:\n]+):([^:\n]*)(?:\n|$)/; // Regex for the complete token const match = rule.exec(src); if (match) { - return { // Token to generate - type: 'description', // Should match "name" above - raw: match[0], // Text to consume from the source - dt: this.inlineTokens(match[1].trim()), // Additional custom properties - dd: this.inlineTokens(match[2].trim()) + return { // Token to generate + type: 'description', // Should match "name" above + raw: match[0], // Text to consume from the source + dt: this.lexer.inlineTokens(match[1].trim()), // Additional custom properties, including + dd: this.lexer.inlineTokens(match[2].trim()) // any further-nested inline tokens }; } }, renderer(token) { - return `\n
${this.parseInline(token.dt)}
${this.parseInline(token.dd)}
`; + return `\n
${this.parser.parseInline(token.dt)}
${this.parser.parseInline(token.dd)}
`; }, childTokens: ['dt', 'dd'], // Any child tokens to be visited by walkTokens walkTokens(token) { // Post-processing on the completed token tree diff --git a/src/Lexer.js b/src/Lexer.js index 9686e4624c..c1d8b03cb4 100644 --- a/src/Lexer.js +++ b/src/Lexer.js @@ -55,6 +55,13 @@ module.exports = class Lexer { this.options.tokenizer = this.options.tokenizer || new Tokenizer(); this.tokenizer = this.options.tokenizer; this.tokenizer.options = this.options; + this.tokenizer.lexer = this; + this.inlineQueue = []; + this.state = { + inLink: false, + inRawBlock: false, + top: true + }; const rules = { block: block.normal, @@ -109,9 +116,12 @@ module.exports = class Lexer { .replace(/\r\n|\r/g, '\n') .replace(/\t/g, ' '); - this.blockTokens(src, this.tokens, true); + this.blockTokens(src, this.tokens); - this.inline(this.tokens); + let next; + while (next = this.inlineQueue.shift()) { + this.inlineTokens(next.src, next.tokens); + } return this.tokens; } @@ -119,17 +129,17 @@ module.exports = class Lexer { /** * Lexing */ - blockTokens(src, tokens = [], top = true) { + blockTokens(src, tokens = []) { if (this.options.pedantic) { src = src.replace(/^ +$/gm, ''); } - let token, i, l, lastToken, cutSrc, lastParagraphClipped; + let token, lastToken, cutSrc, lastParagraphClipped; while (src) { if (this.options.extensions && this.options.extensions.block && this.options.extensions.block.some((extTokenizer) => { - if (token = extTokenizer.call(this, src, tokens)) { + if (token = extTokenizer.call({ lexer: this }, src, tokens)) { src = src.substring(token.raw.length); tokens.push(token); return true; @@ -156,6 +166,8 @@ module.exports = class Lexer { if (lastToken && lastToken.type === 'paragraph') { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; + this.inlineQueue.pop(); + this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text; } else { tokens.push(token); } @@ -176,13 +188,6 @@ module.exports = class Lexer { continue; } - // table no leading pipe (gfm) - if (token = this.tokenizer.nptable(src)) { - src = src.substring(token.raw.length); - tokens.push(token); - continue; - } - // hr if (token = this.tokenizer.hr(src)) { src = src.substring(token.raw.length); @@ -193,7 +198,6 @@ module.exports = class Lexer { // blockquote if (token = this.tokenizer.blockquote(src)) { src = src.substring(token.raw.length); - token.tokens = this.blockTokens(token.text, [], top); tokens.push(token); continue; } @@ -201,10 +205,6 @@ module.exports = class Lexer { // list if (token = this.tokenizer.list(src)) { src = src.substring(token.raw.length); - l = token.items.length; - for (i = 0; i < l; i++) { - token.items[i].tokens = this.blockTokens(token.items[i].text, [], false); - } tokens.push(token); continue; } @@ -217,7 +217,7 @@ module.exports = class Lexer { } // def - if (top && (token = this.tokenizer.def(src))) { + if (this.state.top && (token = this.tokenizer.def(src))) { src = src.substring(token.raw.length); if (!this.tokens.links[token.tag]) { this.tokens.links[token.tag] = { @@ -250,18 +250,20 @@ module.exports = class Lexer { const tempSrc = src.slice(1); let tempStart; this.options.extensions.startBlock.forEach(function(getStartIndex) { - tempStart = getStartIndex.call(this, tempSrc); + tempStart = getStartIndex.call({ lexer: this }, tempSrc); if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); } }); if (startIndex < Infinity && startIndex >= 0) { cutSrc = src.substring(0, startIndex + 1); } } - if (top && (token = this.tokenizer.paragraph(cutSrc))) { + if (this.state.top && (token = this.tokenizer.paragraph(cutSrc))) { lastToken = tokens[tokens.length - 1]; if (lastParagraphClipped && lastToken.type === 'paragraph') { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; + this.inlineQueue.pop(); + this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text; } else { tokens.push(token); } @@ -277,6 +279,8 @@ module.exports = class Lexer { if (lastToken && lastToken.type === 'text') { lastToken.raw += '\n' + token.raw; lastToken.text += '\n' + token.text; + this.inlineQueue.pop(); + this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text; } else { tokens.push(token); } @@ -294,78 +298,18 @@ module.exports = class Lexer { } } + this.state.top = true; return tokens; } - inline(tokens) { - let i, - j, - k, - l2, - row, - token; - - const l = tokens.length; - for (i = 0; i < l; i++) { - token = tokens[i]; - switch (token.type) { - case 'paragraph': - case 'text': - case 'heading': { - token.tokens = []; - this.inlineTokens(token.text, token.tokens); - break; - } - case 'table': { - token.tokens = { - header: [], - cells: [] - }; - - // header - l2 = token.header.length; - for (j = 0; j < l2; j++) { - token.tokens.header[j] = []; - this.inlineTokens(token.header[j], token.tokens.header[j]); - } - - // cells - l2 = token.cells.length; - for (j = 0; j < l2; j++) { - row = token.cells[j]; - token.tokens.cells[j] = []; - for (k = 0; k < row.length; k++) { - token.tokens.cells[j][k] = []; - this.inlineTokens(row[k], token.tokens.cells[j][k]); - } - } - - break; - } - case 'blockquote': { - this.inline(token.tokens); - break; - } - case 'list': { - l2 = token.items.length; - for (j = 0; j < l2; j++) { - this.inline(token.items[j].tokens); - } - break; - } - default: { - // do nothing - } - } - } - - return tokens; + inline(src, tokens) { + this.inlineQueue.push({ src, tokens }); } /** * Lexing/Compiling */ - inlineTokens(src, tokens = [], inLink = false, inRawBlock = false) { + inlineTokens(src, tokens = []) { let token, lastToken, cutSrc; // String with links masked to avoid interference with em and strong @@ -404,7 +348,7 @@ module.exports = class Lexer { if (this.options.extensions && this.options.extensions.inline && this.options.extensions.inline.some((extTokenizer) => { - if (token = extTokenizer.call(this, src, tokens)) { + if (token = extTokenizer.call({ lexer: this }, src, tokens)) { src = src.substring(token.raw.length); tokens.push(token); return true; @@ -422,10 +366,8 @@ module.exports = class Lexer { } // tag - if (token = this.tokenizer.tag(src, inLink, inRawBlock)) { + if (token = this.tokenizer.tag(src)) { src = src.substring(token.raw.length); - inLink = token.inLink; - inRawBlock = token.inRawBlock; lastToken = tokens[tokens.length - 1]; if (lastToken && token.type === 'text' && lastToken.type === 'text') { lastToken.raw += token.raw; @@ -439,9 +381,6 @@ module.exports = class Lexer { // link if (token = this.tokenizer.link(src)) { src = src.substring(token.raw.length); - if (token.type === 'link') { - token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); - } tokens.push(token); continue; } @@ -450,10 +389,7 @@ module.exports = class Lexer { if (token = this.tokenizer.reflink(src, this.tokens.links)) { src = src.substring(token.raw.length); lastToken = tokens[tokens.length - 1]; - if (token.type === 'link') { - token.tokens = this.inlineTokens(token.text, [], true, inRawBlock); - tokens.push(token); - } else if (lastToken && token.type === 'text' && lastToken.type === 'text') { + if (lastToken && token.type === 'text' && lastToken.type === 'text') { lastToken.raw += token.raw; lastToken.text += token.text; } else { @@ -465,7 +401,6 @@ module.exports = class Lexer { // em & strong if (token = this.tokenizer.emStrong(src, maskedSrc, prevChar)) { src = src.substring(token.raw.length); - token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); tokens.push(token); continue; } @@ -487,7 +422,6 @@ module.exports = class Lexer { // del (gfm) if (token = this.tokenizer.del(src)) { src = src.substring(token.raw.length); - token.tokens = this.inlineTokens(token.text, [], inLink, inRawBlock); tokens.push(token); continue; } @@ -500,7 +434,7 @@ module.exports = class Lexer { } // url (gfm) - if (!inLink && (token = this.tokenizer.url(src, mangle))) { + if (!this.state.inLink && (token = this.tokenizer.url(src, mangle))) { src = src.substring(token.raw.length); tokens.push(token); continue; @@ -514,14 +448,14 @@ module.exports = class Lexer { const tempSrc = src.slice(1); let tempStart; this.options.extensions.startInline.forEach(function(getStartIndex) { - tempStart = getStartIndex.call(this, tempSrc); + tempStart = getStartIndex.call({ lexer: this }, tempSrc); if (typeof tempStart === 'number' && tempStart >= 0) { startIndex = Math.min(startIndex, tempStart); } }); if (startIndex < Infinity && startIndex >= 0) { cutSrc = src.substring(0, startIndex + 1); } } - if (token = this.tokenizer.inlineText(cutSrc, inRawBlock, smartypants)) { + if (token = this.tokenizer.inlineText(cutSrc, smartypants)) { src = src.substring(token.raw.length); if (token.raw.slice(-1) !== '_') { // Track prevChar before string of ____ started prevChar = token.raw.slice(-1); diff --git a/src/Parser.js b/src/Parser.js index f8f766b96f..e804c143c4 100644 --- a/src/Parser.js +++ b/src/Parser.js @@ -66,7 +66,7 @@ module.exports = class Parser { // Run any renderer extensions if (this.options.extensions && this.options.extensions.renderers && this.options.extensions.renderers[token.type]) { - ret = this.options.extensions.renderers[token.type].call(this, token); + ret = this.options.extensions.renderers[token.type].call({ parser: this }, token); if (ret !== false || !['space', 'hr', 'heading', 'code', 'table', 'blockquote', 'list', 'html', 'paragraph', 'text'].includes(token.type)) { out += ret || ''; continue; @@ -222,7 +222,7 @@ module.exports = class Parser { // Run any renderer extensions if (this.options.extensions && this.options.extensions.renderers && this.options.extensions.renderers[token.type]) { - ret = this.options.extensions.renderers[token.type].call(this, token); + ret = this.options.extensions.renderers[token.type].call({ parser: this }, token); if (ret !== false || !['escape', 'html', 'link', 'image', 'strong', 'em', 'codespan', 'br', 'del', 'text'].includes(token.type)) { out += ret || ''; continue; diff --git a/src/Tokenizer.js b/src/Tokenizer.js index 0177551af7..922452e7be 100644 --- a/src/Tokenizer.js +++ b/src/Tokenizer.js @@ -6,18 +6,20 @@ const { findClosingBracket } = require('./helpers.js'); -function outputLink(cap, link, raw) { +function outputLink(cap, link, raw, lexer) { const href = link.href; const title = link.title ? escape(link.title) : null; const text = cap[1].replace(/\\([\[\]])/g, '$1'); if (cap[0].charAt(0) !== '!') { + lexer.state.inLink = true; return { type: 'link', raw, href, title, - text + text, + tokens: lexer.inlineTokens(text, []) }; } else { return { @@ -125,48 +127,15 @@ module.exports = class Tokenizer { } } - return { + const token = { type: 'heading', raw: cap[0], depth: cap[1].length, - text: text + text: text, + tokens: [] }; - } - } - - nptable(src) { - const cap = this.rules.block.nptable.exec(src); - if (cap) { - const item = { - type: 'table', - header: splitCells(cap[1].replace(/^ *| *\| *$/g, '')), - align: cap[2].replace(/^ *|\| *$/g, '').split(/ *\| */), - cells: cap[3] ? cap[3].replace(/\n$/, '').split('\n') : [], - raw: cap[0] - }; - - if (item.header.length === item.align.length) { - let l = item.align.length; - let i; - for (i = 0; i < l; i++) { - if (/^ *-+: *$/.test(item.align[i])) { - item.align[i] = 'right'; - } else if (/^ *:-+: *$/.test(item.align[i])) { - item.align[i] = 'center'; - } else if (/^ *:-+ *$/.test(item.align[i])) { - item.align[i] = 'left'; - } else { - item.align[i] = null; - } - } - - l = item.cells.length; - for (i = 0; i < l; i++) { - item.cells[i] = splitCells(item.cells[i], item.header.length); - } - - return item; - } + this.lexer.inline(token.text, token.tokens); + return token; } } @@ -188,6 +157,7 @@ module.exports = class Tokenizer { return { type: 'blockquote', raw: cap[0], + tokens: this.lexer.blockTokens(text, []), text }; } @@ -312,16 +282,28 @@ module.exports = class Tokenizer { } } - list.items.push({ + this.lexer.state.top = false; + + const token = { type: 'list_item', raw, task: istask, checked: ischecked, loose: loose, - text: item - }); + text: item, + tokens: this.lexer.blockTokens(item, []) + }; + + // this.lexer.inline(token.text, ) + list.items.push(token); } + // l2 = token.items.length; + // for (j = 0; j < l2; j++) { + // this.inline(token.items[j].tokens); + // } + // break; + return list; } } @@ -329,15 +311,20 @@ module.exports = class Tokenizer { html(src) { const cap = this.rules.block.html.exec(src); if (cap) { - return { - type: this.options.sanitize - ? 'paragraph' - : 'html', + const token = { + type: 'html', raw: cap[0], pre: !this.options.sanitizer && (cap[1] === 'pre' || cap[1] === 'script' || cap[1] === 'style'), - text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0] + text: cap[0] }; + if (this.options.sanitize) { + token.type = 'paragraph'; + token.text = this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0]); + token.tokens = []; + this.lexer.inline(token.text, token.tokens); + } + return token; } } @@ -370,7 +357,7 @@ module.exports = class Tokenizer { item.raw = cap[0]; let l = item.align.length; - let i; + let i, j, k, row; for (i = 0; i < l; i++) { if (/^ *-+: *$/.test(item.align[i])) { item.align[i] = 'right'; @@ -385,9 +372,31 @@ module.exports = class Tokenizer { l = item.cells.length; for (i = 0; i < l; i++) { - item.cells[i] = splitCells( - item.cells[i].replace(/^ *\| *| *\| *$/g, ''), - item.header.length); + item.cells[i] = splitCells(item.cells[i], item.header.length); + } + + // parse child tokens inside headers and cells + item.tokens = { + header: [], + cells: [] + }; + + // header child tokens + l = item.header.length; + for (j = 0; j < l; j++) { + item.tokens.header[j] = []; + this.lexer.inlineTokens(item.header[j], item.tokens.header[j]); + } + + // cell child tokens + l = item.cells.length; + for (j = 0; j < l; j++) { + row = item.cells[j]; + item.tokens.cells[j] = []; + for (k = 0; k < row.length; k++) { + item.tokens.cells[j][k] = []; + this.lexer.inlineTokens(row[k], item.tokens.cells[j][k]); + } } return item; @@ -398,36 +407,45 @@ module.exports = class Tokenizer { lheading(src) { const cap = this.rules.block.lheading.exec(src); if (cap) { - return { + const token = { type: 'heading', raw: cap[0], depth: cap[2].charAt(0) === '=' ? 1 : 2, - text: cap[1] + text: cap[1], + tokens: [] }; + this.lexer.inline(token.text, token.tokens); + return token; } } paragraph(src) { const cap = this.rules.block.paragraph.exec(src); if (cap) { - return { + const token = { type: 'paragraph', raw: cap[0], text: cap[1].charAt(cap[1].length - 1) === '\n' ? cap[1].slice(0, -1) - : cap[1] + : cap[1], + tokens: [] }; + this.lexer.inline(token.text, token.tokens); + return token; } } text(src) { const cap = this.rules.block.text.exec(src); if (cap) { - return { + const token = { type: 'text', raw: cap[0], - text: cap[0] + text: cap[0], + tokens: [] }; + this.lexer.inline(token.text, token.tokens); + return token; } } @@ -442,18 +460,18 @@ module.exports = class Tokenizer { } } - tag(src, inLink, inRawBlock) { + tag(src) { const cap = this.rules.inline.tag.exec(src); if (cap) { - if (!inLink && /^/i.test(cap[0])) { - inLink = false; + if (!this.lexer.state.inLink && /^/i.test(cap[0])) { + this.lexer.state.inLink = false; } - if (!inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { - inRawBlock = true; - } else if (inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { - inRawBlock = false; + if (!this.lexer.state.inRawBlock && /^<(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { + this.lexer.state.inRawBlock = true; + } else if (this.lexer.state.inRawBlock && /^<\/(pre|code|kbd|script)(\s|>)/i.test(cap[0])) { + this.lexer.state.inRawBlock = false; } return { @@ -461,8 +479,8 @@ module.exports = class Tokenizer { ? 'text' : 'html', raw: cap[0], - inLink, - inRawBlock, + inLink: this.lexer.state.inLink, + inRawBlock: this.lexer.state.inRawBlock, text: this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) @@ -524,7 +542,7 @@ module.exports = class Tokenizer { return outputLink(cap, { href: href ? href.replace(this.rules.inline._escapes, '$1') : href, title: title ? title.replace(this.rules.inline._escapes, '$1') : title - }, cap[0]); + }, cap[0], this.lexer); } } @@ -542,7 +560,7 @@ module.exports = class Tokenizer { text }; } - return outputLink(cap, link, cap[0]); + return outputLink(cap, link, cap[0], this.lexer); } } @@ -591,18 +609,22 @@ module.exports = class Tokenizer { // Create `em` if smallest delimiter has odd char count. *a*** if (Math.min(lLength, rLength) % 2) { + const text = src.slice(1, lLength + match.index + rLength); return { type: 'em', raw: src.slice(0, lLength + match.index + rLength + 1), - text: src.slice(1, lLength + match.index + rLength) + text, + tokens: this.lexer.inlineTokens(text, []) }; } // Create 'strong' if smallest delimiter has even char count. **a*** + const text = src.slice(2, lLength + match.index + rLength - 1); return { type: 'strong', raw: src.slice(0, lLength + match.index + rLength + 1), - text: src.slice(2, lLength + match.index + rLength - 1) + text, + tokens: this.lexer.inlineTokens(text, []) }; } } @@ -642,7 +664,8 @@ module.exports = class Tokenizer { return { type: 'del', raw: cap[0], - text: cap[2] + text: cap[2], + tokens: this.lexer.inlineTokens(cap[2], []) }; } } @@ -712,11 +735,11 @@ module.exports = class Tokenizer { } } - inlineText(src, inRawBlock, smartypants) { + inlineText(src, smartypants) { const cap = this.rules.inline.text.exec(src); if (cap) { let text; - if (inRawBlock) { + if (this.lexer.state.inRawBlock) { text = this.options.sanitize ? (this.options.sanitizer ? this.options.sanitizer(cap[0]) : escape(cap[0])) : cap[0]; } else { text = escape(this.options.smartypants ? smartypants(cap[0]) : cap[0]); diff --git a/src/helpers.js b/src/helpers.js index 93c75a1b2e..e0b48d74ea 100644 --- a/src/helpers.js +++ b/src/helpers.js @@ -160,6 +160,10 @@ function splitCells(tableRow, count) { cells = row.split(/ \|/); let i = 0; + // First/last cell in a row cannot be empty if it has no leading/trailing pipe + if (!cells[0].trim()) { cells.shift(); } + if (!cells[cells.length - 1].trim()) { cells.pop(); } + if (cells.length > count) { cells.splice(count); } else { diff --git a/src/rules.js b/src/rules.js index 58751f0770..13961f2251 100644 --- a/src/rules.js +++ b/src/rules.js @@ -26,7 +26,6 @@ const block = { + '|(?=[ \\t]*(?:\\n|$))[\\s\\S]*?(?:(?:\\n *)+\\n|$)' // (7) closing tag + ')', def: /^ {0,3}\[(label)\]: *\n? *]+)>?(?:(?: +\n? *| *\n *)(title))? *(?:\n+|$)/, - nptable: noopTest, table: noopTest, lheading: /^([^\n]+)\n {0,3}(=+|-+) *(?:\n+|$)/, // regex template, placeholders will be replaced according to different paragraph @@ -97,25 +96,11 @@ block.normal = merge({}, block); */ block.gfm = merge({}, block.normal, { - nptable: '^ *([^|\\n ].*\\|.*)\\n' // Header - + ' {0,3}([-:]+ *\\|[-| :]*)' // Align - + '(?:\\n((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)', // Cells - table: '^ *\\|(.+)\\n' // Header - + ' {0,3}\\|?( *[-:]+[-| :]*)' // Align + table: '^ *([^\\n ].*\\|.*)\\n' // Header + + ' {0,3}(?:\\| *)?(:?-+:? *(?:\\| *:?-+:? *)*)\\|?' // Align + '(?:\\n *((?:(?!\\n|hr|heading|blockquote|code|fences|list|html).*(?:\\n|$))*)\\n*|$)' // Cells }); -block.gfm.nptable = edit(block.gfm.nptable) - .replace('hr', block.hr) - .replace('heading', ' {0,3}#{1,6} ') - .replace('blockquote', ' {0,3}>') - .replace('code', ' {4}[^\\n]') - .replace('fences', ' {0,3}(?:`{3,}(?=[^`\\n]*\\n)|~{3,})[^\\n]*\\n') - .replace('list', ' {0,3}(?:[*+-]|1[.)]) ') // only lists starting from 1 can interrupt - .replace('html', ')|<(?:script|pre|style|textarea|!--)') - .replace('tag', block._tag) // tables can be interrupted by type (6) html blocks - .getRegex(); - block.gfm.table = edit(block.gfm.table) .replace('hr', block.hr) .replace('heading', ' {0,3}#{1,6} ') diff --git a/test/specs/new/table_cells.html b/test/specs/new/table_cells.html index 694f7dddea..5b35bcc3cb 100644 --- a/test/specs/new/table_cells.html +++ b/test/specs/new/table_cells.html @@ -16,7 +16,7 @@
12
1|\2|\
-
12
2
+
12
2
12
12|
diff --git a/test/unit/Lexer-spec.js b/test/unit/Lexer-spec.js index f8361ace77..f94cd6e70d 100644 --- a/test/unit/Lexer-spec.js +++ b/test/unit/Lexer-spec.js @@ -17,12 +17,6 @@ function expectInlineTokens({ md, options, tokens = jasmine.any(Array), links = expect(outTokens).toEqual(tokens); } -function expectInline({ token, options, tokens }) { - const lexer = new Lexer(options); - lexer.inline([token]); - expect(token.tokens).toEqual(tokens); -} - describe('Lexer', () => { describe('paragraph', () => { it('space between paragraphs', () => { @@ -599,65 +593,6 @@ paragraph }); describe('inline', () => { - describe('inline', () => { - it('paragraph', () => { - expectInline({ - token: { type: 'paragraph', text: 'text' }, - tokens: [ - { type: 'text', raw: 'text', text: 'text' } - ] - }); - }); - - it('text', () => { - expectInline({ - token: { type: 'text', text: 'text' }, - tokens: [ - { type: 'text', raw: 'text', text: 'text' } - ] - }); - }); - - it('heading', () => { - expectInline({ - token: { type: 'heading', text: 'text' }, - tokens: [ - { type: 'text', raw: 'text', text: 'text' } - ] - }); - }); - - it('table', () => { - expectInline({ - token: { - type: 'table', - header: ['a', 'b'], - align: [null, null], - cells: [['1', '2']] - }, - tokens: { - header: [ - [{ type: 'text', raw: 'a', text: 'a' }], - [{ type: 'text', raw: 'b', text: 'b' }] - ], - cells: [ - [ - [{ type: 'text', raw: '1', text: '1' }], - [{ type: 'text', raw: '2', text: '2' }] - ] - ] - } - }); - }); - - it('code no inline tokens', () => { - expectInline({ - token: { type: 'code', text: 'code' }, - tokens: undefined - }); - }); - }); - describe('inlineTokens', () => { it('escape', () => { expectInlineTokens({ diff --git a/test/unit/marked-spec.js b/test/unit/marked-spec.js index 4e4b1b46f9..6b67a8fe61 100644 --- a/test/unit/marked-spec.js +++ b/test/unit/marked-spec.js @@ -230,16 +230,18 @@ describe('use extension', () => { const rule = /^(?::[^:\n]+:[^:\n]*(?:\n|$))+/; const match = rule.exec(src); if (match) { - return { + const token = { type: 'descriptionList', raw: match[0], // This is the text that you want your token to consume from the source text: match[0].trim(), // You can add additional properties to your tokens to pass along to the renderer - tokens: this.inlineTokens(match[0].trim()) + tokens: [] }; + this.lexer.inlineTokens(token.text, token.tokens); + return token; } }, renderer(token) { - return `
${this.parseInline(token.tokens)}\n
`; + return `
${this.parser.parseInline(token.tokens)}\n
`; } }; @@ -251,16 +253,19 @@ describe('use extension', () => { const rule = /^:([^:\n]+):([^:\n]*)(?:\n|$)/; const match = rule.exec(src); if (match) { - return { + const token = { type: 'description', - raw: match[0], // This is the text that you want your token to consume from the source - dt: this.inlineTokens(match[1].trim()), // You can add additional properties to your tokens to pass along to the renderer - dd: this.inlineTokens(match[2].trim()) + raw: match[0], + dt: [], + dd: [] }; + this.lexer.inline(match[1].trim(), token.dt); + this.lexer.inline(match[2].trim(), token.dd); + return token; } }, renderer(token) { - return `\n
${this.parseInline(token.dt)}
${this.parseInline(token.dd)}
`; + return `\n
${this.parser.parseInline(token.dt)}
${this.parser.parseInline(token.dd)}
`; } }; marked.use({ extensions: [descriptionlist, description] }); @@ -425,17 +430,21 @@ describe('use extension', () => { const rule = /^:([^:\n]+):([^:\n]*)(?:\n|$)/; const match = rule.exec(src); if (match) { - return { + const token = { type: 'walkableDescription', - raw: match[0], // This is the text that you want your token to consume from the source - dt: this.inlineTokens(match[1].trim()), // You can add additional properties to your tokens to pass along to the renderer - dd: this.inlineTokens(match[2].trim()), - tokens: this.inlineTokens('unwalked') + raw: match[0], + dt: [], + dd: [], + tokens: [] }; + this.lexer.inline(match[1].trim(), token.dt); + this.lexer.inline(match[2].trim(), token.dd); + this.lexer.inline('unwalked', token.tokens); + return token; } }, renderer(token) { - return `\n
${this.parseInline(token.dt)} - ${this.parseInline(token.tokens)}
${this.parseInline(token.dd)}
`; + return `\n
${this.parser.parseInline(token.dt)} - ${this.parser.parseInline(token.tokens)}
${this.parser.parseInline(token.dd)}
`; }, childTokens: ['dd', 'dt'] }], @@ -462,16 +471,18 @@ describe('use extension', () => { tokenizer(src, tokens) { if (src.startsWith(`::${name}\n`)) { const text = `:${name}`; - return { + const token = { type: `block-${name}`, raw: `::${name}\n`, text, - tokens: this.inlineTokens(text) + tokens: [] }; + this.lexer.inline(token.text, token.tokens); + return token; } }, renderer(token) { - return `<${token.type}>${this.parseInline(token.tokens)}\n`; + return `<${token.type}>${this.parser.parseInline(token.tokens)}\n`; } }, { name: `inline-${name}`, @@ -493,12 +504,15 @@ describe('use extension', () => { tokenizer: { heading(src) { if (src.startsWith(`# ${name}`)) { - return { + const token = { type: 'heading', raw: `# ${name}`, text: `used ${name}`, - depth: 1 + depth: 1, + tokens: [] }; + this.lexer.inline(token.text, token.tokens); + return token; } return false; } @@ -628,7 +642,7 @@ used extension2 walked

name: 'styled', renderer(token) { token.type = token.originalType; - const text = this.parse([token]); + const text = this.parser.parse([token]); const openingTag = /(<[^\s<>]+)([^\n<>]*>.*)/s.exec(text); if (openingTag) { return `${openingTag[1]} ${token.style}${openingTag[2]}`; @@ -675,11 +689,14 @@ used extension2 walked

const extension = { tokenizer: { paragraph(text) { - return { + const token = { type: 'paragraph', raw: text, - text: 'extension' + text: 'extension', + tokens: [] }; + this.lexer.inline(token.text, token.tokens); + return token; } } };