Skip to content

Commit

Permalink
fix: Full Commonmark compliance for Lists (#2112)
Browse files Browse the repository at this point in the history
  • Loading branch information
calculuschild committed Aug 10, 2021
1 parent edc2e6d commit eb33d3b
Show file tree
Hide file tree
Showing 15 changed files with 1,114 additions and 1,267 deletions.
962 changes: 453 additions & 509 deletions lib/marked.esm.js

Large diffs are not rendered by default.

986 changes: 481 additions & 505 deletions lib/marked.js

Large diffs are not rendered by default.

12 changes: 8 additions & 4 deletions src/Lexer.js
Expand Up @@ -163,10 +163,9 @@ module.exports = class Lexer {
src = src.substring(token.raw.length);
lastToken = tokens[tokens.length - 1];
// An indented code block cannot interrupt a paragraph.
if (lastToken && lastToken.type === 'paragraph') {
if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.text;
this.inlineQueue.pop();
this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
} else {
tokens.push(token);
Expand Down Expand Up @@ -217,9 +216,14 @@ module.exports = class Lexer {
}

// def
if (this.state.top && (token = this.tokenizer.def(src))) {
if (token = this.tokenizer.def(src)) {
src = src.substring(token.raw.length);
if (!this.tokens.links[token.tag]) {
lastToken = tokens[tokens.length - 1];
if (lastToken && (lastToken.type === 'paragraph' || lastToken.type === 'text')) {
lastToken.raw += '\n' + token.raw;
lastToken.text += '\n' + token.raw;
this.inlineQueue[this.inlineQueue.length - 1].src = lastToken.text;
} else if (!this.tokens.links[token.tag]) {
this.tokens.links[token.tag] = {
href: token.href,
title: token.title
Expand Down
2 changes: 1 addition & 1 deletion src/Parser.js
Expand Up @@ -149,7 +149,7 @@ module.exports = class Parser {
if (item.task) {
checkbox = this.renderer.checkbox(checked);
if (loose) {
if (item.tokens.length > 0 && item.tokens[0].type === 'text') {
if (item.tokens.length > 0 && item.tokens[0].type === 'paragraph') {
item.tokens[0].text = checkbox + ' ' + item.tokens[0].text;
if (item.tokens[0].tokens && item.tokens[0].tokens.length > 0 && item.tokens[0].tokens[0].type === 'text') {
item.tokens[0].tokens[0].text = checkbox + ' ' + item.tokens[0].tokens[0].text;
Expand Down
214 changes: 109 additions & 105 deletions src/Tokenizer.js
Expand Up @@ -164,145 +164,149 @@ module.exports = class Tokenizer {
}

list(src) {
const cap = this.rules.block.list.exec(src);
let cap = this.rules.block.list.exec(src);
if (cap) {
let raw = cap[0];
const bull = cap[2];
let raw, istask, ischecked, indent, i, blankLine, endsWithBlankLine,
line, lines, itemContents;

let bull = cap[1].trim();
const isordered = bull.length > 1;

const list = {
type: 'list',
raw,
raw: '',
ordered: isordered,
start: isordered ? +bull.slice(0, -1) : '',
loose: false,
items: []
};

// Get each top-level item.
const itemMatch = cap[0].match(this.rules.block.item);

let next = false,
item,
space,
bcurr,
bnext,
addBack,
loose,
istask,
ischecked,
endMatch;

let l = itemMatch.length;
bcurr = this.rules.block.listItemStart.exec(itemMatch[0]);
for (let i = 0; i < l; i++) {
item = itemMatch[i];
raw = item;

if (!this.options.pedantic) {
// Determine if current item contains the end of the list
endMatch = item.match(new RegExp('\\n\\s*\\n {0,' + (bcurr[0].length - 1) + '}\\S'));
if (endMatch) {
addBack = item.length - endMatch.index + itemMatch.slice(i + 1).join('\n').length;
list.raw = list.raw.substring(0, list.raw.length - addBack);

item = item.substring(0, endMatch.index);
raw = item;
l = i + 1;
}
bull = isordered ? `\\d{1,9}\\${bull.slice(-1)}` : `\\${bull}`;

if (this.options.pedantic) {
bull = isordered ? bull : '[*+-]';
}

// Get next list item
const itemRegex = new RegExp(`^( {0,3}${bull})((?: [^\\n]*| *)(?:\\n[^\\n]*)*(?:\\n|$))`);

// Get each top-level item
while (src) {
if (this.rules.block.hr.test(src)) { // End list if we encounter an HR (possibly move into itemRegex?)
break;
}

// Determine whether the next list item belongs here.
// Backpedal if it does not belong in this list.
if (i !== l - 1) {
bnext = this.rules.block.listItemStart.exec(itemMatch[i + 1]);
if (
!this.options.pedantic
? bnext[1].length >= bcurr[0].length || bnext[1].length > 3
: bnext[1].length > bcurr[1].length
) {
// nested list or continuation
itemMatch.splice(i, 2, itemMatch[i] + (!this.options.pedantic && bnext[1].length < bcurr[0].length && !itemMatch[i].match(/\n$/) ? '' : '\n') + itemMatch[i + 1]);
i--;
l--;
continue;
} else if (
// different bullet style
!this.options.pedantic || this.options.smartLists
? bnext[2][bnext[2].length - 1] !== bull[bull.length - 1]
: isordered === (bnext[2].length === 1)
) {
addBack = itemMatch.slice(i + 1).join('\n').length;
list.raw = list.raw.substring(0, list.raw.length - addBack);
i = l - 1;
}
bcurr = bnext;
if (!(cap = itemRegex.exec(src))) {
break;
}

// Remove the list item's bullet
// so it is seen as the next token.
space = item.length;
item = item.replace(/^ *([*+-]|\d+[.)]) ?/, '');

// Outdent whatever the
// list item contains. Hacky.
if (~item.indexOf('\n ')) {
space -= item.length;
item = !this.options.pedantic
? item.replace(new RegExp('^ {1,' + space + '}', 'gm'), '')
: item.replace(/^ {1,4}/gm, '');
lines = cap[2].split('\n');

if (this.options.pedantic) {
indent = 2;
itemContents = lines[0].trimLeft();
} else {
indent = cap[2].search(/[^ ]/); // Find first non-space char
indent = cap[1].length + (indent > 4 ? 1 : indent); // intented code blocks after 4 spaces; indent is always 1
itemContents = lines[0].slice(indent - cap[1].length);
}

// trim item newlines at end
item = rtrim(item, '\n');
if (i !== l - 1) {
raw = raw + '\n';
blankLine = false;
raw = cap[0];

if (!lines[0] && /^ *$/.test(lines[1])) { // items begin with at most one blank line
raw = cap[1] + lines.slice(0, 2).join('\n') + '\n';
list.loose = true;
lines = [];
}

// Determine whether item is loose or not.
// Use: /(^|\n)(?! )[^\n]+\n\n(?!\s*$)/
// for discount behavior.
loose = next || /\n\n(?!\s*$)/.test(raw);
if (i !== l - 1) {
next = raw.slice(-2) === '\n\n';
if (!loose) loose = next;
const nextBulletRegex = new RegExp(`^ {0,${Math.min(3, indent - 1)}}(?:[*+-]|\\d{1,9}[.)])`);

for (i = 1; i < lines.length; i++) {
line = lines[i];

if (this.options.pedantic) { // Re-align to follow commonmark nesting rules
line = line.replace(/^ {1,4}(?=( {4})*[^ ])/g, ' ');
}

// End list item if found start of new bullet
if (nextBulletRegex.test(line)) {
raw = cap[1] + lines.slice(0, i).join('\n') + '\n';
break;
}

// Until we encounter a blank line, item contents do not need indentation
if (!blankLine) {
if (!line.trim()) { // Check if current line is empty
blankLine = true;
}

// Dedent if possible
if (line.search(/[^ ]/) >= indent) {
itemContents += '\n' + line.slice(indent);
} else {
itemContents += '\n' + line;
}
continue;
}

// Dedent this line
if (line.search(/[^ ]/) >= indent || !line.trim()) {
itemContents += '\n' + line.slice(indent);
continue;
} else { // Line was not properly indented; end of this item
raw = cap[1] + lines.slice(0, i).join('\n') + '\n';
break;
}
}

if (loose) {
list.loose = true;
if (!list.loose) {
// If the previous item ended with a blank line, the list is loose
if (endsWithBlankLine) {
list.loose = true;
} else if (/\n *\n *$/.test(raw)) {
endsWithBlankLine = true;
}
}

// Check for task list items
if (this.options.gfm) {
istask = /^\[[ xX]\] /.test(item);
ischecked = undefined;
istask = /^\[[ xX]\] /.exec(itemContents);
if (istask) {
ischecked = item[1] !== ' ';
item = item.replace(/^\[[ xX]\] +/, '');
ischecked = istask[0] !== '[ ] ';
itemContents = itemContents.replace(/^\[[ xX]\] +/, '');
}
}

this.lexer.state.top = false;

const token = {
list.items.push({
type: 'list_item',
raw,
task: istask,
raw: raw,
task: !!istask,
checked: ischecked,
loose: loose,
text: item,
tokens: this.lexer.blockTokens(item, [])
};
loose: false,
text: itemContents
});

// this.lexer.inline(token.text, )
list.items.push(token);
list.raw += raw;
src = src.slice(raw.length);
}

// l2 = token.items.length;
// for (j = 0; j < l2; j++) {
// this.inline(token.items[j].tokens);
// }
// break;
// Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic
list.items[list.items.length - 1].raw = raw.trimRight();
list.items[list.items.length - 1].text = itemContents.trimRight();
list.raw = list.raw.trimRight();

const l = list.items.length;

// Item child tokens handled here at end because we needed to have the final item to trim it first
for (i = 0; i < l; i++) {
this.lexer.state.top = false;
list.items[i].tokens = this.lexer.blockTokens(list.items[i].text, []);
if (list.items[i].tokens.some(t => t.type === 'space')) {
list.loose = true;
list.items[i].loose = true;
}
}

return list;
}
Expand Down
9 changes: 2 additions & 7 deletions src/rules.js
Expand Up @@ -10,11 +10,11 @@ const {
const block = {
newline: /^(?: *(?:\n|$))+/,
code: /^( {4}[^\n]+(?:\n(?: *(?:\n|$))*)?)+/,
fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?:\n+|$)|$)/,
fences: /^ {0,3}(`{3,}(?=[^`\n]*\n)|~{3,})([^\n]*)\n(?:|([\s\S]*?)\n)(?: {0,3}\1[~`]* *(?=\n|$)|$)/,
hr: /^ {0,3}((?:- *){3,}|(?:_ *){3,}|(?:\* *){3,})(?:\n+|$)/,
heading: /^ {0,3}(#{1,6})(?=\s|$)(.*)(?:\n+|$)/,
blockquote: /^( {0,3}> ?(paragraph|[^\n]*)(?:\n|$))+/,
list: /^( {0,3})(bull) [\s\S]+?(?:hr|def|\n{2,}(?! )(?! {0,3}bull )\n*|\s*$)/,
list: /^( {0,3}bull)( [^\n]+?)?(?:\n|$)/,
html: '^ {0,3}(?:' // optional indentation
+ '<(script|pre|style|textarea)[\\s>][\\s\\S]*?(?:</\\1>[^\\n]*\\n+|$)' // (1)
+ '|comment[^\\n]*(\\n+|$)' // (2)
Expand Down Expand Up @@ -42,11 +42,6 @@ block.def = edit(block.def)
.getRegex();

block.bullet = /(?:[*+-]|\d{1,9}[.)])/;
block.item = /^( *)(bull) ?[^\n]*(?:\n(?! *bull ?)[^\n]*)*/;
block.item = edit(block.item, 'gm')
.replace(/bull/g, block.bullet)
.getRegex();

block.listItemStart = edit(/^( *)(bull) */)
.replace('bull', block.bullet)
.getRegex();
Expand Down

1 comment on commit eb33d3b

@vercel
Copy link

@vercel vercel bot commented on eb33d3b Aug 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.