Skip to content

Commit

Permalink
fix(parser): Keep html and body end locations (inikulin#436)
Browse files Browse the repository at this point in the history
Before, we overrode the end locations for `html` and `body` tags on EOF. `body` will now take the end position from a closing `html` tag, if there was no closing `body` tag.
  • Loading branch information
fb55 authored and jmbpwtw committed Feb 16, 2023
1 parent 7db9b34 commit b2540d1
Show file tree
Hide file tree
Showing 2 changed files with 85 additions and 3 deletions.
31 changes: 28 additions & 3 deletions packages/parse5/lib/parser/index.ts
Expand Up @@ -1419,12 +1419,31 @@ function appendCommentToDocument<T extends TreeAdapterTypeMap>(p: Parser<T>, tok
function stopParsing<T extends TreeAdapterTypeMap>(p: Parser<T>, token: EOFToken): void {
p.stopped = true;

// NOTE: Set end locations for elements that remain on the open element stack.
if (token.location) {
// NOTE: generate location info for elements
// that remains on open element stack
for (let i = p.openElements.stackTop; i >= 0; i--) {
// NOTE: If we are not in a fragment, `html` and `body` will stay on the stack.
// This is a problem, as we might overwrite their end position here.
const target = p.fragmentContext ? 0 : 2;
for (let i = p.openElements.stackTop; i >= target; i--) {
p._setEndLocation(p.openElements.items[i], token);
}

// Handle `html` and `body`
if (!p.fragmentContext && p.openElements.stackTop >= 0) {
const htmlElement = p.openElements.items[0];
const htmlLocation = p.treeAdapter.getNodeSourceCodeLocation(htmlElement);
if (htmlLocation && !htmlLocation.endTag) {
p._setEndLocation(htmlElement, token);

if (p.openElements.stackTop >= 1) {
const bodyElement = p.openElements.items[1];
const bodyLocation = p.treeAdapter.getNodeSourceCodeLocation(bodyElement);
if (bodyLocation && !bodyLocation.endTag) {
p._setEndLocation(bodyElement, token);
}
}
}
}
}
}

Expand Down Expand Up @@ -3321,6 +3340,12 @@ function endTagAfterBody<T extends TreeAdapterTypeMap>(p: Parser<T>, token: TagT
//the end location explicitly.
if (p.options.sourceCodeLocationInfo && p.openElements.tagIDs[0] === $.HTML) {
p._setEndLocation(p.openElements.items[0], token);

// Update the body element, if it doesn't have an end tag
const bodyElement = p.openElements.items[1];
if (bodyElement && !p.treeAdapter.getNodeSourceCodeLocation(bodyElement)?.endTag) {
p._setEndLocation(bodyElement, token);
}
}
} else {
tokenAfterBody(p, token);
Expand Down
57 changes: 57 additions & 0 deletions packages/parse5/lib/parser/parser-location-info.test.ts
Expand Up @@ -172,6 +172,63 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => {

assertNodeLocation(textLocation, html.slice(8, 15), html, [html]);
});

test("Should use the HTML element's position for BODY, if BODY isn't closed", () => {
const html = outdent`
<html>
<body>
<p>test</p>
</html>
<!-- comment -->
`;

const opts = {
treeAdapter,
sourceCodeLocationInfo: true,
};

const document = parse5.parse(html, opts);
const htmlEl = treeAdapter.getChildNodes(document)[0];
const bodyEl = treeAdapter.getChildNodes(htmlEl)[1];

const htmlLocation = treeAdapter.getNodeSourceCodeLocation(htmlEl);
const bodyLocation = treeAdapter.getNodeSourceCodeLocation(bodyEl);

assert.ok(htmlLocation?.endTag && bodyLocation);

// HTML element's end tag's start location should be BODY's end location
assert.strictEqual(htmlLocation.endTag.startOffset, bodyLocation.endOffset);
assert.strictEqual(htmlLocation.endTag.startLine, bodyLocation.endLine);
assert.strictEqual(htmlLocation.endTag.startCol, bodyLocation.endCol);

// The HTML element's location should not be the location of EOF
assert.notStrictEqual(htmlLocation.endOffset, html.length);
});

test('Should set HTML location to EOF if no end tag is supplied', () => {
const html = outdent`
<html>
<body>
<p>test</p>
<!-- comment -->
`;

const opts = {
treeAdapter,
sourceCodeLocationInfo: true,
};

const document = parse5.parse(html, opts);
const htmlEl = treeAdapter.getChildNodes(document)[0];
const bodyEl = treeAdapter.getChildNodes(htmlEl)[1];

const htmlLocation = treeAdapter.getNodeSourceCodeLocation(htmlEl);
const bodyLocation = treeAdapter.getNodeSourceCodeLocation(bodyEl);

assert.ok(htmlLocation && bodyLocation);
assert.strictEqual(htmlLocation.endOffset, html.length);
assert.strictEqual(bodyLocation.endOffset, html.length);
});
});

describe('location-info-parser', () => {
Expand Down

0 comments on commit b2540d1

Please sign in to comment.