diff --git a/packages/parse5/lib/parser/index.ts b/packages/parse5/lib/parser/index.ts index 98ea7401c..877ff5e35 100644 --- a/packages/parse5/lib/parser/index.ts +++ b/packages/parse5/lib/parser/index.ts @@ -1419,12 +1419,31 @@ function appendCommentToDocument(p: Parser, tok function stopParsing(p: Parser, token: EOFToken): void { p.stopped = true; + // NOTE: Set end locations for elements that remain on the open element stack. if (token.location) { - // NOTE: generate location info for elements - // that remains on open element stack - for (let i = p.openElements.stackTop; i >= 0; i--) { + // NOTE: If we are not in a fragment, `html` and `body` will stay on the stack. + // This is a problem, as we might overwrite their end position here. + const target = p.fragmentContext ? 0 : 2; + for (let i = p.openElements.stackTop; i >= target; i--) { p._setEndLocation(p.openElements.items[i], token); } + + // Handle `html` and `body` + if (!p.fragmentContext && p.openElements.stackTop >= 0) { + const htmlElement = p.openElements.items[0]; + const htmlLocation = p.treeAdapter.getNodeSourceCodeLocation(htmlElement); + if (htmlLocation && !htmlLocation.endTag) { + p._setEndLocation(htmlElement, token); + + if (p.openElements.stackTop >= 1) { + const bodyElement = p.openElements.items[1]; + const bodyLocation = p.treeAdapter.getNodeSourceCodeLocation(bodyElement); + if (bodyLocation && !bodyLocation.endTag) { + p._setEndLocation(bodyElement, token); + } + } + } + } } } @@ -3321,6 +3340,12 @@ function endTagAfterBody(p: Parser, token: TagT //the end location explicitly. if (p.options.sourceCodeLocationInfo && p.openElements.tagIDs[0] === $.HTML) { p._setEndLocation(p.openElements.items[0], token); + + // Update the body element, if it doesn't have an end tag + const bodyElement = p.openElements.items[1]; + if (!p.treeAdapter.getNodeSourceCodeLocation(bodyElement)?.endTag) { + p._setEndLocation(bodyElement, token); + } } } else { tokenAfterBody(p, token); diff --git a/packages/parse5/lib/parser/parser-location-info.test.ts b/packages/parse5/lib/parser/parser-location-info.test.ts index 90cd70bd7..4559014ae 100644 --- a/packages/parse5/lib/parser/parser-location-info.test.ts +++ b/packages/parse5/lib/parser/parser-location-info.test.ts @@ -172,6 +172,63 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { assertNodeLocation(textLocation, html.slice(8, 15), html, [html]); }); + + test("Should use the HTML element's position for BODY, if BODY isn't closed", () => { + const html = outdent` + + +

test

+ + + `; + + const opts = { + treeAdapter, + sourceCodeLocationInfo: true, + }; + + const document = parse5.parse(html, opts); + const htmlEl = treeAdapter.getChildNodes(document)[0]; + const bodyEl = treeAdapter.getChildNodes(htmlEl)[1]; + + const htmlLocation = treeAdapter.getNodeSourceCodeLocation(htmlEl); + const bodyLocation = treeAdapter.getNodeSourceCodeLocation(bodyEl); + + assert.ok(htmlLocation?.endTag && bodyLocation); + + // HTML element's end tag's start location should be BODY's end location + assert.strictEqual(htmlLocation.endTag.startOffset, bodyLocation.endOffset); + assert.strictEqual(htmlLocation.endTag.startLine, bodyLocation.endLine); + assert.strictEqual(htmlLocation.endTag.startCol, bodyLocation.endCol); + + // The HTML element's location should not be the location of EOF + assert.notStrictEqual(htmlLocation.endOffset, html.length); + }); + + test('Should set HTML location to EOF if no end tag is supplied', () => { + const html = outdent` + + +

test

+ + `; + + const opts = { + treeAdapter, + sourceCodeLocationInfo: true, + }; + + const document = parse5.parse(html, opts); + const htmlEl = treeAdapter.getChildNodes(document)[0]; + const bodyEl = treeAdapter.getChildNodes(htmlEl)[1]; + + const htmlLocation = treeAdapter.getNodeSourceCodeLocation(htmlEl); + const bodyLocation = treeAdapter.getNodeSourceCodeLocation(bodyEl); + + assert.ok(htmlLocation && bodyLocation); + assert.strictEqual(htmlLocation.endOffset, html.length); + assert.strictEqual(bodyLocation.endOffset, html.length); + }); }); describe('location-info-parser', () => {