From b2540d155f2caa2e97e4642ea868855e2f75596b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Felix=20B=C3=B6hm?= <188768+fb55@users.noreply.github.com> Date: Mon, 7 Mar 2022 11:02:54 +0000 Subject: [PATCH] fix(parser): Keep `html` and `body` end locations (#436) Before, we overrode the end locations for `html` and `body` tags on EOF. `body` will now take the end position from a closing `html` tag, if there was no closing `body` tag. --- packages/parse5/lib/parser/index.ts | 31 +++++++++- .../lib/parser/parser-location-info.test.ts | 57 +++++++++++++++++++ 2 files changed, 85 insertions(+), 3 deletions(-) diff --git a/packages/parse5/lib/parser/index.ts b/packages/parse5/lib/parser/index.ts index 98ea7401c..5035c2852 100644 --- a/packages/parse5/lib/parser/index.ts +++ b/packages/parse5/lib/parser/index.ts @@ -1419,12 +1419,31 @@ function appendCommentToDocument(p: Parser, tok function stopParsing(p: Parser, token: EOFToken): void { p.stopped = true; + // NOTE: Set end locations for elements that remain on the open element stack. if (token.location) { - // NOTE: generate location info for elements - // that remains on open element stack - for (let i = p.openElements.stackTop; i >= 0; i--) { + // NOTE: If we are not in a fragment, `html` and `body` will stay on the stack. + // This is a problem, as we might overwrite their end position here. + const target = p.fragmentContext ? 0 : 2; + for (let i = p.openElements.stackTop; i >= target; i--) { p._setEndLocation(p.openElements.items[i], token); } + + // Handle `html` and `body` + if (!p.fragmentContext && p.openElements.stackTop >= 0) { + const htmlElement = p.openElements.items[0]; + const htmlLocation = p.treeAdapter.getNodeSourceCodeLocation(htmlElement); + if (htmlLocation && !htmlLocation.endTag) { + p._setEndLocation(htmlElement, token); + + if (p.openElements.stackTop >= 1) { + const bodyElement = p.openElements.items[1]; + const bodyLocation = p.treeAdapter.getNodeSourceCodeLocation(bodyElement); + if (bodyLocation && !bodyLocation.endTag) { + p._setEndLocation(bodyElement, token); + } + } + } + } } } @@ -3321,6 +3340,12 @@ function endTagAfterBody(p: Parser, token: TagT //the end location explicitly. if (p.options.sourceCodeLocationInfo && p.openElements.tagIDs[0] === $.HTML) { p._setEndLocation(p.openElements.items[0], token); + + // Update the body element, if it doesn't have an end tag + const bodyElement = p.openElements.items[1]; + if (bodyElement && !p.treeAdapter.getNodeSourceCodeLocation(bodyElement)?.endTag) { + p._setEndLocation(bodyElement, token); + } } } else { tokenAfterBody(p, token); diff --git a/packages/parse5/lib/parser/parser-location-info.test.ts b/packages/parse5/lib/parser/parser-location-info.test.ts index 90cd70bd7..4559014ae 100644 --- a/packages/parse5/lib/parser/parser-location-info.test.ts +++ b/packages/parse5/lib/parser/parser-location-info.test.ts @@ -172,6 +172,63 @@ generateTestsForEachTreeAdapter('location-info-parser', (treeAdapter) => { assertNodeLocation(textLocation, html.slice(8, 15), html, [html]); }); + + test("Should use the HTML element's position for BODY, if BODY isn't closed", () => { + const html = outdent` + + +

test

+ + + `; + + const opts = { + treeAdapter, + sourceCodeLocationInfo: true, + }; + + const document = parse5.parse(html, opts); + const htmlEl = treeAdapter.getChildNodes(document)[0]; + const bodyEl = treeAdapter.getChildNodes(htmlEl)[1]; + + const htmlLocation = treeAdapter.getNodeSourceCodeLocation(htmlEl); + const bodyLocation = treeAdapter.getNodeSourceCodeLocation(bodyEl); + + assert.ok(htmlLocation?.endTag && bodyLocation); + + // HTML element's end tag's start location should be BODY's end location + assert.strictEqual(htmlLocation.endTag.startOffset, bodyLocation.endOffset); + assert.strictEqual(htmlLocation.endTag.startLine, bodyLocation.endLine); + assert.strictEqual(htmlLocation.endTag.startCol, bodyLocation.endCol); + + // The HTML element's location should not be the location of EOF + assert.notStrictEqual(htmlLocation.endOffset, html.length); + }); + + test('Should set HTML location to EOF if no end tag is supplied', () => { + const html = outdent` + + +

test

+ + `; + + const opts = { + treeAdapter, + sourceCodeLocationInfo: true, + }; + + const document = parse5.parse(html, opts); + const htmlEl = treeAdapter.getChildNodes(document)[0]; + const bodyEl = treeAdapter.getChildNodes(htmlEl)[1]; + + const htmlLocation = treeAdapter.getNodeSourceCodeLocation(htmlEl); + const bodyLocation = treeAdapter.getNodeSourceCodeLocation(bodyEl); + + assert.ok(htmlLocation && bodyLocation); + assert.strictEqual(htmlLocation.endOffset, html.length); + assert.strictEqual(bodyLocation.endOffset, html.length); + }); }); describe('location-info-parser', () => {