From 1fbf1184fed57df02640aad4659afb54dc26a2e9 Mon Sep 17 00:00:00 2001
From: Francesco Trotta <github@fasttime.org>
Date: Wed, 22 Mar 2023 12:48:51 +0100
Subject: [PATCH] fix: `getFirstToken`/`getLastToken` on comment-only node
 (#16889)

* fix: `getFirstToken`/`getLastToken` on comment-only node

* Remove unnecessary array bound checks

* Split `if`-statement for clarity, remove inaccurate unit test

* Fix for trailing comments in root node
---
 lib/source-code/token-store/utils.js          | 18 ++++-
 tests/fixtures/parsers/all-comments-parser.js | 28 +++++++
 tests/lib/source-code/token-store.js          | 80 +++++++++++++++++--
 3 files changed, 114 insertions(+), 12 deletions(-)
 create mode 100644 tests/fixtures/parsers/all-comments-parser.js

diff --git a/lib/source-code/token-store/utils.js b/lib/source-code/token-store/utils.js
index a2bd77de71a..859831916ea 100644
--- a/lib/source-code/token-store/utils.js
+++ b/lib/source-code/token-store/utils.js
@@ -49,13 +49,18 @@ exports.getFirstIndex = function getFirstIndex(tokens, indexMap, startLoc) {
     }
     if ((startLoc - 1) in indexMap) {
         const index = indexMap[startLoc - 1];
-        const token = (index >= 0 && index < tokens.length) ? tokens[index] : null;
+        const token = tokens[index];
+
+        // If the mapped index is out of bounds, the returned cursor index will point after the end of the tokens array.
+        if (!token) {
+            return tokens.length;
+        }
 
         /*
          * For the map of "comment's location -> token's index", it points the next token of a comment.
          * In that case, +1 is unnecessary.
          */
-        if (token && token.range[0] >= startLoc) {
+        if (token.range[0] >= startLoc) {
             return index;
         }
         return index + 1;
@@ -77,13 +82,18 @@ exports.getLastIndex = function getLastIndex(tokens, indexMap, endLoc) {
     }
     if ((endLoc - 1) in indexMap) {
         const index = indexMap[endLoc - 1];
-        const token = (index >= 0 && index < tokens.length) ? tokens[index] : null;
+        const token = tokens[index];
+
+        // If the mapped index is out of bounds, the returned cursor index will point before the end of the tokens array.
+        if (!token) {
+            return tokens.length - 1;
+        }
 
         /*
          * For the map of "comment's location -> token's index", it points the next token of a comment.
          * In that case, -1 is necessary.
          */
-        if (token && token.range[1] > endLoc) {
+        if (token.range[1] > endLoc) {
             return index - 1;
         }
         return index;
diff --git a/tests/fixtures/parsers/all-comments-parser.js b/tests/fixtures/parsers/all-comments-parser.js
new file mode 100644
index 00000000000..595392a0970
--- /dev/null
+++ b/tests/fixtures/parsers/all-comments-parser.js
@@ -0,0 +1,28 @@
+// Similar to the default parser, but considers leading and trailing comments to be part of the root node.
+// Some custom parsers like @typescript-eslint/parser behave in this way.
+
+const espree = require("espree");
+exports.parse = function(code, options) {
+    const ast = espree.parse(code, options);
+
+    if (ast.range && ast.comments && ast.comments.length > 0) {
+        const firstComment = ast.comments[0];
+        const lastComment = ast.comments[ast.comments.length - 1];
+
+        if (ast.range[0] > firstComment.range[0]) {
+            ast.range[0] = firstComment.range[0];
+            ast.start = firstComment.start;
+            if (ast.loc) {
+                ast.loc.start = firstComment.loc.start;
+            }
+        }
+        if (ast.range[1] < lastComment.range[1]) {
+            ast.range[1] = lastComment.range[1];
+            ast.end = lastComment.end;
+            if (ast.loc) {
+                ast.loc.end = lastComment.loc.end;
+            }
+        }
+    }
+    return ast;
+};
diff --git a/tests/lib/source-code/token-store.js b/tests/lib/source-code/token-store.js
index ff54a6a838a..3b9db7cb95f 100644
--- a/tests/lib/source-code/token-store.js
+++ b/tests/lib/source-code/token-store.js
@@ -627,8 +627,8 @@ describe("TokenStore", () => {
             const tokenStore = new TokenStore(ast.tokens, ast.comments);
 
             /*
-             * Actually, the first of nodes is always tokens, not comments.
-             * But I think this test case is needed for completeness.
+             * A node must not start with a token: it can start with a comment or be empty.
+             * This test case is needed for completeness.
              */
             const token = tokenStore.getFirstToken(
                 { range: [ast.comments[0].range[0], ast.tokens[5].range[1]] },
@@ -644,8 +644,8 @@ describe("TokenStore", () => {
             const tokenStore = new TokenStore(ast.tokens, ast.comments);
 
             /*
-             * Actually, the first of nodes is always tokens, not comments.
-             * But I think this test case is needed for completeness.
+             * A node must not start with a token: it can start with a comment or be empty.
+             * This test case is needed for completeness.
              */
             const token = tokenStore.getFirstToken(
                 { range: [ast.comments[0].range[0], ast.tokens[5].range[1]] }
@@ -654,6 +654,38 @@ describe("TokenStore", () => {
             assert.strictEqual(token.value, "c");
         });
 
+        it("should retrieve the first token if the root node contains a trailing comment", () => {
+            const parser = require("../../fixtures/parsers/all-comments-parser");
+            const code = "foo // comment";
+            const ast = parser.parse(code, { loc: true, range: true, tokens: true, comment: true });
+            const tokenStore = new TokenStore(ast.tokens, ast.comments);
+            const token = tokenStore.getFirstToken(ast);
+
+            assert.strictEqual(token, ast.tokens[0]);
+        });
+
+        it("should return null if the source contains only comments", () => {
+            const code = "// comment";
+            const ast = espree.parse(code, { loc: true, range: true, tokens: true, comment: true });
+            const tokenStore = new TokenStore(ast.tokens, ast.comments);
+            const token = tokenStore.getFirstToken(ast, {
+                filter() {
+                    assert.fail("Unexpected call to filter callback");
+                }
+            });
+
+            assert.strictEqual(token, null);
+        });
+
+        it("should return null if the source is empty", () => {
+            const code = "";
+            const ast = espree.parse(code, { loc: true, range: true, tokens: true, comment: true });
+            const tokenStore = new TokenStore(ast.tokens, ast.comments);
+            const token = tokenStore.getFirstToken(ast);
+
+            assert.strictEqual(token, null);
+        });
+
     });
 
     describe("when calling getLastTokens", () => {
@@ -814,8 +846,8 @@ describe("TokenStore", () => {
             const tokenStore = new TokenStore(ast.tokens, ast.comments);
 
             /*
-             * Actually, the last of nodes is always tokens, not comments.
-             * But I think this test case is needed for completeness.
+             * A node must not end with a token: it can end with a comment or be empty.
+             * This test case is needed for completeness.
              */
             const token = tokenStore.getLastToken(
                 { range: [ast.tokens[0].range[0], ast.comments[0].range[1]] },
@@ -831,8 +863,8 @@ describe("TokenStore", () => {
             const tokenStore = new TokenStore(ast.tokens, ast.comments);
 
             /*
-             * Actually, the last of nodes is always tokens, not comments.
-             * But I think this test case is needed for completeness.
+             * A node must not end with a token: it can end with a comment or be empty.
+             * This test case is needed for completeness.
              */
             const token = tokenStore.getLastToken(
                 { range: [ast.tokens[0].range[0], ast.comments[0].range[1]] }
@@ -841,6 +873,38 @@ describe("TokenStore", () => {
             assert.strictEqual(token.value, "b");
         });
 
+        it("should retrieve the last token if the root node contains a trailing comment", () => {
+            const parser = require("../../fixtures/parsers/all-comments-parser");
+            const code = "foo // comment";
+            const ast = parser.parse(code, { loc: true, range: true, tokens: true, comment: true });
+            const tokenStore = new TokenStore(ast.tokens, ast.comments);
+            const token = tokenStore.getLastToken(ast);
+
+            assert.strictEqual(token, ast.tokens[0]);
+        });
+
+        it("should return null if the source contains only comments", () => {
+            const code = "// comment";
+            const ast = espree.parse(code, { loc: true, range: true, tokens: true, comment: true });
+            const tokenStore = new TokenStore(ast.tokens, ast.comments);
+            const token = tokenStore.getLastToken(ast, {
+                filter() {
+                    assert.fail("Unexpected call to filter callback");
+                }
+            });
+
+            assert.strictEqual(token, null);
+        });
+
+        it("should return null if the source is empty", () => {
+            const code = "";
+            const ast = espree.parse(code, { loc: true, range: true, tokens: true, comment: true });
+            const tokenStore = new TokenStore(ast.tokens, ast.comments);
+            const token = tokenStore.getLastToken(ast);
+
+            assert.strictEqual(token, null);
+        });
+
     });
 
     describe("when calling getFirstTokensBetween", () => {