From bad0e6ff01b5ce2c563d77c67cb7f0ddcc3bb3c4 Mon Sep 17 00:00:00 2001 From: Vladimir Date: Fri, 29 Dec 2023 13:03:43 +0100 Subject: [PATCH] feat!: use js-tokens instead of acorn (#6) Co-authored-by: Anthony Fu --- README.md | 35 +---- bench/index.bench.ts | 12 +- package.json | 2 +- pnpm-lock.yaml | 11 +- src/acorn.ts | 148 ---------------------- src/index.ts | 33 +---- src/js-tokens.ts | 86 +++++++++++++ src/regex.ts | 46 ------- test/__snapshots__/index.test.ts.snap | 40 +++--- test/createIsLiteralPosition.test.ts | 53 -------- test/custom-fill.test.ts | 40 ++---- test/filter.test.ts | 4 +- test/fixtures.test.ts | 2 +- test/fixtures/backtick-escape.output.js | 1 - test/fixtures/backtick-in-regex.output.js | 1 - test/fixtures/comment-in-string.output.js | 1 - test/fixtures/forgiving.output.vue | 1 - test/fixtures/issue4.output.js | 1 - test/index.test.ts | 57 ++++----- test/utils.ts | 9 +- 20 files changed, 158 insertions(+), 425 deletions(-) delete mode 100644 src/acorn.ts create mode 100644 src/js-tokens.ts delete mode 100644 src/regex.ts delete mode 100644 test/createIsLiteralPosition.test.ts diff --git a/README.md b/README.md index b2b2208..1af1379 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![NPM version](https://img.shields.io/npm/v/strip-literal?color=a1b858&label=)](https://www.npmjs.com/package/strip-literal) -Strip comments and string literals from JavaScript code. Powered by [acorn](https://github.com/acornjs/acorn)'s tokenizer. +Strip comments and string literals from JavaScript code. Powered by [`js-tokens`](https://github.com/lydell/js-tokens). ## Usage @@ -16,39 +16,6 @@ stripLiteral('const foo = `//foo ${bar}`') // 'const foo = ` ${bar}`' Comments, string literals will be replaced by spaces with the same length to keep the source map untouched. -## Functions - -### `stripLiteralAcorn` - -Strip literal using [Acorn](https://github.com/acornjs/acorn)'s tokenizer. - -Will throw error if the input is not valid JavaScript. - -[Source](./src/acorn.ts) - -### `stripLiteralRegex` - -Strip literal using RegExp. - -This will be faster and can work on non-JavaScript input. But will have some caveats on distinguish strings and comments. - -[Source](./src/regex.ts) - -### `stripLiteral` - -Strip literal from code. - -Try to use `stripLiteralAcorn` first, and fallback to `stripLiteralRegex` if Acorn fails. - -[Source](./src/index.ts) - -### `createIsLiteralPositionAcorn` -Returns a function that returns whether the position is in a literal using [Acorn](https://github.com/acornjs/acorn)'s tokenizer. - -Will throw error if the input is not valid JavaScript. - -[Source](./src/acorn.ts) - ## Sponsors

diff --git a/bench/index.bench.ts b/bench/index.bench.ts index 2d36a5c..50ba154 100644 --- a/bench/index.bench.ts +++ b/bench/index.bench.ts @@ -1,7 +1,7 @@ /* eslint-disable test/consistent-test-it */ import { readFile } from 'node:fs/promises' import { bench, describe } from 'vitest' -import { createIsLiteralPositionAcorn, stripLiteralAcorn, stripLiteralRegex } from '../src' +import { stripLiteralJsTokens } from '../src' const modules = { 'vue-global': './node_modules/vue/dist/vue.runtime.global.js', @@ -11,14 +11,8 @@ const modules = { Object.entries(modules).forEach(([name, path]) => { describe(`bench ${name}`, async () => { const code = await readFile(path, 'utf-8') - bench('stripLiteral (regex)', () => { - stripLiteralRegex(code) - }) - bench('stripLiteral (acorn)', () => { - stripLiteralAcorn(code) - }) - bench('createIsLiteralPositionAcorn (acorn)', () => { - createIsLiteralPositionAcorn(code) + bench('stripLiteral (js-tokens)', () => { + stripLiteralJsTokens(code) }) }) }) diff --git a/package.json b/package.json index 0eee844..970b6a6 100644 --- a/package.json +++ b/package.json @@ -41,7 +41,7 @@ "typecheck": "tsc --noEmit" }, "dependencies": { - "acorn": "^8.11.2" + "js-tokens": "^8.0.2" }, "devDependencies": { "@antfu/eslint-config": "^2.6.1", diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index c26a471..433cf7a 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -8,9 +8,9 @@ importers: .: dependencies: - acorn: - specifier: ^8.11.2 - version: 8.11.2 + js-tokens: + specifier: ^8.0.2 + version: 8.0.2 devDependencies: '@antfu/eslint-config': specifier: ^2.6.1 @@ -1510,6 +1510,7 @@ packages: resolution: {integrity: sha512-nc0Axzp/0FILLEVsm4fNwLCwMttvhEI263QtVPQcbpfZZ3ts0hLsZGOpE6czNlid7CJ9MlyH8reXkpsf3YUY4w==} engines: {node: '>=0.4.0'} hasBin: true + dev: true /ajv@6.12.6: resolution: {integrity: sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==} @@ -2910,6 +2911,10 @@ packages: requiresBuild: true dev: true + /js-tokens@8.0.2: + resolution: {integrity: sha512-Olnt+V7xYdvGze9YTbGFZIfQXuGV4R3nQwwl8BrtgaPE/wq8UFpUHWuTNc05saowhSr1ZO6tx+V6RjE9D5YQog==} + dev: false + /js-yaml@4.1.0: resolution: {integrity: sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==} hasBin: true diff --git a/src/acorn.ts b/src/acorn.ts deleted file mode 100644 index dfbd0bc..0000000 --- a/src/acorn.ts +++ /dev/null @@ -1,148 +0,0 @@ -import { tokenizer } from 'acorn' -import type { Parser, Token } from 'acorn' -import type { StripLiteralOptions } from './types' - -/** - * Strip literal using Acorn's tokenizer. - * - * Will throw error if the input is not valid JavaScript. - */ -export function _stripLiteralAcorn(code: string, options?: StripLiteralOptions) { - const FILL = options?.fillChar ?? ' ' - const FILL_COMMENT = ' ' - let result = '' - - const filter = options?.filter ?? (() => true) - - function fillupTo(index: number) { - if (index > result.length) - result += code.slice(result.length, index).replace(/[^\n]/g, FILL_COMMENT) - } - - const tokens: Token[] = [] - const pasers = tokenizer(code, { - ecmaVersion: 'latest', - sourceType: 'module', - allowHashBang: true, - allowAwaitOutsideFunction: true, - allowImportExportEverywhere: true, - }) as Parser & ReturnType - const iter = pasers[Symbol.iterator]() - - let error: any - try { - while (true) { - const { done, value: token } = iter.next() - if (done) - break - - tokens.push(token) - fillupTo(token.start) - - if (token.type.label === 'string') { - const body = code.slice(token.start + 1, token.end - 1) - if (filter(body)) { - result += code[token.start] + FILL.repeat(token.end - token.start - 2) + code[token.end - 1] - continue - } - } - - else if (token.type.label === 'template') { - const body = code.slice(token.start, token.end) - if (filter(body)) { - result += FILL.repeat(token.end - token.start) - continue - } - } - - else if (token.type.label === 'regexp') { - const body = code.slice(token.start, token.end) - if (filter(body)) { - result += body.replace(/\/(.*)\/(\w?)$/g, (_, $1, $2) => `/${FILL.repeat($1.length)}/${$2}`) - continue - } - } - - result += code.slice(token.start, token.end) - } - - fillupTo(code.length) - } - catch (e) { - error = e - } - - return { - error, - result, - tokens, - } -} - -/** - * Strip literal using Acorn's tokenizer. - * - * Will throw error if the input is not valid JavaScript. - */ -export function stripLiteralAcorn(code: string, options?: StripLiteralOptions) { - const result = _stripLiteralAcorn(code, options) - if (result.error) - throw result.error - return result.result -} - -/** - * Returns a function that returns whether the position is - * in a literal using Acorn's tokenizer. - * - * Will throw error if the input is not valid JavaScript. - */ -export function createIsLiteralPositionAcorn(code: string) { - // literal start position, non-literal start position, literal start position, ... - const positionList: number[] = [] - - const tokens = tokenizer(code, { - ecmaVersion: 'latest', - sourceType: 'module', - allowHashBang: true, - allowAwaitOutsideFunction: true, - allowImportExportEverywhere: true, - onComment(_isBlock, _text, start, end) { - positionList.push(start) - positionList.push(end) - }, - }) - const inter = tokens[Symbol.iterator]() - - while (true) { - const { done, value: token } = inter.next() - if (done) - break - if (token.type.label === 'string') { - positionList.push(token.start + 1) - positionList.push(token.end - 1) - } - else if (token.type.label === 'template') { - positionList.push(token.start) - positionList.push(token.end) - } - } - - return (position: number) => { - const i = binarySearch(positionList, v => position < v) - return (i - 1) % 2 === 0 - } -} - -function binarySearch(array: ArrayLike, pred: (v: number) => boolean) { - let low = -1 - let high = array.length - while (1 + low < high) { - const mid = low + ((high - low) >> 1) - if (pred(array[mid])) - high = mid - else - low = mid - } - return high -} diff --git a/src/index.ts b/src/index.ts index 3cb412b..e16a710 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,15 +1,11 @@ -import { _stripLiteralAcorn } from './acorn' -import { stripLiteralRegex } from './regex' +import { stripLiteralJsTokens } from './js-tokens' import type { StripLiteralOptions } from './types' -export { stripLiteralAcorn, createIsLiteralPositionAcorn } from './acorn' -export { stripLiteralRegex } from './regex' +export { stripLiteralJsTokens } from './js-tokens' export * from './types' /** * Strip literal from code. - * - * Using Acorn's tokenizer first, and fallback to Regex if Acorn fails. */ export function stripLiteral(code: string, options?: StripLiteralOptions) { return stripLiteralDetailed(code, options).result @@ -17,28 +13,7 @@ export function stripLiteral(code: string, options?: StripLiteralOptions) { /** * Strip literal from code, return more detailed information. - * - * Using Acorn's tokenizer first, and fallback to Regex if Acorn fails. */ -export function stripLiteralDetailed(code: string, options?: StripLiteralOptions): { - mode: 'acorn' | 'regex' - result: string - acorn: { - tokens: any[] - error?: any - } -} { - const acorn = _stripLiteralAcorn(code, options) - if (!acorn.error) { - return { - mode: 'acorn', - result: acorn.result, - acorn, - } - } - return { - mode: 'regex', - result: stripLiteralRegex(acorn.result + code.slice(acorn.result.length), options), - acorn, - } +export function stripLiteralDetailed(code: string, options?: StripLiteralOptions) { + return stripLiteralJsTokens(code, options) } diff --git a/src/js-tokens.ts b/src/js-tokens.ts new file mode 100644 index 0000000..2f0f9d4 --- /dev/null +++ b/src/js-tokens.ts @@ -0,0 +1,86 @@ +import type { Token as JSToken } from 'js-tokens' +import jsTokens from 'js-tokens' +import type { StripLiteralOptions } from './types' + +export function stripLiteralJsTokens(code: string, options?: StripLiteralOptions) { + const FILL = options?.fillChar ?? ' ' + const FILL_COMMENT = ' ' + let result = '' + + const filter = options?.filter ?? (() => true) + + const tokens: JSToken[] = [] + + // jsx: false is more correct when parsing html + for (const token of jsTokens(code, { jsx: false })) { + tokens.push(token) + + if (token.type === 'SingleLineComment') { + result += FILL_COMMENT.repeat(token.value.length) + continue + } + + if (token.type === 'MultiLineComment') { + result += token.value.replace(/[^\n]/g, FILL_COMMENT) + continue + } + + if (token.type === 'StringLiteral') { + const body = token.value.slice(1, -1) + if (filter(body)) { + result += token.value[0] + FILL.repeat(body.length) + token.value[token.value.length - 1] + continue + } + } + + if (token.type === 'NoSubstitutionTemplate') { + const body = token.value.slice(1, -1) + if (filter(body)) { + result += `\`${body.replace(/[^\n]/g, FILL)}\`` + continue + } + } + + if (token.type === 'RegularExpressionLiteral') { + const body = token.value + if (filter(body)) { + result += body.replace(/\/(.*)\/(\w?)$/g, (_, $1, $2) => `/${FILL.repeat($1.length)}/${$2}`) + continue + } + } + + // `start${ + if (token.type === 'TemplateHead') { + const body = token.value.slice(1, -2) + if (filter(body)) { + result += `\`${body.replace(/[^\n]/g, FILL)}\${` + continue + } + } + + // }end` + if (token.type === 'TemplateTail') { + const body = token.value.slice(0, -2) + if (filter(body)) { + result += `}${body.replace(/[^\n]/g, FILL)}\`` + continue + } + } + + // }middle${ + if (token.type === 'TemplateMiddle') { + const body = token.value.slice(1, -2) + if (filter(body)) { + result += `}${body.replace(/[^\n]/g, FILL)}\${` + continue + } + } + + result += token.value + } + + return { + result, + tokens, + } +} diff --git a/src/regex.ts b/src/regex.ts deleted file mode 100644 index e178d43..0000000 --- a/src/regex.ts +++ /dev/null @@ -1,46 +0,0 @@ -import type { StripLiteralOptions } from './types' - -const multilineCommentsRE = /\/\*([^*\/])*?\*\//gms -const singlelineCommentsRE = /(?:^|\n|\r)\s*\/\/.*(?:\r|\n|$)/gm -const templateLiteralRE = /\$\{(\s*(?:|{.*}|(?!\$\{).|\n|\r)*?\s*)\}/g -const quotesRE = [ - /(["'`])((?:\\\1|(?!\1)|.|\r)*?)\1/gm, - /([`])((?:\\\1|(?!\1)|.|\n|\r)*?)\1/gm, // multi-line strings (i.e. template literals only) -] - -/** - * Strip literal using RegExp. - * - * This will be faster and can work on non-JavaScript input. - * But will have some caveats on distinguish strings and comments. - */ -export function stripLiteralRegex(code: string, options?: StripLiteralOptions) { - const FILL_COMMENT = ' ' - const FILL = options?.fillChar ?? ' ' - const filter = options?.filter ?? (() => true) - - code = code - .replace(multilineCommentsRE, s => filter(s) ? FILL_COMMENT.repeat(s.length) : s) - .replace(singlelineCommentsRE, s => filter(s) ? FILL_COMMENT.repeat(s.length) : s) - - let expanded = code - // Recursively replace ${} to support nested constructs (e.g. ${`${x}`}) - for (let i = 0; i < 16; i++) { - const before = expanded - expanded = expanded.replace(templateLiteralRE, '` $1`') - if (expanded === before) - break - } - - quotesRE.forEach((re) => { - expanded = expanded - .replace(re, (s, quote, body, index) => { - if (!filter(s.slice(1, -1))) - return s - code = code.slice(0, index + 1) + FILL.repeat(s.length - 2) + code.slice(index + s.length - 1) - return quote + FILL.repeat(s.length - 2) + quote - }) - }) - - return code -} diff --git a/test/__snapshots__/index.test.ts.snap b/test/__snapshots__/index.test.ts.snap index 80780ba..2d693e2 100644 --- a/test/__snapshots__/index.test.ts.snap +++ b/test/__snapshots__/index.test.ts.snap @@ -1,8 +1,7 @@ // Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html exports[`escape character 1`] = ` -"// mode: acorn -' ' +"' ' " " " " " " @@ -17,9 +16,18 @@ exports[`escape character 1`] = ` " "" `; +exports[`multiline string 1`] = ` +"const a = \` + + + + + +\`" +`; + exports[`regexp affect 1`] = ` -"// mode: acorn -[ +"[ / /, ' ', / /, @@ -28,56 +36,48 @@ exports[`regexp affect 1`] = ` `; exports[`strings comment nested 1`] = ` -"// mode: acorn - +" const a = " " " `; exports[`strings comment nested 2`] = ` -"// mode: acorn - +" const a = " " " `; exports[`strings comment nested 3`] = ` -"// mode: acorn - +" const a = " " " `; exports[`strings comment nested 4`] = ` -"// mode: acorn -const a = " " +"const a = " " console.log(" ")" `; exports[`strings comment nested 5`] = ` -"// mode: acorn -const a = " " +"const a = " " console.log(" ") const b = " "" `; exports[`strings comment nested 6`] = ` -"// mode: acorn -const a = " " +"const a = " " console.log(" ") const b = " "" `; exports[`strings comment nested 7`] = ` -"// mode: acorn -const a = " " +"const a = " " console.log(" ") const b = " "" `; exports[`works 1`] = ` -"// mode: acorn - +" const a = ' ' const b = " " diff --git a/test/createIsLiteralPosition.test.ts b/test/createIsLiteralPosition.test.ts deleted file mode 100644 index 84cd8b7..0000000 --- a/test/createIsLiteralPosition.test.ts +++ /dev/null @@ -1,53 +0,0 @@ -/* eslint-disable no-template-curly-in-string */ -import { expect, it } from 'vitest' -import { createIsLiteralPositionAcorn } from '../src' - -function execute(code: string) { - const isLiteralPosition = createIsLiteralPositionAcorn(code) - - const positions = Array.from({ length: code.length }, (_, i) => i) - const result = positions - .map((pos) => { - if (code[pos] === '\n') - return isLiteralPosition(pos) ? '+\n' : '\n' - return isLiteralPosition(pos) ? '*' : code[pos] - }) - .join('') - - return result -} - -it('works', () => { - expect(execute(` -const a = 0 - `)).toMatchSnapshot() - expect(execute(` -// a -const a = 0 - `)).toMatchSnapshot() - expect(execute(` -/* a */ -const a = 0 - `)).toMatchSnapshot() - expect(execute(` -/* - a -*/ -const a = 0 - `)).toMatchSnapshot() - expect(execute(` -const a = 'a' - `)).toMatchSnapshot() - expect(execute(` -const a = "a" - `)).toMatchSnapshot() - expect(execute(` -const a = \`c\${b}\` - `)).toMatchSnapshot() -}) - -it('template string nested', () => { - expect(execute( - '`aa${a + `a`}aa`', - )).toMatchSnapshot() -}) diff --git a/test/custom-fill.test.ts b/test/custom-fill.test.ts index 6462a6a..bc19d95 100644 --- a/test/custom-fill.test.ts +++ b/test/custom-fill.test.ts @@ -3,21 +3,23 @@ import { executeWithVerify } from './utils' describe('custom-fill', () => { it('acorn', () => { - const result = executeWithVerify(` + const result = executeWithVerify( + ` // comment1 const a = 'aaaa' /* comment2 */ const b = "bbbb" const c = \`aaaa\${foo}dddd\${bar}\` - `.trim(), false, { - fillChar: '-', - filter: s => s !== 'aaaa', - }) + `.trim(), + { + fillChar: '-', + filter: s => s !== 'aaaa', + }, + ) expect(result).toMatchInlineSnapshot(` - "// mode: acorn - + " const a = 'aaaa' const b = "----" @@ -25,28 +27,4 @@ const c = \`aaaa\${foo}dddd\${bar}\` const c = \`aaaa\${foo}----\${bar}\`" `) }) - - it('regex', () => { - const result = executeWithVerify(` -\\ -// comment1 -const a = 'aaaa' -/* comment2 */ -const b = "bbbb" - -const c = \`aaaa\${foo}dddd\${bar}\` - `.trim(), false, { - fillChar: '-', - filter: s => s !== 'aaaa', - }) - - expect(result).toMatchInlineSnapshot(` - "// mode: regex - \\ const a = 'aaaa' - - const b = "----" - - const c = \`aaaa\${foo}----\${bar}\`" - `) - }) }) diff --git a/test/filter.test.ts b/test/filter.test.ts index fda3354..b0fbea2 100644 --- a/test/filter.test.ts +++ b/test/filter.test.ts @@ -13,7 +13,6 @@ const b = "bbbb" const c = \`aaaa\${foo}dddd\${bar}\` `.trim(), - true, { filter: (s) => { items.push(s) @@ -23,8 +22,7 @@ const c = \`aaaa\${foo}dddd\${bar}\` ) expect(result).toMatchInlineSnapshot(` - "// mode: acorn - + " const a = 'aaaa' const b = " " diff --git a/test/fixtures.test.ts b/test/fixtures.test.ts index 7f9b1ff..5992768 100644 --- a/test/fixtures.test.ts +++ b/test/fixtures.test.ts @@ -8,7 +8,7 @@ describe('fixtures', () => { continue it(path, async () => { const raw = await input() - const code = executeWithVerify(raw, !!path.match(/\.(ts|js)$/) && !raw.includes('skip-verify')) + const code = executeWithVerify(raw) await expect(code) .toMatchFileSnapshot(path.replace(/\.(\w+)$/, '.output.$1')) }) diff --git a/test/fixtures/backtick-escape.output.js b/test/fixtures/backtick-escape.output.js index d74e620..cf6910a 100644 --- a/test/fixtures/backtick-escape.output.js +++ b/test/fixtures/backtick-escape.output.js @@ -1,4 +1,3 @@ -// mode: acorn this.error(` `) this.error(` `) this.error(` `) \ No newline at end of file diff --git a/test/fixtures/backtick-in-regex.output.js b/test/fixtures/backtick-in-regex.output.js index 466c6e9..c8497a4 100644 --- a/test/fixtures/backtick-in-regex.output.js +++ b/test/fixtures/backtick-in-regex.output.js @@ -1,4 +1,3 @@ -// mode: acorn var r = / /; foobar(`${foo({ class: " " })}`); diff --git a/test/fixtures/comment-in-string.output.js b/test/fixtures/comment-in-string.output.js index 0fea1b7..8efdec3 100644 --- a/test/fixtures/comment-in-string.output.js +++ b/test/fixtures/comment-in-string.output.js @@ -1,4 +1,3 @@ -// mode: acorn const url= ` `; const url1= ' '; onMounted(() => console.log(123)) diff --git a/test/fixtures/forgiving.output.vue b/test/fixtures/forgiving.output.vue index b23790b..0b62042 100644 --- a/test/fixtures/forgiving.output.vue +++ b/test/fixtures/forgiving.output.vue @@ -1,4 +1,3 @@ -// mode: regex