From 4604f3c1d5849af27f6b74635249907472e8df4c Mon Sep 17 00:00:00 2001 From: Anthony Fu Date: Tue, 14 Jun 2022 13:54:24 +0800 Subject: [PATCH] feat!: introduce strip using regex as fallback (#1) --- .github/workflows/release.yml | 8 +++--- README.md | 26 +++++++++++++++++ src/acorn.ts | 42 +++++++++++++++++++++++++++ src/index.ts | 54 +++++++++++------------------------ src/regex.ts | 38 ++++++++++++++++++++++++ test/index.test.ts | 16 ++++++++--- tsconfig.json | 6 +++- 7 files changed, 143 insertions(+), 47 deletions(-) create mode 100644 src/acorn.ts create mode 100644 src/regex.ts diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b4b0d56..216ba3b 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -12,11 +12,11 @@ jobs: - uses: actions/checkout@v3 with: fetch-depth: 0 - - name: Set node - uses: actions/setup-node@v3 + + - uses: actions/setup-node@v3 with: node-version: 16.x - - run: npx conventional-github-releaser -p angular + - run: npx changelogithub env: - CONVENTIONAL_GITHUB_RELEASER_TOKEN: ${{secrets.GITHUB_TOKEN}} + GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} diff --git a/README.md b/README.md index 590de41..2b252f9 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,32 @@ stripLiteral('const foo = `//foo ${bar}`') // 'const foo = ` ${bar}`' Comments, string literals will be replaced by spaces with the same length to keep the source map untouched. +## Functions + +### `stripLiteralAcorn` + +Strip literal using [Acorn](https://github.com/acornjs/acorn)'s tokenizer. + +Will throw error if the input is not valid JavaScript. + +[Source](./src/acorn.ts) + +### `stripLiteralRegex` + +Strip literal using RegExp. + +This will be faster and can work on non-JavaScript input. But will have some caveats on distinguish strings and comments. + +[Source](./src/regex.ts) + +### `stripLiteral` + +Strip literal from code. + +Try to use `stripLiteralAcorn` first, and fallback to `stripLiteralRegex` if Acorn fails. + +[Source](./src/index.ts) + ## Sponsors

diff --git a/src/acorn.ts b/src/acorn.ts new file mode 100644 index 0000000..f247732 --- /dev/null +++ b/src/acorn.ts @@ -0,0 +1,42 @@ +import { tokenizer } from 'acorn' + +/** + * Strip literal using Acorn's tokenizer. + * + * Will throw error if the input is not valid JavaScript. + */ +export function stripLiteralAcorn(code: string) { + const FILL = ' ' + let result = '' + function fulfill(index: number) { + if (index > result.length) + result += code.slice(result.length, index).replace(/[^\n]/g, FILL) + } + + const tokens = tokenizer(code, { + ecmaVersion: 'latest', + sourceType: 'module', + allowHashBang: true, + allowAwaitOutsideFunction: true, + allowImportExportEverywhere: true, + }) + const inter = tokens[Symbol.iterator]() + + while (true) { + const { done, value: token } = inter.next() + if (done) + break + fulfill(token.start) + if (token.type.label === 'string') + result += code[token.start] + FILL.repeat(token.end - token.start - 2) + code[token.end - 1] + else if (token.type.label === 'template') + result += FILL.repeat(token.end - token.start) + + else + result += code.slice(token.start, token.end) + } + + fulfill(code.length) + + return result +} diff --git a/src/index.ts b/src/index.ts index 97bded4..87659be 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,42 +1,20 @@ -import { tokenizer } from 'acorn' +import { stripLiteralAcorn } from './acorn' +import { stripLiteralRegex } from './regex' -export function stripLiteral(code: string, forgiving = false) { - const FILL = ' ' - let result = '' - function fulfill(index: number) { - if (index > result.length) - result += code.slice(result.length, index).replace(/[^\n]/g, FILL) - } - - const tokens = tokenizer(code, { - ecmaVersion: 'latest', - sourceType: 'module', - allowHashBang: true, - allowAwaitOutsideFunction: true, - allowImportExportEverywhere: true, - }) - const inter = tokens[Symbol.iterator]() +export { stripLiteralAcorn } from './acorn' +export { stripLiteralRegex } from './regex' - while (true) { - try { - const { done, value: token } = inter.next() - if (done) - break - fulfill(token.start) - if (token.type.label === 'string') - result += code[token.start] + FILL.repeat(token.end - token.start - 2) + code[token.end - 1] - else if (token.type.label === 'template') - result += FILL.repeat(token.end - token.start) - else - result += code.slice(token.start, token.end) - } - catch (e) { - if (!forgiving) - throw e - } +/** + * Strip literal from code. + * + * Using Acorn's tokenizer first, and fallback to Regex if Acorn fails. + */ +export function stripLiteral(code: string) { + try { + return stripLiteralAcorn(code) + } + catch (e) { + return stripLiteralRegex(code) } - - fulfill(code.length) - - return result } + diff --git a/src/regex.ts b/src/regex.ts new file mode 100644 index 0000000..7acc773 --- /dev/null +++ b/src/regex.ts @@ -0,0 +1,38 @@ +const multilineCommentsRE = /\/\*.*?\*\//gms +const singlelineCommentsRE = /\/\/.*$/gm +const templateLiteralRE = /\$\{(\s*(?:(?!\$\{).|\n|\r)*?\s*)\}/g +const quotesRE = [ + /(["'`])((?:\\\1|(?!\1)|.|\r)*?)\1/gm, + /([`])((?:\\\1|(?!\1)|.|\n|\r)*?)\1/gm, // multi-line strings (i.e. template literals only) +] + +/** + * Strip literal using RegExp. + * + * This will be faster and can work on non-JavaScript input. + * But will have some caveats on distinguish strings and comments. + */ +export function stripLiteralRegex(code: string) { + code = code + .replace(multilineCommentsRE, s => ' '.repeat(s.length)) + .replace(singlelineCommentsRE, s => ' '.repeat(s.length)) + + let expanded = code + // Recursively replace ${} to support nested constructs (e.g. ${`${x}`}) + for (let i = 0; i < 16; i++) { + const before = expanded + expanded = expanded.replace(templateLiteralRE, '` $1`') + if (expanded === before) + break + } + + quotesRE.forEach((re) => { + expanded = expanded + .replace(re, (s, quote, body, index) => { + code = code.slice(0, index + 1) + ' '.repeat(s.length - 2) + code.slice(index + s.length - 1) + return quote + ' '.repeat(s.length - 2) + quote + }) + }) + + return code +} diff --git a/test/index.test.ts b/test/index.test.ts index 58e7476..d0ae0a8 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -3,7 +3,7 @@ import { parse } from 'acorn' import { expect, test } from 'vitest' import { stripLiteral } from '../src' -function executeWithVerify(code: string) { +function executeWithVerify(code: string, verifyAst = true) { code = code.trim() const result = stripLiteral(code) @@ -15,7 +15,8 @@ function executeWithVerify(code: string) { expect(result.length).toBe(code.length) // make sure no syntax errors - parse(result, { ecmaVersion: 'latest', sourceType: 'module' }) + if (verifyAst) + parse(result, { ecmaVersion: 'latest', sourceType: 'module' }) return result } @@ -109,6 +110,13 @@ const b = "b \` " `)).toMatchSnapshot() }) +test('acorn syntax error', () => { + expect(executeWithVerify(` +foo(\`fooo \${foo({ class: "foo" })} bar\`) + `, false)) + .toMatchInlineSnapshot('"foo(` \${foo({ class: \\" \\" } `)"') +}) + test('template string nested', () => { let str = '`aaaa`' expect(executeWithVerify(str)).toMatchInlineSnapshot('"` `"') @@ -161,7 +169,7 @@ test('forgiving', () => { 2 ) -`, true)).toMatchInlineSnapshot(` +`)).toMatchInlineSnapshot(` " " `) }) diff --git a/tsconfig.json b/tsconfig.json index 68f2e4a..2ced197 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -10,5 +10,9 @@ "resolveJsonModule": true, "skipLibCheck": true, "skipDefaultLibCheck": true - } + }, + "include": [ + "./src/**/*.*", + "./test/**/*.*" + ] }