feat!: introduce strip using regex as fallback (#1)

antfu · Jun 14, 2022 · 4604f3c · 4604f3c
1 parent dd0754f
commit 4604f3c
Show file tree

Hide file tree

Showing 7 changed files with 143 additions and 47 deletions.
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
@@ -12,11 +12,11 @@ jobs:
       - uses: actions/checkout@v3
         with:
           fetch-depth: 0
-      - name: Set node
-        uses: actions/setup-node@v3
+
+      - uses: actions/setup-node@v3
         with:
           node-version: 16.x
 
-      - run: npx conventional-github-releaser -p angular
+      - run: npx changelogithub
         env:
-          CONVENTIONAL_GITHUB_RELEASER_TOKEN: ${{secrets.GITHUB_TOKEN}}
+          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
diff --git a/README.md b/README.md
@@ -16,6 +16,32 @@ stripLiteral('const foo = `//foo ${bar}`') // 'const foo = `       ${bar}`'
 
 Comments, string literals will be replaced by spaces with the same length to keep the source map untouched.
 
+## Functions
+
+### `stripLiteralAcorn`
+
+Strip literal using [Acorn](https://github.com/acornjs/acorn)'s tokenizer.
+
+Will throw error if the input is not valid JavaScript.
+
+[Source](./src/acorn.ts)
+
+### `stripLiteralRegex`
+
+Strip literal using RegExp.
+
+This will be faster and can work on non-JavaScript input. But will have some caveats on distinguish strings and comments.
+
+[Source](./src/regex.ts)
+
+### `stripLiteral`
+
+Strip literal from code.
+
+Try to use `stripLiteralAcorn` first, and fallback to `stripLiteralRegex` if Acorn fails.
+
+[Source](./src/index.ts)
+
 ## Sponsors
 
 <p align="center">

diff --git a/src/acorn.ts b/src/acorn.ts
@@ -0,0 +1,42 @@
+import { tokenizer } from 'acorn'
+
+/**
+ * Strip literal using Acorn's tokenizer.
+ *
+ * Will throw error if the input is not valid JavaScript.
+ */
+export function stripLiteralAcorn(code: string) {
+  const FILL = ' '
+  let result = ''
+  function fulfill(index: number) {
+    if (index > result.length)
+      result += code.slice(result.length, index).replace(/[^\n]/g, FILL)
+  }
+
+  const tokens = tokenizer(code, {
+    ecmaVersion: 'latest',
+    sourceType: 'module',
+    allowHashBang: true,
+    allowAwaitOutsideFunction: true,
+    allowImportExportEverywhere: true,
+  })
+  const inter = tokens[Symbol.iterator]()
+
+  while (true) {
+    const { done, value: token } = inter.next()
+    if (done)
+      break
+    fulfill(token.start)
+    if (token.type.label === 'string')
+      result += code[token.start] + FILL.repeat(token.end - token.start - 2) + code[token.end - 1]
+    else if (token.type.label === 'template')
+      result += FILL.repeat(token.end - token.start)
+
+    else
+      result += code.slice(token.start, token.end)
+  }
+
+  fulfill(code.length)
+
+  return result
+}
diff --git a/src/index.ts b/src/index.ts
@@ -1,42 +1,20 @@
-import { tokenizer } from 'acorn'
+import { stripLiteralAcorn } from './acorn'
+import { stripLiteralRegex } from './regex'
 
-export function stripLiteral(code: string, forgiving = false) {
-  const FILL = ' '
-  let result = ''
-  function fulfill(index: number) {
-    if (index > result.length)
-      result += code.slice(result.length, index).replace(/[^\n]/g, FILL)
-  }
-
-  const tokens = tokenizer(code, {
-    ecmaVersion: 'latest',
-    sourceType: 'module',
-    allowHashBang: true,
-    allowAwaitOutsideFunction: true,
-    allowImportExportEverywhere: true,
-  })
-  const inter = tokens[Symbol.iterator]()
+export { stripLiteralAcorn } from './acorn'
+export { stripLiteralRegex } from './regex'
 
-  while (true) {
-    try {
-      const { done, value: token } = inter.next()
-      if (done)
-        break
-      fulfill(token.start)
-      if (token.type.label === 'string')
-        result += code[token.start] + FILL.repeat(token.end - token.start - 2) + code[token.end - 1]
-      else if (token.type.label === 'template')
-        result += FILL.repeat(token.end - token.start)
-      else
-        result += code.slice(token.start, token.end)
-    }
-    catch (e) {
-      if (!forgiving)
-        throw e
-    }
+/**
+ * Strip literal from code.
+ *
+ * Using Acorn's tokenizer first, and fallback to Regex if Acorn fails.
+ */
+export function stripLiteral(code: string) {
+  try {
+    return stripLiteralAcorn(code)
+  }
+  catch (e) {
+    return stripLiteralRegex(code)
   }
-
-  fulfill(code.length)
-
-  return result
 }
+
diff --git a/src/regex.ts b/src/regex.ts
@@ -0,0 +1,38 @@
+const multilineCommentsRE = /\/\*.*?\*\//gms
+const singlelineCommentsRE = /\/\/.*$/gm
+const templateLiteralRE = /\$\{(\s*(?:(?!\$\{).|\n|\r)*?\s*)\}/g
+const quotesRE = [
+  /(["'`])((?:\\\1|(?!\1)|.|\r)*?)\1/gm,
+  /([`])((?:\\\1|(?!\1)|.|\n|\r)*?)\1/gm, // multi-line strings (i.e. template literals only)
+]
+
+/**
+ * Strip literal using RegExp.
+ *
+ * This will be faster and can work on non-JavaScript input.
+ * But will have some caveats on distinguish strings and comments.
+ */
+export function stripLiteralRegex(code: string) {
+  code = code
+    .replace(multilineCommentsRE, s => ' '.repeat(s.length))
+    .replace(singlelineCommentsRE, s => ' '.repeat(s.length))
+
+  let expanded = code
+  // Recursively replace ${} to support nested constructs (e.g. ${`${x}`})
+  for (let i = 0; i < 16; i++) {
+    const before = expanded
+    expanded = expanded.replace(templateLiteralRE, '` $1`')
+    if (expanded === before)
+      break
+  }
+
+  quotesRE.forEach((re) => {
+    expanded = expanded
+      .replace(re, (s, quote, body, index) => {
+        code = code.slice(0, index + 1) + ' '.repeat(s.length - 2) + code.slice(index + s.length - 1)
+        return quote + ' '.repeat(s.length - 2) + quote
+      })
+  })
+
+  return code
+}
diff --git a/test/index.test.ts b/test/index.test.ts
@@ -3,7 +3,7 @@ import { parse } from 'acorn'
 import { expect, test } from 'vitest'
 import { stripLiteral } from '../src'
 
-function executeWithVerify(code: string) {
+function executeWithVerify(code: string, verifyAst = true) {
   code = code.trim()
   const result = stripLiteral(code)
 
@@ -15,7 +15,8 @@ function executeWithVerify(code: string) {
   expect(result.length).toBe(code.length)
 
   // make sure no syntax errors
-  parse(result, { ecmaVersion: 'latest', sourceType: 'module' })
+  if (verifyAst)
+    parse(result, { ecmaVersion: 'latest', sourceType: 'module' })
 
   return result
 }
@@ -109,6 +110,13 @@ const b = "b \` "
   `)).toMatchSnapshot()
 })
 
+test('acorn syntax error', () => {
+  expect(executeWithVerify(`
+foo(\`fooo \${foo({ class: "foo" })} bar\`)
+  `, false))
+    .toMatchInlineSnapshot('"foo(`     \${foo({ class: \\"   \\" }      `)"')
+})
+
 test('template string nested', () => {
   let str = '`aaaa`'
   expect(executeWithVerify(str)).toMatchInlineSnapshot('"`    `"')
@@ -161,7 +169,7 @@ test('forgiving', () => {
     2
   )
 </script>
-`, true)).toMatchInlineSnapshot(`
+`)).toMatchInlineSnapshot(`
   "
   <script type=\\"      \\">
     const rawModules = import.meta.globEager('           ', {
@@ -176,7 +184,7 @@ test('forgiving', () => {
       null,
       2
     )
-  <        
+  </script>
   "
 `)
 })
diff --git a/tsconfig.json b/tsconfig.json
@@ -10,5 +10,9 @@
     "resolveJsonModule": true,
     "skipLibCheck": true,
     "skipDefaultLibCheck": true
-  }
+  },
+  "include": [
+    "./src/**/*.*",
+    "./test/**/*.*"
+  ]
 }