Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat!: introduce strip using regex as fallback (#1)
- Loading branch information
Showing
7 changed files
with
143 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
import { tokenizer } from 'acorn' | ||
|
||
/** | ||
* Strip literal using Acorn's tokenizer. | ||
* | ||
* Will throw error if the input is not valid JavaScript. | ||
*/ | ||
export function stripLiteralAcorn(code: string) { | ||
const FILL = ' ' | ||
let result = '' | ||
function fulfill(index: number) { | ||
if (index > result.length) | ||
result += code.slice(result.length, index).replace(/[^\n]/g, FILL) | ||
} | ||
|
||
const tokens = tokenizer(code, { | ||
ecmaVersion: 'latest', | ||
sourceType: 'module', | ||
allowHashBang: true, | ||
allowAwaitOutsideFunction: true, | ||
allowImportExportEverywhere: true, | ||
}) | ||
const inter = tokens[Symbol.iterator]() | ||
|
||
while (true) { | ||
const { done, value: token } = inter.next() | ||
if (done) | ||
break | ||
fulfill(token.start) | ||
if (token.type.label === 'string') | ||
result += code[token.start] + FILL.repeat(token.end - token.start - 2) + code[token.end - 1] | ||
else if (token.type.label === 'template') | ||
result += FILL.repeat(token.end - token.start) | ||
|
||
else | ||
result += code.slice(token.start, token.end) | ||
} | ||
|
||
fulfill(code.length) | ||
|
||
return result | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,42 +1,20 @@ | ||
import { tokenizer } from 'acorn' | ||
import { stripLiteralAcorn } from './acorn' | ||
import { stripLiteralRegex } from './regex' | ||
|
||
export function stripLiteral(code: string, forgiving = false) { | ||
const FILL = ' ' | ||
let result = '' | ||
function fulfill(index: number) { | ||
if (index > result.length) | ||
result += code.slice(result.length, index).replace(/[^\n]/g, FILL) | ||
} | ||
|
||
const tokens = tokenizer(code, { | ||
ecmaVersion: 'latest', | ||
sourceType: 'module', | ||
allowHashBang: true, | ||
allowAwaitOutsideFunction: true, | ||
allowImportExportEverywhere: true, | ||
}) | ||
const inter = tokens[Symbol.iterator]() | ||
export { stripLiteralAcorn } from './acorn' | ||
export { stripLiteralRegex } from './regex' | ||
|
||
while (true) { | ||
try { | ||
const { done, value: token } = inter.next() | ||
if (done) | ||
break | ||
fulfill(token.start) | ||
if (token.type.label === 'string') | ||
result += code[token.start] + FILL.repeat(token.end - token.start - 2) + code[token.end - 1] | ||
else if (token.type.label === 'template') | ||
result += FILL.repeat(token.end - token.start) | ||
else | ||
result += code.slice(token.start, token.end) | ||
} | ||
catch (e) { | ||
if (!forgiving) | ||
throw e | ||
} | ||
/** | ||
* Strip literal from code. | ||
* | ||
* Using Acorn's tokenizer first, and fallback to Regex if Acorn fails. | ||
*/ | ||
export function stripLiteral(code: string) { | ||
try { | ||
return stripLiteralAcorn(code) | ||
} | ||
catch (e) { | ||
return stripLiteralRegex(code) | ||
} | ||
|
||
fulfill(code.length) | ||
|
||
return result | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
const multilineCommentsRE = /\/\*.*?\*\//gms | ||
const singlelineCommentsRE = /\/\/.*$/gm | ||
const templateLiteralRE = /\$\{(\s*(?:(?!\$\{).|\n|\r)*?\s*)\}/g | ||
const quotesRE = [ | ||
/(["'`])((?:\\\1|(?!\1)|.|\r)*?)\1/gm, | ||
/([`])((?:\\\1|(?!\1)|.|\n|\r)*?)\1/gm, // multi-line strings (i.e. template literals only) | ||
] | ||
|
||
/** | ||
* Strip literal using RegExp. | ||
* | ||
* This will be faster and can work on non-JavaScript input. | ||
* But will have some caveats on distinguish strings and comments. | ||
*/ | ||
export function stripLiteralRegex(code: string) { | ||
code = code | ||
.replace(multilineCommentsRE, s => ' '.repeat(s.length)) | ||
.replace(singlelineCommentsRE, s => ' '.repeat(s.length)) | ||
|
||
let expanded = code | ||
// Recursively replace ${} to support nested constructs (e.g. ${`${x}`}) | ||
for (let i = 0; i < 16; i++) { | ||
const before = expanded | ||
expanded = expanded.replace(templateLiteralRE, '` $1`') | ||
if (expanded === before) | ||
break | ||
} | ||
|
||
quotesRE.forEach((re) => { | ||
expanded = expanded | ||
.replace(re, (s, quote, body, index) => { | ||
code = code.slice(0, index + 1) + ' '.repeat(s.length - 2) + code.slice(index + s.length - 1) | ||
return quote + ' '.repeat(s.length - 2) + quote | ||
}) | ||
}) | ||
|
||
return code | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters