Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(core): don't parse 'requires' from strings or comments or regexes #2393

Draft
wants to merge 9 commits into
base: 0.21
Choose a base branch
from
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ git:
depth: 1
language: node_js
node_js:
- '4'
- '6'
- '8'

before_install:
- npm install
Expand Down
59 changes: 59 additions & 0 deletions bench/get-cjs-deps-one-regex.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
var fs = require('fs');

// require('...') || exports[''] = ... || exports.asd = ... || module.exports = ...
var cjsExportsRegEx = /(?:^\uFEFF?|[^$_a-zA-Z\xA0-\uFFFF.])(exports\s*(\[['"]|\.)|module(\.exports|\['exports'\]|\["exports"\])\s*(\[['"]|[=,\.]))/;
// RegEx adjusted from https://github.com/jbrantly/yabble/blob/master/lib/yabble.js#L339
var cjsRequireRegEx = /(?:^\uFEFF?|[^$_a-zA-Z\xA0-\uFFFF."'])require\s*\(\s*("[^"\\]*(?:\\.[^"\\]*)*"|'[^'\\]*(?:\\.[^'\\]*)*')\s*\)/g;

/** This regex is used to find strings, comments and regex expressions that should be excluded from the deps search.
* All the different kinds are used as alternatives so that they can be detected in a mutually exclusive way without interfering with eachother
* Here is an example of how this expression handles complex js: https://regex101.com/r/r4A4is/1
* ( double quoted string | single quoted string | string literal | comment | multi-line comment | regular expression ) */
var depExclusionsRegEx = /("(?:\\.|[^"\\\n\r]*)*"|'(?:\\.|[^'\\\n\r]*)*'|\`(?:\\.|[^\`\\]*)*\`|\/\/[^\n\r]*|\/\*(?:\*[^\/]|[^*])*\*\/|\/(?:\\.|[^\/\\\n\r]*)*\/)/g;


function getCJSDeps(source) {
cjsRequireRegEx.lastIndex = depExclusionsRegEx.lastIndex = 0;

var deps = [];

var match;

// track string and comment locations for unminified source
var exclusionLocations = [];

function inLocation (locations, index) {
for (var i = 0; i < locations.length; i++)
if (locations[i][0] < index && locations[i][1] > index)
return true;
return false;
}

while (match = depExclusionsRegEx.exec(source))
exclusionLocations.push([match.index, match.index + match[0].length]);

while (match = cjsRequireRegEx.exec(source)) {
// ensure we're not within a string or comment location
// 1 is added to the match index to align with the `require` word
if (!inLocation(exclusionLocations, match.index + 1) ) {
var dep = match[1].substr(1, match[1].length - 2);
// skip cases like require('" + file + "')
if (dep.match(/"|'/))
continue;
// trailing slash requires are removed as they don't map mains in SystemJS
if (dep[dep.length - 1] == '/')
dep = dep.substr(0, dep.length - 1);
deps.push(dep);
}
}

return deps;
}

var cjs = fs.readFileSync('./cjs-sample/cjs.js').toString();

var startTime = Date.now();
for (var i = 0; i < 1000; i++)
getCJSDeps(cjs);
console.log(Date.now() - startTime);

36 changes: 16 additions & 20 deletions src/format-helpers.js
Original file line number Diff line number Diff line change
Expand Up @@ -200,46 +200,42 @@ export function getPathVars (moduleId) {
}

var commentRegEx = /(^|[^\\])(\/\*([\s\S]*?)\*\/|([^:]|^)\/\/(.*)$)/mg;
var stringRegEx = /("[^"\\\n\r]*(\\.[^"\\\n\r]*)*"|'[^'\\\n\r]*(\\.[^'\\\n\r]*)*')/g;

/** This regex is used to find strings, comments and regex expressions that should be excluded from the deps search.
* All the different kinds are used as alternatives so that they can be detected in a mutually exclusive way without interfering with eachother
* Here is an example of how this expression handles complex js: https://regex101.com/r/r4A4is/1
* ( double quoted string | single quoted string | string literal | comment | multi-line comment | regular expression ) */
var depExclusionsRegEx = /("(?:\\.|[^"\\\n\r]*)*"|'(?:\\.|[^'\\\n\r]*)*'|\`(?:\\.|[^\`\\]*)*\`|\/\/[^\n\r]*|\/\*(?:\*[^\/]|[^*])*\*\/|\/(?:\\.|[^\/\\\n\r]*)*\/)/g;

// used to support leading #!/usr/bin/env in scripts as supported in Node
var hashBangRegEx = /^\#\!.*/;

// extract CJS dependencies from source text via regex static analysis
// read require('x') statements not in comments or strings
// read require('x') statements not in comments or strings or regex expressions
export function getCJSDeps (source) {
cjsRequireRegEx.lastIndex = commentRegEx.lastIndex = stringRegEx.lastIndex = 0;
cjsRequireRegEx.lastIndex = depExclusionsRegEx.lastIndex = 0;

var deps = [];

var match;

// track string and comment locations for unminified source
var stringLocations = [], commentLocations = [];
// track exclusion locations
var exclusionLocations = [];

function inLocation (locations, match) {
function inLocation (locations, index) {
for (var i = 0; i < locations.length; i++)
if (locations[i][0] < match.index && locations[i][1] > match.index)
if (locations[i][0] < index && locations[i][1] > index)
return true;
return false;
}

if (source.length / source.split('\n').length < 200) {
while (match = stringRegEx.exec(source))
stringLocations.push([match.index, match.index + match[0].length]);

// TODO: track template literals here before comments

while (match = commentRegEx.exec(source)) {
// only track comments not starting in strings
if (!inLocation(stringLocations, match))
commentLocations.push([match.index + match[1].length, match.index + match[0].length - 1]);
}
}
while (match = depExclusionsRegEx.exec(source))
exclusionLocations.push([match.index, match.index + match[0].length]);

while (match = cjsRequireRegEx.exec(source)) {
// ensure we're not within a string or comment location
if (!inLocation(stringLocations, match) && !inLocation(commentLocations, match)) {
// 1 is added to the match index to align with the `require` word
if (!inLocation(exclusionLocations, match.index + 1) ) {
var dep = match[1].substr(1, match[1].length - 2);
// skip cases like require('" + file + "')
if (dep.match(/"|'/))
Expand Down
22 changes: 22 additions & 0 deletions test/test.js
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"format global";

const assert = require('assert');

suite('SystemJS Standard Tests', function() {

function ok(assertion, message) {
Expand Down Expand Up @@ -634,6 +636,26 @@ suite('SystemJS Standard Tests', function() {
ok(m.d5 == 'text \'quote\' require("yet still not a dep")');
ok(m.d6 == 'd6');
ok(m.d7 == 'export');
ok(m.d8 == `string literal require("string literal not a dep")`);
});
});

test('CommonJS require variations in minified files', function () {
return System.import('tests/commonjs-minified-requires.js').then(function (m) {
assert.equal(m.d0, 'd6');
assert.equal(m.d1, 'd');
assert.equal(m.d2, 'd');
assert.equal(m.d3, "require('not a dep')");
assert.equal(m.d4, "text/* require('still not a dep') text");
assert.equal(m.d5, 'text \'quote\' require("yet still not a dep")');
assert.equal(m.d6, `in single quoted text require("also not a dep 1")`);
assert.equal(m.d6a, 'd');
assert.equal(m.d7, `in double quoted text require('also not a dep 2')`);
assert.equal(m.d7a, 'd');
assert.equal(m.d8, `in string literal text require('also not a dep 3')`);
assert.equal(m.d8a, 'd');
assert.equal(m.d9, 'd');
assert.equal(m.d10, 'end');
});
});

Expand Down
22 changes: 22 additions & 0 deletions test/tests/commonjs-minified-requires.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions test/tests/commonjs-requires.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,21 @@ var regexWithString = /asdfasdf " /;

var regexClose = /asdf " */;

// This comment triggered SystemJS to do a require because of this -> require('')
// This comment should not trigger SystemJS to do a require because of this -> require('comment is not a dep')
exports.d7 = 'export';

var p = false && require('" + "test" + "');

// this line shouldn't be detected
" = require(", "),\n ";

exports.d8 = `string literal require("string literal not a dep")`;

var regexWithRequireText = / require('regex string not a dep') /;

/*

Unsolved breaking cases:
Mixed exclusion blocks are handled correctly

var regex = / " /; var string = " /* " // one line;
require('asdf') // <- this will now be skipped as it will be in the '/*' comment
Expand Down