diff --git a/semver.js b/semver.js index 636fa436..39319c13 100644 --- a/semver.js +++ b/semver.js @@ -26,8 +26,11 @@ var MAX_SAFE_INTEGER = Number.MAX_SAFE_INTEGER || // Max safe segment length for coercion. var MAX_SAFE_COMPONENT_LENGTH = 16 +var MAX_SAFE_BUILD_LENGTH = MAX_LENGTH - 6 + // The actual regexps go on exports.re var re = exports.re = [] +var safeRe = exports.safeRe = [] var src = exports.src = [] var t = exports.tokens = {} var R = 0 @@ -36,6 +39,31 @@ function tok (n) { t[n] = R++ } +var LETTERDASHNUMBER = '[a-zA-Z0-9-]' + +// Replace some greedy regex tokens to prevent regex dos issues. These regex are +// used internally via the safeRe object since all inputs in this library get +// normalized first to trim and collapse all extra whitespace. The original +// regexes are exported for userland consumption and lower level usage. A +// future breaking change could export the safer regex only with a note that +// all input should have extra whitespace removed. +var safeRegexReplacements = [ + ['\\s', 1], + ['\\d', MAX_LENGTH], + [LETTERDASHNUMBER, MAX_SAFE_BUILD_LENGTH], +] + +function makeSafeRe (value) { + for (var i = 0; i < safeRegexReplacements.length; i++) { + var token = safeRegexReplacements[i][0] + var max = safeRegexReplacements[i][1] + value = value + .split(token + '*').join(token + '{0,' + max + '}') + .split(token + '+').join(token + '{1,' + max + '}') + } + return value +} + // The following Regular Expressions can be used for tokenizing, // validating, and parsing SemVer version strings. @@ -45,14 +73,14 @@ function tok (n) { tok('NUMERICIDENTIFIER') src[t.NUMERICIDENTIFIER] = '0|[1-9]\\d*' tok('NUMERICIDENTIFIERLOOSE') -src[t.NUMERICIDENTIFIERLOOSE] = '[0-9]+' +src[t.NUMERICIDENTIFIERLOOSE] = '\\d+' // ## Non-numeric Identifier // Zero or more digits, followed by a letter or hyphen, and then zero or // more letters, digits, or hyphens. tok('NONNUMERICIDENTIFIER') -src[t.NONNUMERICIDENTIFIER] = '\\d*[a-zA-Z-][a-zA-Z0-9-]*' +src[t.NONNUMERICIDENTIFIER] = '\\d*[a-zA-Z-]' + LETTERDASHNUMBER + '*' // ## Main Version // Three dot-separated numeric identifiers. @@ -94,7 +122,7 @@ src[t.PRERELEASELOOSE] = '(?:-?(' + src[t.PRERELEASEIDENTIFIERLOOSE] + // Any combination of digits, letters, or hyphens. tok('BUILDIDENTIFIER') -src[t.BUILDIDENTIFIER] = '[0-9A-Za-z-]+' +src[t.BUILDIDENTIFIER] = LETTERDASHNUMBER + '+' // ## Build Metadata // Plus sign, followed by one or more period-separated build metadata @@ -174,6 +202,7 @@ src[t.COERCE] = '(^|[^\\d])' + '(?:$|[^\\d])' tok('COERCERTL') re[t.COERCERTL] = new RegExp(src[t.COERCE], 'g') +safeRe[t.COERCERTL] = new RegExp(makeSafeRe(src[t.COERCE]), 'g') // Tilde ranges. // Meaning is "reasonably at or greater than" @@ -183,6 +212,7 @@ src[t.LONETILDE] = '(?:~>?)' tok('TILDETRIM') src[t.TILDETRIM] = '(\\s*)' + src[t.LONETILDE] + '\\s+' re[t.TILDETRIM] = new RegExp(src[t.TILDETRIM], 'g') +safeRe[t.TILDETRIM] = new RegExp(makeSafeRe(src[t.TILDETRIM]), 'g') var tildeTrimReplace = '$1~' tok('TILDE') @@ -198,6 +228,7 @@ src[t.LONECARET] = '(?:\\^)' tok('CARETTRIM') src[t.CARETTRIM] = '(\\s*)' + src[t.LONECARET] + '\\s+' re[t.CARETTRIM] = new RegExp(src[t.CARETTRIM], 'g') +safeRe[t.CARETTRIM] = new RegExp(makeSafeRe(src[t.CARETTRIM]), 'g') var caretTrimReplace = '$1^' tok('CARET') @@ -219,6 +250,7 @@ src[t.COMPARATORTRIM] = '(\\s*)' + src[t.GTLT] + // this one has to use the /g flag re[t.COMPARATORTRIM] = new RegExp(src[t.COMPARATORTRIM], 'g') +safeRe[t.COMPARATORTRIM] = new RegExp(makeSafeRe(src[t.COMPARATORTRIM]), 'g') var comparatorTrimReplace = '$1$2$3' // Something like `1.2.3 - 1.2.4` @@ -247,6 +279,14 @@ for (var i = 0; i < R; i++) { debug(i, src[i]) if (!re[i]) { re[i] = new RegExp(src[i]) + + // Replace all greedy whitespace to prevent regex dos issues. These regex are + // used internally via the safeRe object since all inputs in this library get + // normalized first to trim and collapse all extra whitespace. The original + // regexes are exported for userland consumption and lower level usage. A + // future breaking change could export the safer regex only with a note that + // all input should have extra whitespace removed. + safeRe[i] = new RegExp(makeSafeRe(src[i])) } } @@ -271,7 +311,7 @@ function parse (version, options) { return null } - var r = options.loose ? re[t.LOOSE] : re[t.FULL] + var r = options.loose ? safeRe[t.LOOSE] : safeRe[t.FULL] if (!r.test(version)) { return null } @@ -326,7 +366,7 @@ function SemVer (version, options) { this.options = options this.loose = !!options.loose - var m = version.trim().match(options.loose ? re[t.LOOSE] : re[t.FULL]) + var m = version.trim().match(options.loose ? safeRe[t.LOOSE] : safeRe[t.FULL]) if (!m) { throw new TypeError('Invalid Version: ' + version) @@ -771,6 +811,7 @@ function Comparator (comp, options) { return new Comparator(comp, options) } + comp = comp.trim().split(/\s+/).join(' ') debug('comparator', comp, options) this.options = options this.loose = !!options.loose @@ -787,7 +828,7 @@ function Comparator (comp, options) { var ANY = {} Comparator.prototype.parse = function (comp) { - var r = this.options.loose ? re[t.COMPARATORLOOSE] : re[t.COMPARATOR] + var r = this.options.loose ? safeRe[t.COMPARATORLOOSE] : safeRe[t.COMPARATOR] var m = comp.match(r) if (!m) { @@ -911,9 +952,16 @@ function Range (range, options) { this.loose = !!options.loose this.includePrerelease = !!options.includePrerelease - // First, split based on boolean or || + // First reduce all whitespace as much as possible so we do not have to rely + // on potentially slow regexes like \s*. This is then stored and used for + // future error messages as well. this.raw = range - this.set = range.split(/\s*\|\|\s*/).map(function (range) { + .trim() + .split(/\s+/) + .join(' ') + + // First, split based on boolean or || + this.set = this.raw.split('||').map(function (range) { return this.parseRange(range.trim()) }, this).filter(function (c) { // throw out any that are not relevant for whatever reason @@ -921,7 +969,7 @@ function Range (range, options) { }) if (!this.set.length) { - throw new TypeError('Invalid SemVer Range: ' + range) + throw new TypeError('Invalid SemVer Range: ' + this.raw) } this.format() @@ -940,20 +988,19 @@ Range.prototype.toString = function () { Range.prototype.parseRange = function (range) { var loose = this.options.loose - range = range.trim() // `1.2.3 - 1.2.4` => `>=1.2.3 <=1.2.4` - var hr = loose ? re[t.HYPHENRANGELOOSE] : re[t.HYPHENRANGE] + var hr = loose ? safeRe[t.HYPHENRANGELOOSE] : safeRe[t.HYPHENRANGE] range = range.replace(hr, hyphenReplace) debug('hyphen replace', range) // `> 1.2.3 < 1.2.5` => `>1.2.3 <1.2.5` - range = range.replace(re[t.COMPARATORTRIM], comparatorTrimReplace) - debug('comparator trim', range, re[t.COMPARATORTRIM]) + range = range.replace(safeRe[t.COMPARATORTRIM], comparatorTrimReplace) + debug('comparator trim', range, safeRe[t.COMPARATORTRIM]) // `~ 1.2.3` => `~1.2.3` - range = range.replace(re[t.TILDETRIM], tildeTrimReplace) + range = range.replace(safeRe[t.TILDETRIM], tildeTrimReplace) // `^ 1.2.3` => `^1.2.3` - range = range.replace(re[t.CARETTRIM], caretTrimReplace) + range = range.replace(safeRe[t.CARETTRIM], caretTrimReplace) // normalize spaces range = range.split(/\s+/).join(' ') @@ -961,7 +1008,7 @@ Range.prototype.parseRange = function (range) { // At this point, the range is completely trimmed and // ready to be split into comparators. - var compRe = loose ? re[t.COMPARATORLOOSE] : re[t.COMPARATOR] + var compRe = loose ? safeRe[t.COMPARATORLOOSE] : safeRe[t.COMPARATOR] var set = range.split(' ').map(function (comp) { return parseComparator(comp, this.options) }, this).join(' ').split(/\s+/) @@ -1061,7 +1108,7 @@ function replaceTildes (comp, options) { } function replaceTilde (comp, options) { - var r = options.loose ? re[t.TILDELOOSE] : re[t.TILDE] + var r = options.loose ? safeRe[t.TILDELOOSE] : safeRe[t.TILDE] return comp.replace(r, function (_, M, m, p, pr) { debug('tilde', comp, _, M, m, p, pr) var ret @@ -1102,7 +1149,7 @@ function replaceCarets (comp, options) { function replaceCaret (comp, options) { debug('caret', comp, options) - var r = options.loose ? re[t.CARETLOOSE] : re[t.CARET] + var r = options.loose ? safeRe[t.CARETLOOSE] : safeRe[t.CARET] return comp.replace(r, function (_, M, m, p, pr) { debug('caret', comp, _, M, m, p, pr) var ret @@ -1161,7 +1208,7 @@ function replaceXRanges (comp, options) { function replaceXRange (comp, options) { comp = comp.trim() - var r = options.loose ? re[t.XRANGELOOSE] : re[t.XRANGE] + var r = options.loose ? safeRe[t.XRANGELOOSE] : safeRe[t.XRANGE] return comp.replace(r, function (ret, gtlt, M, m, p, pr) { debug('xRange', comp, ret, gtlt, M, m, p, pr) var xM = isX(M) @@ -1236,7 +1283,7 @@ function replaceXRange (comp, options) { function replaceStars (comp, options) { debug('replaceStars', comp, options) // Looseness is ignored here. star is always as loose as it gets! - return comp.trim().replace(re[t.STAR], '') + return comp.trim().replace(safeRe[t.STAR], '') } // This function is passed to string.replace(re[t.HYPHENRANGE]) @@ -1562,7 +1609,7 @@ function coerce (version, options) { var match = null if (!options.rtl) { - match = version.match(re[t.COERCE]) + match = version.match(safeRe[t.COERCE]) } else { // Find the right-most coercible string that does not share // a terminus with a more left-ward coercible string. @@ -1573,17 +1620,17 @@ function coerce (version, options) { // Stop when we get a match that ends at the string end, since no // coercible string can be more right-ward without the same terminus. var next - while ((next = re[t.COERCERTL].exec(version)) && + while ((next = safeRe[t.COERCERTL].exec(version)) && (!match || match.index + match[0].length !== version.length) ) { if (!match || next.index + next[0].length !== match.index + match[0].length) { match = next } - re[t.COERCERTL].lastIndex = next.index + next[1].length + next[2].length + safeRe[t.COERCERTL].lastIndex = next.index + next[1].length + next[2].length } // leave it in a clean state - re[t.COERCERTL].lastIndex = -1 + safeRe[t.COERCERTL].lastIndex = -1 } if (match === null) { diff --git a/test/index.js b/test/index.js index cd325556..0f6f1501 100644 --- a/test/index.js +++ b/test/index.js @@ -355,6 +355,7 @@ test('negative range tests', function (t) { ['*', 'not a version'], ['>=2', 'glorp'], ['2.x', '3.0.0-pre.0', { includePrerelease: true }], + ['== 1.0.0 || foo', '2.0.0', { loose: true }] ].forEach(function (v) { var range = v[0] var ver = v[1] @@ -1092,3 +1093,12 @@ test('really big numeric prerelease value', function (t) { t.strictSame(r.prerelease, [ 'beta', '90071992547409910' ]) t.end() }) + +test('long build id', function (t) { + var longBuild = '-928490632884417731e7af463c92b034d6a78268fc993bcb88a57944' + var shortVersion = '1.1.1' + var longVersion = Number.MAX_SAFE_INTEGER + '.' + Number.MAX_SAFE_INTEGER + '.' + Number.MAX_SAFE_INTEGER + t.equal(semver.valid(shortVersion + longBuild), shortVersion + longBuild) + t.equal(semver.valid(longVersion + longBuild), longVersion + longBuild) + t.end() +}) \ No newline at end of file diff --git a/test/re.js b/test/re.js new file mode 100644 index 00000000..8943e503 --- /dev/null +++ b/test/re.js @@ -0,0 +1,14 @@ +var test = require('tap').test +var semver = require('../') + +test('has a list of src, re, and safeRe', function (t) { + semver.re.forEach(function (r) { return t.match(r, RegExp, 'regexps are regexps') }) + semver.src.forEach(function (s) { return t.match(s, String, 'src is strings') }) + + semver.safeRe.forEach(function (r) { + t.notMatch(r.source, '\\s+', 'safe regex do not contain greedy whitespace') + t.notMatch(r.source, '\\s*', 'safe regex do not contain greedy whitespace') + }) + + t.end() +}) diff --git a/test/whitespace.js b/test/whitespace.js new file mode 100644 index 00000000..9ee6f719 --- /dev/null +++ b/test/whitespace.js @@ -0,0 +1,63 @@ +var test = require('tap').test +var semver = require('../') + +var validRange = semver.validRange +var SemVer = semver.SemVer +var Range = semver.Range +var Comparator = semver.Comparator +var minVersion = semver.minVersion +var minSatisfying = semver.minSatisfying +var maxSatisfying = semver.maxSatisfying + +function s(n, char) { + if (!n) { + n = 500000 + } + if (!char) { + char = ' ' + } + var c = '' + for (var i = 0; i < n; i++) { + c += char + } + return c +} + +test('regex dos via range whitespace', function (t) { + // a range with this much whitespace would take a few minutes to process if + // any redos susceptible regexes were used. there is a global tap timeout per + // file set in the package.json that will error if this test takes too long. + var r = `1.2.3 ${s()} <1.3.0` + + t.equal(new Range(r).range, '1.2.3 <1.3.0') + t.equal(validRange(r), '1.2.3 <1.3.0') + t.equal(minVersion(r).version, '1.2.3') + t.equal(minSatisfying(['1.2.3'], r), '1.2.3') + t.equal(maxSatisfying(['1.2.3'], r), '1.2.3') + + t.end() +}) + +test('range with 0', function (t) { + var r = `1.2.3 ${s(null, '0')} <1.3.0` + t.throws(function () { return new Range(r).range }) + t.equal(validRange(r), null) + t.throws(function () { return minVersion(r).version }) + t.equal(minSatisfying(['1.2.3'], r), null) + t.equal(maxSatisfying(['1.2.3'], r), null) + t.end() +}) + +test('semver version', function (t) { + var v = `${s(125)}1.2.3${s(125)}` + var tooLong = `${s()}1.2.3${s()}` + t.equal(new SemVer(v).version, '1.2.3') + t.throws(function () { return new SemVer(tooLong) }) + t.end() +}) + +test('comparator', function (t) { + var c = `${s()}<${s()}1.2.3${s()}` + t.equal(new Comparator(c).value, '<1.2.3') + t.end() +})