From 26f3d0b04fec438400d337c2d4ace218225b7ecb Mon Sep 17 00:00:00 2001 From: Luke Karrys Date: Tue, 1 Nov 2022 09:45:37 -0700 Subject: [PATCH] fix: use hosted-git-info to parse registry urls (#5761) Previously this was using `new URL` which would fail on some urls that `hosted-git-info` is able to parse. But if we still get a url that can't be parsed, we now set it to be removed from the tree instead of erroring. Fixes: #5278 --- DEPENDENCIES.md | 2 + node_modules/hosted-git-info/lib/index.js | 144 ++++-------------- node_modules/hosted-git-info/lib/parse-url.js | 78 ++++++++++ node_modules/hosted-git-info/package.json | 23 ++- package-lock.json | 9 +- package.json | 2 +- workspaces/arborist/lib/arborist/reify.js | 26 +++- workspaces/arborist/package.json | 1 + workspaces/arborist/test/arborist/reify.js | 79 +++++++++- 9 files changed, 229 insertions(+), 135 deletions(-) create mode 100644 node_modules/hosted-git-info/lib/parse-url.js diff --git a/DEPENDENCIES.md b/DEPENDENCIES.md index f8e89b92fa46a..46547a18604a8 100644 --- a/DEPENDENCIES.md +++ b/DEPENDENCIES.md @@ -149,6 +149,7 @@ graph LR; npm-registry-fetch-->proc-log; npmcli-arborist-->bin-links; npmcli-arborist-->cacache; + npmcli-arborist-->hosted-git-info; npmcli-arborist-->nopt; npmcli-arborist-->npm-install-checks; npmcli-arborist-->npm-package-arg; @@ -578,6 +579,7 @@ graph LR; npmcli-arborist-->cacache; npmcli-arborist-->chalk; npmcli-arborist-->common-ancestor-path; + npmcli-arborist-->hosted-git-info; npmcli-arborist-->isaacs-string-locale-compare["@isaacs/string-locale-compare"]; npmcli-arborist-->json-parse-even-better-errors; npmcli-arborist-->json-stringify-nice; diff --git a/node_modules/hosted-git-info/lib/index.js b/node_modules/hosted-git-info/lib/index.js index d5d63c66839b0..f6c66ff3298eb 100644 --- a/node_modules/hosted-git-info/lib/index.js +++ b/node_modules/hosted-git-info/lib/index.js @@ -1,32 +1,25 @@ 'use strict' -const url = require('url') const gitHosts = require('./git-host-info.js') const GitHost = module.exports = require('./git-host.js') const LRU = require('lru-cache') -const cache = new LRU({ max: 1000 }) - -const protocolToRepresentationMap = { - 'git+ssh:': 'sshurl', - 'git+https:': 'https', - 'ssh:': 'sshurl', - 'git:': 'git', -} +const parseUrl = require('./parse-url.js') -function protocolToRepresentation (protocol) { - return protocolToRepresentationMap[protocol] || protocol.slice(0, -1) -} +const cache = new LRU({ max: 1000 }) -const authProtocols = { - 'git:': true, - 'https:': true, - 'git+https:': true, - 'http:': true, - 'git+http:': true, +const protocols = { + 'git+ssh:': { name: 'sshurl' }, + 'ssh:': { name: 'sshurl' }, + 'git+https:': { name: 'https', auth: true }, + 'git:': { auth: true }, + 'http:': { auth: true }, + 'https:': { auth: true }, + 'git+http:': { auth: true }, + ...Object.keys(gitHosts.byShortcut).reduce((acc, key) => { + acc[key] = { name: gitHosts.byShortcut[key] } + return acc + }, {}), } -const knownProtocols = Object.keys(gitHosts.byShortcut) - .concat(['http:', 'https:', 'git:', 'git+ssh:', 'git+https:', 'ssh:']) - module.exports.fromUrl = function (giturl, opts) { if (typeof giturl !== 'string') { return @@ -41,22 +34,23 @@ module.exports.fromUrl = function (giturl, opts) { return cache.get(key) } +module.exports.parseUrl = parseUrl + function fromUrl (giturl, opts) { if (!giturl) { return } - const correctedUrl = isGitHubShorthand(giturl) ? 'github:' + giturl : correctProtocol(giturl) - const parsed = parseGitUrl(correctedUrl) + const correctedUrl = isGitHubShorthand(giturl) ? `github:${giturl}` : giturl + const parsed = parseUrl(correctedUrl, protocols) if (!parsed) { - return parsed + return } const gitHostShortcut = gitHosts.byShortcut[parsed.protocol] - const gitHostDomain = - gitHosts.byDomain[parsed.hostname.startsWith('www.') ? - parsed.hostname.slice(4) : - parsed.hostname] + const gitHostDomain = gitHosts.byDomain[parsed.hostname.startsWith('www.') + ? parsed.hostname.slice(4) + : parsed.hostname] const gitHostName = gitHostShortcut || gitHostDomain if (!gitHostName) { return @@ -64,7 +58,10 @@ function fromUrl (giturl, opts) { const gitHostInfo = gitHosts[gitHostShortcut || gitHostDomain] let auth = null - if (authProtocols[parsed.protocol] && (parsed.username || parsed.password)) { + if (protocols[parsed.protocol] && + protocols[parsed.protocol].auth && + (parsed.username || parsed.password) + ) { auth = `${parsed.username}${parsed.password ? ':' + parsed.password : ''}` } @@ -116,7 +113,8 @@ function fromUrl (giturl, opts) { user = segments.user && decodeURIComponent(segments.user) project = decodeURIComponent(segments.project) committish = decodeURIComponent(segments.committish) - defaultRepresentation = protocolToRepresentation(parsed.protocol) + defaultRepresentation = (protocols[parsed.protocol] && protocols[parsed.protocol].name) + || parsed.protocol.slice(0, -1) } } catch (err) { /* istanbul ignore else */ @@ -130,31 +128,6 @@ function fromUrl (giturl, opts) { return new GitHost(gitHostName, user, auth, project, committish, defaultRepresentation, opts) } -// accepts input like git:github.com:user/repo and inserts the // after the first : -const correctProtocol = (arg) => { - const firstColon = arg.indexOf(':') - const proto = arg.slice(0, firstColon + 1) - if (knownProtocols.includes(proto)) { - return arg - } - - const firstAt = arg.indexOf('@') - if (firstAt > -1) { - if (firstAt > firstColon) { - return `git+ssh://${arg}` - } else { - return arg - } - } - - const doubleSlash = arg.indexOf('//') - if (doubleSlash === firstColon + 1) { - return arg - } - - return arg.slice(0, firstColon + 1) + '//' + arg.slice(firstColon + 1) -} - // look for github shorthand inputs, such as npm/cli const isGitHubShorthand = (arg) => { // it cannot contain whitespace before the first # @@ -185,64 +158,3 @@ const isGitHubShorthand = (arg) => { doesNotStartWithDot && atOnlyAfterHash && colonOnlyAfterHash && secondSlashOnlyAfterHash } - -// attempt to correct an scp style url so that it will parse with `new URL()` -const correctUrl = (giturl) => { - const firstAt = giturl.indexOf('@') - const lastHash = giturl.lastIndexOf('#') - let firstColon = giturl.indexOf(':') - let lastColon = giturl.lastIndexOf(':', lastHash > -1 ? lastHash : Infinity) - - let corrected - if (lastColon > firstAt) { - // the last : comes after the first @ (or there is no @) - // like it would in: - // proto://hostname.com:user/repo - // username@hostname.com:user/repo - // :password@hostname.com:user/repo - // username:password@hostname.com:user/repo - // proto://username@hostname.com:user/repo - // proto://:password@hostname.com:user/repo - // proto://username:password@hostname.com:user/repo - // then we replace the last : with a / to create a valid path - corrected = giturl.slice(0, lastColon) + '/' + giturl.slice(lastColon + 1) - // // and we find our new : positions - firstColon = corrected.indexOf(':') - lastColon = corrected.lastIndexOf(':') - } - - if (firstColon === -1 && giturl.indexOf('//') === -1) { - // we have no : at all - // as it would be in: - // username@hostname.com/user/repo - // then we prepend a protocol - corrected = `git+ssh://${corrected}` - } - - return corrected -} - -// try to parse the url as its given to us, if that throws -// then we try to clean the url and parse that result instead -// THIS FUNCTION SHOULD NEVER THROW -const parseGitUrl = (giturl) => { - let result - try { - result = new url.URL(giturl) - } catch { - // this fn should never throw - } - - if (result) { - return result - } - - const correctedUrl = correctUrl(giturl) - try { - result = new url.URL(correctedUrl) - } catch { - // this fn should never throw - } - - return result -} diff --git a/node_modules/hosted-git-info/lib/parse-url.js b/node_modules/hosted-git-info/lib/parse-url.js new file mode 100644 index 0000000000000..7d5489c008ab4 --- /dev/null +++ b/node_modules/hosted-git-info/lib/parse-url.js @@ -0,0 +1,78 @@ +const url = require('url') + +const lastIndexOfBefore = (str, char, beforeChar) => { + const startPosition = str.indexOf(beforeChar) + return str.lastIndexOf(char, startPosition > -1 ? startPosition : Infinity) +} + +const safeUrl = (u) => { + try { + return new url.URL(u) + } catch { + // this fn should never throw + } +} + +// accepts input like git:github.com:user/repo and inserts the // after the first : +const correctProtocol = (arg, protocols) => { + const firstColon = arg.indexOf(':') + const proto = arg.slice(0, firstColon + 1) + if (Object.prototype.hasOwnProperty.call(protocols, proto)) { + return arg + } + + const firstAt = arg.indexOf('@') + if (firstAt > -1) { + if (firstAt > firstColon) { + return `git+ssh://${arg}` + } else { + return arg + } + } + + const doubleSlash = arg.indexOf('//') + if (doubleSlash === firstColon + 1) { + return arg + } + + return `${arg.slice(0, firstColon + 1)}//${arg.slice(firstColon + 1)}` +} + +// attempt to correct an scp style url so that it will parse with `new URL()` +const correctUrl = (giturl) => { + // ignore @ that come after the first hash since the denotes the start + // of a committish which can contain @ characters + const firstAt = lastIndexOfBefore(giturl, '@', '#') + // ignore colons that come after the hash since that could include colons such as: + // git@github.com:user/package-2#semver:^1.0.0 + const lastColonBeforeHash = lastIndexOfBefore(giturl, ':', '#') + + if (lastColonBeforeHash > firstAt) { + // the last : comes after the first @ (or there is no @) + // like it would in: + // proto://hostname.com:user/repo + // username@hostname.com:user/repo + // :password@hostname.com:user/repo + // username:password@hostname.com:user/repo + // proto://username@hostname.com:user/repo + // proto://:password@hostname.com:user/repo + // proto://username:password@hostname.com:user/repo + // then we replace the last : with a / to create a valid path + giturl = giturl.slice(0, lastColonBeforeHash) + '/' + giturl.slice(lastColonBeforeHash + 1) + } + + if (lastIndexOfBefore(giturl, ':', '#') === -1 && giturl.indexOf('//') === -1) { + // we have no : at all + // as it would be in: + // username@hostname.com/user/repo + // then we prepend a protocol + giturl = `git+ssh://${giturl}` + } + + return giturl +} + +module.exports = (giturl, protocols) => { + const withProtocol = protocols ? correctProtocol(giturl, protocols) : giturl + return safeUrl(withProtocol) || safeUrl(correctUrl(withProtocol)) +} diff --git a/node_modules/hosted-git-info/package.json b/node_modules/hosted-git-info/package.json index 07a5587ca76ef..ffd5c7d1b6bcf 100644 --- a/node_modules/hosted-git-info/package.json +++ b/node_modules/hosted-git-info/package.json @@ -1,6 +1,6 @@ { "name": "hosted-git-info", - "version": "5.1.0", + "version": "5.2.1", "description": "Provides metadata and conversions from repository urls for GitHub, Bitbucket and GitLab", "main": "./lib/index.js", "repository": { @@ -21,9 +21,6 @@ "homepage": "https://github.com/npm/hosted-git-info", "scripts": { "posttest": "npm run lint", - "postversion": "npm publish", - "prepublishOnly": "git push origin --follow-tags", - "preversion": "npm test", "snap": "tap", "test": "tap", "test:coverage": "tap --coverage-report=html", @@ -37,7 +34,7 @@ }, "devDependencies": { "@npmcli/eslint-config": "^3.0.1", - "@npmcli/template-oss": "3.5.0", + "@npmcli/template-oss": "4.7.1", "tap": "^16.0.1" }, "files": [ @@ -49,10 +46,22 @@ }, "tap": { "color": 1, - "coverage": true + "coverage": true, + "nyc-arg": [ + "--exclude", + "tap-snapshots/**" + ] }, "templateOSS": { "//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.", - "version": "3.5.0" + "version": "4.7.1", + "ciVersions": [ + "12.13.0", + "12.x", + "14.15.0", + "14.x", + "16.0.0", + "16.x" + ] } } diff --git a/package-lock.json b/package-lock.json index 5a11e5a8b44c1..6edb72c6cfdb0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -109,7 +109,7 @@ "fastest-levenshtein": "^1.0.12", "glob": "^8.0.1", "graceful-fs": "^4.2.10", - "hosted-git-info": "^5.1.0", + "hosted-git-info": "^5.2.1", "ini": "^3.0.1", "init-package-json": "^3.0.2", "is-cidr": "^4.0.2", @@ -5955,9 +5955,9 @@ } }, "node_modules/hosted-git-info": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-5.1.0.tgz", - "integrity": "sha512-Ek+QmMEqZF8XrbFdwoDjSbm7rT23pCgEMOJmz6GPk/s4yH//RQfNPArhIxbguNxROq/+5lNBwCDHMhA903Kx1Q==", + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-5.2.1.tgz", + "integrity": "sha512-xIcQYMnhcx2Nr4JTjsFmwwnr9vldugPy9uVm0o87bjqqWMv9GaqsTeT+i99wTl0mk1uLxJtHxLb8kymqTENQsw==", "inBundle": true, "dependencies": { "lru-cache": "^7.5.1" @@ -13880,6 +13880,7 @@ "bin-links": "^3.0.3", "cacache": "^16.1.3", "common-ancestor-path": "^1.0.1", + "hosted-git-info": "^5.2.1", "json-parse-even-better-errors": "^2.3.1", "json-stringify-nice": "^1.1.4", "minimatch": "^5.1.0", diff --git a/package.json b/package.json index 8db35cfa0c17b..299ea7dee6699 100644 --- a/package.json +++ b/package.json @@ -74,7 +74,7 @@ "fastest-levenshtein": "^1.0.12", "glob": "^8.0.1", "graceful-fs": "^4.2.10", - "hosted-git-info": "^5.1.0", + "hosted-git-info": "^5.2.1", "ini": "^3.0.1", "init-package-json": "^3.0.2", "is-cidr": "^4.0.2", diff --git a/workspaces/arborist/lib/arborist/reify.js b/workspaces/arborist/lib/arborist/reify.js index 0c9026f5e4d1e..4e8b4fc16f4aa 100644 --- a/workspaces/arborist/lib/arborist/reify.js +++ b/workspaces/arborist/lib/arborist/reify.js @@ -9,6 +9,7 @@ const semver = require('semver') const debug = require('../debug.js') const walkUp = require('walk-up-path') const log = require('proc-log') +const hgi = require('hosted-git-info') const { dirname, resolve, relative } = require('path') const { depth: dfwalk } = require('treeverse') @@ -638,10 +639,15 @@ module.exports = cls => class Reifier extends cls { // and no 'bundled: true' setting. // Do the best with what we have, or else remove it from the tree // entirely, since we can't possibly reify it. - const res = node.resolved ? `${node.name}@${this[_registryResolved](node.resolved)}` - : node.packageName && node.version - ? `${node.packageName}@${node.version}` - : null + let res = null + if (node.resolved) { + const registryResolved = this[_registryResolved](node.resolved) + if (registryResolved) { + res = `${node.name}@${registryResolved}` + } + } else if (node.packageName && node.version) { + res = `${node.packageName}@${node.version}` + } // no idea what this thing is. remove it from the tree. if (!res) { @@ -718,12 +724,20 @@ module.exports = cls => class Reifier extends cls { // ${REGISTRY} or something. This has to be threaded through the // Shrinkwrap and Node classes carefully, so for now, just treat // the default reg as the magical animal that it has been. - const resolvedURL = new URL(resolved) + const resolvedURL = hgi.parseUrl(resolved) + + if (!resolvedURL) { + // if we could not parse the url at all then returning nothing + // here means it will get removed from the tree in the next step + return + } + if ((this.options.replaceRegistryHost === resolvedURL.hostname) || this.options.replaceRegistryHost === 'always') { // this.registry always has a trailing slash - resolved = `${this.registry.slice(0, -1)}${resolvedURL.pathname}${resolvedURL.searchParams}` + return `${this.registry.slice(0, -1)}${resolvedURL.pathname}${resolvedURL.searchParams}` } + return resolved } diff --git a/workspaces/arborist/package.json b/workspaces/arborist/package.json index c4eec90d73e53..31cfad06d1919 100644 --- a/workspaces/arborist/package.json +++ b/workspaces/arborist/package.json @@ -16,6 +16,7 @@ "bin-links": "^3.0.3", "cacache": "^16.1.3", "common-ancestor-path": "^1.0.1", + "hosted-git-info": "^5.2.1", "json-parse-even-better-errors": "^2.3.1", "json-stringify-nice": "^1.1.4", "minimatch": "^5.1.0", diff --git a/workspaces/arborist/test/arborist/reify.js b/workspaces/arborist/test/arborist/reify.js index 01945f7136c41..21bbaff4b8898 100644 --- a/workspaces/arborist/test/arborist/reify.js +++ b/workspaces/arborist/test/arborist/reify.js @@ -2925,7 +2925,20 @@ t.test('installLinks', (t) => { }) t.only('should preserve exact ranges, missing actual tree', async (t) => { - const Arborist = require('../../lib/index.js') + const Pacote = require('pacote') + const Arborist = t.mock('../../lib/arborist', { + pacote: { + ...Pacote, + extract: async (...args) => { + if (args[0].startsWith('gitssh')) { + // we just want to test that this url is handled properly + // but its not a real git url we can clone so return early + return true + } + return Pacote.extract(...args) + }, + }, + }) const abbrev = resolve(__dirname, '../fixtures/registry-mocks/content/abbrev/-/abbrev-1.1.1.tgz') const abbrevTGZ = fs.readFileSync(abbrev) @@ -2962,6 +2975,40 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { }, }) + const gitSshPackument = JSON.stringify({ + _id: 'gitssh', + _rev: 'lkjadflkjasdf', + name: 'gitssh', + 'dist-tags': { latest: '1.1.1' }, + versions: { + '1.1.1': { + name: 'gitssh', + version: '1.1.1', + dist: { + // this is a url that `new URL()` cant parse + // https://github.com/npm/cli/issues/5278 + tarball: 'git+ssh://git@github.com:a/b/c.git#lkjadflkjasdf', + }, + }, + }, + }) + + const notAUrlPackument = JSON.stringify({ + _id: 'notaurl', + _rev: 'lkjadflkjasdf', + name: 'notaurl', + 'dist-tags': { latest: '1.1.1' }, + versions: { + '1.1.1': { + name: 'notaurl', + version: '1.1.1', + dist: { + tarball: 'hey been trying to break this test', + }, + }, + }, + }) + t.only('host should not be replaced replaceRegistryHost=never', async (t) => { const testdir = t.testdir({ project: { @@ -2970,6 +3017,8 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { version: '1.0.0', dependencies: { abbrev: '1.1.1', + gitssh: '1.1.1', + notaurl: '1.1.1', }, }), }, @@ -2983,6 +3032,14 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { .get('/abbrev/-/abbrev-1.1.1.tgz') .reply(200, abbrevTGZ) + tnock(t, 'https://registry.github.com') + .get('/gitssh') + .reply(200, gitSshPackument) + + tnock(t, 'https://registry.github.com') + .get('/notaurl') + .reply(200, notAUrlPackument) + const arb = new Arborist({ path: resolve(testdir, 'project'), registry: 'https://registry.github.com', @@ -3000,6 +3057,8 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { version: '1.0.0', dependencies: { abbrev: '1.1.1', + gitssh: '1.1.1', + notaurl: '1.1.1', }, }), }, @@ -3009,10 +3068,18 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { .get('/abbrev') .reply(200, abbrevPackument) + tnock(t, 'https://registry.github.com') + .get('/gitssh') + .reply(200, gitSshPackument) + tnock(t, 'https://registry.github.com') .get('/abbrev/-/abbrev-1.1.1.tgz') .reply(200, abbrevTGZ) + tnock(t, 'https://registry.github.com') + .get('/notaurl') + .reply(200, notAUrlPackument) + const arb = new Arborist({ path: resolve(testdir, 'project'), registry: 'https://registry.github.com', @@ -3030,6 +3097,8 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { version: '1.0.0', dependencies: { abbrev: '1.1.1', + gitssh: '1.1.1', + notaurl: '1.1.1', }, }), }, @@ -3039,10 +3108,18 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => { .get('/abbrev') .reply(200, abbrevPackument2) + tnock(t, 'https://registry.github.com') + .get('/gitssh') + .reply(200, gitSshPackument) + tnock(t, 'https://registry.github.com') .get('/abbrev/-/abbrev-1.1.1.tgz') .reply(200, abbrevTGZ) + tnock(t, 'https://registry.github.com') + .get('/notaurl') + .reply(200, notAUrlPackument) + const arb = new Arborist({ path: resolve(testdir, 'project'), registry: 'https://registry.github.com',