Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
fix: use hosted-git-info to parse registry urls (#5758)
Previously this was using `new URL` which would fail on some urls that
`hosted-git-info` is able to parse. But if we still get a url that can't
be parsed, we now set it to be removed from the tree instead of
erroring.

Fixes: #5278
  • Loading branch information
lukekarrys committed Nov 1, 2022
1 parent ce6745c commit 0c5834e
Show file tree
Hide file tree
Showing 10 changed files with 119 additions and 27 deletions.
2 changes: 2 additions & 0 deletions DEPENDENCIES.md
Expand Up @@ -157,6 +157,7 @@ graph LR;
npm-registry-fetch-->proc-log;
npmcli-arborist-->bin-links;
npmcli-arborist-->cacache;
npmcli-arborist-->hosted-git-info;
npmcli-arborist-->json-parse-even-better-errors;
npmcli-arborist-->minify-registry-metadata;
npmcli-arborist-->nopt;
Expand Down Expand Up @@ -790,6 +791,7 @@ graph LR;
npmcli-arborist-->cacache;
npmcli-arborist-->chalk;
npmcli-arborist-->common-ancestor-path;
npmcli-arborist-->hosted-git-info;
npmcli-arborist-->isaacs-string-locale-compare["@isaacs/string-locale-compare"];
npmcli-arborist-->json-parse-even-better-errors;
npmcli-arborist-->json-stringify-nice;
Expand Down
11 changes: 9 additions & 2 deletions node_modules/hosted-git-info/lib/index.js
Expand Up @@ -4,7 +4,6 @@ const LRU = require('lru-cache')
const hosts = require('./hosts.js')
const fromUrl = require('./from-url.js')
const parseUrl = require('./parse-url.js')
const getProtocols = require('./protocols.js')

const cache = new LRU({ max: 1000 })

Expand All @@ -22,7 +21,15 @@ class GitHost {
}

static #gitHosts = { byShortcut: {}, byDomain: {} }
static #protocols = getProtocols()
static #protocols = {
'git+ssh:': { name: 'sshurl' },
'ssh:': { name: 'sshurl' },
'git+https:': { name: 'https', auth: true },
'git:': { auth: true },
'http:': { auth: true },
'https:': { auth: true },
'git+http:': { auth: true },
}

static addHost (name, host) {
GitHost.#gitHosts[name] = host
Expand Down
5 changes: 2 additions & 3 deletions node_modules/hosted-git-info/lib/parse-url.js
@@ -1,5 +1,4 @@
const url = require('url')
const getProtocols = require('./protocols.js')

const lastIndexOfBefore = (str, char, beforeChar) => {
const startPosition = str.indexOf(beforeChar)
Expand Down Expand Up @@ -73,7 +72,7 @@ const correctUrl = (giturl) => {
return giturl
}

module.exports = (giturl, protocols = getProtocols()) => {
const withProtocol = correctProtocol(giturl, protocols)
module.exports = (giturl, protocols) => {
const withProtocol = protocols ? correctProtocol(giturl, protocols) : giturl
return safeUrl(withProtocol) || safeUrl(correctUrl(withProtocol))
}
9 changes: 0 additions & 9 deletions node_modules/hosted-git-info/lib/protocols.js

This file was deleted.

2 changes: 1 addition & 1 deletion node_modules/hosted-git-info/package.json
@@ -1,6 +1,6 @@
{
"name": "hosted-git-info",
"version": "6.1.0",
"version": "6.1.1",
"description": "Provides metadata and conversions from repository urls for GitHub, Bitbucket and GitLab",
"main": "./lib/index.js",
"repository": {
Expand Down
9 changes: 5 additions & 4 deletions package-lock.json
Expand Up @@ -104,7 +104,7 @@
"fs-minipass": "^2.1.0",
"glob": "^8.0.1",
"graceful-fs": "^4.2.10",
"hosted-git-info": "^6.1.0",
"hosted-git-info": "^6.1.1",
"ini": "^3.0.1",
"init-package-json": "^4.0.1",
"is-cidr": "^4.0.2",
Expand Down Expand Up @@ -6018,9 +6018,9 @@
}
},
"node_modules/hosted-git-info": {
"version": "6.1.0",
"resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-6.1.0.tgz",
"integrity": "sha512-HGLEbnDnxaXOoVjyE4gR+zEzQ/jvdPBVbVvDiRedZsn7pKx45gic0G1HGZBZ94RyJz0e6pBMeInIh349TAvHCQ==",
"version": "6.1.1",
"resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-6.1.1.tgz",
"integrity": "sha512-r0EI+HBMcXadMrugk0GCQ+6BQV39PiWAZVfq7oIckeGiN7sjRGyQxPdft3nQekFTCQbYxLBH+/axZMeH8UX6+w==",
"inBundle": true,
"dependencies": {
"lru-cache": "^7.5.1"
Expand Down Expand Up @@ -14050,6 +14050,7 @@
"bin-links": "^4.0.1",
"cacache": "^17.0.1",
"common-ancestor-path": "^1.0.1",
"hosted-git-info": "^6.1.1",
"json-parse-even-better-errors": "^3.0.0",
"json-stringify-nice": "^1.1.4",
"minimatch": "^5.1.0",
Expand Down
2 changes: 1 addition & 1 deletion package.json
Expand Up @@ -73,7 +73,7 @@
"fs-minipass": "^2.1.0",
"glob": "^8.0.1",
"graceful-fs": "^4.2.10",
"hosted-git-info": "^6.1.0",
"hosted-git-info": "^6.1.1",
"ini": "^3.0.1",
"init-package-json": "^4.0.1",
"is-cidr": "^4.0.2",
Expand Down
26 changes: 20 additions & 6 deletions workspaces/arborist/lib/arborist/reify.js
Expand Up @@ -9,6 +9,7 @@ const semver = require('semver')
const debug = require('../debug.js')
const walkUp = require('walk-up-path')
const log = require('proc-log')
const hgi = require('hosted-git-info')

const { dirname, resolve, relative } = require('path')
const { depth: dfwalk } = require('treeverse')
Expand Down Expand Up @@ -640,10 +641,15 @@ module.exports = cls => class Reifier extends cls {
// and no 'bundled: true' setting.
// Do the best with what we have, or else remove it from the tree
// entirely, since we can't possibly reify it.
const res = node.resolved ? `${node.name}@${this[_registryResolved](node.resolved)}`
: node.packageName && node.version
? `${node.packageName}@${node.version}`
: null
let res = null
if (node.resolved) {
const registryResolved = this[_registryResolved](node.resolved)
if (registryResolved) {
res = `${node.name}@${registryResolved}`
}
} else if (node.packageName && node.version) {
res = `${node.packageName}@${node.version}`
}

// no idea what this thing is. remove it from the tree.
if (!res) {
Expand Down Expand Up @@ -721,12 +727,20 @@ module.exports = cls => class Reifier extends cls {
// ${REGISTRY} or something. This has to be threaded through the
// Shrinkwrap and Node classes carefully, so for now, just treat
// the default reg as the magical animal that it has been.
const resolvedURL = new URL(resolved)
const resolvedURL = hgi.parseUrl(resolved)

if (!resolvedURL) {
// if we could not parse the url at all then returning nothing
// here means it will get removed from the tree in the next step
return
}

if ((this.options.replaceRegistryHost === resolvedURL.hostname)
|| this.options.replaceRegistryHost === 'always') {
// this.registry always has a trailing slash
resolved = `${this.registry.slice(0, -1)}${resolvedURL.pathname}${resolvedURL.searchParams}`
return `${this.registry.slice(0, -1)}${resolvedURL.pathname}${resolvedURL.searchParams}`
}

return resolved
}

Expand Down
1 change: 1 addition & 0 deletions workspaces/arborist/package.json
Expand Up @@ -16,6 +16,7 @@
"bin-links": "^4.0.1",
"cacache": "^17.0.1",
"common-ancestor-path": "^1.0.1",
"hosted-git-info": "^6.1.1",
"json-parse-even-better-errors": "^3.0.0",
"json-stringify-nice": "^1.1.4",
"minimatch": "^5.1.0",
Expand Down
79 changes: 78 additions & 1 deletion workspaces/arborist/test/arborist/reify.js
Expand Up @@ -2936,7 +2936,20 @@ t.test('installLinks', (t) => {
})

t.only('should preserve exact ranges, missing actual tree', async (t) => {
const Arborist = require('../../lib/index.js')
const Pacote = require('pacote')
const Arborist = t.mock('../../lib/arborist', {
pacote: {
...Pacote,
extract: async (...args) => {
if (args[0].startsWith('gitssh')) {
// we just want to test that this url is handled properly
// but its not a real git url we can clone so return early
return true
}
return Pacote.extract(...args)
},
},
})
const abbrev = resolve(__dirname,
'../fixtures/registry-mocks/content/abbrev/-/abbrev-1.1.1.tgz')
const abbrevTGZ = fs.readFileSync(abbrev)
Expand Down Expand Up @@ -2973,6 +2986,40 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => {
},
})

const gitSshPackument = JSON.stringify({
_id: 'gitssh',
_rev: 'lkjadflkjasdf',
name: 'gitssh',
'dist-tags': { latest: '1.1.1' },
versions: {
'1.1.1': {
name: 'gitssh',
version: '1.1.1',
dist: {
// this is a url that `new URL()` cant parse
// https://github.com/npm/cli/issues/5278
tarball: 'git+ssh://git@github.com:a/b/c.git#lkjadflkjasdf',
},
},
},
})

const notAUrlPackument = JSON.stringify({
_id: 'notaurl',
_rev: 'lkjadflkjasdf',
name: 'notaurl',
'dist-tags': { latest: '1.1.1' },
versions: {
'1.1.1': {
name: 'notaurl',
version: '1.1.1',
dist: {
tarball: 'hey been trying to break this test',
},
},
},
})

t.only('host should not be replaced replaceRegistryHost=never', async (t) => {
const testdir = t.testdir({
project: {
Expand All @@ -2981,6 +3028,8 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => {
version: '1.0.0',
dependencies: {
abbrev: '1.1.1',
gitssh: '1.1.1',
notaurl: '1.1.1',
},
}),
},
Expand All @@ -2994,6 +3043,14 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => {
.get('/abbrev/-/abbrev-1.1.1.tgz')
.reply(200, abbrevTGZ)

tnock(t, 'https://registry.github.com')
.get('/gitssh')
.reply(200, gitSshPackument)

tnock(t, 'https://registry.github.com')
.get('/notaurl')
.reply(200, notAUrlPackument)

const arb = new Arborist({
path: resolve(testdir, 'project'),
registry: 'https://registry.github.com',
Expand All @@ -3011,6 +3068,8 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => {
version: '1.0.0',
dependencies: {
abbrev: '1.1.1',
gitssh: '1.1.1',
notaurl: '1.1.1',
},
}),
},
Expand All @@ -3020,10 +3079,18 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => {
.get('/abbrev')
.reply(200, abbrevPackument)

tnock(t, 'https://registry.github.com')
.get('/gitssh')
.reply(200, gitSshPackument)

tnock(t, 'https://registry.github.com')
.get('/abbrev/-/abbrev-1.1.1.tgz')
.reply(200, abbrevTGZ)

tnock(t, 'https://registry.github.com')
.get('/notaurl')
.reply(200, notAUrlPackument)

const arb = new Arborist({
path: resolve(testdir, 'project'),
registry: 'https://registry.github.com',
Expand All @@ -3041,6 +3108,8 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => {
version: '1.0.0',
dependencies: {
abbrev: '1.1.1',
gitssh: '1.1.1',
notaurl: '1.1.1',
},
}),
},
Expand All @@ -3050,10 +3119,18 @@ t.only('should preserve exact ranges, missing actual tree', async (t) => {
.get('/abbrev')
.reply(200, abbrevPackument2)

tnock(t, 'https://registry.github.com')
.get('/gitssh')
.reply(200, gitSshPackument)

tnock(t, 'https://registry.github.com')
.get('/abbrev/-/abbrev-1.1.1.tgz')
.reply(200, abbrevTGZ)

tnock(t, 'https://registry.github.com')
.get('/notaurl')
.reply(200, notAUrlPackument)

const arb = new Arborist({
path: resolve(testdir, 'project'),
registry: 'https://registry.github.com',
Expand Down

0 comments on commit 0c5834e

Please sign in to comment.