Skip to content

Commit

Permalink
fix: use hosted-git-info to parse registry urls (#5761)
Browse files Browse the repository at this point in the history
Previously this was using `new URL` which would fail on some urls that
`hosted-git-info` is able to parse. But if we still get a url that can't
be parsed, we now set it to be removed from the tree instead of
erroring.

Fixes: #5278
  • Loading branch information
lukekarrys committed Nov 1, 2022
1 parent 292156c commit 26f3d0b
Show file tree
Hide file tree
Showing 9 changed files with 229 additions and 135 deletions.
2 changes: 2 additions & 0 deletions DEPENDENCIES.md
Expand Up @@ -149,6 +149,7 @@ graph LR;
npm-registry-fetch-->proc-log;
npmcli-arborist-->bin-links;
npmcli-arborist-->cacache;
npmcli-arborist-->hosted-git-info;
npmcli-arborist-->nopt;
npmcli-arborist-->npm-install-checks;
npmcli-arborist-->npm-package-arg;
Expand Down Expand Up @@ -578,6 +579,7 @@ graph LR;
npmcli-arborist-->cacache;
npmcli-arborist-->chalk;
npmcli-arborist-->common-ancestor-path;
npmcli-arborist-->hosted-git-info;
npmcli-arborist-->isaacs-string-locale-compare["@isaacs/string-locale-compare"];
npmcli-arborist-->json-parse-even-better-errors;
npmcli-arborist-->json-stringify-nice;
Expand Down
144 changes: 28 additions & 116 deletions node_modules/hosted-git-info/lib/index.js
@@ -1,32 +1,25 @@
'use strict'
const url = require('url')
const gitHosts = require('./git-host-info.js')
const GitHost = module.exports = require('./git-host.js')
const LRU = require('lru-cache')
const cache = new LRU({ max: 1000 })

const protocolToRepresentationMap = {
'git+ssh:': 'sshurl',
'git+https:': 'https',
'ssh:': 'sshurl',
'git:': 'git',
}
const parseUrl = require('./parse-url.js')

function protocolToRepresentation (protocol) {
return protocolToRepresentationMap[protocol] || protocol.slice(0, -1)
}
const cache = new LRU({ max: 1000 })

const authProtocols = {
'git:': true,
'https:': true,
'git+https:': true,
'http:': true,
'git+http:': true,
const protocols = {
'git+ssh:': { name: 'sshurl' },
'ssh:': { name: 'sshurl' },
'git+https:': { name: 'https', auth: true },
'git:': { auth: true },
'http:': { auth: true },
'https:': { auth: true },
'git+http:': { auth: true },
...Object.keys(gitHosts.byShortcut).reduce((acc, key) => {
acc[key] = { name: gitHosts.byShortcut[key] }
return acc
}, {}),
}

const knownProtocols = Object.keys(gitHosts.byShortcut)
.concat(['http:', 'https:', 'git:', 'git+ssh:', 'git+https:', 'ssh:'])

module.exports.fromUrl = function (giturl, opts) {
if (typeof giturl !== 'string') {
return
Expand All @@ -41,30 +34,34 @@ module.exports.fromUrl = function (giturl, opts) {
return cache.get(key)
}

module.exports.parseUrl = parseUrl

function fromUrl (giturl, opts) {
if (!giturl) {
return
}

const correctedUrl = isGitHubShorthand(giturl) ? 'github:' + giturl : correctProtocol(giturl)
const parsed = parseGitUrl(correctedUrl)
const correctedUrl = isGitHubShorthand(giturl) ? `github:${giturl}` : giturl
const parsed = parseUrl(correctedUrl, protocols)
if (!parsed) {
return parsed
return
}

const gitHostShortcut = gitHosts.byShortcut[parsed.protocol]
const gitHostDomain =
gitHosts.byDomain[parsed.hostname.startsWith('www.') ?
parsed.hostname.slice(4) :
parsed.hostname]
const gitHostDomain = gitHosts.byDomain[parsed.hostname.startsWith('www.')
? parsed.hostname.slice(4)
: parsed.hostname]
const gitHostName = gitHostShortcut || gitHostDomain
if (!gitHostName) {
return
}

const gitHostInfo = gitHosts[gitHostShortcut || gitHostDomain]
let auth = null
if (authProtocols[parsed.protocol] && (parsed.username || parsed.password)) {
if (protocols[parsed.protocol] &&
protocols[parsed.protocol].auth &&
(parsed.username || parsed.password)
) {
auth = `${parsed.username}${parsed.password ? ':' + parsed.password : ''}`
}

Expand Down Expand Up @@ -116,7 +113,8 @@ function fromUrl (giturl, opts) {
user = segments.user && decodeURIComponent(segments.user)
project = decodeURIComponent(segments.project)
committish = decodeURIComponent(segments.committish)
defaultRepresentation = protocolToRepresentation(parsed.protocol)
defaultRepresentation = (protocols[parsed.protocol] && protocols[parsed.protocol].name)
|| parsed.protocol.slice(0, -1)
}
} catch (err) {
/* istanbul ignore else */
Expand All @@ -130,31 +128,6 @@ function fromUrl (giturl, opts) {
return new GitHost(gitHostName, user, auth, project, committish, defaultRepresentation, opts)
}

// accepts input like git:github.com:user/repo and inserts the // after the first :
const correctProtocol = (arg) => {
const firstColon = arg.indexOf(':')
const proto = arg.slice(0, firstColon + 1)
if (knownProtocols.includes(proto)) {
return arg
}

const firstAt = arg.indexOf('@')
if (firstAt > -1) {
if (firstAt > firstColon) {
return `git+ssh://${arg}`
} else {
return arg
}
}

const doubleSlash = arg.indexOf('//')
if (doubleSlash === firstColon + 1) {
return arg
}

return arg.slice(0, firstColon + 1) + '//' + arg.slice(firstColon + 1)
}

// look for github shorthand inputs, such as npm/cli
const isGitHubShorthand = (arg) => {
// it cannot contain whitespace before the first #
Expand Down Expand Up @@ -185,64 +158,3 @@ const isGitHubShorthand = (arg) => {
doesNotStartWithDot && atOnlyAfterHash && colonOnlyAfterHash &&
secondSlashOnlyAfterHash
}

// attempt to correct an scp style url so that it will parse with `new URL()`
const correctUrl = (giturl) => {
const firstAt = giturl.indexOf('@')
const lastHash = giturl.lastIndexOf('#')
let firstColon = giturl.indexOf(':')
let lastColon = giturl.lastIndexOf(':', lastHash > -1 ? lastHash : Infinity)

let corrected
if (lastColon > firstAt) {
// the last : comes after the first @ (or there is no @)
// like it would in:
// proto://hostname.com:user/repo
// username@hostname.com:user/repo
// :password@hostname.com:user/repo
// username:password@hostname.com:user/repo
// proto://username@hostname.com:user/repo
// proto://:password@hostname.com:user/repo
// proto://username:password@hostname.com:user/repo
// then we replace the last : with a / to create a valid path
corrected = giturl.slice(0, lastColon) + '/' + giturl.slice(lastColon + 1)
// // and we find our new : positions
firstColon = corrected.indexOf(':')
lastColon = corrected.lastIndexOf(':')
}

if (firstColon === -1 && giturl.indexOf('//') === -1) {
// we have no : at all
// as it would be in:
// username@hostname.com/user/repo
// then we prepend a protocol
corrected = `git+ssh://${corrected}`
}

return corrected
}

// try to parse the url as its given to us, if that throws
// then we try to clean the url and parse that result instead
// THIS FUNCTION SHOULD NEVER THROW
const parseGitUrl = (giturl) => {
let result
try {
result = new url.URL(giturl)
} catch {
// this fn should never throw
}

if (result) {
return result
}

const correctedUrl = correctUrl(giturl)
try {
result = new url.URL(correctedUrl)
} catch {
// this fn should never throw
}

return result
}
78 changes: 78 additions & 0 deletions node_modules/hosted-git-info/lib/parse-url.js
@@ -0,0 +1,78 @@
const url = require('url')

const lastIndexOfBefore = (str, char, beforeChar) => {
const startPosition = str.indexOf(beforeChar)
return str.lastIndexOf(char, startPosition > -1 ? startPosition : Infinity)
}

const safeUrl = (u) => {
try {
return new url.URL(u)
} catch {
// this fn should never throw
}
}

// accepts input like git:github.com:user/repo and inserts the // after the first :
const correctProtocol = (arg, protocols) => {
const firstColon = arg.indexOf(':')
const proto = arg.slice(0, firstColon + 1)
if (Object.prototype.hasOwnProperty.call(protocols, proto)) {
return arg
}

const firstAt = arg.indexOf('@')
if (firstAt > -1) {
if (firstAt > firstColon) {
return `git+ssh://${arg}`
} else {
return arg
}
}

const doubleSlash = arg.indexOf('//')
if (doubleSlash === firstColon + 1) {
return arg
}

return `${arg.slice(0, firstColon + 1)}//${arg.slice(firstColon + 1)}`
}

// attempt to correct an scp style url so that it will parse with `new URL()`
const correctUrl = (giturl) => {
// ignore @ that come after the first hash since the denotes the start
// of a committish which can contain @ characters
const firstAt = lastIndexOfBefore(giturl, '@', '#')
// ignore colons that come after the hash since that could include colons such as:
// git@github.com:user/package-2#semver:^1.0.0
const lastColonBeforeHash = lastIndexOfBefore(giturl, ':', '#')

if (lastColonBeforeHash > firstAt) {
// the last : comes after the first @ (or there is no @)
// like it would in:
// proto://hostname.com:user/repo
// username@hostname.com:user/repo
// :password@hostname.com:user/repo
// username:password@hostname.com:user/repo
// proto://username@hostname.com:user/repo
// proto://:password@hostname.com:user/repo
// proto://username:password@hostname.com:user/repo
// then we replace the last : with a / to create a valid path
giturl = giturl.slice(0, lastColonBeforeHash) + '/' + giturl.slice(lastColonBeforeHash + 1)
}

if (lastIndexOfBefore(giturl, ':', '#') === -1 && giturl.indexOf('//') === -1) {
// we have no : at all
// as it would be in:
// username@hostname.com/user/repo
// then we prepend a protocol
giturl = `git+ssh://${giturl}`
}

return giturl
}

module.exports = (giturl, protocols) => {
const withProtocol = protocols ? correctProtocol(giturl, protocols) : giturl
return safeUrl(withProtocol) || safeUrl(correctUrl(withProtocol))
}
23 changes: 16 additions & 7 deletions node_modules/hosted-git-info/package.json
@@ -1,6 +1,6 @@
{
"name": "hosted-git-info",
"version": "5.1.0",
"version": "5.2.1",
"description": "Provides metadata and conversions from repository urls for GitHub, Bitbucket and GitLab",
"main": "./lib/index.js",
"repository": {
Expand All @@ -21,9 +21,6 @@
"homepage": "https://github.com/npm/hosted-git-info",
"scripts": {
"posttest": "npm run lint",
"postversion": "npm publish",
"prepublishOnly": "git push origin --follow-tags",
"preversion": "npm test",
"snap": "tap",
"test": "tap",
"test:coverage": "tap --coverage-report=html",
Expand All @@ -37,7 +34,7 @@
},
"devDependencies": {
"@npmcli/eslint-config": "^3.0.1",
"@npmcli/template-oss": "3.5.0",
"@npmcli/template-oss": "4.7.1",
"tap": "^16.0.1"
},
"files": [
Expand All @@ -49,10 +46,22 @@
},
"tap": {
"color": 1,
"coverage": true
"coverage": true,
"nyc-arg": [
"--exclude",
"tap-snapshots/**"
]
},
"templateOSS": {
"//@npmcli/template-oss": "This file is partially managed by @npmcli/template-oss. Edits may be overwritten.",
"version": "3.5.0"
"version": "4.7.1",
"ciVersions": [
"12.13.0",
"12.x",
"14.15.0",
"14.x",
"16.0.0",
"16.x"
]
}
}
9 changes: 5 additions & 4 deletions package-lock.json
Expand Up @@ -109,7 +109,7 @@
"fastest-levenshtein": "^1.0.12",
"glob": "^8.0.1",
"graceful-fs": "^4.2.10",
"hosted-git-info": "^5.1.0",
"hosted-git-info": "^5.2.1",
"ini": "^3.0.1",
"init-package-json": "^3.0.2",
"is-cidr": "^4.0.2",
Expand Down Expand Up @@ -5955,9 +5955,9 @@
}
},
"node_modules/hosted-git-info": {
"version": "5.1.0",
"resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-5.1.0.tgz",
"integrity": "sha512-Ek+QmMEqZF8XrbFdwoDjSbm7rT23pCgEMOJmz6GPk/s4yH//RQfNPArhIxbguNxROq/+5lNBwCDHMhA903Kx1Q==",
"version": "5.2.1",
"resolved": "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-5.2.1.tgz",
"integrity": "sha512-xIcQYMnhcx2Nr4JTjsFmwwnr9vldugPy9uVm0o87bjqqWMv9GaqsTeT+i99wTl0mk1uLxJtHxLb8kymqTENQsw==",
"inBundle": true,
"dependencies": {
"lru-cache": "^7.5.1"
Expand Down Expand Up @@ -13880,6 +13880,7 @@
"bin-links": "^3.0.3",
"cacache": "^16.1.3",
"common-ancestor-path": "^1.0.1",
"hosted-git-info": "^5.2.1",
"json-parse-even-better-errors": "^2.3.1",
"json-stringify-nice": "^1.1.4",
"minimatch": "^5.1.0",
Expand Down
2 changes: 1 addition & 1 deletion package.json
Expand Up @@ -74,7 +74,7 @@
"fastest-levenshtein": "^1.0.12",
"glob": "^8.0.1",
"graceful-fs": "^4.2.10",
"hosted-git-info": "^5.1.0",
"hosted-git-info": "^5.2.1",
"ini": "^3.0.1",
"init-package-json": "^3.0.2",
"is-cidr": "^4.0.2",
Expand Down

0 comments on commit 26f3d0b

Please sign in to comment.