Skip to content

Commit

Permalink
Automatically detecting _smart Git hosts_.
Browse files Browse the repository at this point in the history
Added logic to automatically detect smart Git hosts that allow shallow
cloning. This is done by sending an `ls-remote` request to the server
and then evaluating the returned HTTP header fields. For this, Curl
verbose logging is enabled for the `ls-remote` request, since Curl
verbose logging sends the returned HTTP headers to `stderr`.

If the `stderr` output contains the desired header

  Content-Type: application/x-git-upload-pack-advertisement

then the server supports shallow cloning.

This approach uses Git and Curl for the heavy lifting. Instead of
implementing the request to the server using a simple HTTP client, Git
is used, since it takes care of authentication using stored credentials.

The used approach should also work for BitBucket, which only sends the
Content-Type header when a specific user agent is used. Using Git to
make the request enables this behavior.

The function to detect the smart Git host
(`GitRemoteResolver.prototype._supportsShallowCloning`) returns a
promise that is resolved when the server's request is evaluated. The
promise handling required an addition to `GitHubResolver.js` - to always
resolve the promise to `true`, since GitHub supports shallow cloning.

Added test cases to verify the new functionality.
  • Loading branch information
nwinkler committed Jan 13, 2015
1 parent f458114 commit 6f0028b
Show file tree
Hide file tree
Showing 3 changed files with 254 additions and 28 deletions.
3 changes: 2 additions & 1 deletion lib/core/resolvers/GitHubResolver.js
@@ -1,6 +1,7 @@
var util = require('util');
var path = require('path');
var mout = require('mout');
var Q = require('q');
var GitRemoteResolver = require('./GitRemoteResolver');
var download = require('../../util/download');
var extract = require('../../util/extract');
Expand Down Expand Up @@ -37,7 +38,7 @@ function GitHubResolver(decEndpoint, config, logger) {
}

// Enable shallow clones for GitHub repos
this._shallowClone = true;
this._shallowClone = Q.resolve(true);
}

util.inherits(GitHubResolver, GitRemoteResolver);
Expand Down
107 changes: 80 additions & 27 deletions lib/core/resolvers/GitRemoteResolver.js
Expand Up @@ -26,8 +26,10 @@ function GitRemoteResolver(decEndpoint, config, logger) {
this._host = url.parse(this._source).host;
}

// Disable shallow clones
this._shallowClone = false;
this._remote = url.parse(this._source);

// Verify whether the server supports shallow cloning
this._shallowClone = this._supportsShallowCloning();
}

util.inherits(GitRemoteResolver, GitResolver);
Expand Down Expand Up @@ -115,34 +117,36 @@ GitRemoteResolver.prototype._fastClone = function (resolution) {
branch = resolution.tag || resolution.branch;
args = ['clone', this._source, '-b', branch, '--progress', '.'];

// If the host does not support shallow clones, we don't use --depth=1
if (this._shallowClone && !GitRemoteResolver._noShallow.get(this._host)) {
args.push('--depth', 1);
}

return cmd('git', args, { cwd: this._tempDir })
.spread(function (stdout, stderr) {
// Only after 1.7.10 --branch accepts tags
// Detect those cases and inform the user to update git otherwise it's
// a lot slower than newer versions
if (!/branch .+? not found/i.test(stderr)) {
return;
return this._shallowClone.then(function (shallowCloningSupported) {
// If the host does not support shallow clones, we don't use --depth=1
if (shallowCloningSupported && !GitRemoteResolver._noShallow.get(this._host)) {
args.push('--depth', 1);
}

that._logger.warn('old-git', 'It seems you are using an old version of git, it will be slower and propitious to errors!');
return cmd('git', ['checkout', resolution.commit], { cwd: that._tempDir });
}, function (err) {
// Some git servers do not support shallow clones
// When that happens, we mark this host and try again
if (!GitRemoteResolver._noShallow.has(that._source) &&
err.details &&
/(rpc failed|shallow|--depth)/i.test(err.details)
) {
GitRemoteResolver._noShallow.set(that._host, true);
return that._fastClone(resolution);
}
return cmd('git', args, { cwd: that._tempDir })
.spread(function (stdout, stderr) {
// Only after 1.7.10 --branch accepts tags
// Detect those cases and inform the user to update git otherwise it's
// a lot slower than newer versions
if (!/branch .+? not found/i.test(stderr)) {
return;
}

throw err;
that._logger.warn('old-git', 'It seems you are using an old version of git, it will be slower and propitious to errors!');
return cmd('git', ['checkout', resolution.commit], { cwd: that._tempDir });
}, function (err) {
// Some git servers do not support shallow clones
// When that happens, we mark this host and try again
if (!GitRemoteResolver._noShallow.has(that._source) &&
err.details &&
/(rpc failed|shallow|--depth)/i.test(err.details)
) {
GitRemoteResolver._noShallow.set(that._host, true);
return that._fastClone(resolution);
}

throw err;
});
});
};

Expand All @@ -160,6 +164,55 @@ GitRemoteResolver.prototype._suggestProxyWorkaround = function (err) {
}
};

// Verifies whether the server supports shallow cloning.
// This is done according to the rules found in the following links:
// * https://github.com/dimitri/el-get/pull/1921/files
// * http://stackoverflow.com/questions/9270488/is-it-possible-to-detect-whether-a-http-git-remote-is-smart-or-dumb
//
// Summary of the rules:
// * Protocols like ssh or git always support shallow cloning
// * HTTP-based protocols can be verified by sending a HEAD or GET request to the URI (appended to the URL of the Git repo):
// /info/refs?service=git-upload-pack
// * If the server responds with a 'Content-Type' header of 'application/x-git-upload-pack-advertisement',
// the server supports shallow cloning ("smart server")
// * If the server responds with a different content type, the server does not support shallow cloning ("dumb server")
// * Instead of doing the HEAD or GET request using an HTTP client, we're letting Git and Curl do the heavy lifting.
// Calling Git with the GIT_CURL_VERBOSE=2 env variable will provide the Git and Curl output, which includes
// the content type. This has the advantage that Git will take care of using stored credentials and any additional
// negotiation that needs to take place.
//
// The above should cover most cases, including BitBucket.
GitRemoteResolver.prototype._supportsShallowCloning = function () {
var value = true;

if (mout.string.startsWith(this._remote.protocol, 'http')) {
// Provide GIT_CURL_VERBOSE=2 environment variable to capture curl output.
// Calling ls-remote includes a call to the git-upload-pack service, which returns the content type in the response.
var processEnv = mout.object.merge(process.env, { 'GIT_CURL_VERBOSE': 2 });

value = cmd('git', ['ls-remote', '--heads', this._source], {
env: processEnv
})
.spread(function (stdout, stderr) {
// Check stderr for content-type, ignore stdout
var isSmartServer;

// If the content type is 'x-git', then the server supports shallow cloning
isSmartServer = mout.string.contains(stderr,
'Content-Type: application/x-git-upload-pack-advertisement');

this._logger.debug('detect-smart-git', 'Smart Git host detected: ' + isSmartServer);

return isSmartServer;
}.bind(this));
}
else {
return Q.resolve(true);
}

return value;
};

// ------------------------------

// Grab refs remotely
Expand Down
172 changes: 172 additions & 0 deletions test/core/resolvers/gitRemoteResolver.js
Expand Up @@ -2,6 +2,10 @@ var expect = require('expect.js');
var path = require('path');
var fs = require('graceful-fs');
var Logger = require('bower-logger');
var helpers = require('../../helpers');
var Q = require('q');
var mout = require('mout');
var multiline = require('multiline').stripIndent;
var GitRemoteResolver = require('../../../lib/core/resolvers/GitRemoteResolver');
var defaultConfig = require('../../../lib/config');

Expand Down Expand Up @@ -106,6 +110,92 @@ describe('GitRemoteResolver', function () {
.done();
});

describe('shallow cloning', function () {
var gitRemoteResolverFactory;

beforeEach(function () {
gitRemoteResolverFactory = function (handler) {
return helpers.require('lib/core/resolvers/GitRemoteResolver', {
'../../util/cmd': handler
});
};
});

it('should add --depth=1 when shallow cloning is supported', function (next) {
var testSource = 'http://foo/bar.git';

var MyGitRemoteResolver = gitRemoteResolverFactory(function (cmd, args) {
// The first git call fetches the tags for the provided source
if (mout.array.equals(args, ['ls-remote', '--tags', '--heads', testSource])) {
// Return list of commits, including one tag.
// The tag will be used for the clone call.
return Q.all([multiline(function () {/*
e4655d250f2a3f64ef2d712f25dafa60652bb93e refs/heads/some-branch
0a7daf646d4fd743b6ef701d63bdbe20eee422de refs/tags/0.0.1
*/
})]);
}
else if (args[0] === 'clone') {
// Verify parameters of the clone call.
// In this case, the arguments need to contain "--depth 1".
expect(args).to.eql(['clone', 'http://foo/bar.git', '-b', '0.0.1', '--progress', '.', '--depth', 1]);

// In this case, only the stderr content is evaluated. Everything's fine as long as it
// does not contain any error description.
return Q.all(['stdout', 'stderr']);
}
});

// Mock the call, return true for this test.
MyGitRemoteResolver.prototype._supportsShallowCloning = function () {
return Q.resolve(true);
};

var resolver = new MyGitRemoteResolver({ source: testSource }, defaultConfig(), logger);

resolver.resolve().then(function () {
next();
});
});

it('should not add --depth=1 when shallow cloning is not supported', function (next) {
var testSource = 'http://foo/bar.git';

var MyGitRemoteResolver = gitRemoteResolverFactory(function (cmd, args) {
// The first git call fetches the tags for the provided source
if (mout.array.equals(args, ['ls-remote', '--tags', '--heads', testSource])) {
// Return list of commits, including one tag.
// The tag will be used for the clone call.
return Q.all([multiline(function () {/*
e4655d250f2a3f64ef2d712f25dafa60652bb93e refs/heads/some-branch
0a7daf646d4fd743b6ef701d63bdbe20eee422de refs/tags/0.0.1
*/
})]);
}
else if (args[0] === 'clone') {
// Verify parameters of the clone call.
// In this case, the arguments should not contain "--depth 1".
expect(args).to.eql(['clone', 'http://foo/bar.git', '-b', '0.0.1', '--progress', '.']);

// In this case, only the stderr content is evaluated. Everything's fine as long as it
// does not contain any error description.
return Q.all(['stdout', 'stderr']);
}
});

// Mock the call, return false for this test.
MyGitRemoteResolver.prototype._supportsShallowCloning = function () {
return Q.resolve(false);
};

var resolver = new MyGitRemoteResolver({ source: testSource }, defaultConfig(), logger);

resolver.resolve().then(function () {
next();
});
});
});

it.skip('should handle gracefully servers that do not support --depth=1');
it.skip('should report progress when it takes too long to clone');
});
Expand Down Expand Up @@ -162,4 +252,86 @@ describe('GitRemoteResolver', function () {
.done();
});
});

describe('#_supportsShallowCloning', function () {
var gitRemoteResolverFactory;

beforeEach(function () {
gitRemoteResolverFactory = function (handler) {
return helpers.require('lib/core/resolvers/GitRemoteResolver', {
'../../util/cmd': handler
});
};
});

function createCmdHandlerFn (testSource, stderr) {
return function (cmd, args, options) {
expect(cmd).to.be('git');
expect(args).to.eql([ 'ls-remote', '--heads', testSource ]);
expect(options.env.GIT_CURL_VERBOSE).to.be(2);

return Q.all(['stdout', stderr]);
};
}

it('should call ls-remote when using http protocol', function (next) {
var testSource = 'http://foo/bar.git';

var MyGitRemoteResolver = gitRemoteResolverFactory(
createCmdHandlerFn(testSource, multiline(function () {/*
foo: bar
Content-Type: none
1234: 5678
*/}))
);

var resolver = new MyGitRemoteResolver({ source: testSource }, defaultConfig(), logger);

resolver._shallowClone.then(function (shallowCloningSupported) {
expect(shallowCloningSupported).to.be(false);

next();
});
});

it('should call ls-remote when using https protocol', function (next) {
var testSource = 'https://foo/bar.git';

var MyGitRemoteResolver = gitRemoteResolverFactory(
createCmdHandlerFn(testSource, multiline(function () {/*
foo: bar
Content-Type: none
1234: 5678
*/}))
);

var resolver = new MyGitRemoteResolver({ source: testSource }, defaultConfig(), logger);

resolver._shallowClone.then(function (shallowCloningSupported) {
expect(shallowCloningSupported).to.be(false);

next();
});
});

it('should evaluate to true when the smart content type is returned', function (next) {
var testSource = 'https://foo/bar.git';

var MyGitRemoteResolver = gitRemoteResolverFactory(
createCmdHandlerFn(testSource, multiline(function () {/*
foo: bar
Content-Type: application/x-git-upload-pack-advertisement
1234: 5678
*/}))
);

var resolver = new MyGitRemoteResolver({ source: testSource }, defaultConfig(), logger);

resolver._shallowClone.then(function (shallowCloningSupported) {
expect(shallowCloningSupported).to.be(true);

next();
});
});
});
});

0 comments on commit 6f0028b

Please sign in to comment.