Skip to content

Commit

Permalink
Merge pull request #5699 from bk2204/ls-files-optimization
Browse files Browse the repository at this point in the history
Optimize performance for scanning trees in partial clones
  • Loading branch information
bk2204 committed Apr 17, 2024
2 parents 3d1ca20 + beae114 commit 6fc94ca
Show file tree
Hide file tree
Showing 6 changed files with 84 additions and 3 deletions.
2 changes: 1 addition & 1 deletion commands/command_checkout.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func checkoutCommand(cmd *cobra.Command, args []string) {

chgitscanner.Filter = filepathfilter.New(rootedPaths(args), nil, filepathfilter.GitIgnore)

if err := chgitscanner.ScanTree(ref.Sha, nil); err != nil {
if err := chgitscanner.ScanLFSFiles(ref.Sha, nil); err != nil {
ExitWithError(err)
}

Expand Down
2 changes: 1 addition & 1 deletion commands/command_pull.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ func pull(filter *filepathfilter.Filter) {
}()

processQueue := time.Now()
if err := gitscanner.ScanTree(ref.Sha, nil); err != nil {
if err := gitscanner.ScanLFSFiles(ref.Sha, nil); err != nil {
singleCheckout.Close()
ExitWithError(err)
}
Expand Down
12 changes: 12 additions & 0 deletions git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,18 @@ func LsTree(ref string) (*subprocess.BufferedCmd, error) {
)
}

func LsFilesLFS() (*subprocess.BufferedCmd, error) {
// This requires Git 2.42.0 for `--format` with `objecttype`.
return gitNoLFSBuffered(
"ls-files",
"--cached",
"--full-name",
"-z",
"--format=%(objectmode) %(objecttype) %(objectname) %(objectsize)\t%(path)",
":(top,attr:filter=lfs)",
)
}

func ResolveRef(ref string) (*Ref, error) {
outp, err := gitNoLFSSimple("rev-parse", ref, "--symbolic-full-name", ref)
if err != nil {
Expand Down
16 changes: 16 additions & 0 deletions lfs/gitscanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,22 @@ func (s *GitScanner) ScanTree(ref string, cb GitScannerFoundPointer) error {
return err
}

// ScanLFSFiles takes a ref, which points to HEAD, and returns WrappedPointer
// objects in the index or tree at that ref. Differs from ScanRefs in that
// multiple files in the tree with the same content are all reported.
func (s *GitScanner) ScanLFSFiles(ref string, cb GitScannerFoundPointer) error {
callback, err := firstGitScannerCallback(cb, s.foundPointer)
if err != nil {
return err
}

start := time.Now()
err = runScanLFSFiles(callback, ref, s.Filter, s.cfg.GitEnv(), s.cfg.OSEnv())
tracerx.PerformanceSince("ScanLFSFiles", start)

return err
}

// ScanUnpushed scans history for all LFS pointers which have been added but not
// pushed to the named remote. remote can be left blank to mean 'any remote'.
func (s *GitScanner) ScanUnpushed(remote string, cb GitScannerFoundPointer) error {
Expand Down
51 changes: 50 additions & 1 deletion lfs/gitscanner_tree.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"github.com/git-lfs/git-lfs/v3/filepathfilter"
"github.com/git-lfs/git-lfs/v3/git"
"github.com/git-lfs/git-lfs/v3/git/gitattr"
"github.com/git-lfs/git-lfs/v3/subprocess"
"github.com/git-lfs/git-lfs/v3/tr"
)

Expand Down Expand Up @@ -38,6 +39,39 @@ func runScanTree(cb GitScannerFoundPointer, ref string, filter *filepathfilter.F
return nil
}

func runScanLFSFiles(cb GitScannerFoundPointer, ref string, filter *filepathfilter.Filter, gitEnv, osEnv config.Environment) error {
var treeShas *TreeBlobChannelWrapper
var err error
if git.IsGitVersionAtLeast("2.42.0") {
treeShas, err = lsFilesBlobs(func(t *git.TreeBlob) bool {
return t != nil && t.Size < blobSizeCutoff && filter.Allows(t.Filename)
})
} else {
treeShas, err = lsTreeBlobs(ref, func(t *git.TreeBlob) bool {
return t != nil && t.Size < blobSizeCutoff && filter.Allows(t.Filename)
})
}
// We don't use the nameMap approach here since that's imprecise when >1 file
// can be using the same content
if err != nil {
return err
}

pcw, err := catFileBatchTree(treeShas, gitEnv, osEnv)
if err != nil {
return err
}

for p := range pcw.Results {
cb(p, nil)
}

if err := pcw.Wait(); err != nil {
cb(nil, err)
}
return nil
}

// catFileBatchTree() uses an ObjectDatabase from the
// github.com/git-lfs/gitobj/v2 package to get the contents of Git
// blob objects, given their SHA1s from git.TreeBlob structs, similar
Expand Down Expand Up @@ -98,7 +132,13 @@ func catFileBatchTree(treeblobs *TreeBlobChannelWrapper, gitEnv, osEnv config.En
// The returned channel will be sent these blobs which should be sent to catFileBatchTree
// for final check & conversion to Pointer
func lsTreeBlobs(ref string, predicate func(*git.TreeBlob) bool) (*TreeBlobChannelWrapper, error) {
cmd, err := git.LsTree(ref)
return lsBlobs(func() (*subprocess.BufferedCmd, error) {
return git.LsTree(ref)
}, predicate)
}

func lsBlobs(backend func() (*subprocess.BufferedCmd, error), predicate func(*git.TreeBlob) bool) (*TreeBlobChannelWrapper, error) {
cmd, err := backend()
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -128,6 +168,15 @@ func lsTreeBlobs(ref string, predicate func(*git.TreeBlob) bool) (*TreeBlobChann
return NewTreeBlobChannelWrapper(blobs, errchan), nil
}

// Use ls-files at ref to find a list of candidate tree blobs which might be lfs files
// The returned channel will be sent these blobs which should be sent to catFileBatchTree
// for final check & conversion to Pointer
func lsFilesBlobs(predicate func(*git.TreeBlob) bool) (*TreeBlobChannelWrapper, error) {
return lsBlobs(func() (*subprocess.BufferedCmd, error) {
return git.LsFilesLFS()
}, predicate)
}

func catFileBatchTreeForPointers(treeblobs *TreeBlobChannelWrapper, gitEnv, osEnv config.Environment) (map[string]*WrappedPointer, *filepathfilter.Filter, error) {
pscanner, err := NewPointerScanner(gitEnv, osEnv)
if err != nil {
Expand Down
4 changes: 4 additions & 0 deletions t/t-clone.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ begin_test "clone"

git lfs track "*.dat" 2>&1 | tee track.log
grep "Tracking \"\*.dat\"" track.log
git add .gitattributes
git commit -m "Track *.dat"

# generate some test data & commits with random LFS data
echo "[
Expand Down Expand Up @@ -732,6 +734,8 @@ begin_test "clone (HTTP server/proxy require cookies)"

git lfs track "*.dat" 2>&1 | tee track.log
grep "Tracking \"\*.dat\"" track.log
git add .gitattributes
git commit -m "Track *.dat"

# generate some test data & commits with random LFS data
echo "[
Expand Down

0 comments on commit 6fc94ca

Please sign in to comment.