Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prefactor package hashing #4816

Merged
merged 14 commits into from May 4, 2023
91 changes: 89 additions & 2 deletions cli/internal/hashing/package_deps_hash.go
Expand Up @@ -10,6 +10,8 @@
"sync"

"github.com/pkg/errors"
gitignore "github.com/sabhiram/go-gitignore"
"github.com/vercel/turbo/cli/internal/doublestar"
"github.com/vercel/turbo/cli/internal/encoding/gitoutput"
"github.com/vercel/turbo/cli/internal/fs"
"github.com/vercel/turbo/cli/internal/globby"
Expand Down Expand Up @@ -68,8 +70,93 @@
return result, nil
}

// func getFileHashesFromProcessingGitIgnore(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath, inputs []string) (map[turbopath.AnchoredUnixPath]string, error) {
// }
func safeCompileIgnoreFile(filepath turbopath.AbsoluteSystemPath) (*gitignore.GitIgnore, error) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This helper function came along for the ride and got converted to turbopath, cleaning up a couple of small bits of its invocation.

if filepath.FileExists() {
return gitignore.CompileIgnoreFile(filepath.ToString())
}
// no op
return gitignore.CompileIgnoreLines([]string{}...), nil
}

func GetPackageFileHashesFromProcessingGitIgnore(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath, inputs []string) (map[turbopath.AnchoredUnixPath]string, error) {

Check warning on line 81 in cli/internal/hashing/package_deps_hash.go

View workflow job for this annotation

GitHub Actions / Go linting

exported: exported function GetPackageFileHashesFromProcessingGitIgnore should have comment or be unexported (revive)
result := make(map[turbopath.AnchoredUnixPath]string)
absolutePackagePath := packagePath.RestoreAnchor(rootPath)

// Instead of implementing all gitignore properly, we hack it. We only respect .gitignore in the root and in
// the directory of a package.
ignore, err := safeCompileIgnoreFile(rootPath.UntypedJoin(".gitignore"))
if err != nil {
return nil, err
}

ignorePkg, err := safeCompileIgnoreFile(absolutePackagePath.UntypedJoin(".gitignore"))
if err != nil {
return nil, err
}

includePattern := ""
excludePattern := ""
if len(inputs) > 0 {
var includePatterns []string
var excludePatterns []string
for _, pattern := range inputs {
if len(pattern) > 0 && pattern[0] == '!' {
excludePatterns = append(excludePatterns, absolutePackagePath.UntypedJoin(pattern[1:]).ToString())
} else {
includePatterns = append(includePatterns, absolutePackagePath.UntypedJoin(pattern).ToString())
}
}
if len(includePatterns) > 0 {
includePattern = "{" + strings.Join(includePatterns, ",") + "}"
}
if len(excludePatterns) > 0 {
excludePattern = "{" + strings.Join(excludePatterns, ",") + "}"
}
}

err = fs.Walk(absolutePackagePath.ToStringDuringMigration(), func(name string, isDir bool) error {
convertedName := turbopath.AbsoluteSystemPathFromUpstream(name)
rootMatch := ignore.MatchesPath(convertedName.ToString())
otherMatch := ignorePkg.MatchesPath(convertedName.ToString())
if !rootMatch && !otherMatch {
if !isDir {
if includePattern != "" {
val, err := doublestar.PathMatch(includePattern, convertedName.ToString())
if err != nil {
return err
}
if !val {
return nil
}
}
if excludePattern != "" {
val, err := doublestar.PathMatch(excludePattern, convertedName.ToString())
if err != nil {
return err
}
if val {
return nil
}
}
hash, err := fs.GitLikeHashFile(convertedName.ToString())
if err != nil {
return fmt.Errorf("could not hash file %v. \n%w", convertedName.ToString(), err)
}

relativePath, err := convertedName.RelativeTo(absolutePackagePath)
if err != nil {
return fmt.Errorf("File path cannot be made relative: %w", err)
}
result[relativePath.ToUnixPath()] = hash
}
}
return nil
})
if err != nil {
return nil, err
}
return result, nil
}

func getPackageFileHashesFromInputs(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath, inputs []string) (map[turbopath.AnchoredUnixPath]string, error) {
absolutePackagePath := packagePath.RestoreAnchor(rootPath)
Expand Down Expand Up @@ -141,7 +228,7 @@
func GetPackageDeps(rootPath turbopath.AbsoluteSystemPath, p *PackageDepsOptions) (map[turbopath.AnchoredUnixPath]string, error) {
if len(p.InputPatterns) == 0 {
return getPackageFileHashesFromGitIndex(rootPath, p.PackagePath)
} else {

Check warning on line 231 in cli/internal/hashing/package_deps_hash.go

View workflow job for this annotation

GitHub Actions / Go linting

indent-error-flow: if block ends with a return statement, so drop this else and outdent its block (revive)
return getPackageFileHashesFromInputs(rootPath, p.PackagePath, p.InputPatterns)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this commit the second side of the if statement got moved into its own function.

}
}
Expand Down
87 changes: 1 addition & 86 deletions cli/internal/taskhash/taskhash.go
Expand Up @@ -9,8 +9,6 @@ import (

"github.com/hashicorp/go-hclog"
"github.com/pyr-sh/dag"
gitignore "github.com/sabhiram/go-gitignore"
"github.com/vercel/turbo/cli/internal/doublestar"
"github.com/vercel/turbo/cli/internal/env"
"github.com/vercel/turbo/cli/internal/fs"
"github.com/vercel/turbo/cli/internal/hashing"
Expand Down Expand Up @@ -86,14 +84,6 @@ func (pfs packageFileSpec) ToKey() packageFileHashKey {
return packageFileHashKey(fmt.Sprintf("%v#%v", pfs.pkg, strings.Join(pfs.inputs, "!")))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This probably can't go wrong, but we already have a unique identifier, the taskID. This was to enable multiple tasks in the same package with the same specified inputs to not have to rerun file hashing.

It's an optimization, but not actually the ideal one. Now that everything calls into GetFileHashes that could just maintain a set of all files it has seen and apply it across all packages, all inputs.

I do not add this feature in this PR.

}

func safeCompileIgnoreFile(filepath string) (*gitignore.GitIgnore, error) {
if fs.FileExists(filepath) {
return gitignore.CompileIgnoreFile(filepath)
}
// no op
return gitignore.CompileIgnoreLines([]string{}...), nil
}

func (pfs *packageFileSpec) getHashObject(pkg *fs.PackageJSON, repoRoot turbopath.AbsoluteSystemPath) map[turbopath.AnchoredUnixPath]string {
hashObject, pkgDepsErr := hashing.GetPackageDeps(repoRoot, &hashing.PackageDepsOptions{
PackagePath: pkg.Dir,
Expand All @@ -119,82 +109,7 @@ func (pfs *packageFileSpec) hash(hashObject map[turbopath.AnchoredUnixPath]strin
}

func manuallyHashPackage(pkg *fs.PackageJSON, inputs []string, rootPath turbopath.AbsoluteSystemPath) (map[turbopath.AnchoredUnixPath]string, error) {
hashObject := make(map[turbopath.AnchoredUnixPath]string)
// Instead of implementing all gitignore properly, we hack it. We only respect .gitignore in the root and in
// the directory of a package.
ignore, err := safeCompileIgnoreFile(rootPath.UntypedJoin(".gitignore").ToString())
if err != nil {
return nil, err
}

ignorePkg, err := safeCompileIgnoreFile(rootPath.UntypedJoin(pkg.Dir.ToStringDuringMigration(), ".gitignore").ToString())
if err != nil {
return nil, err
}

pathPrefix := rootPath.UntypedJoin(pkg.Dir.ToStringDuringMigration())
includePattern := ""
excludePattern := ""
if len(inputs) > 0 {
var includePatterns []string
var excludePatterns []string
for _, pattern := range inputs {
if len(pattern) > 0 && pattern[0] == '!' {
excludePatterns = append(excludePatterns, pathPrefix.UntypedJoin(pattern[1:]).ToString())
} else {
includePatterns = append(includePatterns, pathPrefix.UntypedJoin(pattern).ToString())
}
}
if len(includePatterns) > 0 {
includePattern = "{" + strings.Join(includePatterns, ",") + "}"
}
if len(excludePatterns) > 0 {
excludePattern = "{" + strings.Join(excludePatterns, ",") + "}"
}
}

err = fs.Walk(pathPrefix.ToStringDuringMigration(), func(name string, isDir bool) error {
convertedName := turbopath.AbsoluteSystemPathFromUpstream(name)
rootMatch := ignore.MatchesPath(convertedName.ToString())
otherMatch := ignorePkg.MatchesPath(convertedName.ToString())
if !rootMatch && !otherMatch {
if !isDir {
if includePattern != "" {
val, err := doublestar.PathMatch(includePattern, convertedName.ToString())
if err != nil {
return err
}
if !val {
return nil
}
}
if excludePattern != "" {
val, err := doublestar.PathMatch(excludePattern, convertedName.ToString())
if err != nil {
return err
}
if val {
return nil
}
}
hash, err := fs.GitLikeHashFile(convertedName.ToString())
if err != nil {
return fmt.Errorf("could not hash file %v. \n%w", convertedName.ToString(), err)
}

relativePath, err := convertedName.RelativeTo(pathPrefix)
if err != nil {
return fmt.Errorf("File path cannot be made relative: %w", err)
}
hashObject[relativePath.ToUnixPath()] = hash
}
}
return nil
})
if err != nil {
return nil, err
}
return hashObject, nil
return hashing.GetPackageFileHashesFromProcessingGitIgnore(rootPath, pkg.Dir, inputs)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Turns out that we have a third method of doing file hashes for packages, tucked inside of taskhash.go.

This moves the body of the function to an exported place in package-deps-hash.go.

}

// packageFileHashes is a map from a package and optional input globs to the hash of
Expand Down