Skip to content

Commit

Permalink
First pass at file hashing for a package (#4820)
Browse files Browse the repository at this point in the history
Co-authored-by: Greg Soltis <Greg Soltis>
Co-authored-by: Chris Olszewski <chris.olszewski@vercel.com>
Co-authored-by: Nicholas Yang <nicholas.yang@vercel.com>
  • Loading branch information
3 people committed May 10, 2023
1 parent 565a671 commit 78aa333
Show file tree
Hide file tree
Showing 27 changed files with 1,480 additions and 211 deletions.
7 changes: 4 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions cli/internal/ffi/bindings.h
Expand Up @@ -18,6 +18,8 @@ struct Buffer previous_content(struct Buffer buffer);

struct Buffer recursive_copy(struct Buffer buffer);

struct Buffer get_package_file_hashes_from_git_index(struct Buffer buffer);

struct Buffer transitive_closure(struct Buffer buf);

struct Buffer subgraph(struct Buffer buf);
Expand Down
23 changes: 23 additions & 0 deletions cli/internal/ffi/ffi.go
Expand Up @@ -313,3 +313,26 @@ func GlobalChange(packageManager string, prevContents []byte, currContents []byt

return resp.GetGlobalChange()
}

// GetPackageFileHashesFromGitIndex proxies to rust to use git to hash the files in a package.
// It does not support additional files, it just hashes the non-ignored files in the package.
func GetPackageFileHashesFromGitIndex(rootPath string, packagePath string) (map[string]string, error) {
req := ffi_proto.GetPackageFileHashesFromGitIndexRequest{
TurboRoot: rootPath,
PackagePath: packagePath,
}
reqBuf := Marshal(&req)
resBuf := C.get_package_file_hashes_from_git_index(reqBuf)
reqBuf.Free()

resp := ffi_proto.GetPackageFileHashesFromGitIndexResponse{}
if err := Unmarshal(resBuf, resp.ProtoReflect().Interface()); err != nil {
panic(err)
}

if err := resp.GetError(); err != "" {
return nil, errors.New(err)
}
hashes := resp.GetHashes()
return hashes.GetHashes(), nil
}
344 changes: 298 additions & 46 deletions cli/internal/ffi/proto/messages.pb.go

Large diffs are not rendered by default.

100 changes: 0 additions & 100 deletions cli/internal/hashing/package_deps_hash.go
Expand Up @@ -28,48 +28,6 @@ type PackageDepsOptions struct {
InputPatterns []string
}

func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) {
var result map[turbopath.AnchoredUnixPath]string
absolutePackagePath := packagePath.RestoreAnchor(rootPath)

// Get the state of the git index.
gitLsTreeOutput, err := gitLsTree(absolutePackagePath)
if err != nil {
return nil, fmt.Errorf("could not get git hashes for files in package %s: %w", packagePath, err)
}
result = gitLsTreeOutput

// Update the with the state of the working directory.
// The paths returned from this call are anchored at the package directory
gitStatusOutput, err := gitStatus(absolutePackagePath)
if err != nil {
return nil, fmt.Errorf("Could not get git hashes from git status: %v", err)
}

// Review status output to identify the delta.
var filesToHash []turbopath.AnchoredSystemPath
for filePath, status := range gitStatusOutput {
if status.isDelete() {
delete(result, filePath)
} else {
filesToHash = append(filesToHash, filePath.ToSystemPath())
}
}

// Get the hashes for any modified files in the working directory.
hashes, err := GetHashesForFiles(absolutePackagePath, filesToHash)
if err != nil {
return nil, err
}

// Zip up file paths and hashes together
for filePath, hash := range hashes {
result[filePath] = hash
}

return result, nil
}

func safeCompileIgnoreFile(filepath turbopath.AbsoluteSystemPath) (*gitignore.GitIgnore, error) {
if filepath.FileExists() {
return gitignore.CompileIgnoreFile(filepath.ToString())
Expand Down Expand Up @@ -496,61 +454,3 @@ type statusCode struct {
func (s statusCode) isDelete() bool {
return s.x == "D" || s.y == "D"
}

// gitStatus returns a map of paths to their `git` status code. This can be used to identify what should
// be done with files that do not currently match what is in the index.
//
// Note: `git status -z`'s relative path results are relative to the repository's location.
// We need to calculate where the repository's location is in order to determine what the full path is
// before we can return those paths relative to the calling directory, normalizing to the behavior of
// `ls-files` and `ls-tree`.
func gitStatus(rootPath turbopath.AbsoluteSystemPath) (map[turbopath.AnchoredUnixPath]statusCode, error) {
cmd := exec.Command(
"git", // Using `git` from $PATH,
"status", // tell me about the status of the working tree,
"--untracked-files", // including information about untracked files,
"--no-renames", // do not detect renames,
"-z", // with each file path relative to the repository root and \000-terminated,
"--", // and any additional argument you see is a path, promise.
)
cmd.Args = append(cmd.Args, ".") // Operate in the current directory instead of the root of the working tree.
cmd.Dir = rootPath.ToString() // Include files only from this directory.

entries, err := runGitCommand(cmd, "status", gitoutput.NewStatusReader)
if err != nil {
return nil, err
}

output := make(map[turbopath.AnchoredUnixPath]statusCode, len(entries))
convertedRootPath := turbopath.AbsoluteSystemPathFromUpstream(rootPath.ToString())

traversePath, err := memoizedGetTraversePath(convertedRootPath)
if err != nil {
return nil, err
}

for _, entry := range entries {
statusEntry := gitoutput.StatusEntry(entry)
// Anchored at repository.
pathFromStatus := turbopath.AnchoredUnixPathFromUpstream(statusEntry.GetField(gitoutput.Path))
var outputPath turbopath.AnchoredUnixPath

if len(traversePath) > 0 {
repositoryPath := convertedRootPath.Join(traversePath.ToSystemPath())
fileFullPath := pathFromStatus.ToSystemPath().RestoreAnchor(repositoryPath)

relativePath, err := fileFullPath.RelativeTo(convertedRootPath)
if err != nil {
return nil, err
}

outputPath = relativePath.ToUnixPath()
} else {
outputPath = pathFromStatus
}

output[outputPath] = statusCode{x: statusEntry.GetField(gitoutput.StatusX), y: statusEntry.GetField(gitoutput.StatusY)}
}

return output, nil
}
112 changes: 112 additions & 0 deletions cli/internal/hashing/package_deps_hash_go.go
@@ -0,0 +1,112 @@
//go:build go || !rust
// +build go !rust

package hashing

import (
"fmt"
"os/exec"

"github.com/vercel/turbo/cli/internal/encoding/gitoutput"
"github.com/vercel/turbo/cli/internal/turbopath"
)

func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) {
var result map[turbopath.AnchoredUnixPath]string
absolutePackagePath := packagePath.RestoreAnchor(rootPath)

// Get the state of the git index.
gitLsTreeOutput, err := gitLsTree(absolutePackagePath)
if err != nil {
return nil, fmt.Errorf("could not get git hashes for files in package %s: %w", packagePath, err)
}
result = gitLsTreeOutput

// Update the with the state of the working directory.
// The paths returned from this call are anchored at the package directory
gitStatusOutput, err := gitStatus(absolutePackagePath)
if err != nil {
return nil, fmt.Errorf("Could not get git hashes from git status: %v", err)
}

// Review status output to identify the delta.
var filesToHash []turbopath.AnchoredSystemPath
for filePath, status := range gitStatusOutput {
if status.isDelete() {
delete(result, filePath)
} else {
filesToHash = append(filesToHash, filePath.ToSystemPath())
}
}

// Get the hashes for any modified files in the working directory.
hashes, err := GetHashesForFiles(absolutePackagePath, filesToHash)
if err != nil {
return nil, err
}

// Zip up file paths and hashes together
for filePath, hash := range hashes {
result[filePath] = hash
}

return result, nil
}

// gitStatus returns a map of paths to their `git` status code. This can be used to identify what should
// be done with files that do not currently match what is in the index.
//
// Note: `git status -z`'s relative path results are relative to the repository's location.
// We need to calculate where the repository's location is in order to determine what the full path is
// before we can return those paths relative to the calling directory, normalizing to the behavior of
// `ls-files` and `ls-tree`.
func gitStatus(rootPath turbopath.AbsoluteSystemPath) (map[turbopath.AnchoredUnixPath]statusCode, error) {
cmd := exec.Command(
"git", // Using `git` from $PATH,
"status", // tell me about the status of the working tree,
"--untracked-files", // including information about untracked files,
"--no-renames", // do not detect renames,
"-z", // with each file path relative to the repository root and \000-terminated,
"--", // and any additional argument you see is a path, promise.
)
cmd.Args = append(cmd.Args, ".") // Operate in the current directory instead of the root of the working tree.
cmd.Dir = rootPath.ToString() // Include files only from this directory.

entries, err := runGitCommand(cmd, "status", gitoutput.NewStatusReader)
if err != nil {
return nil, err
}

output := make(map[turbopath.AnchoredUnixPath]statusCode, len(entries))
convertedRootPath := turbopath.AbsoluteSystemPathFromUpstream(rootPath.ToString())

traversePath, err := memoizedGetTraversePath(convertedRootPath)
if err != nil {
return nil, err
}

for _, entry := range entries {
statusEntry := gitoutput.StatusEntry(entry)
// Anchored at repository.
pathFromStatus := turbopath.AnchoredUnixPathFromUpstream(statusEntry.GetField(gitoutput.Path))
var outputPath turbopath.AnchoredUnixPath

if len(traversePath) > 0 {
repositoryPath := convertedRootPath.Join(traversePath.ToSystemPath())
fileFullPath := pathFromStatus.ToSystemPath().RestoreAnchor(repositoryPath)

relativePath, err := fileFullPath.RelativeTo(convertedRootPath)
if err != nil {
return nil, err
}

outputPath = relativePath.ToUnixPath()
} else {
outputPath = pathFromStatus
}

output[outputPath] = statusCode{x: statusEntry.GetField(gitoutput.StatusX), y: statusEntry.GetField(gitoutput.StatusY)}
}

return output, nil
}
22 changes: 22 additions & 0 deletions cli/internal/hashing/package_deps_hash_rust.go
@@ -0,0 +1,22 @@
//go:build rust
// +build rust

package hashing

import (
"github.com/vercel/turbo/cli/internal/ffi"
"github.com/vercel/turbo/cli/internal/turbopath"
)

func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) {
rawHashes, err := ffi.GetPackageFileHashesFromGitIndex(rootPath.ToString(), packagePath.ToString())
if err != nil {
return nil, err
}

hashes := make(map[turbopath.AnchoredUnixPath]string, len(rawHashes))
for rawPath, hash := range rawHashes {
hashes[turbopath.AnchoredUnixPathFromUpstream(rawPath)] = hash
}
return hashes, nil
}
1 change: 1 addition & 0 deletions crates/turbopath/Cargo.toml
Expand Up @@ -7,6 +7,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
bstr = "1.4.0"
path-slash = "0.2.1"
# TODO: Make this a crate feature
serde = { workspace = true }
Expand Down
17 changes: 17 additions & 0 deletions crates/turbopath/src/absolute_system_path_buf.rs
Expand Up @@ -163,6 +163,14 @@ impl AbsoluteSystemPathBuf {
AbsoluteSystemPathBuf(self.0.join(Path::new(segment)))
}

pub fn join_unix_path_literal<S: AsRef<str>>(
&self,
unix_path: S,
) -> Result<AbsoluteSystemPathBuf, PathError> {
let tail = Path::new(unix_path.as_ref()).into_system()?;
Ok(AbsoluteSystemPathBuf(self.0.join(tail)))
}

pub fn ensure_dir(&self) -> Result<(), io::Error> {
if let Some(parent) = self.0.parent() {
fs::create_dir_all(parent)
Expand All @@ -171,6 +179,10 @@ impl AbsoluteSystemPathBuf {
}
}

pub fn create_dir_all(&self) -> Result<(), io::Error> {
fs::create_dir_all(self.0.as_path())
}

pub fn remove(&self) -> Result<(), io::Error> {
fs::remove_file(self.0.as_path())
}
Expand Down Expand Up @@ -245,6 +257,11 @@ impl AbsoluteSystemPathBuf {
pub fn open(&self) -> Result<fs::File, PathError> {
Ok(fs::File::open(&self.0)?)
}

pub fn to_realpath(&self) -> Result<Self, PathError> {
let realpath = fs::canonicalize(&self.0)?;
Ok(Self(realpath))
}
}

impl From<AbsoluteSystemPathBuf> for PathBuf {
Expand Down

0 comments on commit 78aa333

Please sign in to comment.