Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

First pass at file hashing for a package #4820

Merged
merged 11 commits into from May 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
7 changes: 4 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions cli/internal/ffi/bindings.h
Expand Up @@ -18,6 +18,8 @@ struct Buffer previous_content(struct Buffer buffer);

struct Buffer recursive_copy(struct Buffer buffer);

struct Buffer get_package_file_hashes_from_git_index(struct Buffer buffer);

struct Buffer transitive_closure(struct Buffer buf);

struct Buffer subgraph(struct Buffer buf);
Expand Down
23 changes: 23 additions & 0 deletions cli/internal/ffi/ffi.go
Expand Up @@ -313,3 +313,26 @@ func GlobalChange(packageManager string, prevContents []byte, currContents []byt

return resp.GetGlobalChange()
}

// GetPackageFileHashesFromGitIndex proxies to rust to use git to hash the files in a package.
// It does not support additional files, it just hashes the non-ignored files in the package.
func GetPackageFileHashesFromGitIndex(rootPath string, packagePath string) (map[string]string, error) {
req := ffi_proto.GetPackageFileHashesFromGitIndexRequest{
TurboRoot: rootPath,
PackagePath: packagePath,
}
reqBuf := Marshal(&req)
resBuf := C.get_package_file_hashes_from_git_index(reqBuf)
reqBuf.Free()

resp := ffi_proto.GetPackageFileHashesFromGitIndexResponse{}
if err := Unmarshal(resBuf, resp.ProtoReflect().Interface()); err != nil {
panic(err)
}

if err := resp.GetError(); err != "" {
return nil, errors.New(err)
}
hashes := resp.GetHashes()
return hashes.GetHashes(), nil
}
344 changes: 298 additions & 46 deletions cli/internal/ffi/proto/messages.pb.go

Large diffs are not rendered by default.

100 changes: 0 additions & 100 deletions cli/internal/hashing/package_deps_hash.go
Expand Up @@ -28,48 +28,6 @@ type PackageDepsOptions struct {
InputPatterns []string
}

func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) {
var result map[turbopath.AnchoredUnixPath]string
absolutePackagePath := packagePath.RestoreAnchor(rootPath)

// Get the state of the git index.
gitLsTreeOutput, err := gitLsTree(absolutePackagePath)
if err != nil {
return nil, fmt.Errorf("could not get git hashes for files in package %s: %w", packagePath, err)
}
result = gitLsTreeOutput

// Update the with the state of the working directory.
// The paths returned from this call are anchored at the package directory
gitStatusOutput, err := gitStatus(absolutePackagePath)
if err != nil {
return nil, fmt.Errorf("Could not get git hashes from git status: %v", err)
}

// Review status output to identify the delta.
var filesToHash []turbopath.AnchoredSystemPath
for filePath, status := range gitStatusOutput {
if status.isDelete() {
delete(result, filePath)
} else {
filesToHash = append(filesToHash, filePath.ToSystemPath())
}
}

// Get the hashes for any modified files in the working directory.
hashes, err := GetHashesForFiles(absolutePackagePath, filesToHash)
if err != nil {
return nil, err
}

// Zip up file paths and hashes together
for filePath, hash := range hashes {
result[filePath] = hash
}

return result, nil
}

func safeCompileIgnoreFile(filepath turbopath.AbsoluteSystemPath) (*gitignore.GitIgnore, error) {
if filepath.FileExists() {
return gitignore.CompileIgnoreFile(filepath.ToString())
Expand Down Expand Up @@ -496,61 +454,3 @@ type statusCode struct {
func (s statusCode) isDelete() bool {
return s.x == "D" || s.y == "D"
}

// gitStatus returns a map of paths to their `git` status code. This can be used to identify what should
// be done with files that do not currently match what is in the index.
//
// Note: `git status -z`'s relative path results are relative to the repository's location.
// We need to calculate where the repository's location is in order to determine what the full path is
// before we can return those paths relative to the calling directory, normalizing to the behavior of
// `ls-files` and `ls-tree`.
func gitStatus(rootPath turbopath.AbsoluteSystemPath) (map[turbopath.AnchoredUnixPath]statusCode, error) {
cmd := exec.Command(
"git", // Using `git` from $PATH,
"status", // tell me about the status of the working tree,
"--untracked-files", // including information about untracked files,
"--no-renames", // do not detect renames,
"-z", // with each file path relative to the repository root and \000-terminated,
"--", // and any additional argument you see is a path, promise.
)
cmd.Args = append(cmd.Args, ".") // Operate in the current directory instead of the root of the working tree.
cmd.Dir = rootPath.ToString() // Include files only from this directory.

entries, err := runGitCommand(cmd, "status", gitoutput.NewStatusReader)
if err != nil {
return nil, err
}

output := make(map[turbopath.AnchoredUnixPath]statusCode, len(entries))
convertedRootPath := turbopath.AbsoluteSystemPathFromUpstream(rootPath.ToString())

traversePath, err := memoizedGetTraversePath(convertedRootPath)
if err != nil {
return nil, err
}

for _, entry := range entries {
statusEntry := gitoutput.StatusEntry(entry)
// Anchored at repository.
pathFromStatus := turbopath.AnchoredUnixPathFromUpstream(statusEntry.GetField(gitoutput.Path))
var outputPath turbopath.AnchoredUnixPath

if len(traversePath) > 0 {
repositoryPath := convertedRootPath.Join(traversePath.ToSystemPath())
fileFullPath := pathFromStatus.ToSystemPath().RestoreAnchor(repositoryPath)

relativePath, err := fileFullPath.RelativeTo(convertedRootPath)
if err != nil {
return nil, err
}

outputPath = relativePath.ToUnixPath()
} else {
outputPath = pathFromStatus
}

output[outputPath] = statusCode{x: statusEntry.GetField(gitoutput.StatusX), y: statusEntry.GetField(gitoutput.StatusY)}
}

return output, nil
}
112 changes: 112 additions & 0 deletions cli/internal/hashing/package_deps_hash_go.go
@@ -0,0 +1,112 @@
//go:build go || !rust
// +build go !rust

package hashing

import (
"fmt"
"os/exec"

"github.com/vercel/turbo/cli/internal/encoding/gitoutput"
"github.com/vercel/turbo/cli/internal/turbopath"
)

func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) {
var result map[turbopath.AnchoredUnixPath]string
absolutePackagePath := packagePath.RestoreAnchor(rootPath)

// Get the state of the git index.
gitLsTreeOutput, err := gitLsTree(absolutePackagePath)
if err != nil {
return nil, fmt.Errorf("could not get git hashes for files in package %s: %w", packagePath, err)
}
result = gitLsTreeOutput

// Update the with the state of the working directory.
// The paths returned from this call are anchored at the package directory
gitStatusOutput, err := gitStatus(absolutePackagePath)
if err != nil {
return nil, fmt.Errorf("Could not get git hashes from git status: %v", err)
}

// Review status output to identify the delta.
var filesToHash []turbopath.AnchoredSystemPath
for filePath, status := range gitStatusOutput {
if status.isDelete() {
delete(result, filePath)
} else {
filesToHash = append(filesToHash, filePath.ToSystemPath())
}
}

// Get the hashes for any modified files in the working directory.
hashes, err := GetHashesForFiles(absolutePackagePath, filesToHash)
if err != nil {
return nil, err
}

// Zip up file paths and hashes together
for filePath, hash := range hashes {
result[filePath] = hash
}

return result, nil
}

// gitStatus returns a map of paths to their `git` status code. This can be used to identify what should
// be done with files that do not currently match what is in the index.
//
// Note: `git status -z`'s relative path results are relative to the repository's location.
// We need to calculate where the repository's location is in order to determine what the full path is
// before we can return those paths relative to the calling directory, normalizing to the behavior of
// `ls-files` and `ls-tree`.
func gitStatus(rootPath turbopath.AbsoluteSystemPath) (map[turbopath.AnchoredUnixPath]statusCode, error) {
cmd := exec.Command(
"git", // Using `git` from $PATH,
"status", // tell me about the status of the working tree,
"--untracked-files", // including information about untracked files,
"--no-renames", // do not detect renames,
"-z", // with each file path relative to the repository root and \000-terminated,
"--", // and any additional argument you see is a path, promise.
)
cmd.Args = append(cmd.Args, ".") // Operate in the current directory instead of the root of the working tree.
cmd.Dir = rootPath.ToString() // Include files only from this directory.

entries, err := runGitCommand(cmd, "status", gitoutput.NewStatusReader)
if err != nil {
return nil, err
}

output := make(map[turbopath.AnchoredUnixPath]statusCode, len(entries))
convertedRootPath := turbopath.AbsoluteSystemPathFromUpstream(rootPath.ToString())

traversePath, err := memoizedGetTraversePath(convertedRootPath)
if err != nil {
return nil, err
}

for _, entry := range entries {
statusEntry := gitoutput.StatusEntry(entry)
// Anchored at repository.
pathFromStatus := turbopath.AnchoredUnixPathFromUpstream(statusEntry.GetField(gitoutput.Path))
var outputPath turbopath.AnchoredUnixPath

if len(traversePath) > 0 {
repositoryPath := convertedRootPath.Join(traversePath.ToSystemPath())
fileFullPath := pathFromStatus.ToSystemPath().RestoreAnchor(repositoryPath)

relativePath, err := fileFullPath.RelativeTo(convertedRootPath)
if err != nil {
return nil, err
}

outputPath = relativePath.ToUnixPath()
} else {
outputPath = pathFromStatus
}

output[outputPath] = statusCode{x: statusEntry.GetField(gitoutput.StatusX), y: statusEntry.GetField(gitoutput.StatusY)}
}

return output, nil
}
22 changes: 22 additions & 0 deletions cli/internal/hashing/package_deps_hash_rust.go
@@ -0,0 +1,22 @@
//go:build rust
// +build rust

package hashing

import (
"github.com/vercel/turbo/cli/internal/ffi"
"github.com/vercel/turbo/cli/internal/turbopath"
)

func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) {
rawHashes, err := ffi.GetPackageFileHashesFromGitIndex(rootPath.ToString(), packagePath.ToString())
if err != nil {
return nil, err
}

hashes := make(map[turbopath.AnchoredUnixPath]string, len(rawHashes))
for rawPath, hash := range rawHashes {
hashes[turbopath.AnchoredUnixPathFromUpstream(rawPath)] = hash
}
return hashes, nil
}
1 change: 1 addition & 0 deletions crates/turbopath/Cargo.toml
Expand Up @@ -7,6 +7,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
bstr = "1.4.0"
path-slash = "0.2.1"
# TODO: Make this a crate feature
serde = { workspace = true }
Expand Down
17 changes: 17 additions & 0 deletions crates/turbopath/src/absolute_system_path_buf.rs
Expand Up @@ -163,6 +163,14 @@ impl AbsoluteSystemPathBuf {
AbsoluteSystemPathBuf(self.0.join(Path::new(segment)))
}

pub fn join_unix_path_literal<S: AsRef<str>>(
&self,
unix_path: S,
) -> Result<AbsoluteSystemPathBuf, PathError> {
let tail = Path::new(unix_path.as_ref()).into_system()?;
Ok(AbsoluteSystemPathBuf(self.0.join(tail)))
}

pub fn ensure_dir(&self) -> Result<(), io::Error> {
if let Some(parent) = self.0.parent() {
fs::create_dir_all(parent)
Expand All @@ -171,6 +179,10 @@ impl AbsoluteSystemPathBuf {
}
}

pub fn create_dir_all(&self) -> Result<(), io::Error> {
fs::create_dir_all(self.0.as_path())
}

pub fn remove(&self) -> Result<(), io::Error> {
fs::remove_file(self.0.as_path())
}
Expand Down Expand Up @@ -245,6 +257,11 @@ impl AbsoluteSystemPathBuf {
pub fn open(&self) -> Result<fs::File, PathError> {
Ok(fs::File::open(&self.0)?)
}

pub fn to_realpath(&self) -> Result<Self, PathError> {
NicholasLYang marked this conversation as resolved.
Show resolved Hide resolved
let realpath = fs::canonicalize(&self.0)?;
Ok(Self(realpath))
}
}

impl From<AbsoluteSystemPathBuf> for PathBuf {
Expand Down