diff --git a/Cargo.lock b/Cargo.lock index 87ac86e794789..3948216b4ece7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9292,9 +9292,9 @@ dependencies = [ name = "turborepo-scm" version = "0.1.0" dependencies = [ - "anyhow", "dunce", "git2 0.16.1", + "nom", "tempfile", "thiserror", "turbopath", diff --git a/cli/internal/ffi/bindings.h b/cli/internal/ffi/bindings.h index 923145d3100ef..61010332dceb1 100644 --- a/cli/internal/ffi/bindings.h +++ b/cli/internal/ffi/bindings.h @@ -18,6 +18,8 @@ struct Buffer previous_content(struct Buffer buffer); struct Buffer recursive_copy(struct Buffer buffer); +struct Buffer get_package_file_hashes_from_git_index(struct Buffer buffer); + struct Buffer transitive_closure(struct Buffer buf); struct Buffer subgraph(struct Buffer buf); diff --git a/cli/internal/ffi/ffi.go b/cli/internal/ffi/ffi.go index b59846fa73bb0..0a05db77a7251 100644 --- a/cli/internal/ffi/ffi.go +++ b/cli/internal/ffi/ffi.go @@ -313,3 +313,26 @@ func GlobalChange(packageManager string, prevContents []byte, currContents []byt return resp.GetGlobalChange() } + +// GetPackageFileHashesFromGitIndex proxies to rust to use git to hash the files in a package. +// It does not support additional files, it just hashes the non-ignored files in the package. +func GetPackageFileHashesFromGitIndex(rootPath string, packagePath string) (map[string]string, error) { + req := ffi_proto.GetPackageFileHashesFromGitIndexRequest{ + TurboRoot: rootPath, + PackagePath: packagePath, + } + reqBuf := Marshal(&req) + resBuf := C.get_package_file_hashes_from_git_index(reqBuf) + reqBuf.Free() + + resp := ffi_proto.GetPackageFileHashesFromGitIndexResponse{} + if err := Unmarshal(resBuf, resp.ProtoReflect().Interface()); err != nil { + panic(err) + } + + if err := resp.GetError(); err != "" { + return nil, errors.New(err) + } + hashes := resp.GetHashes() + return hashes.GetHashes(), nil +} diff --git a/cli/internal/ffi/proto/messages.pb.go b/cli/internal/ffi/proto/messages.pb.go index 553b205c07904..2232633f114bd 100644 --- a/cli/internal/ffi/proto/messages.pb.go +++ b/cli/internal/ffi/proto/messages.pb.go @@ -1662,6 +1662,190 @@ func (x *RecursiveCopyResponse) GetError() string { return "" } +type GetPackageFileHashesFromGitIndexRequest struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + TurboRoot string `protobuf:"bytes,1,opt,name=turbo_root,json=turboRoot,proto3" json:"turbo_root,omitempty"` + PackagePath string `protobuf:"bytes,2,opt,name=package_path,json=packagePath,proto3" json:"package_path,omitempty"` +} + +func (x *GetPackageFileHashesFromGitIndexRequest) Reset() { + *x = GetPackageFileHashesFromGitIndexRequest{} + if protoimpl.UnsafeEnabled { + mi := &file_turborepo_ffi_messages_proto_msgTypes[26] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetPackageFileHashesFromGitIndexRequest) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetPackageFileHashesFromGitIndexRequest) ProtoMessage() {} + +func (x *GetPackageFileHashesFromGitIndexRequest) ProtoReflect() protoreflect.Message { + mi := &file_turborepo_ffi_messages_proto_msgTypes[26] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetPackageFileHashesFromGitIndexRequest.ProtoReflect.Descriptor instead. +func (*GetPackageFileHashesFromGitIndexRequest) Descriptor() ([]byte, []int) { + return file_turborepo_ffi_messages_proto_rawDescGZIP(), []int{26} +} + +func (x *GetPackageFileHashesFromGitIndexRequest) GetTurboRoot() string { + if x != nil { + return x.TurboRoot + } + return "" +} + +func (x *GetPackageFileHashesFromGitIndexRequest) GetPackagePath() string { + if x != nil { + return x.PackagePath + } + return "" +} + +type FileHashes struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + Hashes map[string]string `protobuf:"bytes,1,rep,name=hashes,proto3" json:"hashes,omitempty" protobuf_key:"bytes,1,opt,name=key,proto3" protobuf_val:"bytes,2,opt,name=value,proto3"` +} + +func (x *FileHashes) Reset() { + *x = FileHashes{} + if protoimpl.UnsafeEnabled { + mi := &file_turborepo_ffi_messages_proto_msgTypes[27] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *FileHashes) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*FileHashes) ProtoMessage() {} + +func (x *FileHashes) ProtoReflect() protoreflect.Message { + mi := &file_turborepo_ffi_messages_proto_msgTypes[27] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use FileHashes.ProtoReflect.Descriptor instead. +func (*FileHashes) Descriptor() ([]byte, []int) { + return file_turborepo_ffi_messages_proto_rawDescGZIP(), []int{27} +} + +func (x *FileHashes) GetHashes() map[string]string { + if x != nil { + return x.Hashes + } + return nil +} + +type GetPackageFileHashesFromGitIndexResponse struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // Types that are assignable to Response: + // *GetPackageFileHashesFromGitIndexResponse_Hashes + // *GetPackageFileHashesFromGitIndexResponse_Error + Response isGetPackageFileHashesFromGitIndexResponse_Response `protobuf_oneof:"response"` +} + +func (x *GetPackageFileHashesFromGitIndexResponse) Reset() { + *x = GetPackageFileHashesFromGitIndexResponse{} + if protoimpl.UnsafeEnabled { + mi := &file_turborepo_ffi_messages_proto_msgTypes[28] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *GetPackageFileHashesFromGitIndexResponse) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*GetPackageFileHashesFromGitIndexResponse) ProtoMessage() {} + +func (x *GetPackageFileHashesFromGitIndexResponse) ProtoReflect() protoreflect.Message { + mi := &file_turborepo_ffi_messages_proto_msgTypes[28] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use GetPackageFileHashesFromGitIndexResponse.ProtoReflect.Descriptor instead. +func (*GetPackageFileHashesFromGitIndexResponse) Descriptor() ([]byte, []int) { + return file_turborepo_ffi_messages_proto_rawDescGZIP(), []int{28} +} + +func (m *GetPackageFileHashesFromGitIndexResponse) GetResponse() isGetPackageFileHashesFromGitIndexResponse_Response { + if m != nil { + return m.Response + } + return nil +} + +func (x *GetPackageFileHashesFromGitIndexResponse) GetHashes() *FileHashes { + if x, ok := x.GetResponse().(*GetPackageFileHashesFromGitIndexResponse_Hashes); ok { + return x.Hashes + } + return nil +} + +func (x *GetPackageFileHashesFromGitIndexResponse) GetError() string { + if x, ok := x.GetResponse().(*GetPackageFileHashesFromGitIndexResponse_Error); ok { + return x.Error + } + return "" +} + +type isGetPackageFileHashesFromGitIndexResponse_Response interface { + isGetPackageFileHashesFromGitIndexResponse_Response() +} + +type GetPackageFileHashesFromGitIndexResponse_Hashes struct { + Hashes *FileHashes `protobuf:"bytes,1,opt,name=hashes,proto3,oneof"` +} + +type GetPackageFileHashesFromGitIndexResponse_Error struct { + Error string `protobuf:"bytes,2,opt,name=error,proto3,oneof"` +} + +func (*GetPackageFileHashesFromGitIndexResponse_Hashes) isGetPackageFileHashesFromGitIndexResponse_Response() { +} + +func (*GetPackageFileHashesFromGitIndexResponse_Error) isGetPackageFileHashesFromGitIndexResponse_Response() { +} + var File_turborepo_ffi_messages_proto protoreflect.FileDescriptor var file_turborepo_ffi_messages_proto_rawDesc = []byte{ @@ -1843,10 +2027,32 @@ var file_turborepo_ffi_messages_proto_rawDesc = []byte{ 0x76, 0x65, 0x43, 0x6f, 0x70, 0x79, 0x52, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x19, 0x0a, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x88, 0x01, 0x01, 0x42, 0x08, 0x0a, 0x06, 0x5f, 0x65, 0x72, - 0x72, 0x6f, 0x72, 0x2a, 0x24, 0x0a, 0x0e, 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x4d, 0x61, - 0x6e, 0x61, 0x67, 0x65, 0x72, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x50, 0x4d, 0x10, 0x00, 0x12, 0x09, - 0x0a, 0x05, 0x42, 0x45, 0x52, 0x52, 0x59, 0x10, 0x01, 0x42, 0x0b, 0x5a, 0x09, 0x66, 0x66, 0x69, - 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x72, 0x6f, 0x72, 0x22, 0x6b, 0x0a, 0x27, 0x47, 0x65, 0x74, 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, + 0x65, 0x46, 0x69, 0x6c, 0x65, 0x48, 0x61, 0x73, 0x68, 0x65, 0x73, 0x46, 0x72, 0x6f, 0x6d, 0x47, + 0x69, 0x74, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x52, 0x65, 0x71, 0x75, 0x65, 0x73, 0x74, 0x12, 0x1d, + 0x0a, 0x0a, 0x74, 0x75, 0x72, 0x62, 0x6f, 0x5f, 0x72, 0x6f, 0x6f, 0x74, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x09, 0x74, 0x75, 0x72, 0x62, 0x6f, 0x52, 0x6f, 0x6f, 0x74, 0x12, 0x21, 0x0a, + 0x0c, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x5f, 0x70, 0x61, 0x74, 0x68, 0x18, 0x02, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0b, 0x70, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x50, 0x61, 0x74, 0x68, + 0x22, 0x78, 0x0a, 0x0a, 0x46, 0x69, 0x6c, 0x65, 0x48, 0x61, 0x73, 0x68, 0x65, 0x73, 0x12, 0x2f, + 0x0a, 0x06, 0x68, 0x61, 0x73, 0x68, 0x65, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x17, + 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x48, 0x61, 0x73, 0x68, 0x65, 0x73, 0x2e, 0x48, 0x61, 0x73, 0x68, + 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x52, 0x06, 0x68, 0x61, 0x73, 0x68, 0x65, 0x73, 0x1a, + 0x39, 0x0a, 0x0b, 0x48, 0x61, 0x73, 0x68, 0x65, 0x73, 0x45, 0x6e, 0x74, 0x72, 0x79, 0x12, 0x10, + 0x0a, 0x03, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x03, 0x6b, 0x65, 0x79, + 0x12, 0x14, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x75, 0x0a, 0x28, 0x47, 0x65, + 0x74, 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x46, 0x69, 0x6c, 0x65, 0x48, 0x61, 0x73, 0x68, + 0x65, 0x73, 0x46, 0x72, 0x6f, 0x6d, 0x47, 0x69, 0x74, 0x49, 0x6e, 0x64, 0x65, 0x78, 0x52, 0x65, + 0x73, 0x70, 0x6f, 0x6e, 0x73, 0x65, 0x12, 0x25, 0x0a, 0x06, 0x68, 0x61, 0x73, 0x68, 0x65, 0x73, + 0x18, 0x01, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x0b, 0x2e, 0x46, 0x69, 0x6c, 0x65, 0x48, 0x61, 0x73, + 0x68, 0x65, 0x73, 0x48, 0x00, 0x52, 0x06, 0x68, 0x61, 0x73, 0x68, 0x65, 0x73, 0x12, 0x16, 0x0a, + 0x05, 0x65, 0x72, 0x72, 0x6f, 0x72, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x48, 0x00, 0x52, 0x05, + 0x65, 0x72, 0x72, 0x6f, 0x72, 0x42, 0x0a, 0x0a, 0x08, 0x72, 0x65, 0x73, 0x70, 0x6f, 0x6e, 0x73, + 0x65, 0x2a, 0x24, 0x0a, 0x0e, 0x50, 0x61, 0x63, 0x6b, 0x61, 0x67, 0x65, 0x4d, 0x61, 0x6e, 0x61, + 0x67, 0x65, 0x72, 0x12, 0x07, 0x0a, 0x03, 0x4e, 0x50, 0x4d, 0x10, 0x00, 0x12, 0x09, 0x0a, 0x05, + 0x42, 0x45, 0x52, 0x52, 0x59, 0x10, 0x01, 0x42, 0x0b, 0x5a, 0x09, 0x66, 0x66, 0x69, 0x2f, 0x70, + 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -1862,62 +2068,68 @@ func file_turborepo_ffi_messages_proto_rawDescGZIP() []byte { } var file_turborepo_ffi_messages_proto_enumTypes = make([]protoimpl.EnumInfo, 1) -var file_turborepo_ffi_messages_proto_msgTypes = make([]protoimpl.MessageInfo, 29) +var file_turborepo_ffi_messages_proto_msgTypes = make([]protoimpl.MessageInfo, 33) var file_turborepo_ffi_messages_proto_goTypes = []interface{}{ - (PackageManager)(0), // 0: PackageManager - (*TurboDataDirResp)(nil), // 1: TurboDataDirResp - (*GlobReq)(nil), // 2: GlobReq - (*GlobResp)(nil), // 3: GlobResp - (*GlobRespList)(nil), // 4: GlobRespList - (*ChangedFilesReq)(nil), // 5: ChangedFilesReq - (*ChangedFilesResp)(nil), // 6: ChangedFilesResp - (*ChangedFilesList)(nil), // 7: ChangedFilesList - (*PreviousContentReq)(nil), // 8: PreviousContentReq - (*PreviousContentResp)(nil), // 9: PreviousContentResp - (*PackageDependency)(nil), // 10: PackageDependency - (*PackageDependencyList)(nil), // 11: PackageDependencyList - (*WorkspaceDependencies)(nil), // 12: WorkspaceDependencies - (*TransitiveDepsRequest)(nil), // 13: TransitiveDepsRequest - (*TransitiveDepsResponse)(nil), // 14: TransitiveDepsResponse - (*AdditionalBerryData)(nil), // 15: AdditionalBerryData - (*LockfilePackage)(nil), // 16: LockfilePackage - (*LockfilePackageList)(nil), // 17: LockfilePackageList - (*SubgraphRequest)(nil), // 18: SubgraphRequest - (*SubgraphResponse)(nil), // 19: SubgraphResponse - (*PatchesRequest)(nil), // 20: PatchesRequest - (*PatchesResponse)(nil), // 21: PatchesResponse - (*Patches)(nil), // 22: Patches - (*GlobalChangeRequest)(nil), // 23: GlobalChangeRequest - (*GlobalChangeResponse)(nil), // 24: GlobalChangeResponse - (*RecursiveCopyRequest)(nil), // 25: RecursiveCopyRequest - (*RecursiveCopyResponse)(nil), // 26: RecursiveCopyResponse - nil, // 27: WorkspaceDependencies.DependenciesEntry - nil, // 28: TransitiveDepsRequest.WorkspacesEntry - nil, // 29: AdditionalBerryData.ResolutionsEntry + (PackageManager)(0), // 0: PackageManager + (*TurboDataDirResp)(nil), // 1: TurboDataDirResp + (*GlobReq)(nil), // 2: GlobReq + (*GlobResp)(nil), // 3: GlobResp + (*GlobRespList)(nil), // 4: GlobRespList + (*ChangedFilesReq)(nil), // 5: ChangedFilesReq + (*ChangedFilesResp)(nil), // 6: ChangedFilesResp + (*ChangedFilesList)(nil), // 7: ChangedFilesList + (*PreviousContentReq)(nil), // 8: PreviousContentReq + (*PreviousContentResp)(nil), // 9: PreviousContentResp + (*PackageDependency)(nil), // 10: PackageDependency + (*PackageDependencyList)(nil), // 11: PackageDependencyList + (*WorkspaceDependencies)(nil), // 12: WorkspaceDependencies + (*TransitiveDepsRequest)(nil), // 13: TransitiveDepsRequest + (*TransitiveDepsResponse)(nil), // 14: TransitiveDepsResponse + (*AdditionalBerryData)(nil), // 15: AdditionalBerryData + (*LockfilePackage)(nil), // 16: LockfilePackage + (*LockfilePackageList)(nil), // 17: LockfilePackageList + (*SubgraphRequest)(nil), // 18: SubgraphRequest + (*SubgraphResponse)(nil), // 19: SubgraphResponse + (*PatchesRequest)(nil), // 20: PatchesRequest + (*PatchesResponse)(nil), // 21: PatchesResponse + (*Patches)(nil), // 22: Patches + (*GlobalChangeRequest)(nil), // 23: GlobalChangeRequest + (*GlobalChangeResponse)(nil), // 24: GlobalChangeResponse + (*RecursiveCopyRequest)(nil), // 25: RecursiveCopyRequest + (*RecursiveCopyResponse)(nil), // 26: RecursiveCopyResponse + (*GetPackageFileHashesFromGitIndexRequest)(nil), // 27: GetPackageFileHashesFromGitIndexRequest + (*FileHashes)(nil), // 28: FileHashes + (*GetPackageFileHashesFromGitIndexResponse)(nil), // 29: GetPackageFileHashesFromGitIndexResponse + nil, // 30: WorkspaceDependencies.DependenciesEntry + nil, // 31: TransitiveDepsRequest.WorkspacesEntry + nil, // 32: AdditionalBerryData.ResolutionsEntry + nil, // 33: FileHashes.HashesEntry } var file_turborepo_ffi_messages_proto_depIdxs = []int32{ 4, // 0: GlobResp.files:type_name -> GlobRespList 7, // 1: ChangedFilesResp.files:type_name -> ChangedFilesList 10, // 2: PackageDependencyList.list:type_name -> PackageDependency - 27, // 3: WorkspaceDependencies.dependencies:type_name -> WorkspaceDependencies.DependenciesEntry + 30, // 3: WorkspaceDependencies.dependencies:type_name -> WorkspaceDependencies.DependenciesEntry 0, // 4: TransitiveDepsRequest.package_manager:type_name -> PackageManager - 28, // 5: TransitiveDepsRequest.workspaces:type_name -> TransitiveDepsRequest.WorkspacesEntry + 31, // 5: TransitiveDepsRequest.workspaces:type_name -> TransitiveDepsRequest.WorkspacesEntry 15, // 6: TransitiveDepsRequest.resolutions:type_name -> AdditionalBerryData 12, // 7: TransitiveDepsResponse.dependencies:type_name -> WorkspaceDependencies - 29, // 8: AdditionalBerryData.resolutions:type_name -> AdditionalBerryData.ResolutionsEntry + 32, // 8: AdditionalBerryData.resolutions:type_name -> AdditionalBerryData.ResolutionsEntry 16, // 9: LockfilePackageList.list:type_name -> LockfilePackage 0, // 10: SubgraphRequest.package_manager:type_name -> PackageManager 15, // 11: SubgraphRequest.resolutions:type_name -> AdditionalBerryData 0, // 12: PatchesRequest.package_manager:type_name -> PackageManager 22, // 13: PatchesResponse.patches:type_name -> Patches 0, // 14: GlobalChangeRequest.package_manager:type_name -> PackageManager - 17, // 15: WorkspaceDependencies.DependenciesEntry.value:type_name -> LockfilePackageList - 11, // 16: TransitiveDepsRequest.WorkspacesEntry.value:type_name -> PackageDependencyList - 17, // [17:17] is the sub-list for method output_type - 17, // [17:17] is the sub-list for method input_type - 17, // [17:17] is the sub-list for extension type_name - 17, // [17:17] is the sub-list for extension extendee - 0, // [0:17] is the sub-list for field type_name + 33, // 15: FileHashes.hashes:type_name -> FileHashes.HashesEntry + 28, // 16: GetPackageFileHashesFromGitIndexResponse.hashes:type_name -> FileHashes + 17, // 17: WorkspaceDependencies.DependenciesEntry.value:type_name -> LockfilePackageList + 11, // 18: TransitiveDepsRequest.WorkspacesEntry.value:type_name -> PackageDependencyList + 19, // [19:19] is the sub-list for method output_type + 19, // [19:19] is the sub-list for method input_type + 19, // [19:19] is the sub-list for extension type_name + 19, // [19:19] is the sub-list for extension extendee + 0, // [0:19] is the sub-list for field type_name } func init() { file_turborepo_ffi_messages_proto_init() } @@ -2238,6 +2450,42 @@ func file_turborepo_ffi_messages_proto_init() { return nil } } + file_turborepo_ffi_messages_proto_msgTypes[26].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetPackageFileHashesFromGitIndexRequest); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_turborepo_ffi_messages_proto_msgTypes[27].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*FileHashes); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + file_turborepo_ffi_messages_proto_msgTypes[28].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*GetPackageFileHashesFromGitIndexResponse); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } } file_turborepo_ffi_messages_proto_msgTypes[2].OneofWrappers = []interface{}{ (*GlobResp_Files)(nil), @@ -2267,13 +2515,17 @@ func file_turborepo_ffi_messages_proto_init() { (*PatchesResponse_Error)(nil), } file_turborepo_ffi_messages_proto_msgTypes[25].OneofWrappers = []interface{}{} + file_turborepo_ffi_messages_proto_msgTypes[28].OneofWrappers = []interface{}{ + (*GetPackageFileHashesFromGitIndexResponse_Hashes)(nil), + (*GetPackageFileHashesFromGitIndexResponse_Error)(nil), + } type x struct{} out := protoimpl.TypeBuilder{ File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_turborepo_ffi_messages_proto_rawDesc, NumEnums: 1, - NumMessages: 29, + NumMessages: 33, NumExtensions: 0, NumServices: 0, }, diff --git a/cli/internal/hashing/package_deps_hash.go b/cli/internal/hashing/package_deps_hash.go index c51c056b9dcf7..dfa972585729d 100644 --- a/cli/internal/hashing/package_deps_hash.go +++ b/cli/internal/hashing/package_deps_hash.go @@ -28,48 +28,6 @@ type PackageDepsOptions struct { InputPatterns []string } -func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) { - var result map[turbopath.AnchoredUnixPath]string - absolutePackagePath := packagePath.RestoreAnchor(rootPath) - - // Get the state of the git index. - gitLsTreeOutput, err := gitLsTree(absolutePackagePath) - if err != nil { - return nil, fmt.Errorf("could not get git hashes for files in package %s: %w", packagePath, err) - } - result = gitLsTreeOutput - - // Update the with the state of the working directory. - // The paths returned from this call are anchored at the package directory - gitStatusOutput, err := gitStatus(absolutePackagePath) - if err != nil { - return nil, fmt.Errorf("Could not get git hashes from git status: %v", err) - } - - // Review status output to identify the delta. - var filesToHash []turbopath.AnchoredSystemPath - for filePath, status := range gitStatusOutput { - if status.isDelete() { - delete(result, filePath) - } else { - filesToHash = append(filesToHash, filePath.ToSystemPath()) - } - } - - // Get the hashes for any modified files in the working directory. - hashes, err := GetHashesForFiles(absolutePackagePath, filesToHash) - if err != nil { - return nil, err - } - - // Zip up file paths and hashes together - for filePath, hash := range hashes { - result[filePath] = hash - } - - return result, nil -} - func safeCompileIgnoreFile(filepath turbopath.AbsoluteSystemPath) (*gitignore.GitIgnore, error) { if filepath.FileExists() { return gitignore.CompileIgnoreFile(filepath.ToString()) @@ -496,61 +454,3 @@ type statusCode struct { func (s statusCode) isDelete() bool { return s.x == "D" || s.y == "D" } - -// gitStatus returns a map of paths to their `git` status code. This can be used to identify what should -// be done with files that do not currently match what is in the index. -// -// Note: `git status -z`'s relative path results are relative to the repository's location. -// We need to calculate where the repository's location is in order to determine what the full path is -// before we can return those paths relative to the calling directory, normalizing to the behavior of -// `ls-files` and `ls-tree`. -func gitStatus(rootPath turbopath.AbsoluteSystemPath) (map[turbopath.AnchoredUnixPath]statusCode, error) { - cmd := exec.Command( - "git", // Using `git` from $PATH, - "status", // tell me about the status of the working tree, - "--untracked-files", // including information about untracked files, - "--no-renames", // do not detect renames, - "-z", // with each file path relative to the repository root and \000-terminated, - "--", // and any additional argument you see is a path, promise. - ) - cmd.Args = append(cmd.Args, ".") // Operate in the current directory instead of the root of the working tree. - cmd.Dir = rootPath.ToString() // Include files only from this directory. - - entries, err := runGitCommand(cmd, "status", gitoutput.NewStatusReader) - if err != nil { - return nil, err - } - - output := make(map[turbopath.AnchoredUnixPath]statusCode, len(entries)) - convertedRootPath := turbopath.AbsoluteSystemPathFromUpstream(rootPath.ToString()) - - traversePath, err := memoizedGetTraversePath(convertedRootPath) - if err != nil { - return nil, err - } - - for _, entry := range entries { - statusEntry := gitoutput.StatusEntry(entry) - // Anchored at repository. - pathFromStatus := turbopath.AnchoredUnixPathFromUpstream(statusEntry.GetField(gitoutput.Path)) - var outputPath turbopath.AnchoredUnixPath - - if len(traversePath) > 0 { - repositoryPath := convertedRootPath.Join(traversePath.ToSystemPath()) - fileFullPath := pathFromStatus.ToSystemPath().RestoreAnchor(repositoryPath) - - relativePath, err := fileFullPath.RelativeTo(convertedRootPath) - if err != nil { - return nil, err - } - - outputPath = relativePath.ToUnixPath() - } else { - outputPath = pathFromStatus - } - - output[outputPath] = statusCode{x: statusEntry.GetField(gitoutput.StatusX), y: statusEntry.GetField(gitoutput.StatusY)} - } - - return output, nil -} diff --git a/cli/internal/hashing/package_deps_hash_go.go b/cli/internal/hashing/package_deps_hash_go.go new file mode 100644 index 0000000000000..46e5db6a65774 --- /dev/null +++ b/cli/internal/hashing/package_deps_hash_go.go @@ -0,0 +1,112 @@ +//go:build go || !rust +// +build go !rust + +package hashing + +import ( + "fmt" + "os/exec" + + "github.com/vercel/turbo/cli/internal/encoding/gitoutput" + "github.com/vercel/turbo/cli/internal/turbopath" +) + +func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) { + var result map[turbopath.AnchoredUnixPath]string + absolutePackagePath := packagePath.RestoreAnchor(rootPath) + + // Get the state of the git index. + gitLsTreeOutput, err := gitLsTree(absolutePackagePath) + if err != nil { + return nil, fmt.Errorf("could not get git hashes for files in package %s: %w", packagePath, err) + } + result = gitLsTreeOutput + + // Update the with the state of the working directory. + // The paths returned from this call are anchored at the package directory + gitStatusOutput, err := gitStatus(absolutePackagePath) + if err != nil { + return nil, fmt.Errorf("Could not get git hashes from git status: %v", err) + } + + // Review status output to identify the delta. + var filesToHash []turbopath.AnchoredSystemPath + for filePath, status := range gitStatusOutput { + if status.isDelete() { + delete(result, filePath) + } else { + filesToHash = append(filesToHash, filePath.ToSystemPath()) + } + } + + // Get the hashes for any modified files in the working directory. + hashes, err := GetHashesForFiles(absolutePackagePath, filesToHash) + if err != nil { + return nil, err + } + + // Zip up file paths and hashes together + for filePath, hash := range hashes { + result[filePath] = hash + } + + return result, nil +} + +// gitStatus returns a map of paths to their `git` status code. This can be used to identify what should +// be done with files that do not currently match what is in the index. +// +// Note: `git status -z`'s relative path results are relative to the repository's location. +// We need to calculate where the repository's location is in order to determine what the full path is +// before we can return those paths relative to the calling directory, normalizing to the behavior of +// `ls-files` and `ls-tree`. +func gitStatus(rootPath turbopath.AbsoluteSystemPath) (map[turbopath.AnchoredUnixPath]statusCode, error) { + cmd := exec.Command( + "git", // Using `git` from $PATH, + "status", // tell me about the status of the working tree, + "--untracked-files", // including information about untracked files, + "--no-renames", // do not detect renames, + "-z", // with each file path relative to the repository root and \000-terminated, + "--", // and any additional argument you see is a path, promise. + ) + cmd.Args = append(cmd.Args, ".") // Operate in the current directory instead of the root of the working tree. + cmd.Dir = rootPath.ToString() // Include files only from this directory. + + entries, err := runGitCommand(cmd, "status", gitoutput.NewStatusReader) + if err != nil { + return nil, err + } + + output := make(map[turbopath.AnchoredUnixPath]statusCode, len(entries)) + convertedRootPath := turbopath.AbsoluteSystemPathFromUpstream(rootPath.ToString()) + + traversePath, err := memoizedGetTraversePath(convertedRootPath) + if err != nil { + return nil, err + } + + for _, entry := range entries { + statusEntry := gitoutput.StatusEntry(entry) + // Anchored at repository. + pathFromStatus := turbopath.AnchoredUnixPathFromUpstream(statusEntry.GetField(gitoutput.Path)) + var outputPath turbopath.AnchoredUnixPath + + if len(traversePath) > 0 { + repositoryPath := convertedRootPath.Join(traversePath.ToSystemPath()) + fileFullPath := pathFromStatus.ToSystemPath().RestoreAnchor(repositoryPath) + + relativePath, err := fileFullPath.RelativeTo(convertedRootPath) + if err != nil { + return nil, err + } + + outputPath = relativePath.ToUnixPath() + } else { + outputPath = pathFromStatus + } + + output[outputPath] = statusCode{x: statusEntry.GetField(gitoutput.StatusX), y: statusEntry.GetField(gitoutput.StatusY)} + } + + return output, nil +} diff --git a/cli/internal/hashing/package_deps_hash_rust.go b/cli/internal/hashing/package_deps_hash_rust.go new file mode 100644 index 0000000000000..4f5aa1dd13ae9 --- /dev/null +++ b/cli/internal/hashing/package_deps_hash_rust.go @@ -0,0 +1,22 @@ +//go:build rust +// +build rust + +package hashing + +import ( + "github.com/vercel/turbo/cli/internal/ffi" + "github.com/vercel/turbo/cli/internal/turbopath" +) + +func getPackageFileHashesFromGitIndex(rootPath turbopath.AbsoluteSystemPath, packagePath turbopath.AnchoredSystemPath) (map[turbopath.AnchoredUnixPath]string, error) { + rawHashes, err := ffi.GetPackageFileHashesFromGitIndex(rootPath.ToString(), packagePath.ToString()) + if err != nil { + return nil, err + } + + hashes := make(map[turbopath.AnchoredUnixPath]string, len(rawHashes)) + for rawPath, hash := range rawHashes { + hashes[turbopath.AnchoredUnixPathFromUpstream(rawPath)] = hash + } + return hashes, nil +} diff --git a/crates/turborepo-ffi/messages.proto b/crates/turborepo-ffi/messages.proto index ce7b189a1f3a2..d4408d21daf5f 100644 --- a/crates/turborepo-ffi/messages.proto +++ b/crates/turborepo-ffi/messages.proto @@ -149,3 +149,19 @@ message RecursiveCopyRequest { message RecursiveCopyResponse { optional string error = 1; } + +message GetPackageFileHashesFromGitIndexRequest { + string turbo_root = 1; + string package_path = 2; +} + +message FileHashes { + map hashes = 1; +} + +message GetPackageFileHashesFromGitIndexResponse { + oneof response { + FileHashes hashes = 1; + string error = 2; + } +} diff --git a/crates/turborepo-ffi/src/lib.rs b/crates/turborepo-ffi/src/lib.rs index f1d34380a0be8..5b788ba6334b3 100644 --- a/crates/turborepo-ffi/src/lib.rs +++ b/crates/turborepo-ffi/src/lib.rs @@ -4,10 +4,10 @@ //! and in ffi.go before modifying this file. mod lockfile; -use std::{mem::ManuallyDrop, path::PathBuf}; +use std::{collections::HashMap, mem::ManuallyDrop, path::PathBuf}; pub use lockfile::{patches, subgraph, transitive_closure}; -use turbopath::AbsoluteSystemPathBuf; +use turbopath::{AbsoluteSystemPathBuf, AnchoredSystemPathBuf}; mod proto { include!(concat!(env!("OUT_DIR"), "/_.rs")); @@ -163,3 +163,87 @@ pub extern "C" fn recursive_copy(buffer: Buffer) -> Buffer { }; response.into() } + +#[no_mangle] +pub extern "C" fn get_package_file_hashes_from_git_index(buffer: Buffer) -> Buffer { + let req: proto::GetPackageFileHashesFromGitIndexRequest = match buffer.into_proto() { + Ok(req) => req, + Err(err) => { + let resp = proto::GetPackageFileHashesFromGitIndexResponse { + response: Some( + proto::get_package_file_hashes_from_git_index_response::Response::Error( + err.to_string(), + ), + ), + }; + return resp.into(); + } + }; + + let turbo_root = match AbsoluteSystemPathBuf::new(req.turbo_root) { + Ok(turbo_root) => turbo_root, + Err(err) => { + let resp = proto::GetPackageFileHashesFromGitIndexResponse { + response: Some( + proto::get_package_file_hashes_from_git_index_response::Response::Error( + err.to_string(), + ), + ), + }; + return resp.into(); + } + }; + let package_path = match AnchoredSystemPathBuf::from_raw(req.package_path) { + Ok(package_path) => package_path, + Err(err) => { + let resp = proto::GetPackageFileHashesFromGitIndexResponse { + response: Some( + proto::get_package_file_hashes_from_git_index_response::Response::Error( + err.to_string(), + ), + ), + }; + return resp.into(); + } + }; + let response = match turborepo_scm::package_deps::get_package_file_hashes_from_git_index( + &turbo_root, + &package_path, + ) { + Ok(hashes) => { + let mut to_return = HashMap::new(); + for (filename, hash) in hashes { + let filename = match filename.as_str() { + Ok(s) => s.to_owned(), + Err(err) => { + let resp = proto::GetPackageFileHashesFromGitIndexResponse { + response: Some(proto::get_package_file_hashes_from_git_index_response::Response::Error(err.to_string())) + }; + return resp.into(); + } + }; + to_return.insert(filename, hash); + } + let file_hashes = proto::FileHashes { hashes: to_return }; + let resp = proto::GetPackageFileHashesFromGitIndexResponse { + response: Some( + proto::get_package_file_hashes_from_git_index_response::Response::Hashes( + file_hashes, + ), + ), + }; + resp + } + Err(err) => { + let resp = proto::GetPackageFileHashesFromGitIndexResponse { + response: Some( + proto::get_package_file_hashes_from_git_index_response::Response::Error( + err.to_string(), + ), + ), + }; + return resp.into(); + } + }; + response.into() +} diff --git a/crates/turborepo-lib/.gitignore b/crates/turborepo-lib/.gitignore new file mode 100644 index 0000000000000..1a7387072f811 --- /dev/null +++ b/crates/turborepo-lib/.gitignore @@ -0,0 +1 @@ +file_descriptor_set.bin diff --git a/crates/turborepo-scm/Cargo.toml b/crates/turborepo-scm/Cargo.toml index 9c40f507ce844..466e6e11f5025 100644 --- a/crates/turborepo-scm/Cargo.toml +++ b/crates/turborepo-scm/Cargo.toml @@ -7,9 +7,9 @@ license = "MPL-2.0" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -anyhow = { workspace = true } dunce = { workspace = true } git2 = { version = "0.16.1", default-features = false } +nom = "7.1.3" thiserror = { workspace = true } turbopath = { workspace = true } diff --git a/crates/turborepo-scm/fixtures/01-git-hash-object/.gitignore b/crates/turborepo-scm/fixtures/01-git-hash-object/.gitignore new file mode 100644 index 0000000000000..d8e19507425ea --- /dev/null +++ b/crates/turborepo-scm/fixtures/01-git-hash-object/.gitignore @@ -0,0 +1,2 @@ +"quote" +new*line diff --git a/crates/turborepo-scm/fixtures/01-git-hash-object/child/child.json b/crates/turborepo-scm/fixtures/01-git-hash-object/child/child.json new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/crates/turborepo-scm/fixtures/01-git-hash-object/child/grandchild/grandchild.json b/crates/turborepo-scm/fixtures/01-git-hash-object/child/grandchild/grandchild.json new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/crates/turborepo-scm/fixtures/01-git-hash-object/root.json b/crates/turborepo-scm/fixtures/01-git-hash-object/root.json new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/crates/turborepo-scm/src/hash_object.rs b/crates/turborepo-scm/src/hash_object.rs new file mode 100644 index 0000000000000..f0797a731c38d --- /dev/null +++ b/crates/turborepo-scm/src/hash_object.rs @@ -0,0 +1,192 @@ +use std::{ + io::{BufWriter, Read, Write}, + panic, + process::{Command, Stdio}, + thread, +}; + +use nom::{Finish, IResult}; +use turbopath::{AbsoluteSystemPathBuf, RelativeUnixPathBuf}; + +use crate::{package_deps::GitHashes, Error}; + +pub(crate) fn hash_objects( + pkg_path: &AbsoluteSystemPathBuf, + to_hash: Vec, + pkg_prefix: &RelativeUnixPathBuf, + hashes: &mut GitHashes, +) -> Result<(), Error> { + if to_hash.is_empty() { + return Ok(()); + } + let mut git = Command::new("git") + .args(["hash-object", "--stdin-paths"]) + .current_dir(pkg_path) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .stdin(Stdio::piped()) + .spawn()?; + { + let stdout = git + .stdout + .as_mut() + .ok_or_else(|| Error::git_error("failed to get stdout for git hash-object"))?; + // We take, rather than borrow, stdin so that we can drop it and force the + // underlying file descriptor to close, signalling the end of input. + let stdin: std::process::ChildStdin = git + .stdin + .take() + .ok_or_else(|| Error::git_error("failed to get stdin for git hash-object"))?; + let mut stderr = git + .stderr + .take() + .ok_or_else(|| Error::git_error("failed to get stderr for git hash-object"))?; + let result = read_object_hashes(stdout, stdin, &to_hash, pkg_prefix, hashes); + if let Err(err) = result { + let mut buf = String::new(); + let bytes_read = stderr.read_to_string(&mut buf)?; + if bytes_read > 0 { + // something failed with git, report that error + return Err(Error::git_error(buf)); + } + return Err(err); + } + } + git.wait()?; + Ok(()) +} + +const HASH_LEN: usize = 40; + +fn read_object_hashes( + mut reader: R, + writer: W, + to_hash: &Vec, + pkg_prefix: &RelativeUnixPathBuf, + hashes: &mut GitHashes, +) -> Result<(), Error> { + thread::scope(move |scope| -> Result<(), Error> { + let write_thread = scope.spawn(move || -> Result<(), Error> { + let mut writer = BufWriter::new(writer); + for path in to_hash { + path.write_escaped_bytes(&mut writer)?; + writer.write_all(&[b'\n'])?; + writer.flush()?; + } + // writer is dropped here, closing stdin + Ok(()) + }); + // Buffer size is HASH_LEN + 1 to account for the trailing \n + let mut buffer: [u8; HASH_LEN + 1] = [0; HASH_LEN + 1]; + for (i, filename) in to_hash.iter().enumerate() { + if i == to_hash.len() { + break; + } + reader.read_exact(&mut buffer)?; + { + let hash = parse_hash_object(&buffer)?; + let hash = String::from_utf8(hash.to_vec())?; + let path = filename.strip_prefix(pkg_prefix)?; + hashes.insert(path, hash); + } + } + match write_thread.join() { + // the error case is if the thread panic'd. In that case, we propagate + // the panic, since we aren't going to handle it. + Err(e) => panic::resume_unwind(e), + Ok(result) => result, + } + })?; + Ok(()) +} + +fn parse_hash_object(i: &[u8]) -> Result<&[u8], Error> { + match nom_parse_hash_object(i).finish() { + Ok((_, hash)) => Ok(hash), + Err(e) => Err(Error::git_error(format!( + "failed to parse git-hash-object {}", + String::from_utf8_lossy(e.input) + ))), + } +} + +fn nom_parse_hash_object(i: &[u8]) -> IResult<&[u8], &[u8]> { + let (i, hash) = nom::bytes::complete::take(HASH_LEN)(i)?; + let (i, _) = nom::bytes::complete::tag(&[b'\n'])(i)?; + Ok((i, hash)) +} + +#[cfg(test)] +mod test { + use turbopath::{AbsoluteSystemPathBuf, RelativeUnixPathBuf}; + + use super::hash_objects; + use crate::package_deps::{find_git_root, GitHashes}; + + #[test] + fn test_read_object_hashes() { + // Note that cwd can be different based on where the test suite is running from + // or if the test is launched in debug mode from VSCode + let cwd = std::env::current_dir().unwrap(); + let cwd = AbsoluteSystemPathBuf::new(cwd).unwrap(); + let git_root = find_git_root(&cwd).unwrap(); + let fixture_path = git_root + .join_unix_path_literal("crates/turborepo-scm/fixtures/01-git-hash-object") + .unwrap(); + + let fixture_child_path = fixture_path.join_literal("child"); + let git_root = find_git_root(&fixture_path).unwrap(); + + // paths for files here are relative to the package path. + let tests: Vec<(Vec<(&str, &str)>, &AbsoluteSystemPathBuf)> = vec![ + (vec![], &fixture_path), + ( + vec![ + ("../root.json", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + ("child.json", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + ( + "grandchild/grandchild.json", + "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391", + ), + ], + &fixture_child_path, + ), + ]; + + for (to_hash, pkg_path) in tests { + let file_hashes: Vec<(RelativeUnixPathBuf, String)> = to_hash + .into_iter() + .map(|(raw, hash)| (RelativeUnixPathBuf::new(raw).unwrap(), String::from(hash))) + .collect(); + + let git_to_pkg_path = git_root.anchor(pkg_path).unwrap(); + let pkg_prefix = git_to_pkg_path.to_unix().unwrap(); + + let expected_hashes = GitHashes::from_iter(file_hashes.into_iter()); + let mut hashes = GitHashes::new(); + let to_hash = expected_hashes.keys().map(|k| pkg_prefix.join(k)).collect(); + hash_objects(&pkg_path, to_hash, &pkg_prefix, &mut hashes).unwrap(); + assert_eq!(hashes, expected_hashes); + } + + // paths for files here are relative to the package path. + let error_tests: Vec<(Vec<&str>, &AbsoluteSystemPathBuf)> = vec![ + // skipping test for outside of git repo, we now error earlier in the process + (vec!["nonexistent.json"], &fixture_path), + ]; + + for (to_hash, pkg_path) in error_tests { + let git_to_pkg_path = git_root.anchor(pkg_path).unwrap(); + let pkg_prefix = git_to_pkg_path.to_unix().unwrap(); + + let to_hash = to_hash + .into_iter() + .map(|k| pkg_prefix.join(&RelativeUnixPathBuf::new(k).unwrap())) + .collect(); + + let mut hashes = GitHashes::new(); + let result = hash_objects(&pkg_path, to_hash, &pkg_prefix, &mut hashes); + assert_eq!(result.is_err(), true); + } + } +} diff --git a/crates/turborepo-scm/src/lib.rs b/crates/turborepo-scm/src/lib.rs index 86abdfc38d4e5..d0b11553269df 100644 --- a/crates/turborepo-scm/src/lib.rs +++ b/crates/turborepo-scm/src/lib.rs @@ -2,12 +2,16 @@ #![feature(provide_any)] #![feature(assert_matches)] -use std::backtrace; +use std::backtrace::{self, Backtrace}; use thiserror::Error; use turbopath::PathError; pub mod git; +mod hash_object; +mod ls_tree; +pub mod package_deps; +mod status; #[derive(Debug, Error)] pub enum Error { @@ -19,4 +23,15 @@ pub enum Error { Io(#[from] std::io::Error, #[backtrace] backtrace::Backtrace), #[error("path error: {0}")] Path(#[from] PathError, #[backtrace] backtrace::Backtrace), + #[error("encoding error: {0}")] + Encoding( + #[from] std::string::FromUtf8Error, + #[backtrace] backtrace::Backtrace, + ), +} + +impl Error { + pub(crate) fn git_error(s: impl Into) -> Self { + Error::Git(s.into(), Backtrace::capture()) + } } diff --git a/crates/turborepo-scm/src/ls_tree.rs b/crates/turborepo-scm/src/ls_tree.rs new file mode 100644 index 0000000000000..51fb9626e3e3a --- /dev/null +++ b/crates/turborepo-scm/src/ls_tree.rs @@ -0,0 +1,150 @@ +use std::{ + io::{BufRead, BufReader, Read}, + process::{Command, Stdio}, +}; + +use nom::Finish; +use turbopath::{AbsoluteSystemPathBuf, RelativeUnixPathBuf}; + +use crate::{package_deps::GitHashes, Error}; + +pub fn git_ls_tree(root_path: &AbsoluteSystemPathBuf) -> Result { + let mut hashes = GitHashes::new(); + let mut git = Command::new("git") + .args(["ls-tree", "-r", "-z", "HEAD"]) + .current_dir(root_path) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn()?; + { + let stdout = git + .stdout + .as_mut() + .ok_or_else(|| Error::git_error("failed to get stdout for git ls-tree"))?; + let mut stderr = git + .stderr + .take() + .ok_or_else(|| Error::git_error("failed to get stderr for git ls-tree"))?; + let result = read_ls_tree(stdout, &mut hashes); + if result.is_err() { + let mut buf = String::new(); + let bytes_read = stderr.read_to_string(&mut buf)?; + if bytes_read > 0 { + // something failed with git, report that error + return Err(Error::git_error(buf)); + } + } + result?; + } + git.wait()?; + Ok(hashes) +} + +fn read_ls_tree(reader: R, hashes: &mut GitHashes) -> Result<(), Error> { + let mut reader = BufReader::new(reader); + let mut buffer = Vec::new(); + loop { + buffer.clear(); + { + let bytes_read = reader.read_until(b'\0', &mut buffer)?; + if bytes_read == 0 { + break; + } + { + let entry = parse_ls_tree(&buffer)?; + let hash = String::from_utf8(entry.hash.to_vec())?; + let path = RelativeUnixPathBuf::new(entry.filename)?; + hashes.insert(path, hash); + } + } + } + Ok(()) +} + +struct LsTreeEntry<'a> { + filename: &'a [u8], + hash: &'a [u8], +} + +fn parse_ls_tree(i: &[u8]) -> Result, Error> { + match nom_parse_ls_tree(i).finish() { + Ok((_, entry)) => Ok(entry), + Err(e) => Err(Error::git_error(format!( + "failed to parse git-ls-tree: {}", + String::from_utf8_lossy(e.input) + ))), + } +} + +fn nom_parse_ls_tree(i: &[u8]) -> nom::IResult<&[u8], LsTreeEntry<'_>> { + let (i, _) = nom::bytes::complete::is_not(" ")(i)?; + let (i, _) = nom::character::complete::space1(i)?; + let (i, _) = nom::bytes::complete::is_not(" ")(i)?; + let (i, _) = nom::character::complete::space1(i)?; + let (i, hash) = nom::bytes::complete::take(40usize)(i)?; + let (i, _) = nom::bytes::complete::take(1usize)(i)?; + let (i, filename) = nom::bytes::complete::is_not(" \0")(i)?; + Ok((i, LsTreeEntry { filename, hash })) +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use turbopath::RelativeUnixPathBuf; + + use crate::{ls_tree::read_ls_tree, package_deps::GitHashes}; + + fn to_hash_map(pairs: &[(&str, &str)]) -> GitHashes { + HashMap::from_iter(pairs.into_iter().map(|(path, hash)| { + ( + RelativeUnixPathBuf::new(path.as_bytes()).unwrap(), + hash.to_string(), + ) + })) + } + + #[test] + fn test_ls_tree() { + let tests: &[(&str, &[(&str, &str)])] = &[ + ( + "100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\tpackage.json\0", + &[("package.json", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")], + ), + ( + // missing nul byte + "100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\tpackage.json", + &[("package.json", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391")], + ), + ( + "100644 blob e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\t\t\000100644 blob \ + e69de29bb2d1d6434b8b29ae775ad8c2e48c5391\t\"\000100644 blob \ + 5b999efa470b056e329b4c23a73904e0794bdc2f\t\n\000100644 blob \ + f44f57fff95196c5f7139dfa0b96875f1e9650a9\t.gitignore\000100644 blob \ + 33dbaf21275ca2a5f460249d941cbc27d5da3121\tREADME.md\000040000 tree \ + 7360f2d292aec95907cebdcbb412a6bf2bd10f8a\tapps\000100644 blob \ + 9ec2879b24ce2c817296eebe2cb3846f8e4751ea\tpackage.json\000040000 tree \ + 5759aadaea2cde55468a61e7104eb0a9d86c1d30\tpackages\000100644 blob \ + 33d0621ee2f4da4a2f6f6bdd51a42618d181e337\tturbo.json\0", + &[ + ("\t", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + ("\"", "e69de29bb2d1d6434b8b29ae775ad8c2e48c5391"), + ("\n", "5b999efa470b056e329b4c23a73904e0794bdc2f"), + (".gitignore", "f44f57fff95196c5f7139dfa0b96875f1e9650a9"), + ("README.md", "33dbaf21275ca2a5f460249d941cbc27d5da3121"), + ("apps", "7360f2d292aec95907cebdcbb412a6bf2bd10f8a"), + ("package.json", "9ec2879b24ce2c817296eebe2cb3846f8e4751ea"), + ("packages", "5759aadaea2cde55468a61e7104eb0a9d86c1d30"), + ("turbo.json", "33d0621ee2f4da4a2f6f6bdd51a42618d181e337"), + ], + ), + ]; + for (input, expected) in tests { + let input_bytes = input.as_bytes(); + let mut hashes = GitHashes::new(); + let expected = to_hash_map(expected); + read_ls_tree(input_bytes, &mut hashes).unwrap(); + assert_eq!(hashes, expected); + } + } +} diff --git a/crates/turborepo-scm/src/package_deps.rs b/crates/turborepo-scm/src/package_deps.rs new file mode 100644 index 0000000000000..27002cfef8eee --- /dev/null +++ b/crates/turborepo-scm/src/package_deps.rs @@ -0,0 +1,148 @@ +use std::{collections::HashMap, process::Command}; + +use turbopath::{AbsoluteSystemPathBuf, AnchoredSystemPathBuf, RelativeUnixPathBuf}; + +use crate::{hash_object::hash_objects, ls_tree::git_ls_tree, status::append_git_status, Error}; + +pub type GitHashes = HashMap; + +pub fn get_package_file_hashes_from_git_index( + turbo_root: &AbsoluteSystemPathBuf, + package_path: &AnchoredSystemPathBuf, +) -> Result { + // TODO: memoize git root -> turbo root calculation once we aren't crossing ffi + let git_root = find_git_root(turbo_root)?; + let full_pkg_path = turbo_root.resolve(package_path); + let git_to_pkg_path = git_root.anchor(&full_pkg_path)?; + let pkg_prefix = git_to_pkg_path.to_unix()?; + let mut hashes = git_ls_tree(&full_pkg_path)?; + // Note: to_hash is *git repo relative* + let to_hash = append_git_status(&full_pkg_path, &pkg_prefix, &mut hashes)?; + hash_objects(&full_pkg_path, to_hash, &pkg_prefix, &mut hashes)?; + Ok(hashes) +} + +pub(crate) fn find_git_root( + turbo_root: &AbsoluteSystemPathBuf, +) -> Result { + let rev_parse = Command::new("git") + .args(["rev-parse", "--show-cdup"]) + .current_dir(turbo_root) + .output()?; + let root = String::from_utf8(rev_parse.stdout)?; + Ok(turbo_root.join_literal(root.trim_end()).to_realpath()?) +} + +#[cfg(test)] +mod tests { + use std::process::Command; + + use super::*; + + fn tmp_dir() -> (tempfile::TempDir, AbsoluteSystemPathBuf) { + let tmp_dir = tempfile::tempdir().unwrap(); + let dir = AbsoluteSystemPathBuf::new(tmp_dir.path().to_path_buf()) + .unwrap() + .to_realpath() + .unwrap(); + (tmp_dir, dir) + } + + fn require_git_cmd(repo_root: &AbsoluteSystemPathBuf, args: &[&str]) { + let mut cmd = Command::new("git"); + cmd.args(args).current_dir(repo_root); + assert_eq!(cmd.output().unwrap().status.success(), true); + } + + fn setup_repository(repo_root: &AbsoluteSystemPathBuf) { + let cmds: &[&[&str]] = &[ + &["init", "."], + &["config", "--local", "user.name", "test"], + &["config", "--local", "user.email", "test@example.com"], + ]; + for cmd in cmds { + require_git_cmd(repo_root, cmd); + } + } + + fn commit_all(repo_root: &AbsoluteSystemPathBuf) { + let cmds: &[&[&str]] = &[&["add", "."], &["commit", "-m", "foo"]]; + for cmd in cmds { + require_git_cmd(repo_root, cmd); + } + } + + #[test] + fn test_get_package_deps() -> Result<(), Error> { + // Directory structure: + // / + // new-root-file <- new file not added to git + // my-pkg/ + // committed-file + // deleted-file + // uncommitted-file <- new file not added to git + // dir/ + // nested-file + let (_repo_root_tmp, repo_root) = tmp_dir(); + let my_pkg_dir = repo_root.join_literal("my-pkg"); + my_pkg_dir.create_dir_all()?; + + // create file 1 + let committed_file_path = my_pkg_dir.join_literal("committed-file"); + committed_file_path.create_with_contents("committed bytes")?; + + // create file 2 + let deleted_file_path = my_pkg_dir.join_literal("deleted-file"); + deleted_file_path.create_with_contents("delete-me")?; + + // create file 3 + let nested_file_path = my_pkg_dir.join_literal("dir/nested-file"); + nested_file_path.ensure_dir()?; + nested_file_path.create_with_contents("nested")?; + + // create a package.json + let pkg_json_path = my_pkg_dir.join_literal("package.json"); + pkg_json_path.create_with_contents("{}")?; + + setup_repository(&repo_root); + commit_all(&repo_root); + + // remove a file + deleted_file_path.remove()?; + + // create another untracked file in git + let uncommitted_file_path = my_pkg_dir.join_literal("uncommitted-file"); + uncommitted_file_path.create_with_contents("uncommitted bytes")?; + + // create an untracked file in git up a level + let root_file_path = repo_root.join_literal("new-root-file"); + root_file_path.create_with_contents("new-root bytes")?; + + let package_path = AnchoredSystemPathBuf::from_raw("my-pkg")?; + + let expected = to_hash_map(&[ + ("committed-file", "3a29e62ea9ba15c4a4009d1f605d391cdd262033"), + ( + "uncommitted-file", + "4e56ad89387e6379e4e91ddfe9872cf6a72c9976", + ), + ("package.json", "9e26dfeeb6e641a33dae4961196235bdb965b21b"), + ( + "dir/nested-file", + "bfe53d766e64d78f80050b73cd1c88095bc70abb", + ), + ]); + let hashes = get_package_file_hashes_from_git_index(&repo_root, &package_path)?; + assert_eq!(hashes, expected); + Ok(()) + } + + fn to_hash_map(pairs: &[(&str, &str)]) -> GitHashes { + HashMap::from_iter(pairs.into_iter().map(|(path, hash)| { + ( + RelativeUnixPathBuf::new(path.as_bytes()).unwrap(), + hash.to_string(), + ) + })) + } +} diff --git a/crates/turborepo-scm/src/status.rs b/crates/turborepo-scm/src/status.rs new file mode 100644 index 0000000000000..f7110c90e0cea --- /dev/null +++ b/crates/turborepo-scm/src/status.rs @@ -0,0 +1,156 @@ +use std::{ + io::{BufRead, BufReader, Read}, + process::{Command, Stdio}, +}; + +use nom::Finish; +use turbopath::{AbsoluteSystemPathBuf, RelativeUnixPathBuf}; + +use crate::{package_deps::GitHashes, Error}; + +pub(crate) fn append_git_status( + root_path: &AbsoluteSystemPathBuf, + pkg_prefix: &RelativeUnixPathBuf, + hashes: &mut GitHashes, +) -> Result, Error> { + let mut git = Command::new("git") + .args([ + "status", + "--untracked-files", + "--no-renames", + "-z", + "--", + ".", + ]) + .current_dir(root_path) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn()?; + let to_hash = { + let stdout = git + .stdout + .as_mut() + .ok_or_else(|| Error::git_error("failed to get stdout for git status"))?; + let mut stderr = git + .stderr + .take() + .ok_or_else(|| Error::git_error("failed to get stderr for git status"))?; + let result = read_status(stdout, pkg_prefix, hashes); + if result.is_err() { + let mut buf = String::new(); + let bytes_read = stderr.read_to_string(&mut buf)?; + if bytes_read > 0 { + // something failed with git, report that error + return Err(Error::git_error(buf)); + } + } + result? + }; + git.wait()?; + Ok(to_hash) +} + +fn read_status( + reader: R, + pkg_prefix: &RelativeUnixPathBuf, + hashes: &mut GitHashes, +) -> Result, Error> { + let mut to_hash = Vec::new(); + let mut reader = BufReader::new(reader); + let mut buffer = Vec::new(); + loop { + buffer.clear(); + { + let bytes_read = reader.read_until(b'\0', &mut buffer)?; + if bytes_read == 0 { + break; + } + { + let entry = parse_status(&buffer)?; + let path = RelativeUnixPathBuf::new(entry.filename)?; + if entry.is_delete { + let path = path.strip_prefix(pkg_prefix)?; + hashes.remove(&path); + } else { + to_hash.push(path); + } + } + } + } + Ok(to_hash) +} + +struct StatusEntry<'a> { + filename: &'a [u8], + is_delete: bool, +} + +fn parse_status(i: &[u8]) -> Result, Error> { + match nom_parse_status(i).finish() { + Ok((_, tup)) => Ok(tup), + Err(e) => Err(Error::git_error(format!( + "failed to parse git-status: {}", + String::from_utf8_lossy(e.input) + ))), + } +} + +fn nom_parse_status(i: &[u8]) -> nom::IResult<&[u8], StatusEntry<'_>> { + let (i, x) = nom::bytes::complete::take(1usize)(i)?; + let (i, y) = nom::bytes::complete::take(1usize)(i)?; + let (i, _) = nom::character::complete::space1(i)?; + let (i, filename) = nom::bytes::complete::is_not(" \0")(i)?; + Ok(( + i, + StatusEntry { + filename, + is_delete: x[0] == b'D' || y[0] == b'D', + }, + )) +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use turbopath::RelativeUnixPathBuf; + + use super::read_status; + use crate::package_deps::GitHashes; + + #[test] + fn test_status() { + let tests: &[(&str, &str, (&str, bool))] = &[ + ("AD my-pkg/package.json\0", "my-pkg", ("package.json", true)), + ( + // no trailing NUL + "AD some-pkg/package.json", + "some-pkg", + ("package.json", true), + ), + ("M package.json\0", "", ("package.json", false)), + ("A some-pkg/some-file\0", "some-pkg", ("some-file", false)), + ]; + for (input, prefix, (expected_filename, expect_delete)) in tests { + let prefix = RelativeUnixPathBuf::new(prefix.as_bytes()).unwrap(); + let mut hashes = to_hash_map(&[(expected_filename, "some-hash")]); + let to_hash = read_status(input.as_bytes(), &prefix, &mut hashes).unwrap(); + if *expect_delete { + assert_eq!(hashes.len(), 0, "input: {}", input); + } else { + assert_eq!(to_hash.len(), 1, "input: {}", input); + let expected = prefix.join(&RelativeUnixPathBuf::new(*expected_filename).unwrap()); + assert_eq!(to_hash[0], expected); + } + } + } + + fn to_hash_map(pairs: &[(&str, &str)]) -> GitHashes { + HashMap::from_iter(pairs.into_iter().map(|(path, hash)| { + ( + RelativeUnixPathBuf::new(path.as_bytes()).unwrap(), + hash.to_string(), + ) + })) + } +}