Skip to content

Commit

Permalink
worker: embeddings: replace usage of old gitserver.DiffSymbols client…
Browse files Browse the repository at this point in the history
… method with new gitserver.ChangedFiles method (#62359)

Part of #60654

This PR replaces the worker embedding job's use of the old gitserver.DiffSymbols endpoint with the gitserver.ChangedFiles gRPC endpoint introduced in #62354.

## Test plan

Existing CI
  • Loading branch information
ggilmore committed May 7, 2024
1 parent f821f23 commit 47d82c7
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 19 deletions.
3 changes: 2 additions & 1 deletion cmd/worker/internal/embeddings/repo/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ go_library(
importpath = "github.com/sourcegraph/sourcegraph/cmd/worker/internal/embeddings/repo",
visibility = ["//cmd/worker:__subpackages__"],
deps = [
"//cmd/searcher/diff",
"//cmd/worker/job",
"//cmd/worker/shared/init/codeintel",
"//cmd/worker/shared/init/db",
Expand All @@ -29,6 +28,7 @@ go_library(
"//internal/env",
"//internal/featureflag",
"//internal/gitserver",
"//internal/gitserver/gitdomain",
"//internal/goroutine",
"//internal/observation",
"//internal/paths",
Expand All @@ -50,6 +50,7 @@ go_test(
"//internal/conf/conftypes",
"//internal/embeddings/embed",
"//internal/gitserver",
"//internal/gitserver/gitdomain",
"@com_github_google_go_cmp//cmp",
],
)
37 changes: 30 additions & 7 deletions cmd/worker/internal/embeddings/repo/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ import (

"github.com/sourcegraph/log"

"github.com/sourcegraph/sourcegraph/cmd/searcher/diff"
"github.com/sourcegraph/sourcegraph/internal/gitserver/gitdomain"

"github.com/sourcegraph/sourcegraph/internal/actor"
"github.com/sourcegraph/sourcegraph/internal/api"
codeintelContext "github.com/sourcegraph/sourcegraph/internal/codeintel/context"
Expand Down Expand Up @@ -228,18 +229,40 @@ func (r *revisionFetcher) List(ctx context.Context) ([]embed.FileEntry, error) {

func (r *revisionFetcher) Diff(ctx context.Context, oldCommit api.CommitID) (
toIndex []embed.FileEntry,
toRemove []string,
filesToRemove []string,
err error,
) {
ctx = actor.WithInternalActor(ctx)
b, err := r.gitserver.DiffSymbols(ctx, r.repo, oldCommit, r.revision)
changedFilesIterator, err := r.gitserver.ChangedFiles(ctx, r.repo, string(oldCommit), string(r.revision))
if err != nil {
return nil, nil, err
}
defer changedFilesIterator.Close()

toRemove, changedNew, err := diff.ParseGitDiffNameStatus(b)
if err != nil {
return nil, nil, err
var toRemove []string
var changedNew []string

for {
f, err := changedFilesIterator.Next()
if err == io.EOF {
break
}
if err != nil {
return nil, nil, errors.Wrap(err, "iterating over changed files in git diff")
}

switch f.Status {
case gitdomain.DeletedAMD:
// Deleted since "oldCommit"
toRemove = append(toRemove, f.Path)
case gitdomain.ModifiedAMD:
// Modified in "r.revision"
toRemove = append(toRemove, f.Path)
changedNew = append(changedNew, f.Path)
case gitdomain.AddedAMD:
// Added in "r.revision"
changedNew = append(changedNew, f.Path)
}
}

// toRemove only contains file names, but we also need the file sizes. We could
Expand All @@ -262,7 +285,7 @@ func (r *revisionFetcher) Diff(ctx context.Context, oldCommit api.CommitID) (
}
}

return
return toIndex, toRemove, nil
}

// validateRevision returns an error if the revision provided to this job is empty.
Expand Down
22 changes: 11 additions & 11 deletions cmd/worker/internal/embeddings/repo/handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import (

"github.com/google/go-cmp/cmp"

"github.com/sourcegraph/sourcegraph/internal/gitserver/gitdomain"

"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/conf/conftypes"
"github.com/sourcegraph/sourcegraph/internal/embeddings/embed"
Expand All @@ -20,15 +22,13 @@ import (
func TestDiff(t *testing.T) {
ctx := context.Background()

diffSymbolsFunc := &gitserver.ClientDiffSymbolsFunc{}
diffSymbolsFunc.SetDefaultHook(func(ctx context.Context, name api.RepoName, id api.CommitID, id2 api.CommitID) ([]byte, error) {
// This is a fake diff output that contains a modified, added and deleted file.
// The output assumes a specific order of "old commit" and "new commit" in
// the call to git diff.
//
// git diff -z --name-status --no-renames <old commit> <new commit>
//
return []byte("M\x00modifiedFile\x00A\x00addedFile\x00D\x00deletedFile\x00"), nil
changedFilesFunc := &gitserver.ClientChangedFilesFunc{}
changedFilesFunc.SetDefaultHook(func(ctx context.Context, name api.RepoName, id string, id2 string) (gitserver.ChangedFilesIterator, error) {
return gitserver.NewChangedFilesIteratorFromSlice([]gitdomain.PathStatus{
{Path: "modifiedFile", Status: gitdomain.ModifiedAMD},
{Path: "addedFile", Status: gitdomain.AddedAMD},
{Path: "deletedFile", Status: gitdomain.DeletedAMD},
}), nil
})

readDirFunc := &gitserver.ClientReadDirFunc{}
Expand All @@ -54,8 +54,8 @@ func TestDiff(t *testing.T) {
})

mockGitServer := &gitserver.MockClient{
DiffSymbolsFunc: diffSymbolsFunc,
ReadDirFunc: readDirFunc,
ChangedFilesFunc: changedFilesFunc,
ReadDirFunc: readDirFunc,
}

rf := revisionFetcher{
Expand Down

0 comments on commit 47d82c7

Please sign in to comment.