Skip to content

Commit

Permalink
searcher: replace usage of gitserver.DiffSymbols with new gitserver.C…
Browse files Browse the repository at this point in the history
…hangedFiles client method (#62358)

Part of #60654

This PR replaces hybrid's search use of the gitserver.DiffSymbols endpoint with the new gitserver.ChangedFiles gRPC endpoint introduced in #62354.

## Test plan

Existing CI pipeline
  • Loading branch information
ggilmore committed May 7, 2024
1 parent fc03ac9 commit f821f23
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 36 deletions.
3 changes: 2 additions & 1 deletion cmd/searcher/internal/search/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@ go_library(
importpath = "github.com/sourcegraph/sourcegraph/cmd/searcher/internal/search",
visibility = ["//cmd/searcher:__subpackages__"],
deps = [
"//cmd/searcher/diff",
"//cmd/searcher/protocol",
"//internal/api",
"//internal/comby",
"//internal/conf",
"//internal/diskcache",
"//internal/errcode",
"//internal/gitserver",
"//internal/gitserver/gitdomain",
"//internal/lazyregexp",
"//internal/limiter",
"//internal/metrics",
Expand Down Expand Up @@ -141,6 +141,7 @@ go_test(
"//internal/conf",
"//internal/errcode",
"//internal/gitserver",
"//internal/gitserver/gitdomain",
"//internal/grpc",
"//internal/grpc/defaults",
"//internal/observation",
Expand Down
69 changes: 50 additions & 19 deletions cmd/searcher/internal/search/hybrid.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package search
import (
"bytes"
"context"
"io"
"regexp/syntax" //nolint:depguard // using the grafana fork of regexp clashes with zoekt, which uses the std regexp/syntax.
"sort"
"time"

"github.com/prometheus/client_golang/prometheus"
Expand All @@ -12,10 +14,10 @@ import (
"github.com/sourcegraph/zoekt"
zoektquery "github.com/sourcegraph/zoekt/query"

"github.com/sourcegraph/sourcegraph/cmd/searcher/diff"
"github.com/sourcegraph/sourcegraph/cmd/searcher/protocol"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/errcode"
"github.com/sourcegraph/sourcegraph/internal/gitserver/gitdomain"
"github.com/sourcegraph/sourcegraph/internal/search"
"github.com/sourcegraph/sourcegraph/internal/trace"
"github.com/sourcegraph/sourcegraph/lib/errors"
Expand Down Expand Up @@ -102,26 +104,55 @@ func (s *Service) hybrid(ctx context.Context, rootLogger log.Logger, p *protocol
}
logger = logger.With(log.String("indexed", string(indexed)))

// TODO if our store was more flexible we could cache just based on
// indexed and p.Commit and avoid the need of running diff for each
// search.
out, err := s.GitDiffSymbols(ctx, p.Repo, indexed, p.Commit)
if errcode.IsNotFound(err) {
recordHybridFinalState("git-diff-not-found")
logger.Debug("not doing hybrid search due to likely missing indexed commit on gitserver", log.Error(err))
return nil, false, nil
} else if err != nil {
recordHybridFinalState("git-diff-error")
return nil, false, errors.Wrapf(err, "failed to find changed files in %s between %s and %s", p.Repo, indexed, p.Commit)
}
indexedIgnore, unindexedSearch, err := func() (indexedIgnore []string, unindexedSearch []string, err error) {
// TODO if our store was more flexible we could cache just based on
// indexed and p.Commit and avoid the need of running diff for each
// search.
changedFiles, err := s.GitChangedFiles(ctx, p.Repo, indexed, p.Commit)
if err != nil {
return nil, nil, errors.Wrap(err, "failed to get changed files")
}
defer changedFiles.Close()

for {
c, err := changedFiles.Next()
if err == io.EOF {
break
}

if err != nil {
err = errors.Wrap(err, "iterating over changed files in git diff")
return nil, nil, err
}

switch c.Status {
case gitdomain.DeletedAMD:
// no longer appears in "p.Commit"
indexedIgnore = append(indexedIgnore, c.Path)
case gitdomain.ModifiedAMD:
// changed in both "indexed" and "p.Commit"
indexedIgnore = append(indexedIgnore, c.Path)
unindexedSearch = append(unindexedSearch, c.Path)
case gitdomain.AddedAMD:
// doesn't exist in "indexed"
unindexedSearch = append(unindexedSearch, c.Path)
}
}

indexedIgnore, unindexedSearch, err := diff.ParseGitDiffNameStatus(out)
sort.Strings(indexedIgnore)
sort.Strings(unindexedSearch)

return indexedIgnore, unindexedSearch, nil
}()
if err != nil {
logger.Debug("parseGitDiffNameStatus failed",
log.Binary("out", out),
log.Error(err))
recordHybridFinalState("git-diff-parse-error")
return nil, false, errors.Wrapf(err, "failed to parse git diff output of changed files in %s between %s and %s", p.Repo, indexed, p.Commit)
if errcode.IsNotFound(err) {
recordHybridFinalState("git-diff-not-found")
logger.Debug("not doing hybrid search due to likely missing indexed commit on gitserver", log.Error(err))
} else if err != nil {
recordHybridFinalState("git-diff-error")
}

return nil, false, err
}

totalLenIndexedIgnore := totalStringsLen(indexedIgnore)
Expand Down
19 changes: 10 additions & 9 deletions cmd/searcher/internal/search/hybrid_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ import (
zoektgrpc "github.com/sourcegraph/zoekt/cmd/zoekt-webserver/grpc/server"
"google.golang.org/grpc"

"github.com/sourcegraph/sourcegraph/internal/gitserver"
"github.com/sourcegraph/sourcegraph/internal/gitserver/gitdomain"

internalgrpc "github.com/sourcegraph/sourcegraph/internal/grpc"
"github.com/sourcegraph/sourcegraph/internal/grpc/defaults"
proto "github.com/sourcegraph/sourcegraph/internal/searcher/v1"
Expand Down Expand Up @@ -81,13 +84,6 @@ Hello world example in go`, typeFile},
delete(files, unchanged)
}

gitDiffOutput := strings.Join([]string{
"M", "changed.go",
"A", "added.md",
"D", "removed.md",
"", // trailing null
}, "\x00")

s := newStore(t, files)

// explictly remove FetchTar since we should only be using FetchTarByPath
Expand Down Expand Up @@ -115,14 +111,19 @@ Hello world example in go`, typeFile},

// we expect one command against git, lets just fake it.
service := &search.Service{
GitDiffSymbols: func(ctx context.Context, repo api.RepoName, commitA, commitB api.CommitID) ([]byte, error) {
GitChangedFiles: func(ctx context.Context, repo api.RepoName, commitA, commitB api.CommitID) (gitserver.ChangedFilesIterator, error) {
if commitA != "indexedfdeadbeefdeadbeefdeadbeefdeadbeef" {
return nil, errors.Errorf("expected first commit to be indexed, got: %s", commitA)
}
if commitB != "deadbeefdeadbeefdeadbeefdeadbeefdeadbeef" {
return nil, errors.Errorf("expected first commit to be unindexed, got: %s", commitB)
}
return []byte(gitDiffOutput), nil

return gitserver.NewChangedFilesIteratorFromSlice([]gitdomain.PathStatus{
{Status: gitdomain.ModifiedAMD, Path: "changed.go"},
{Status: gitdomain.AddedAMD, Path: "added.md"},
{Status: gitdomain.DeletedAMD, Path: "removed.md"},
}), nil
},
MaxTotalPathsLength: 100_000,

Expand Down
10 changes: 5 additions & 5 deletions cmd/searcher/internal/search/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"github.com/sourcegraph/log"
"github.com/sourcegraph/zoekt"

"github.com/sourcegraph/sourcegraph/internal/gitserver"

"github.com/sourcegraph/sourcegraph/cmd/searcher/protocol"
"github.com/sourcegraph/sourcegraph/internal/api"
"github.com/sourcegraph/sourcegraph/internal/errcode"
Expand All @@ -43,11 +45,9 @@ type Service struct {

Indexed zoekt.Streamer

// GitDiffSymbols returns the stdout of running "git diff -z --name-status
// --no-renames commitA commitB" against repo.
//
// TODO Git client should be exposing a better API here.
GitDiffSymbols func(ctx context.Context, repo api.RepoName, commitA, commitB api.CommitID) ([]byte, error)
// GitChangedFiles returns an iterator that yields list of changed files that have changed in between commitA and commitB in the given
// repo.
GitChangedFiles func(ctx context.Context, repo api.RepoName, commitA, commitB api.CommitID) (gitserver.ChangedFilesIterator, error)

// MaxTotalPathsLength is the maximum sum of lengths of all paths in a
// single call to git archive. This mainly needs to be less than ARG_MAX
Expand Down
4 changes: 2 additions & 2 deletions cmd/searcher/shared/shared.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,10 @@ func Start(ctx context.Context, observationCtx *observation.Context, ready servi

Indexed: sharedsearch.Indexed(),

GitDiffSymbols: func(ctx context.Context, repo api.RepoName, commitA, commitB api.CommitID) ([]byte, error) {
GitChangedFiles: func(ctx context.Context, repo api.RepoName, commitA, commitB api.CommitID) (gitserver.ChangedFilesIterator, error) {
// As this is an internal service call, we need an internal actor.
ctx = actor.WithInternalActor(ctx)
return git.DiffSymbols(ctx, repo, commitA, commitB)
return git.ChangedFiles(ctx, repo, string(commitA), string(commitB))
},
MaxTotalPathsLength: maxTotalPathsLength,

Expand Down

0 comments on commit f821f23

Please sign in to comment.