Skip to content

Commit

Permalink
stats: support stats for all retry attempts; support transparent retry (
Browse files Browse the repository at this point in the history
  • Loading branch information
dfawley committed Sep 14, 2021
1 parent 5d8e5aa commit d41f21c
Show file tree
Hide file tree
Showing 4 changed files with 284 additions and 73 deletions.
7 changes: 5 additions & 2 deletions stats/stats.go
Expand Up @@ -36,19 +36,22 @@ type RPCStats interface {
IsClient() bool
}

// Begin contains stats when an RPC begins.
// Begin contains stats when an RPC attempt begins.
// FailFast is only valid if this Begin is from client side.
type Begin struct {
// Client is true if this Begin is from client side.
Client bool
// BeginTime is the time when the RPC begins.
// BeginTime is the time when the RPC attempt begins.
BeginTime time.Time
// FailFast indicates if this RPC is failfast.
FailFast bool
// IsClientStream indicates whether the RPC is a client streaming RPC.
IsClientStream bool
// IsServerStream indicates whether the RPC is a server streaming RPC.
IsServerStream bool
// IsTransparentRetryAttempt indicates whether this attempt was initiated
// due to transparently retrying a previous attempt.
IsTransparentRetryAttempt bool
}

// IsClient indicates if the stats information is from client side.
Expand Down
129 changes: 67 additions & 62 deletions stream.go
Expand Up @@ -274,35 +274,6 @@ func newClientStreamWithParams(ctx context.Context, desc *StreamDesc, cc *Client
if c.creds != nil {
callHdr.Creds = c.creds
}
var trInfo *traceInfo
if EnableTracing {
trInfo = &traceInfo{
tr: trace.New("grpc.Sent."+methodFamily(method), method),
firstLine: firstLine{
client: true,
},
}
if deadline, ok := ctx.Deadline(); ok {
trInfo.firstLine.deadline = time.Until(deadline)
}
trInfo.tr.LazyLog(&trInfo.firstLine, false)
ctx = trace.NewContext(ctx, trInfo.tr)
}
ctx = newContextWithRPCInfo(ctx, c.failFast, c.codec, cp, comp)
sh := cc.dopts.copts.StatsHandler
var beginTime time.Time
if sh != nil {
ctx = sh.TagRPC(ctx, &stats.RPCTagInfo{FullMethodName: method, FailFast: c.failFast})
beginTime = time.Now()
begin := &stats.Begin{
Client: true,
BeginTime: beginTime,
FailFast: c.failFast,
IsClientStream: desc.ClientStreams,
IsServerStream: desc.ServerStreams,
}
sh.HandleRPC(ctx, begin)
}

cs := &clientStream{
callHdr: callHdr,
Expand All @@ -316,7 +287,6 @@ func newClientStreamWithParams(ctx context.Context, desc *StreamDesc, cc *Client
cp: cp,
comp: comp,
cancel: cancel,
beginTime: beginTime,
firstAttempt: true,
onCommit: onCommit,
}
Expand All @@ -325,9 +295,7 @@ func newClientStreamWithParams(ctx context.Context, desc *StreamDesc, cc *Client
}
cs.binlog = binarylog.GetMethodLogger(method)

// Only this initial attempt has stats/tracing.
// TODO(dfawley): move to newAttempt when per-attempt stats are implemented.
if err := cs.newAttemptLocked(sh, trInfo); err != nil {
if err := cs.newAttemptLocked(false /* isTransparent */); err != nil {
cs.finish(err)
return nil, err
}
Expand Down Expand Up @@ -375,8 +343,43 @@ func newClientStreamWithParams(ctx context.Context, desc *StreamDesc, cc *Client

// newAttemptLocked creates a new attempt with a transport.
// If it succeeds, then it replaces clientStream's attempt with this new attempt.
func (cs *clientStream) newAttemptLocked(sh stats.Handler, trInfo *traceInfo) (retErr error) {
func (cs *clientStream) newAttemptLocked(isTransparent bool) (retErr error) {
ctx := newContextWithRPCInfo(cs.ctx, cs.callInfo.failFast, cs.callInfo.codec, cs.cp, cs.comp)
method := cs.callHdr.Method
sh := cs.cc.dopts.copts.StatsHandler
var beginTime time.Time
if sh != nil {
ctx = sh.TagRPC(ctx, &stats.RPCTagInfo{FullMethodName: method, FailFast: cs.callInfo.failFast})
beginTime = time.Now()
begin := &stats.Begin{
Client: true,
BeginTime: beginTime,
FailFast: cs.callInfo.failFast,
IsClientStream: cs.desc.ClientStreams,
IsServerStream: cs.desc.ServerStreams,
IsTransparentRetryAttempt: isTransparent,
}
sh.HandleRPC(ctx, begin)
}

var trInfo *traceInfo
if EnableTracing {
trInfo = &traceInfo{
tr: trace.New("grpc.Sent."+methodFamily(method), method),
firstLine: firstLine{
client: true,
},
}
if deadline, ok := ctx.Deadline(); ok {
trInfo.firstLine.deadline = time.Until(deadline)
}
trInfo.tr.LazyLog(&trInfo.firstLine, false)
ctx = trace.NewContext(ctx, trInfo.tr)
}

newAttempt := &csAttempt{
ctx: ctx,
beginTime: beginTime,
cs: cs,
dc: cs.cc.dopts.dc,
statsHandler: sh,
Expand All @@ -391,15 +394,14 @@ func (cs *clientStream) newAttemptLocked(sh stats.Handler, trInfo *traceInfo) (r
}
}()

if err := cs.ctx.Err(); err != nil {
if err := ctx.Err(); err != nil {
return toRPCErr(err)
}

ctx := cs.ctx
if cs.cc.parsedTarget.Scheme == "xds" {
// Add extra metadata (metadata that will be added by transport) to context
// so the balancer can see them.
ctx = grpcutil.WithExtraMetadata(cs.ctx, metadata.Pairs(
ctx = grpcutil.WithExtraMetadata(ctx, metadata.Pairs(
"content-type", grpcutil.ContentType(cs.callHdr.ContentSubtype),
))
}
Expand All @@ -419,7 +421,7 @@ func (cs *clientStream) newAttemptLocked(sh stats.Handler, trInfo *traceInfo) (r
func (a *csAttempt) newStream() error {
cs := a.cs
cs.callHdr.PreviousAttempts = cs.numRetries
s, err := a.t.NewStream(cs.ctx, cs.callHdr)
s, err := a.t.NewStream(a.ctx, cs.callHdr)
if err != nil {
// Return without converting to an RPC error so retry code can
// inspect.
Expand All @@ -444,8 +446,7 @@ type clientStream struct {

cancel context.CancelFunc // cancels all attempts

sentLast bool // sent an end stream
beginTime time.Time
sentLast bool // sent an end stream

methodConfig *MethodConfig

Expand Down Expand Up @@ -485,6 +486,7 @@ type clientStream struct {
// csAttempt implements a single transport stream attempt within a
// clientStream.
type csAttempt struct {
ctx context.Context
cs *clientStream
t transport.ClientTransport
s *transport.Stream
Expand All @@ -503,6 +505,7 @@ type csAttempt struct {
trInfo *traceInfo

statsHandler stats.Handler
beginTime time.Time
}

func (cs *clientStream) commitAttemptLocked() {
Expand All @@ -520,15 +523,16 @@ func (cs *clientStream) commitAttempt() {
}

// shouldRetry returns nil if the RPC should be retried; otherwise it returns
// the error that should be returned by the operation.
func (cs *clientStream) shouldRetry(err error) error {
// the error that should be returned by the operation. If the RPC should be
// retried, the bool indicates whether it is being retried transparently.
func (cs *clientStream) shouldRetry(err error) (bool, error) {
if cs.attempt.s == nil {
// Error from NewClientStream.
nse, ok := err.(*transport.NewStreamError)
if !ok {
// Unexpected, but assume no I/O was performed and the RPC is not
// fatal, so retry indefinitely.
return nil
return true, nil
}

// Unwrap and convert error.
Expand All @@ -537,19 +541,19 @@ func (cs *clientStream) shouldRetry(err error) error {
// Never retry DoNotRetry errors, which indicate the RPC should not be
// retried due to max header list size violation, etc.
if nse.DoNotRetry {
return err
return false, err
}

// In the event of a non-IO operation error from NewStream, we never
// attempted to write anything to the wire, so we can retry
// indefinitely.
if !nse.PerformedIO {
return nil
return true, nil
}
}
if cs.finished || cs.committed {
// RPC is finished or committed; cannot retry.
return err
return false, err
}
// Wait for the trailers.
unprocessed := false
Expand All @@ -559,17 +563,17 @@ func (cs *clientStream) shouldRetry(err error) error {
}
if cs.firstAttempt && unprocessed {
// First attempt, stream unprocessed: transparently retry.
return nil
return true, nil
}
if cs.cc.dopts.disableRetry {
return err
return false, err
}

pushback := 0
hasPushback := false
if cs.attempt.s != nil {
if !cs.attempt.s.TrailersOnly() {
return err
return false, err
}

// TODO(retry): Move down if the spec changes to not check server pushback
Expand All @@ -580,13 +584,13 @@ func (cs *clientStream) shouldRetry(err error) error {
if pushback, e = strconv.Atoi(sps[0]); e != nil || pushback < 0 {
channelz.Infof(logger, cs.cc.channelzID, "Server retry pushback specified to abort (%q).", sps[0])
cs.retryThrottler.throttle() // This counts as a failure for throttling.
return err
return false, err
}
hasPushback = true
} else if len(sps) > 1 {
channelz.Warningf(logger, cs.cc.channelzID, "Server retry pushback specified multiple values (%q); not retrying.", sps)
cs.retryThrottler.throttle() // This counts as a failure for throttling.
return err
return false, err
}
}

Expand All @@ -599,16 +603,16 @@ func (cs *clientStream) shouldRetry(err error) error {

rp := cs.methodConfig.RetryPolicy
if rp == nil || !rp.RetryableStatusCodes[code] {
return err
return false, err
}

// Note: the ordering here is important; we count this as a failure
// only if the code matched a retryable code.
if cs.retryThrottler.throttle() {
return err
return false, err
}
if cs.numRetries+1 >= rp.MaxAttempts {
return err
return false, err
}

var dur time.Duration
Expand All @@ -631,23 +635,24 @@ func (cs *clientStream) shouldRetry(err error) error {
select {
case <-t.C:
cs.numRetries++
return nil
return false, nil
case <-cs.ctx.Done():
t.Stop()
return status.FromContextError(cs.ctx.Err()).Err()
return false, status.FromContextError(cs.ctx.Err()).Err()
}
}

// Returns nil if a retry was performed and succeeded; error otherwise.
func (cs *clientStream) retryLocked(lastErr error) error {
for {
cs.attempt.finish(toRPCErr(lastErr))
if err := cs.shouldRetry(lastErr); err != nil {
isTransparent, err := cs.shouldRetry(lastErr)
if err != nil {
cs.commitAttemptLocked()
return err
}
cs.firstAttempt = false
if err := cs.newAttemptLocked(nil, nil); err != nil {
if err := cs.newAttemptLocked(isTransparent); err != nil {
return err
}
if lastErr = cs.replayBufferLocked(); lastErr == nil {
Expand Down Expand Up @@ -937,7 +942,7 @@ func (a *csAttempt) sendMsg(m interface{}, hdr, payld, data []byte) error {
return io.EOF
}
if a.statsHandler != nil {
a.statsHandler.HandleRPC(cs.ctx, outPayload(true, m, data, payld, time.Now()))
a.statsHandler.HandleRPC(a.ctx, outPayload(true, m, data, payld, time.Now()))
}
if channelz.IsOn() {
a.t.IncrMsgSent()
Expand Down Expand Up @@ -985,7 +990,7 @@ func (a *csAttempt) recvMsg(m interface{}, payInfo *payloadInfo) (err error) {
a.mu.Unlock()
}
if a.statsHandler != nil {
a.statsHandler.HandleRPC(cs.ctx, &stats.InPayload{
a.statsHandler.HandleRPC(a.ctx, &stats.InPayload{
Client: true,
RecvTime: time.Now(),
Payload: m,
Expand Down Expand Up @@ -1047,12 +1052,12 @@ func (a *csAttempt) finish(err error) {
if a.statsHandler != nil {
end := &stats.End{
Client: true,
BeginTime: a.cs.beginTime,
BeginTime: a.beginTime,
EndTime: time.Now(),
Trailer: tr,
Error: err,
}
a.statsHandler.HandleRPC(a.cs.ctx, end)
a.statsHandler.HandleRPC(a.ctx, end)
}
if a.trInfo != nil && a.trInfo.tr != nil {
if err == nil {
Expand Down

0 comments on commit d41f21c

Please sign in to comment.