/
producer.go
219 lines (195 loc) · 6.47 KB
/
producer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
/*
* Copyright 2022 gRPC authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package orca
import (
"context"
"sync"
"time"
"google.golang.org/grpc"
"google.golang.org/grpc/balancer"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/internal/grpcsync"
"google.golang.org/grpc/orca/internal"
"google.golang.org/grpc/status"
v3orcapb "github.com/cncf/xds/go/xds/data/orca/v3"
v3orcaservicegrpc "github.com/cncf/xds/go/xds/service/orca/v3"
v3orcaservicepb "github.com/cncf/xds/go/xds/service/orca/v3"
"google.golang.org/protobuf/types/known/durationpb"
)
type producerBuilder struct{}
// Build constructs and returns a producer and its cleanup function
func (*producerBuilder) Build(cci interface{}) (balancer.Producer, func()) {
ctx, cancel := context.WithCancel(context.Background())
p := &producer{
client: v3orcaservicegrpc.NewOpenRcaServiceClient(cci.(grpc.ClientConnInterface)),
closed: grpcsync.NewEvent(),
intervals: make(map[time.Duration]int),
listeners: make(map[OOBListener]struct{}),
backoff: internal.DefaultBackoffFunc,
}
go p.run(ctx)
return p, func() {
cancel()
<-p.closed.Done() // Block until stream stopped.
}
}
var producerBuilderSingleton = &producerBuilder{}
// OOBListener is used to receive out-of-band load reports as they arrive.
type OOBListener interface {
// OnLoadReport is called when a load report is received.
OnLoadReport(*v3orcapb.OrcaLoadReport)
}
// OOBListenerOptions contains options to control how an OOBListener is called.
type OOBListenerOptions struct {
// How often to request the server to provide a load report. May be
// provided less frequently if the server requires a longer interval, or
// may be provided more frequently if another subscriber requests a shorter
// interval.
ReportInterval time.Duration
}
// RegisterOOBListener registers an out-of-band load report listener on sc.
// Any OOBListener may only be registered once per subchannel at a time. The
// returned stop function must be called when no longer needed.
func RegisterOOBListener(sc balancer.SubConn, l OOBListener, opts OOBListenerOptions) (stop func()) {
pr, close := sc.GetOrBuildProducer(producerBuilderSingleton)
p := pr.(*producer)
p.registerListener(l, opts.ReportInterval)
// TODO: When we can register for SubConn state updates, don't call run()
// until READY and automatically call stop() on SHUTDOWN.
// If stop is called multiple times, prevent it from having any effect on
// subsequent calls.
var once sync.Once
return func() {
once.Do(func() {
p.unregisterListener(l, opts.ReportInterval)
close()
})
}
}
type producer struct {
client v3orcaservicegrpc.OpenRcaServiceClient
ctx context.Context
cancel func()
closed *grpcsync.Event // fired when closure completes
backoff func(int) time.Duration
mu sync.Mutex
intervals map[time.Duration]int // map from interval time to count of listeners requesting that time
listeners map[OOBListener]struct{} // set of registered listeners
}
// registerListener adds the listener and its requested report interval to the
// producer.
func (p *producer) registerListener(l OOBListener, interval time.Duration) {
p.mu.Lock()
defer p.mu.Unlock()
p.listeners[l] = struct{}{}
p.intervals[interval]++
}
// registerListener removes the listener and its requested report interval to
// the producer.
func (p *producer) unregisterListener(l OOBListener, interval time.Duration) {
p.mu.Lock()
defer p.mu.Unlock()
delete(p.listeners, l)
p.intervals[interval]--
if p.intervals[interval] == 0 {
delete(p.intervals, interval)
}
}
// minInterval returns the smallest key in p.intervals.
func (p *producer) minInterval() time.Duration {
p.mu.Lock()
defer p.mu.Unlock()
min := time.Duration(-1)
for t := range p.intervals {
if min == time.Duration(-1) || t < min {
min = t
}
}
return min
}
// run manages the ORCA OOB stream on the subchannel.
func (p *producer) run(ctx context.Context) {
defer p.closed.Fire()
backoffAttempt := 0
backoffTimer := time.NewTimer(0)
for ctx.Err() == nil {
select {
case <-backoffTimer.C:
case <-ctx.Done():
return
}
resetBackoff, err := p.runStream(ctx)
if resetBackoff {
backoffTimer.Reset(0)
backoffAttempt = 0
} else {
backoffTimer.Reset(p.backoff(backoffAttempt))
backoffAttempt++
}
switch {
case err == nil:
// No error was encountered; restart the stream.
case ctx.Err() != nil:
// Producer was stopped; exit immediately and without logging an
// error.
return
case status.Code(err) == codes.Unimplemented:
// Unimplemented; do not retry and log at ERROR level.
logger.Error("Server doesn't support ORCA OOB load reporting protocol; not listening for load reports.")
return
case status.Code(err) == codes.Unavailable:
// The SubConn is not currently ready; backoff silently.
//
// TODO: don't attempt the stream until the state is READY to
// minimize the chances of this case and to avoid using the
// exponential backoff mechanism, as we should know it's safe to
// retry when the state is READY again.
default:
// Log all other stream errors.
logger.Info("Received unexpected stream error:", err, ctx.Err())
}
}
}
// runStream runs a single stream on the subchannel and returns the resulting
// error, if any, and whether or not the run loop should reset the backoff
// timer to zero or advance it.
func (p *producer) runStream(ctx context.Context) (resetBackoff bool, err error) {
interval := p.minInterval()
streamCtx, cancel := context.WithCancel(ctx)
defer cancel()
stream, err := p.client.StreamCoreMetrics(streamCtx, &v3orcaservicepb.OrcaLoadReportRequest{
ReportInterval: durationpb.New(interval),
})
if err != nil {
return resetBackoff, err
}
for {
report, err := stream.Recv()
if err != nil {
return resetBackoff, err
}
resetBackoff = true
p.mu.Lock()
for l := range p.listeners {
l.OnLoadReport(report)
}
p.mu.Unlock()
if interval != p.minInterval() {
// restart stream to use new interval
return true, nil
}
}
}