diff --git a/CHANGELOG.md b/CHANGELOG.md index b7ce27ee2c..e413dc7ac0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ * [FEATURE] Query Frontend/Scheduler: Add a new counter metric `cortex_request_queue_requests_total` for total requests going to queue. #5030 * [FEATURE] Build ARM docker images. #5041 * [BUGFIX] Updated `golang.org/x/net` dependency to fix CVE-2022-27664. #5008 +* [BUGFIX] Fix panic when otel and xray tracing is enabled. #5044 ## 1.14.0 2022-12-02 diff --git a/pkg/tracing/sampler/sampling.go b/pkg/tracing/sampler/sampling.go index 44bc60c612..068f48a1e8 100644 --- a/pkg/tracing/sampler/sampling.go +++ b/pkg/tracing/sampler/sampling.go @@ -1,39 +1,43 @@ package sampler import ( + "encoding/binary" "fmt" + "math" sdktrace "go.opentelemetry.io/otel/sdk/trace" "go.opentelemetry.io/otel/trace" ) -type randGenerator interface { - Float64() float64 -} - -type RandomRatioBased struct { +type xrayTraceIDRatioBased struct { sdktrace.Sampler - rnd randGenerator - fraction float64 + max uint64 } -// NewRandomRatioBased creates a sampler based on random number. +// NewXrayTraceIDRatioBased creates a sampler based on random number. // fraction parameter should be between 0 and 1 where: // fraction >= 1 it will always sample // fraction <= 0 it will never sample -func NewRandomRatioBased(fraction float64, rnd randGenerator) sdktrace.Sampler { +func NewXrayTraceIDRatioBased(fraction float64) sdktrace.Sampler { if fraction >= 1 { return sdktrace.AlwaysSample() } else if fraction <= 0 { return sdktrace.NeverSample() } - return &RandomRatioBased{rnd: rnd, fraction: fraction} + return &xrayTraceIDRatioBased{max: uint64(fraction * math.MaxUint64)} } -func (s *RandomRatioBased) ShouldSample(p sdktrace.SamplingParameters) sdktrace.SamplingResult { +func (s *xrayTraceIDRatioBased) ShouldSample(p sdktrace.SamplingParameters) sdktrace.SamplingResult { + // The default otel sampler pick the first 8 bytes to make the sampling decision and this is a problem to + // xray case as the first 4 bytes on the xray traceId is the time of the original request and the random part are + // the 12 last bytes, and so, this sampler pick the last 8 bytes to make the sampling decision. + // Xray Trace format: https://docs.aws.amazon.com/xray/latest/devguide/xray-api-sendingdata.html + // Xray Id Generator: https://github.com/open-telemetry/opentelemetry-go-contrib/blob/54f0bc5c0fd347cd6db9b7bc14c9f0c00dfcb36b/propagators/aws/xray/idgenerator.go#L58-L63 + // Ref: https://github.com/open-telemetry/opentelemetry-go/blob/7a60bc785d669fa6ad26ba70e88151d4df631d90/sdk/trace/sampling.go#L82-L95 + val := binary.BigEndian.Uint64(p.TraceID[8:16]) psc := trace.SpanContextFromContext(p.ParentContext) - shouldSample := s.rnd.Float64() < s.fraction + shouldSample := val < s.max if shouldSample { return sdktrace.SamplingResult{ Decision: sdktrace.RecordAndSample, @@ -46,6 +50,6 @@ func (s *RandomRatioBased) ShouldSample(p sdktrace.SamplingParameters) sdktrace. } } -func (s *RandomRatioBased) Description() string { - return fmt.Sprintf("RandomRatioBased{%g}", s.fraction) +func (s *xrayTraceIDRatioBased) Description() string { + return fmt.Sprintf("xrayTraceIDRatioBased{%v}", s.max) } diff --git a/pkg/tracing/sampler/sampling_test.go b/pkg/tracing/sampler/sampling_test.go index ff283f55cb..15e27704a0 100644 --- a/pkg/tracing/sampler/sampling_test.go +++ b/pkg/tracing/sampler/sampling_test.go @@ -2,24 +2,15 @@ package sampler import ( "context" - "math/rand" "testing" "github.com/stretchr/testify/require" + "go.opentelemetry.io/contrib/propagators/aws/xray" sdktrace "go.opentelemetry.io/otel/sdk/trace" "go.opentelemetry.io/otel/trace" ) -type mockGenerator struct { - mockedValue float64 -} - -func (g *mockGenerator) Float64() float64 { - return g.mockedValue -} - func Test_ShouldSample(t *testing.T) { - traceID, _ := trace.TraceIDFromHex("4bf92f3577b34da6a3ce929d0e0e4736") parentCtx := trace.ContextWithSpanContext( context.Background(), trace.NewSpanContext(trace.SpanContextConfig{ @@ -27,43 +18,41 @@ func Test_ShouldSample(t *testing.T) { }), ) + generator := xray.NewIDGenerator() + tests := []struct { - name string - samplingDecision sdktrace.SamplingDecision - fraction float64 - generator randGenerator + name string + fraction float64 }{ { - name: "should always sample", - samplingDecision: sdktrace.RecordAndSample, - fraction: 1, - generator: rand.New(rand.NewSource(rand.Int63())), + name: "should always sample", + fraction: 1, }, { - name: "should nerver sample", - samplingDecision: sdktrace.Drop, - fraction: 0, - generator: rand.New(rand.NewSource(rand.Int63())), + name: "should nerver sample", + fraction: 0, }, { - name: "should sample when fraction is above generated", - samplingDecision: sdktrace.RecordAndSample, - fraction: 0.5, - generator: &mockGenerator{0.2}, + name: "should sample 50%", + fraction: 0.5, }, { - name: "should not sample when fraction is not above generated", - samplingDecision: sdktrace.Drop, - fraction: 0.5, - generator: &mockGenerator{0.8}, + name: "should sample 10%", + fraction: 0.1, + }, + { + name: "should sample 1%", + fraction: 0.01, }, } + totalIterations := 10000 for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - s := NewRandomRatioBased(tt.fraction, tt.generator) - for i := 0; i < 100; i++ { - + totalSampled := 0 + s := NewXrayTraceIDRatioBased(tt.fraction) + for i := 0; i < totalIterations; i++ { + traceID, _ := generator.NewIDs(context.Background()) r := s.ShouldSample( sdktrace.SamplingParameters{ ParentContext: parentCtx, @@ -71,9 +60,14 @@ func Test_ShouldSample(t *testing.T) { Name: "test", Kind: trace.SpanKindServer, }) - - require.Equal(t, tt.samplingDecision, r.Decision) + if r.Decision == sdktrace.RecordAndSample { + totalSampled++ + } } + + tolerance := 0.1 + expected := tt.fraction * float64(totalIterations) + require.InDelta(t, expected, totalSampled, expected*tolerance) }) } } diff --git a/pkg/tracing/tracing.go b/pkg/tracing/tracing.go index bdd34d45ea..c562d771cf 100644 --- a/pkg/tracing/tracing.go +++ b/pkg/tracing/tracing.go @@ -4,17 +4,15 @@ import ( "context" "flag" "fmt" - "math/rand" "strings" - "time" "github.com/go-kit/log/level" "github.com/pkg/errors" "github.com/weaveworks/common/tracing" + "go.opentelemetry.io/contrib/propagators/aws/xray" "google.golang.org/grpc/credentials" "github.com/opentracing/opentracing-go" - "go.opentelemetry.io/contrib/propagators/aws/xray" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/exporters/otlp/otlptrace" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" @@ -110,10 +108,10 @@ func SetupTracing(ctx context.Context, name string, c Config) (func(context.Cont return nil, fmt.Errorf("creating tracing resource: %w", err) } - tracerProvider := newTraceProvider(r, c, exporter) + propagator, tracerProvider := newTraceProvider(r, c, exporter) bridge, wrappedProvider := migration.NewCortexBridgeTracerWrapper(tracerProvider.Tracer("github.com/cortexproject/cortex/cmd/cortex")) - bridge.SetTextMapPropagator(propagation.TraceContext{}) + bridge.SetTextMapPropagator(propagator) opentracing.SetGlobalTracer(bridge) otel.SetTracerProvider(wrappedProvider) @@ -125,21 +123,22 @@ func SetupTracing(ctx context.Context, name string, c Config) (func(context.Cont }, nil } -func newTraceProvider(r *resource.Resource, c Config, exporter *otlptrace.Exporter) *sdktrace.TracerProvider { +func newTraceProvider(r *resource.Resource, c Config, exporter *otlptrace.Exporter) (propagation.TextMapPropagator, *sdktrace.TracerProvider) { options := []sdktrace.TracerProviderOption{ sdktrace.WithBatcher(exporter), sdktrace.WithResource(r), } - + var propagator propagation.TextMapPropagator = propagation.TraceContext{} switch strings.ToLower(c.Otel.ExporterType) { case "awsxray": options = append(options, sdktrace.WithIDGenerator(xray.NewIDGenerator())) - options = append(options, sdktrace.WithSampler(sdktrace.ParentBased(sampler.NewRandomRatioBased(c.Otel.SampleRatio, rand.New(rand.NewSource(time.Now().Unix())))))) + options = append(options, sdktrace.WithSampler(sdktrace.ParentBased(sampler.NewXrayTraceIDRatioBased(c.Otel.SampleRatio)))) + propagator = xray.Propagator{} default: options = append(options, sdktrace.WithSampler(sdktrace.ParentBased(sdktrace.TraceIDRatioBased(c.Otel.SampleRatio)))) } - return sdktrace.NewTracerProvider(options...) + return propagator, sdktrace.NewTracerProvider(options...) } func newResource(ctx context.Context, target string, detectors []resource.Detector) (*resource.Resource, error) {