From 323f3c81613895eef78996b01f5d26c8190281dc Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Thu, 15 Dec 2022 13:28:42 -0800 Subject: [PATCH 1/5] fix NewRandomRatioBased panic Signed-off-by: Alan Protasio --- pkg/tracing/sampler/sampling.go | 18 ++++---- pkg/tracing/sampler/sampling_test.go | 66 +++++++++++++--------------- pkg/tracing/tracing.go | 4 +- 3 files changed, 39 insertions(+), 49 deletions(-) diff --git a/pkg/tracing/sampler/sampling.go b/pkg/tracing/sampler/sampling.go index 44bc60c612..71a616fb8e 100644 --- a/pkg/tracing/sampler/sampling.go +++ b/pkg/tracing/sampler/sampling.go @@ -1,39 +1,37 @@ package sampler import ( + "encoding/binary" "fmt" + "math" sdktrace "go.opentelemetry.io/otel/sdk/trace" "go.opentelemetry.io/otel/trace" ) -type randGenerator interface { - Float64() float64 -} - type RandomRatioBased struct { sdktrace.Sampler - rnd randGenerator - fraction float64 + max uint64 } // NewRandomRatioBased creates a sampler based on random number. // fraction parameter should be between 0 and 1 where: // fraction >= 1 it will always sample // fraction <= 0 it will never sample -func NewRandomRatioBased(fraction float64, rnd randGenerator) sdktrace.Sampler { +func NewRandomRatioBased(fraction float64) sdktrace.Sampler { if fraction >= 1 { return sdktrace.AlwaysSample() } else if fraction <= 0 { return sdktrace.NeverSample() } - return &RandomRatioBased{rnd: rnd, fraction: fraction} + return &RandomRatioBased{max: uint64(fraction * math.MaxUint64)} } func (s *RandomRatioBased) ShouldSample(p sdktrace.SamplingParameters) sdktrace.SamplingResult { + val := binary.BigEndian.Uint64(p.TraceID[8:16]) psc := trace.SpanContextFromContext(p.ParentContext) - shouldSample := s.rnd.Float64() < s.fraction + shouldSample := val < s.max if shouldSample { return sdktrace.SamplingResult{ Decision: sdktrace.RecordAndSample, @@ -47,5 +45,5 @@ func (s *RandomRatioBased) ShouldSample(p sdktrace.SamplingParameters) sdktrace. } func (s *RandomRatioBased) Description() string { - return fmt.Sprintf("RandomRatioBased{%g}", s.fraction) + return fmt.Sprintf("RandomRatioBased{%v}", s.max) } diff --git a/pkg/tracing/sampler/sampling_test.go b/pkg/tracing/sampler/sampling_test.go index ff283f55cb..22135616c8 100644 --- a/pkg/tracing/sampler/sampling_test.go +++ b/pkg/tracing/sampler/sampling_test.go @@ -2,24 +2,15 @@ package sampler import ( "context" - "math/rand" "testing" "github.com/stretchr/testify/require" + "go.opentelemetry.io/contrib/propagators/aws/xray" sdktrace "go.opentelemetry.io/otel/sdk/trace" "go.opentelemetry.io/otel/trace" ) -type mockGenerator struct { - mockedValue float64 -} - -func (g *mockGenerator) Float64() float64 { - return g.mockedValue -} - func Test_ShouldSample(t *testing.T) { - traceID, _ := trace.TraceIDFromHex("4bf92f3577b34da6a3ce929d0e0e4736") parentCtx := trace.ContextWithSpanContext( context.Background(), trace.NewSpanContext(trace.SpanContextConfig{ @@ -27,53 +18,56 @@ func Test_ShouldSample(t *testing.T) { }), ) + generator := xray.NewIDGenerator() + tests := []struct { - name string - samplingDecision sdktrace.SamplingDecision - fraction float64 - generator randGenerator + name string + fraction float64 }{ { - name: "should always sample", - samplingDecision: sdktrace.RecordAndSample, - fraction: 1, - generator: rand.New(rand.NewSource(rand.Int63())), + name: "should always sample", + fraction: 1, }, { - name: "should nerver sample", - samplingDecision: sdktrace.Drop, - fraction: 0, - generator: rand.New(rand.NewSource(rand.Int63())), + name: "should nerver sample", + fraction: 0, }, { - name: "should sample when fraction is above generated", - samplingDecision: sdktrace.RecordAndSample, - fraction: 0.5, - generator: &mockGenerator{0.2}, + name: "should sample 50%", + fraction: 0.5, }, { - name: "should not sample when fraction is not above generated", - samplingDecision: sdktrace.Drop, - fraction: 0.5, - generator: &mockGenerator{0.8}, + name: "should sample 10%", + fraction: 0.1, + }, + { + name: "should sample 1%", + fraction: 0.01, }, } + totalIterations := 10000 for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - s := NewRandomRatioBased(tt.fraction, tt.generator) - for i := 0; i < 100; i++ { - + totalSampled := 0 + s := NewRandomRatioBased(tt.fraction) + for i := 0; i < totalIterations; i++ { + traceId, _ := generator.NewIDs(context.Background()) r := s.ShouldSample( sdktrace.SamplingParameters{ ParentContext: parentCtx, - TraceID: traceID, + TraceID: traceId, Name: "test", Kind: trace.SpanKindServer, }) - - require.Equal(t, tt.samplingDecision, r.Decision) + if r.Decision == sdktrace.RecordAndSample { + totalSampled += 1 + } } + + tolerance := 0.1 + expected := tt.fraction * float64(totalIterations) + require.InDelta(t, expected, totalSampled, expected*tolerance) }) } } diff --git a/pkg/tracing/tracing.go b/pkg/tracing/tracing.go index bdd34d45ea..7619344089 100644 --- a/pkg/tracing/tracing.go +++ b/pkg/tracing/tracing.go @@ -4,9 +4,7 @@ import ( "context" "flag" "fmt" - "math/rand" "strings" - "time" "github.com/go-kit/log/level" "github.com/pkg/errors" @@ -134,7 +132,7 @@ func newTraceProvider(r *resource.Resource, c Config, exporter *otlptrace.Export switch strings.ToLower(c.Otel.ExporterType) { case "awsxray": options = append(options, sdktrace.WithIDGenerator(xray.NewIDGenerator())) - options = append(options, sdktrace.WithSampler(sdktrace.ParentBased(sampler.NewRandomRatioBased(c.Otel.SampleRatio, rand.New(rand.NewSource(time.Now().Unix())))))) + options = append(options, sdktrace.WithSampler(sdktrace.ParentBased(sampler.NewRandomRatioBased(c.Otel.SampleRatio)))) default: options = append(options, sdktrace.WithSampler(sdktrace.ParentBased(sdktrace.TraceIDRatioBased(c.Otel.SampleRatio)))) } From 9f791372c8b1deb1a75cf8ac734ca15dddc5b655 Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Thu, 15 Dec 2022 13:32:47 -0800 Subject: [PATCH 2/5] changelog Signed-off-by: Alan Protasio --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b7ce27ee2c..e413dc7ac0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ * [FEATURE] Query Frontend/Scheduler: Add a new counter metric `cortex_request_queue_requests_total` for total requests going to queue. #5030 * [FEATURE] Build ARM docker images. #5041 * [BUGFIX] Updated `golang.org/x/net` dependency to fix CVE-2022-27664. #5008 +* [BUGFIX] Fix panic when otel and xray tracing is enabled. #5044 ## 1.14.0 2022-12-02 From b709a8f13a07b5f0c0631be0db8998d79130558b Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Thu, 15 Dec 2022 13:54:50 -0800 Subject: [PATCH 3/5] make lint happy Signed-off-by: Alan Protasio --- pkg/tracing/sampler/sampling_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pkg/tracing/sampler/sampling_test.go b/pkg/tracing/sampler/sampling_test.go index 22135616c8..17f5ca53c1 100644 --- a/pkg/tracing/sampler/sampling_test.go +++ b/pkg/tracing/sampler/sampling_test.go @@ -52,16 +52,16 @@ func Test_ShouldSample(t *testing.T) { totalSampled := 0 s := NewRandomRatioBased(tt.fraction) for i := 0; i < totalIterations; i++ { - traceId, _ := generator.NewIDs(context.Background()) + traceID, _ := generator.NewIDs(context.Background()) r := s.ShouldSample( sdktrace.SamplingParameters{ ParentContext: parentCtx, - TraceID: traceId, + TraceID: traceID, Name: "test", Kind: trace.SpanKindServer, }) if r.Decision == sdktrace.RecordAndSample { - totalSampled += 1 + totalSampled++ } } From fbfbd4b3185c858e27cb8143b54164b773ff2136 Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Thu, 15 Dec 2022 14:32:17 -0800 Subject: [PATCH 4/5] adding back xray propagator Signed-off-by: Alan Protasio --- pkg/tracing/tracing.go | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pkg/tracing/tracing.go b/pkg/tracing/tracing.go index 7619344089..1b5394a529 100644 --- a/pkg/tracing/tracing.go +++ b/pkg/tracing/tracing.go @@ -9,10 +9,10 @@ import ( "github.com/go-kit/log/level" "github.com/pkg/errors" "github.com/weaveworks/common/tracing" + "go.opentelemetry.io/contrib/propagators/aws/xray" "google.golang.org/grpc/credentials" "github.com/opentracing/opentracing-go" - "go.opentelemetry.io/contrib/propagators/aws/xray" "go.opentelemetry.io/otel" "go.opentelemetry.io/otel/exporters/otlp/otlptrace" "go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc" @@ -108,10 +108,10 @@ func SetupTracing(ctx context.Context, name string, c Config) (func(context.Cont return nil, fmt.Errorf("creating tracing resource: %w", err) } - tracerProvider := newTraceProvider(r, c, exporter) + propagator, tracerProvider := newTraceProvider(r, c, exporter) bridge, wrappedProvider := migration.NewCortexBridgeTracerWrapper(tracerProvider.Tracer("github.com/cortexproject/cortex/cmd/cortex")) - bridge.SetTextMapPropagator(propagation.TraceContext{}) + bridge.SetTextMapPropagator(propagator) opentracing.SetGlobalTracer(bridge) otel.SetTracerProvider(wrappedProvider) @@ -123,21 +123,22 @@ func SetupTracing(ctx context.Context, name string, c Config) (func(context.Cont }, nil } -func newTraceProvider(r *resource.Resource, c Config, exporter *otlptrace.Exporter) *sdktrace.TracerProvider { +func newTraceProvider(r *resource.Resource, c Config, exporter *otlptrace.Exporter) (propagation.TextMapPropagator, *sdktrace.TracerProvider) { options := []sdktrace.TracerProviderOption{ sdktrace.WithBatcher(exporter), sdktrace.WithResource(r), } - + var propagator propagation.TextMapPropagator = propagation.TraceContext{} switch strings.ToLower(c.Otel.ExporterType) { case "awsxray": options = append(options, sdktrace.WithIDGenerator(xray.NewIDGenerator())) options = append(options, sdktrace.WithSampler(sdktrace.ParentBased(sampler.NewRandomRatioBased(c.Otel.SampleRatio)))) + propagator = xray.Propagator{} default: options = append(options, sdktrace.WithSampler(sdktrace.ParentBased(sdktrace.TraceIDRatioBased(c.Otel.SampleRatio)))) } - return sdktrace.NewTracerProvider(options...) + return propagator, sdktrace.NewTracerProvider(options...) } func newResource(ctx context.Context, target string, detectors []resource.Detector) (*resource.Resource, error) { From 84eae7ecc863c705cd628772fae88c8d69bc0e08 Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Thu, 15 Dec 2022 14:46:50 -0800 Subject: [PATCH 5/5] rename the sampler Signed-off-by: Alan Protasio --- pkg/tracing/sampler/sampling.go | 20 +++++++++++++------- pkg/tracing/sampler/sampling_test.go | 2 +- pkg/tracing/tracing.go | 2 +- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/pkg/tracing/sampler/sampling.go b/pkg/tracing/sampler/sampling.go index 71a616fb8e..068f48a1e8 100644 --- a/pkg/tracing/sampler/sampling.go +++ b/pkg/tracing/sampler/sampling.go @@ -9,26 +9,32 @@ import ( "go.opentelemetry.io/otel/trace" ) -type RandomRatioBased struct { +type xrayTraceIDRatioBased struct { sdktrace.Sampler max uint64 } -// NewRandomRatioBased creates a sampler based on random number. +// NewXrayTraceIDRatioBased creates a sampler based on random number. // fraction parameter should be between 0 and 1 where: // fraction >= 1 it will always sample // fraction <= 0 it will never sample -func NewRandomRatioBased(fraction float64) sdktrace.Sampler { +func NewXrayTraceIDRatioBased(fraction float64) sdktrace.Sampler { if fraction >= 1 { return sdktrace.AlwaysSample() } else if fraction <= 0 { return sdktrace.NeverSample() } - return &RandomRatioBased{max: uint64(fraction * math.MaxUint64)} + return &xrayTraceIDRatioBased{max: uint64(fraction * math.MaxUint64)} } -func (s *RandomRatioBased) ShouldSample(p sdktrace.SamplingParameters) sdktrace.SamplingResult { +func (s *xrayTraceIDRatioBased) ShouldSample(p sdktrace.SamplingParameters) sdktrace.SamplingResult { + // The default otel sampler pick the first 8 bytes to make the sampling decision and this is a problem to + // xray case as the first 4 bytes on the xray traceId is the time of the original request and the random part are + // the 12 last bytes, and so, this sampler pick the last 8 bytes to make the sampling decision. + // Xray Trace format: https://docs.aws.amazon.com/xray/latest/devguide/xray-api-sendingdata.html + // Xray Id Generator: https://github.com/open-telemetry/opentelemetry-go-contrib/blob/54f0bc5c0fd347cd6db9b7bc14c9f0c00dfcb36b/propagators/aws/xray/idgenerator.go#L58-L63 + // Ref: https://github.com/open-telemetry/opentelemetry-go/blob/7a60bc785d669fa6ad26ba70e88151d4df631d90/sdk/trace/sampling.go#L82-L95 val := binary.BigEndian.Uint64(p.TraceID[8:16]) psc := trace.SpanContextFromContext(p.ParentContext) shouldSample := val < s.max @@ -44,6 +50,6 @@ func (s *RandomRatioBased) ShouldSample(p sdktrace.SamplingParameters) sdktrace. } } -func (s *RandomRatioBased) Description() string { - return fmt.Sprintf("RandomRatioBased{%v}", s.max) +func (s *xrayTraceIDRatioBased) Description() string { + return fmt.Sprintf("xrayTraceIDRatioBased{%v}", s.max) } diff --git a/pkg/tracing/sampler/sampling_test.go b/pkg/tracing/sampler/sampling_test.go index 17f5ca53c1..15e27704a0 100644 --- a/pkg/tracing/sampler/sampling_test.go +++ b/pkg/tracing/sampler/sampling_test.go @@ -50,7 +50,7 @@ func Test_ShouldSample(t *testing.T) { for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { totalSampled := 0 - s := NewRandomRatioBased(tt.fraction) + s := NewXrayTraceIDRatioBased(tt.fraction) for i := 0; i < totalIterations; i++ { traceID, _ := generator.NewIDs(context.Background()) r := s.ShouldSample( diff --git a/pkg/tracing/tracing.go b/pkg/tracing/tracing.go index 1b5394a529..c562d771cf 100644 --- a/pkg/tracing/tracing.go +++ b/pkg/tracing/tracing.go @@ -132,7 +132,7 @@ func newTraceProvider(r *resource.Resource, c Config, exporter *otlptrace.Export switch strings.ToLower(c.Otel.ExporterType) { case "awsxray": options = append(options, sdktrace.WithIDGenerator(xray.NewIDGenerator())) - options = append(options, sdktrace.WithSampler(sdktrace.ParentBased(sampler.NewRandomRatioBased(c.Otel.SampleRatio)))) + options = append(options, sdktrace.WithSampler(sdktrace.ParentBased(sampler.NewXrayTraceIDRatioBased(c.Otel.SampleRatio)))) propagator = xray.Propagator{} default: options = append(options, sdktrace.WithSampler(sdktrace.ParentBased(sdktrace.TraceIDRatioBased(c.Otel.SampleRatio))))