diff --git a/CHANGELOG.md b/CHANGELOG.md index c32557b888c..9c3e7fab311 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm exact upper-inclusive boundary support following the [corresponding specification change](https://github.com/open-telemetry/opentelemetry-specification/pull/2633). (#2982) - Attempting to start a span with a nil `context` will no longer cause a panic. (#3110) +- Ensure valid UTF-8 when truncating over-length attribute values. (#3156) ## [1.9.0/0.0.3] - 2022-08-01 diff --git a/sdk/trace/span.go b/sdk/trace/span.go index 14d0aabfe69..449cf6c2552 100644 --- a/sdk/trace/span.go +++ b/sdk/trace/span.go @@ -20,8 +20,10 @@ import ( "reflect" "runtime" rt "runtime/trace" + "strings" "sync" "time" + "unicode/utf8" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" @@ -294,7 +296,7 @@ func (s *recordingSpan) addOverCapAttrs(limit int, attrs []attribute.KeyValue) { // truncateAttr returns a truncated version of attr. Only string and string // slice attribute values are truncated. String values are truncated to at -// most a length of limit. Each string slice value is truncated in this fasion +// most a length of limit. Each string slice value is truncated in this fashion // (the slice length itself is unaffected). // // No truncation is perfromed for a negative limit. @@ -305,7 +307,7 @@ func truncateAttr(limit int, attr attribute.KeyValue) attribute.KeyValue { switch attr.Value.Type() { case attribute.STRING: if v := attr.Value.AsString(); len(v) > limit { - return attr.Key.String(v[:limit]) + return attr.Key.String(safeTruncate(v, limit)) } case attribute.STRINGSLICE: // Do no mutate the original, make a copy. @@ -324,7 +326,7 @@ func truncateAttr(limit int, attr attribute.KeyValue) attribute.KeyValue { v := trucated.Value.AsStringSlice() for i := range v { if len(v[i]) > limit { - v[i] = v[i][:limit] + v[i] = safeTruncate(v[i], limit) } } return trucated @@ -332,6 +334,34 @@ func truncateAttr(limit int, attr attribute.KeyValue) attribute.KeyValue { return attr } +// safeTruncate truncates the string and guarantees valid UTF-8 is returned. +func safeTruncate(input string, limit int) string { + if trunc, ok := safeTruncateValidUTF8(input, limit); ok { + return trunc + } + trunc, _ := safeTruncateValidUTF8(strings.ToValidUTF8(input, ""), limit) + return trunc +} + +// safeTruncateValidUTF8 returns a copy of the input string safely truncated to +// limit. The truncation is ensured to occur at the bounds of complete UTF-8 +// characters. If invalid encoding of UTF-8 is encountered, input is returned +// with false, otherwise, the truncated input will be returned with true. +func safeTruncateValidUTF8(input string, limit int) (string, bool) { + for cnt := 0; cnt <= limit; { + r, size := utf8.DecodeRuneInString(input[cnt:]) + if r == utf8.RuneError { + return input, false + } + + if cnt+size > limit { + return input[:cnt], true + } + cnt += size + } + return input, true +} + // End ends the span. This method does nothing if the span is already ended or // is not being recorded. // diff --git a/sdk/trace/span_limits_test.go b/sdk/trace/span_limits_test.go index 91199516c56..5e88770ae0f 100644 --- a/sdk/trace/span_limits_test.go +++ b/sdk/trace/span_limits_test.go @@ -168,6 +168,7 @@ func testSpanLimits(t *testing.T, limits SpanLimits) ReadOnlySpan { span.SetAttributes( attribute.String("string", "abc"), attribute.StringSlice("stringSlice", []string{"abc", "def"}), + attribute.String("euro", "€"), // this is a 3-byte rune ) span.AddEvent("event 1", trace.WithAttributes(a...)) span.AddEvent("event 2", trace.WithAttributes(a...)) @@ -186,24 +187,27 @@ func TestSpanLimits(t *testing.T) { attrs := testSpanLimits(t, limits).Attributes() assert.Contains(t, attrs, attribute.String("string", "abc")) assert.Contains(t, attrs, attribute.StringSlice("stringSlice", []string{"abc", "def"})) + assert.Contains(t, attrs, attribute.String("euro", "€")) limits.AttributeValueLengthLimit = 2 attrs = testSpanLimits(t, limits).Attributes() // Ensure string and string slice attributes are truncated. assert.Contains(t, attrs, attribute.String("string", "ab")) assert.Contains(t, attrs, attribute.StringSlice("stringSlice", []string{"ab", "de"})) + assert.Contains(t, attrs, attribute.String("euro", "")) limits.AttributeValueLengthLimit = 0 attrs = testSpanLimits(t, limits).Attributes() assert.Contains(t, attrs, attribute.String("string", "")) assert.Contains(t, attrs, attribute.StringSlice("stringSlice", []string{"", ""})) + assert.Contains(t, attrs, attribute.String("euro", "")) }) t.Run("AttributeCountLimit", func(t *testing.T) { limits := NewSpanLimits() // Unlimited. limits.AttributeCountLimit = -1 - assert.Len(t, testSpanLimits(t, limits).Attributes(), 2) + assert.Len(t, testSpanLimits(t, limits).Attributes(), 3) limits.AttributeCountLimit = 1 assert.Len(t, testSpanLimits(t, limits).Attributes(), 1) diff --git a/sdk/trace/span_test.go b/sdk/trace/span_test.go index 2c7992e457e..2441defae71 100644 --- a/sdk/trace/span_test.go +++ b/sdk/trace/span_test.go @@ -134,6 +134,30 @@ func TestTruncateAttr(t *testing.T) { attr: strSliceAttr, want: strSliceAttr, }, + { + // This tests the ordinary safeTruncate(). + limit: 10, + attr: attribute.String(key, "€€€€"), // 3 bytes each + want: attribute.String(key, "€€€"), + }, + { + // This tests truncation with an invalid UTF-8 input. + // + // Note that after removing the invalid rune, + // the string is over length and still has to + // be truncated on a code point boundary. + limit: 10, + attr: attribute.String(key, "€"[0:2]+"hello€€"), // corrupted first rune, then over limit + want: attribute.String(key, "hello€"), + }, + { + // This tests the fallback to invalidTruncate() + // where after validation the string does not require + // truncation. + limit: 6, + attr: attribute.String(key, "€"[0:2]+"hello"), // corrupted first rune, then not over limit + want: attribute.String(key, "hello"), + }, } for _, test := range tests {