From fba8c8e201db2692c369298c276bdf476e0f1abe Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 9 Sep 2022 05:49:04 -0700 Subject: [PATCH 1/4] Safely truncate over-length string attributes --- sdk/trace/span.go | 35 ++++++++++++++++++++++++++++++++--- sdk/trace/span_limits_test.go | 6 +++++- sdk/trace/span_test.go | 16 ++++++++++++++++ 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/sdk/trace/span.go b/sdk/trace/span.go index 14d0aabfe69..dff26e55528 100644 --- a/sdk/trace/span.go +++ b/sdk/trace/span.go @@ -20,8 +20,10 @@ import ( "reflect" "runtime" rt "runtime/trace" + "strings" "sync" "time" + "unicode/utf8" "go.opentelemetry.io/otel/attribute" "go.opentelemetry.io/otel/codes" @@ -294,7 +296,7 @@ func (s *recordingSpan) addOverCapAttrs(limit int, attrs []attribute.KeyValue) { // truncateAttr returns a truncated version of attr. Only string and string // slice attribute values are truncated. String values are truncated to at -// most a length of limit. Each string slice value is truncated in this fasion +// most a length of limit. Each string slice value is truncated in this fashion // (the slice length itself is unaffected). // // No truncation is perfromed for a negative limit. @@ -305,7 +307,7 @@ func truncateAttr(limit int, attr attribute.KeyValue) attribute.KeyValue { switch attr.Value.Type() { case attribute.STRING: if v := attr.Value.AsString(); len(v) > limit { - return attr.Key.String(v[:limit]) + return attr.Key.String(safeTruncate(v, limit)) } case attribute.STRINGSLICE: // Do no mutate the original, make a copy. @@ -324,7 +326,7 @@ func truncateAttr(limit int, attr attribute.KeyValue) attribute.KeyValue { v := trucated.Value.AsStringSlice() for i := range v { if len(v[i]) > limit { - v[i] = v[i][:limit] + v[i] = safeTruncate(v[i], limit) } } return trucated @@ -332,6 +334,33 @@ func truncateAttr(limit int, attr attribute.KeyValue) attribute.KeyValue { return attr } +// safeTruncate truncates the string and guarantees valid UTF-8 is returned. +func safeTruncate(input string, limit int) string { + cnt := 0 + for cnt <= limit { + r, size := utf8.DecodeRuneInString(input[cnt:]) + if r == utf8.RuneError { + break + } + + if cnt+size > limit { + return input[:cnt] + } + cnt += size + } + return invalidTruncate(input, limit) +} + +// invalidTruncate truncates the string and then removes invalid UTF-8 points. +// this is a fallback from safeTruncate() in case the input is invalid. +func invalidTruncate(input string, limit int) string { + valid := strings.ToValidUTF8(input[:limit], "") + if len(valid) <= limit { + return valid + } + return valid[:limit] +} + // End ends the span. This method does nothing if the span is already ended or // is not being recorded. // diff --git a/sdk/trace/span_limits_test.go b/sdk/trace/span_limits_test.go index 91199516c56..5e88770ae0f 100644 --- a/sdk/trace/span_limits_test.go +++ b/sdk/trace/span_limits_test.go @@ -168,6 +168,7 @@ func testSpanLimits(t *testing.T, limits SpanLimits) ReadOnlySpan { span.SetAttributes( attribute.String("string", "abc"), attribute.StringSlice("stringSlice", []string{"abc", "def"}), + attribute.String("euro", "€"), // this is a 3-byte rune ) span.AddEvent("event 1", trace.WithAttributes(a...)) span.AddEvent("event 2", trace.WithAttributes(a...)) @@ -186,24 +187,27 @@ func TestSpanLimits(t *testing.T) { attrs := testSpanLimits(t, limits).Attributes() assert.Contains(t, attrs, attribute.String("string", "abc")) assert.Contains(t, attrs, attribute.StringSlice("stringSlice", []string{"abc", "def"})) + assert.Contains(t, attrs, attribute.String("euro", "€")) limits.AttributeValueLengthLimit = 2 attrs = testSpanLimits(t, limits).Attributes() // Ensure string and string slice attributes are truncated. assert.Contains(t, attrs, attribute.String("string", "ab")) assert.Contains(t, attrs, attribute.StringSlice("stringSlice", []string{"ab", "de"})) + assert.Contains(t, attrs, attribute.String("euro", "")) limits.AttributeValueLengthLimit = 0 attrs = testSpanLimits(t, limits).Attributes() assert.Contains(t, attrs, attribute.String("string", "")) assert.Contains(t, attrs, attribute.StringSlice("stringSlice", []string{"", ""})) + assert.Contains(t, attrs, attribute.String("euro", "")) }) t.Run("AttributeCountLimit", func(t *testing.T) { limits := NewSpanLimits() // Unlimited. limits.AttributeCountLimit = -1 - assert.Len(t, testSpanLimits(t, limits).Attributes(), 2) + assert.Len(t, testSpanLimits(t, limits).Attributes(), 3) limits.AttributeCountLimit = 1 assert.Len(t, testSpanLimits(t, limits).Attributes(), 1) diff --git a/sdk/trace/span_test.go b/sdk/trace/span_test.go index 2c7992e457e..adfadbd5bec 100644 --- a/sdk/trace/span_test.go +++ b/sdk/trace/span_test.go @@ -134,6 +134,22 @@ func TestTruncateAttr(t *testing.T) { attr: strSliceAttr, want: strSliceAttr, }, + { + // This tests the ordinary safeTruncate(). + limit: 10, + attr: attribute.String(key, "€€€€"), // 3 bytes each + want: attribute.String(key, "€€€"), + }, + { + // This tests the fallback to invalidTruncate(). + // + // Note that after removing the invalid rune, + // the string is over length and still has to + // be truncated on a code point boundary. + limit: 10, + attr: attribute.String(key, "€"[0:2]+"hello€€"), // corrupted first rune, over limit + want: attribute.String(key, "hello€"), + }, } for _, test := range tests { From 697194bc3599d1ca79447b6aa648e6a372584227 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 9 Sep 2022 06:09:21 -0700 Subject: [PATCH 2/4] more better test --- CHANGELOG.md | 1 + sdk/trace/span.go | 27 ++++++++++++--------------- sdk/trace/span_test.go | 12 ++++++++++-- 3 files changed, 23 insertions(+), 17 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4dec154c4e8..1a860e7f780 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -26,6 +26,7 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm specification change](https://github.com/open-telemetry/opentelemetry-specification/pull/2633). (#2982) - Attempting to start a span with a nil `context` will no longer cause a panic. (#3110) - Export scope attributes for all exporters provided by `go.opentelemetry.io/otel/exporters/otlp/otlptrace`. (#3131) +- Ensure valid UTF-8 when truncating over-length attribute values. (#3156) ## [1.9.0/0.0.3] - 2022-08-01 diff --git a/sdk/trace/span.go b/sdk/trace/span.go index dff26e55528..d18de9ce477 100644 --- a/sdk/trace/span.go +++ b/sdk/trace/span.go @@ -336,29 +336,26 @@ func truncateAttr(limit int, attr attribute.KeyValue) attribute.KeyValue { // safeTruncate truncates the string and guarantees valid UTF-8 is returned. func safeTruncate(input string, limit int) string { - cnt := 0 - for cnt <= limit { + if trunc, ok := safeTruncateValidUTF8(input, limit); ok { + return trunc + } + trunc, _ := safeTruncateValidUTF8(strings.ToValidUTF8(input, ""), limit) + return trunc +} + +func safeTruncateValidUTF8(input string, limit int) (string, bool) { + for cnt := 0; cnt <= limit; { r, size := utf8.DecodeRuneInString(input[cnt:]) if r == utf8.RuneError { - break + return input, false } if cnt+size > limit { - return input[:cnt] + return input[:cnt], true } cnt += size } - return invalidTruncate(input, limit) -} - -// invalidTruncate truncates the string and then removes invalid UTF-8 points. -// this is a fallback from safeTruncate() in case the input is invalid. -func invalidTruncate(input string, limit int) string { - valid := strings.ToValidUTF8(input[:limit], "") - if len(valid) <= limit { - return valid - } - return valid[:limit] + return input, true } // End ends the span. This method does nothing if the span is already ended or diff --git a/sdk/trace/span_test.go b/sdk/trace/span_test.go index adfadbd5bec..2441defae71 100644 --- a/sdk/trace/span_test.go +++ b/sdk/trace/span_test.go @@ -141,15 +141,23 @@ func TestTruncateAttr(t *testing.T) { want: attribute.String(key, "€€€"), }, { - // This tests the fallback to invalidTruncate(). + // This tests truncation with an invalid UTF-8 input. // // Note that after removing the invalid rune, // the string is over length and still has to // be truncated on a code point boundary. limit: 10, - attr: attribute.String(key, "€"[0:2]+"hello€€"), // corrupted first rune, over limit + attr: attribute.String(key, "€"[0:2]+"hello€€"), // corrupted first rune, then over limit want: attribute.String(key, "hello€"), }, + { + // This tests the fallback to invalidTruncate() + // where after validation the string does not require + // truncation. + limit: 6, + attr: attribute.String(key, "€"[0:2]+"hello"), // corrupted first rune, then not over limit + want: attribute.String(key, "hello"), + }, } for _, test := range tests { From 0b7c4ca759b08ed585e01fb8608e1900290fd745 Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 9 Sep 2022 08:40:09 -0700 Subject: [PATCH 3/4] Update sdk/trace/span.go Co-authored-by: Tyler Yahn --- sdk/trace/span.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sdk/trace/span.go b/sdk/trace/span.go index d18de9ce477..449cf6c2552 100644 --- a/sdk/trace/span.go +++ b/sdk/trace/span.go @@ -343,6 +343,10 @@ func safeTruncate(input string, limit int) string { return trunc } +// safeTruncateValidUTF8 returns a copy of the input string safely truncated to +// limit. The truncation is ensured to occur at the bounds of complete UTF-8 +// characters. If invalid encoding of UTF-8 is encountered, input is returned +// with false, otherwise, the truncated input will be returned with true. func safeTruncateValidUTF8(input string, limit int) (string, bool) { for cnt := 0; cnt <= limit; { r, size := utf8.DecodeRuneInString(input[cnt:]) From bd38c9f01e3c012ae898a40f0e306e7fabe2856d Mon Sep 17 00:00:00 2001 From: Joshua MacDonald Date: Fri, 9 Sep 2022 08:57:05 -0700 Subject: [PATCH 4/4] Update --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c3bbe470749..9c3e7fab311 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,7 +22,6 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm exact upper-inclusive boundary support following the [corresponding specification change](https://github.com/open-telemetry/opentelemetry-specification/pull/2633). (#2982) - Attempting to start a span with a nil `context` will no longer cause a panic. (#3110) -- Export scope attributes for all exporters provided by `go.opentelemetry.io/otel/exporters/otlp/otlptrace`. (#3131) - Ensure valid UTF-8 when truncating over-length attribute values. (#3156) ## [1.9.0/0.0.3] - 2022-08-01