Skip to content

Commit

Permalink
fix(baggage): Update baggage parsing and encoding in vendored otel pa…
Browse files Browse the repository at this point in the history
…ckage (#568)
  • Loading branch information
tonyo committed Feb 6, 2023
1 parent ef3a838 commit 3964ece
Show file tree
Hide file tree
Showing 4 changed files with 966 additions and 16 deletions.
3 changes: 3 additions & 0 deletions go.mod
Expand Up @@ -12,6 +12,7 @@ require (
github.com/pingcap/errors v0.11.4
github.com/pkg/errors v0.9.1
github.com/sirupsen/logrus v1.9.0
github.com/stretchr/testify v1.8.0
github.com/urfave/negroni v1.0.0
github.com/valyala/fasthttp v1.40.0
golang.org/x/sys v0.0.0-20220928140112-f11e5e49a4ec
Expand All @@ -26,6 +27,7 @@ require (
github.com/andybalholm/brotli v1.0.4 // indirect
github.com/aymerick/douceur v0.2.0 // indirect
github.com/codegangsta/inject v0.0.0-20150114235600-33e0aa1cb7c0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/eknkc/amber v0.0.0-20171010120322-cdade1c07385 // indirect
github.com/fatih/structs v1.1.0 // indirect
github.com/flosch/pongo2/v4 v4.0.2 // indirect
Expand Down Expand Up @@ -57,6 +59,7 @@ require (
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/pelletier/go-toml/v2 v2.0.5 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/schollz/closestmatch v2.1.0+incompatible // indirect
github.com/tdewolff/minify/v2 v2.12.4 // indirect
Expand Down
12 changes: 12 additions & 0 deletions internal/otel/baggage/README.md
@@ -0,0 +1,12 @@
## Why do we have this "otel/baggage" folder?

The root sentry-go SDK (namely, the Dynamic Sampling functionality) needs an implementation of the [baggage spec](https://www.w3.org/TR/baggage/).
For that reason, we've taken the existing baggage implementation from the [opentelemetry-go](https://github.com/open-telemetry/opentelemetry-go/) repository, and fixed a few things that in our opinion were violating the specification.

These issues are:
1. Baggage string value `one%20two` should be properly parsed as "one two"
1. Baggage string value `one+two` should be parsed as "one+two"
1. Go string value "one two" should be encoded as `one%20two` (percent encoding), and NOT as `one+two` (URL query encoding).
1. Go string value "1=1" might be encoded as `1=1`, because the spec says: "Note, value MAY contain any number of the equal sign (=) characters. Parsers MUST NOT assume that the equal sign is only used to separate key and value.". `1%3D1` is also valid, but to simplify the implementation we're not doing it.

Changes were made in this PR: https://github.com/getsentry/sentry-go/pull/568
61 changes: 45 additions & 16 deletions internal/otel/baggage/baggage.go
@@ -1,6 +1,3 @@
// This file was vendored in unmodified from
// https://github.com/open-telemetry/opentelemetry-go/blob/c21b6b6bb31a2f74edd06e262f1690f3f6ea3d5c/baggage/baggage.go
//
// # Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
Expand All @@ -23,6 +20,7 @@ import (
"net/url"
"regexp"
"strings"
"unicode/utf8"

"github.com/getsentry/sentry-go/internal/otel/baggage/internal/baggage"
)
Expand Down Expand Up @@ -267,11 +265,12 @@ func NewMember(key, value string, props ...Property) (Member, error) {
if err := m.validate(); err != nil {
return newInvalidMember(), err
}
decodedValue, err := url.QueryUnescape(value)
if err != nil {
return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidValue, value)
}
m.value = decodedValue
//// NOTE(anton): I don't think we need to unescape here
// decodedValue, err := url.PathUnescape(value)
// if err != nil {
// return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidValue, value)
// }
// m.value = decodedValue
return m, nil
}

Expand Down Expand Up @@ -315,17 +314,19 @@ func parseMember(member string) (Member, error) {
// "Leading and trailing whitespaces are allowed but MUST be trimmed
// when converting the header into a data structure."
key = strings.TrimSpace(kv[0])
value = strings.TrimSpace(kv[1])
var err error
value, err = url.QueryUnescape(strings.TrimSpace(kv[1]))
if err != nil {
return newInvalidMember(), fmt.Errorf("%w: %q", err, value)
}
if !keyRe.MatchString(key) {
return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidKey, key)
}
if !valueRe.MatchString(value) {
return newInvalidMember(), fmt.Errorf("%w: %q", errInvalidValue, value)
}
decodedValue, err := url.PathUnescape(value)
if err != nil {
return newInvalidMember(), fmt.Errorf("%w: %q", err, value)
}
value = decodedValue
default:
// This should never happen unless a developer has changed the string
// splitting somehow. Panic instead of failing silently and allowing
Expand All @@ -347,9 +348,10 @@ func (m Member) validate() error {
if !keyRe.MatchString(m.key) {
return fmt.Errorf("%w: %q", errInvalidKey, m.key)
}
if !valueRe.MatchString(m.value) {
return fmt.Errorf("%w: %q", errInvalidValue, m.value)
}
//// NOTE(anton): IMO it's too early to validate the value here.
// if !valueRe.MatchString(m.value) {
// return fmt.Errorf("%w: %q", errInvalidValue, m.value)
// }
return m.properties.validate()
}

Expand All @@ -366,13 +368,40 @@ func (m Member) Properties() []Property { return m.properties.Copy() }
// specification.
func (m Member) String() string {
// A key is just an ASCII string, but a value is URL encoded UTF-8.
s := fmt.Sprintf("%s%s%s", m.key, keyValueDelimiter, url.QueryEscape(m.value))
s := fmt.Sprintf("%s%s%s", m.key, keyValueDelimiter, percentEncodeValue(m.value))
if len(m.properties) > 0 {
s = fmt.Sprintf("%s%s%s", s, propertyDelimiter, m.properties.String())
}
return s
}

// percentEncodeValue encodes the baggage value, using percent-encoding for
// disallowed octets.
func percentEncodeValue(s string) string {
const upperhex = "0123456789ABCDEF"
var sb strings.Builder

for byteIndex, width := 0, 0; byteIndex < len(s); byteIndex += width {
runeValue, w := utf8.DecodeRuneInString(s[byteIndex:])
width = w
char := string(runeValue)
if valueRe.MatchString(char) && char != "%" {
// The character is returned as is, no need to percent-encode
sb.WriteString(char)
} else {
// We need to percent-encode each byte of the multi-octet character
for j := 0; j < width; j++ {
b := s[byteIndex+j]
sb.WriteByte('%')
// Bitwise operations are inspired by "net/url"
sb.WriteByte(upperhex[b>>4])
sb.WriteByte(upperhex[b&15])
}
}
}
return sb.String()
}

// Baggage is a list of baggage members representing the baggage-string as
// defined by the W3C Baggage specification.
type Baggage struct { //nolint:golint
Expand Down

0 comments on commit 3964ece

Please sign in to comment.