Skip to content

Commit

Permalink
Merge pull request #4889 from jsternberg/universal-telemetry-client
Browse files Browse the repository at this point in the history
cli: add otel sdk tracing and metric providers to the core cli
  • Loading branch information
neersighted committed Mar 25, 2024
2 parents b4d0328 + 89db01e commit b39bbb4
Show file tree
Hide file tree
Showing 236 changed files with 42,128 additions and 1,505 deletions.
2 changes: 2 additions & 0 deletions cli/command/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ type Cli interface {
ContextStore() store.Store
CurrentContext() string
DockerEndpoint() docker.Endpoint
TelemetryClient
}

// DockerCli is an instance the docker command line client.
Expand All @@ -85,6 +86,7 @@ type DockerCli struct {
dockerEndpoint docker.Endpoint
contextStoreConfig store.Config
initTimeout time.Duration
res telemetryResource

// baseCtx is the base context used for internal operations. In the future
// this may be replaced by explicitly passing a context to functions that
Expand Down
202 changes: 202 additions & 0 deletions cli/command/telemetry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
package command

import (
"context"
"os"
"path/filepath"
"sync"
"time"

"github.com/docker/distribution/uuid"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/metric"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/metric/metricdata"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
"go.opentelemetry.io/otel/trace"
)

const exportTimeout = 50 * time.Millisecond

// TracerProvider is an extension of the trace.TracerProvider interface for CLI programs.
type TracerProvider interface {
trace.TracerProvider
ForceFlush(ctx context.Context) error
Shutdown(ctx context.Context) error
}

// MeterProvider is an extension of the metric.MeterProvider interface for CLI programs.
type MeterProvider interface {
metric.MeterProvider
ForceFlush(ctx context.Context) error
Shutdown(ctx context.Context) error
}

// TelemetryClient provides the methods for using OTEL tracing or metrics.
type TelemetryClient interface {
// Resource returns the OTEL Resource configured with this TelemetryClient.
// This resource may be created lazily, but the resource should be the same
// each time this function is invoked.
Resource() *resource.Resource

// TracerProvider returns a TracerProvider. This TracerProvider will be configured
// with the default tracing components for a CLI program along with any options given
// for the SDK.
TracerProvider(ctx context.Context, opts ...sdktrace.TracerProviderOption) TracerProvider

// MeterProvider returns a MeterProvider. This MeterProvider will be configured
// with the default metric components for a CLI program along with any options given
// for the SDK.
MeterProvider(ctx context.Context, opts ...sdkmetric.Option) MeterProvider
}

func (cli *DockerCli) Resource() *resource.Resource {
return cli.res.Get()
}

func (cli *DockerCli) TracerProvider(ctx context.Context, opts ...sdktrace.TracerProviderOption) TracerProvider {
allOpts := make([]sdktrace.TracerProviderOption, 0, len(opts)+2)
allOpts = append(allOpts, sdktrace.WithResource(cli.Resource()))
allOpts = append(allOpts, dockerSpanExporter(ctx, cli)...)
allOpts = append(allOpts, opts...)
return sdktrace.NewTracerProvider(allOpts...)
}

func (cli *DockerCli) MeterProvider(ctx context.Context, opts ...sdkmetric.Option) MeterProvider {
allOpts := make([]sdkmetric.Option, 0, len(opts)+2)
allOpts = append(allOpts, sdkmetric.WithResource(cli.Resource()))
allOpts = append(allOpts, dockerMetricExporter(ctx, cli)...)
allOpts = append(allOpts, opts...)
return sdkmetric.NewMeterProvider(allOpts...)
}

// WithResourceOptions configures additional options for the default resource. The default
// resource will continue to include its default options.
func WithResourceOptions(opts ...resource.Option) CLIOption {
return func(cli *DockerCli) error {
cli.res.AppendOptions(opts...)
return nil
}
}

// WithResource overwrites the default resource and prevents its creation.
func WithResource(res *resource.Resource) CLIOption {
return func(cli *DockerCli) error {
cli.res.Set(res)
return nil
}
}

type telemetryResource struct {
res *resource.Resource
opts []resource.Option
once sync.Once
}

func (r *telemetryResource) Set(res *resource.Resource) {
r.res = res
}

func (r *telemetryResource) Get() *resource.Resource {
r.once.Do(r.init)
return r.res
}

func (r *telemetryResource) init() {
if r.res != nil {
r.opts = nil
return
}

opts := append(r.defaultOptions(), r.opts...)
res, err := resource.New(context.Background(), opts...)
if err != nil {
otel.Handle(err)
}
r.res = res

// Clear the resource options since they'll never be used again and to allow
// the garbage collector to retrieve that memory.
r.opts = nil
}

func (r *telemetryResource) defaultOptions() []resource.Option {
return []resource.Option{
resource.WithDetectors(serviceNameDetector{}),
resource.WithAttributes(
// Use a unique instance id so OTEL knows that each invocation
// of the CLI is its own instance. Without this, downstream
// OTEL processors may think the same process is restarting
// continuously.
semconv.ServiceInstanceID(uuid.Generate().String()),
),
resource.WithFromEnv(),
resource.WithTelemetrySDK(),
}
}

func (r *telemetryResource) AppendOptions(opts ...resource.Option) {
if r.res != nil {
return
}
r.opts = append(r.opts, opts...)
}

type serviceNameDetector struct{}

func (serviceNameDetector) Detect(ctx context.Context) (*resource.Resource, error) {
return resource.StringDetector(
semconv.SchemaURL,
semconv.ServiceNameKey,
func() (string, error) {
return filepath.Base(os.Args[0]), nil
},
).Detect(ctx)
}

// cliReader is an implementation of Reader that will automatically
// report to a designated Exporter when Shutdown is called.
type cliReader struct {
sdkmetric.Reader
exporter sdkmetric.Exporter
}

func newCLIReader(exp sdkmetric.Exporter) sdkmetric.Reader {
reader := sdkmetric.NewManualReader(
sdkmetric.WithTemporalitySelector(deltaTemporality),
)
return &cliReader{
Reader: reader,
exporter: exp,
}
}

func (r *cliReader) Shutdown(ctx context.Context) error {
var rm metricdata.ResourceMetrics
if err := r.Reader.Collect(ctx, &rm); err != nil {
return err
}

// Place a pretty tight constraint on the actual reporting.
// We don't want CLI metrics to prevent the CLI from exiting
// so if there's some kind of issue we need to abort pretty
// quickly.
ctx, cancel := context.WithTimeout(ctx, exportTimeout)
defer cancel()

return r.exporter.Export(ctx, &rm)
}

// deltaTemporality sets the Temporality of every instrument to delta.
//
// This isn't really needed since we create a unique resource on each invocation,
// but it can help with cardinality concerns for downstream processors since they can
// perform aggregation for a time interval and then discard the data once that time
// period has passed. Cumulative temporality would imply to the downstream processor
// that they might receive a successive point and they may unnecessarily keep state
// they really shouldn't.
func deltaTemporality(_ sdkmetric.InstrumentKind) metricdata.Temporality {
return metricdata.DeltaTemporality
}
127 changes: 127 additions & 0 deletions cli/command/telemetry_docker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,127 @@
package command

import (
"context"
"fmt"
"net/url"
"path"

"github.com/pkg/errors"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
)

const otelContextFieldName = "otel"

// dockerExporterOTLPEndpoint retrieves the OTLP endpoint used for the docker reporter
// from the current context.
func dockerExporterOTLPEndpoint(cli Cli) (endpoint string, secure bool) {
meta, err := cli.ContextStore().GetMetadata(cli.CurrentContext())
if err != nil {
otel.Handle(err)
return "", false
}

var otelCfg any
switch m := meta.Metadata.(type) {
case DockerContext:
otelCfg = m.AdditionalFields[otelContextFieldName]
case map[string]any:
otelCfg = m[otelContextFieldName]
}

if otelCfg == nil {
return "", false
}

otelMap, ok := otelCfg.(map[string]any)
if !ok {
otel.Handle(errors.Errorf(
"unexpected type for field %q: %T (expected: %T)",
otelContextFieldName,
otelCfg,
otelMap,
))
return "", false
}

// keys from https://opentelemetry.io/docs/concepts/sdk-configuration/otlp-exporter-configuration/
endpoint, ok = otelMap["OTEL_EXPORTER_OTLP_ENDPOINT"].(string)
if !ok {
return "", false
}

// Parse the endpoint. The docker config expects the endpoint to be
// in the form of a URL to match the environment variable, but this
// option doesn't correspond directly to WithEndpoint.
//
// We pretend we're the same as the environment reader.
u, err := url.Parse(endpoint)
if err != nil {
otel.Handle(errors.Errorf("docker otel endpoint is invalid: %s", err))
return "", false
}

switch u.Scheme {
case "unix":
// Unix sockets are a bit weird. OTEL seems to imply they
// can be used as an environment variable and are handled properly,
// but they don't seem to be as the behavior of the environment variable
// is to strip the scheme from the endpoint, but the underlying implementation
// needs the scheme to use the correct resolver.
//
// We'll just handle this in a special way and add the unix:// back to the endpoint.
endpoint = fmt.Sprintf("unix://%s", path.Join(u.Host, u.Path))
case "https":
secure = true
fallthrough
case "http":
endpoint = path.Join(u.Host, u.Path)
}
return endpoint, secure
}

func dockerSpanExporter(ctx context.Context, cli Cli) []sdktrace.TracerProviderOption {
endpoint, secure := dockerExporterOTLPEndpoint(cli)
if endpoint == "" {
return nil
}

opts := []otlptracegrpc.Option{
otlptracegrpc.WithEndpoint(endpoint),
}
if !secure {
opts = append(opts, otlptracegrpc.WithInsecure())
}

exp, err := otlptracegrpc.New(ctx, opts...)
if err != nil {
otel.Handle(err)
return nil
}
return []sdktrace.TracerProviderOption{sdktrace.WithBatcher(exp, sdktrace.WithExportTimeout(exportTimeout))}
}

func dockerMetricExporter(ctx context.Context, cli Cli) []sdkmetric.Option {
endpoint, secure := dockerExporterOTLPEndpoint(cli)
if endpoint == "" {
return nil
}

opts := []otlpmetricgrpc.Option{
otlpmetricgrpc.WithEndpoint(endpoint),
}
if !secure {
opts = append(opts, otlpmetricgrpc.WithInsecure())
}

exp, err := otlpmetricgrpc.New(ctx, opts...)
if err != nil {
otel.Handle(err)
return nil
}
return []sdkmetric.Option{sdkmetric.WithReader(newCLIReader(exp))}
}
17 changes: 13 additions & 4 deletions vendor.mod
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ require (
github.com/tonistiigi/go-rosetta v0.0.0-20200727161949-f79598599c5d
github.com/xeipuuv/gojsonschema v1.2.0
go.opentelemetry.io/otel v1.21.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.44.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.21.0
go.opentelemetry.io/otel/metric v1.21.0
go.opentelemetry.io/otel/sdk v1.21.0
go.opentelemetry.io/otel/sdk/metric v1.21.0
go.opentelemetry.io/otel/trace v1.21.0
golang.org/x/sync v0.6.0
golang.org/x/sys v0.16.0
golang.org/x/term v0.15.0
Expand All @@ -52,16 +58,18 @@ require (
github.com/Microsoft/go-winio v0.6.1 // indirect
github.com/Microsoft/hcsshim v0.11.4 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/containerd/log v0.1.0 // indirect
github.com/docker/go v1.5.1-1.0.20160303222718-d30aec9fd63c // indirect
github.com/docker/go-events v0.0.0-20190806004212-e31b211e4f1c // indirect
github.com/docker/go-metrics v0.0.1 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-logr/logr v1.3.0 // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/gorilla/mux v1.8.1 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/klauspost/compress v1.17.4 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
Expand All @@ -78,14 +86,15 @@ require (
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
go.etcd.io/etcd/raft/v3 v3.5.6 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.46.1 // indirect
go.opentelemetry.io/otel/metric v1.21.0 // indirect
go.opentelemetry.io/otel/trace v1.21.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.21.0 // indirect
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/mod v0.14.0 // indirect
golang.org/x/net v0.19.0 // indirect
golang.org/x/time v0.3.0 // indirect
golang.org/x/tools v0.16.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20231002182017-d307bd883b97 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20231016165738-49dd2c1f3d0b // indirect
google.golang.org/grpc v1.59.0 // indirect
google.golang.org/grpc v1.60.1 // indirect
google.golang.org/protobuf v1.33.0 // indirect
)

0 comments on commit b39bbb4

Please sign in to comment.