Skip to content

Commit

Permalink
cli: add otel sdk tracing and metric providers to the core cli
Browse files Browse the repository at this point in the history
This adds the code used by buildx and compose into the default CLI
program to help normalize the usage of these APIs and allow code reuse
between projects. It also allows these projects to benefit from
improvements or changes that may be made by another team.

At the moment, these APIs are a pretty thin layer on the OTEL SDK. It
configures an additional exporter to a docker endpoint that's used for
usage collection and is only active if the option is configured in
docker desktop.

This also upgrades the OTEL version to v1.19 which is the one being used
by buildkit, buildx, compose, etc.

Signed-off-by: Jonathan A. Sternberg <jonathan.sternberg@docker.com>
  • Loading branch information
jsternberg committed Feb 21, 2024
1 parent 99eb502 commit f5bc6da
Show file tree
Hide file tree
Showing 192 changed files with 39,642 additions and 3 deletions.
5 changes: 5 additions & 0 deletions cli/command/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import (
"github.com/pkg/errors"
"github.com/spf13/cobra"
notaryclient "github.com/theupdateframework/notary/client"
"go.opentelemetry.io/otel/sdk/resource"
)

const defaultInitTimeout = 2 * time.Second
Expand Down Expand Up @@ -65,6 +66,7 @@ type Cli interface {
ContextStore() store.Store
CurrentContext() string
DockerEndpoint() docker.Endpoint
TelemetryClient
}

// DockerCli is an instance the docker command line client.
Expand All @@ -85,6 +87,9 @@ type DockerCli struct {
dockerEndpoint docker.Endpoint
contextStoreConfig store.Config
initTimeout time.Duration
res *resource.Resource
resOpts []resource.Option
resOnce sync.Once

// baseCtx is the base context used for internal operations. In the future
// this may be replaced by explicitly passing a context to functions that
Expand Down
182 changes: 182 additions & 0 deletions cli/command/telemetry.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
package command

import (
"context"
"os"
"path/filepath"
"time"

"github.com/docker/distribution/uuid"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/metric"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/metric/metricdata"
"go.opentelemetry.io/otel/sdk/resource"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.21.0"
"go.opentelemetry.io/otel/trace"
)

// TracerProvider is an extension of the trace.TracerProvider interface for CLI programs.
type TracerProvider interface {
trace.TracerProvider
ForceFlush(ctx context.Context) error
Shutdown(ctx context.Context) error
}

// MeterProvider is an extension of the metric.MeterProvider interface for CLI programs.
type MeterProvider interface {
metric.MeterProvider
ForceFlush(ctx context.Context) error
Shutdown(ctx context.Context) error
}

// TelemetryClient provides the methods for using OTEL tracing or metrics.
type TelemetryClient interface {
// Resource returns the OTEL Resource configured with this TelemetryClient.
// This resource may be created lazily, but the resource should be the same
// each time this function is invoked.
Resource() *resource.Resource

// TracerProvider returns a TracerProvider. This TracerProvider will be configured
// with the default tracing components for a CLI program along with any options given
// for the SDK.
TracerProvider(opts ...sdktrace.TracerProviderOption) TracerProvider

// MeterProvider returns a MeterProvider. This MeterProvider will be configured
// with the default metric components for a CLI program along with any options given
// for the SDK.
MeterProvider(opts ...sdkmetric.Option) MeterProvider
}

func (cli *DockerCli) Resource() *resource.Resource {
cli.resOnce.Do(func() {
if cli.res == nil {
opts := []resource.Option{
resource.WithDetectors(serviceNameDetector{}),
resource.WithAttributes(
// Use a unique instance id so OTEL knows that each invocation
// of the CLI is its own instance. Without this, downstream
// OTEL processors may think the same process is restarting
// continuously.
semconv.ServiceInstanceID(uuid.Generate().String()),
),
resource.WithFromEnv(),
resource.WithTelemetrySDK(),
}
if len(cli.resOpts) > 0 {
opts = append(opts, cli.resOpts...)
}

res, err := resource.New(context.Background(), opts...)
if err != nil {
otel.Handle(err)
}
cli.res = res
}
// Clear the resource options since they'll never be used again and to allow
// the garbage collector to retrieve that memory.
cli.resOpts = nil
})
return cli.res
}

func (cli *DockerCli) TracerProvider(opts ...sdktrace.TracerProviderOption) TracerProvider {
exp, err := dockerSpanExporter(cli)
if err != nil {
otel.Handle(err)
}
defaultOpts := []sdktrace.TracerProviderOption{
sdktrace.WithResource(cli.Resource()),
sdktrace.WithBatcher(exp),
}
return sdktrace.NewTracerProvider(append(defaultOpts, opts...)...)
}

func (cli *DockerCli) MeterProvider(opts ...sdkmetric.Option) MeterProvider {
exp, err := dockerMetricExporter(cli)
if err != nil {
otel.Handle(err)
}
defaultOpts := []sdkmetric.Option{
sdkmetric.WithResource(cli.Resource()),
sdkmetric.WithReader(newCLIReader(exp)),
}
return sdkmetric.NewMeterProvider(append(defaultOpts, opts...)...)
}

// WithResourceOptions configures additional options for the default resource. The default
// resource will continue to include its default options.
func WithResourceOptions(opts ...resource.Option) CLIOption {
return func(cli *DockerCli) error {
cli.resOpts = append(cli.resOpts, opts...)
return nil
}
}

// WithResource overwrites the default resource and prevents its creation.
func WithResource(res *resource.Resource) CLIOption {
return func(cli *DockerCli) error {
cli.res = res
return nil
}
}

type serviceNameDetector struct{}

func (serviceNameDetector) Detect(ctx context.Context) (*resource.Resource, error) {
return resource.StringDetector(
semconv.SchemaURL,
semconv.ServiceNameKey,
func() (string, error) {
return filepath.Base(os.Args[0]), nil
},
).Detect(ctx)
}

// cliReader is an implementation of Reader that will automatically
// report to a designated Exporter when Shutdown is called.
type cliReader struct {
sdkmetric.Reader
exporter sdkmetric.Exporter
}

func newCLIReader(exp sdkmetric.Exporter) sdkmetric.Reader {
reader := sdkmetric.NewManualReader(
sdkmetric.WithTemporalitySelector(deltaTemporality),
)
return &cliReader{
Reader: reader,
exporter: exp,
}
}

const shutdownTimeout = 2 * time.Second

func (r *cliReader) Shutdown(ctx context.Context) error {
var rm metricdata.ResourceMetrics
if err := r.Reader.Collect(ctx, &rm); err != nil {
return err
}

// Place a pretty tight constraint on the actual reporting.
// We don't want CLI metrics to prevent the CLI from exiting
// so if there's some kind of issue we need to abort pretty
// quickly.
ctx, cancel := context.WithTimeout(ctx, shutdownTimeout)
defer cancel()

return r.exporter.Export(ctx, &rm)
}

// deltaTemporality sets the Temporality of every instrument to delta.
//
// This isn't really needed since we create a unique resource on each invocation,
// but it can help with cardinality concerns for downstream processors since they can
// perform aggregation for a time interval and then discard the data once that time
// period has passed. Cumulative temporality would imply to the downstream processor
// that they might receive a successive point and they may unnecessarily keep state
// they really shouldn't.
func deltaTemporality(_ sdkmetric.InstrumentKind) metricdata.Temporality {
return metricdata.DeltaTemporality
}
121 changes: 121 additions & 0 deletions cli/command/telemetry_docker.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
package command

import (
"context"
"fmt"
"net/url"
"path"

"github.com/pkg/errors"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
sdktrace "go.opentelemetry.io/otel/sdk/trace"
)

const otelConfigFieldName = "otel"

// dockerExporterOtlpEndpoint retrieves the OTLP endpoint used for the docker reporter
// from the current context.
func dockerExporterOtlpEndpoint(cli Cli) (endpoint string, secure bool, err error) {
meta, err := cli.ContextStore().GetMetadata(cli.CurrentContext())
if err != nil {
return "", false, err
}

var otelCfg interface{}
switch m := meta.Metadata.(type) {
case DockerContext:
otelCfg = m.AdditionalFields[otelConfigFieldName]
case map[string]interface{}:
otelCfg = m[otelConfigFieldName]
}

if otelCfg == nil {
return "", false, nil
}

otelMap, ok := otelCfg.(map[string]interface{})
if !ok {
return "", false, errors.Errorf(
"unexpected type for field %q: %T (expected: %T)",
otelConfigFieldName,
otelCfg,
otelMap,
)
}

// keys from https://opentelemetry.io/docs/concepts/sdk-configuration/otlp-exporter-configuration/
endpoint, ok = otelMap["OTEL_EXPORTER_OTLP_ENDPOINT"].(string)
if !ok {
return "", false, nil
}

// Parse the endpoint. The docker config expects the endpoint to be
// in the form of a URL to match the environment variable, but this
// option doesn't correspond directly to WithEndpoint.
//
// We pretend we're the same as the environment reader.
u, err := url.Parse(endpoint)
if err != nil {
return "", false, errors.Errorf("docker otel endpoint is invalid: %s", err)
}

switch u.Scheme {
case "unix":
// Unix sockets are a bit weird. OTEL seems to imply they
// can be used as an environment variable and are handled properly,
// but they don't seem to be as the behavior of the environment variable
// is to strip the scheme from the endpoint, but the underlying implementation
// needs the scheme to use the correct resolver.
//
// We'll just handle this in a special way and add the unix:// back to the endpoint.
endpoint = fmt.Sprintf("unix://%s", path.Join(u.Host, u.Path))
case "https":
secure = true
fallthrough
case "http":
endpoint = path.Join(u.Host, u.Path)
}
return endpoint, secure, nil
}

func dockerSpanExporter(cli Cli) (sdktrace.SpanExporter, error) {
endpoint, secure, err := dockerExporterOtlpEndpoint(cli)
if err != nil {
return nil, err
}

opts := []otlptracegrpc.Option{
otlptracegrpc.WithEndpoint(endpoint),
}
if !secure {
opts = append(opts, otlptracegrpc.WithInsecure())
}

exp, err := otlptracegrpc.New(context.Background(), opts...)
if err != nil {
return nil, err
}
return exp, nil
}

func dockerMetricExporter(cli Cli) (sdkmetric.Exporter, error) {
endpoint, secure, err := dockerExporterOtlpEndpoint(cli)
if err != nil {
return nil, err
}

opts := []otlpmetricgrpc.Option{
otlpmetricgrpc.WithEndpoint(endpoint),
}
if !secure {
opts = append(opts, otlpmetricgrpc.WithInsecure())
}

exp, err := otlpmetricgrpc.New(context.Background(), opts...)
if err != nil {
return nil, err
}
return exp, nil
}
14 changes: 12 additions & 2 deletions vendor.mod
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ require (
github.com/tonistiigi/go-rosetta v0.0.0-20200727161949-f79598599c5d
github.com/xeipuuv/gojsonschema v1.2.0
go.opentelemetry.io/otel v1.19.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.19.0
go.opentelemetry.io/otel/metric v1.19.0
go.opentelemetry.io/otel/sdk v1.19.0
go.opentelemetry.io/otel/sdk/metric v1.19.0
go.opentelemetry.io/otel/trace v1.19.0
golang.org/x/sync v0.6.0
golang.org/x/sys v0.16.0
golang.org/x/term v0.15.0
Expand All @@ -52,6 +58,7 @@ require (
github.com/Microsoft/go-winio v0.6.1 // indirect
github.com/Microsoft/hcsshim v0.11.4 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v4 v4.2.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/containerd/log v0.1.0 // indirect
github.com/docker/go v1.5.1-1.0.20160303222718-d30aec9fd63c // indirect
Expand All @@ -62,6 +69,7 @@ require (
github.com/go-logr/stdr v1.2.2 // indirect
github.com/golang/protobuf v1.5.3 // indirect
github.com/gorilla/mux v1.8.1 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/klauspost/compress v1.17.4 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
Expand All @@ -78,13 +86,15 @@ require (
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
go.etcd.io/etcd/raft/v3 v3.5.6 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.45.0 // indirect
go.opentelemetry.io/otel/metric v1.19.0 // indirect
go.opentelemetry.io/otel/trace v1.19.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.19.0 // indirect
go.opentelemetry.io/proto/otlp v1.0.0 // indirect
golang.org/x/crypto v0.17.0 // indirect
golang.org/x/mod v0.14.0 // indirect
golang.org/x/net v0.19.0 // indirect
golang.org/x/time v0.3.0 // indirect
golang.org/x/tools v0.16.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20230711160842-782d3b101e98 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20230711160842-782d3b101e98 // indirect
google.golang.org/grpc v1.58.3 // indirect
google.golang.org/protobuf v1.31.0 // indirect
Expand Down

0 comments on commit f5bc6da

Please sign in to comment.