diff --git a/changelog/12802.txt b/changelog/12802.txt new file mode 100644 index 0000000000000..9c49bf1466a73 --- /dev/null +++ b/changelog/12802.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cli: Operator diagnose now tests for missing or partial telemetry configurations. +``` \ No newline at end of file diff --git a/command/operator_diagnose.go b/command/operator_diagnose.go index 7e95134dc643a..d793280158703 100644 --- a/command/operator_diagnose.go +++ b/command/operator_diagnose.go @@ -3,6 +3,7 @@ package command import ( "context" "encoding/json" + "errors" "fmt" "io" "os" @@ -249,6 +250,42 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error return fmt.Errorf("No vault server configuration found.") } + diagnose.Test(ctx, "Check Telemetry", func(ctx context.Context) (err error) { + if config.Telemetry == nil { + diagnose.Warn(ctx, "Telemetry is using default configuration") + diagnose.Advise(ctx, "By default only Prometheus and JSON metrics are available. Ignore this warning if you are using telemetry or are using these metrics and are satisfied with the default retention time and gauge period.") + } else { + t := config.Telemetry + // If any Circonus setting is present but we're missing the basic fields... + if coalesce(t.CirconusAPIURL, t.CirconusAPIToken, t.CirconusCheckID, t.CirconusCheckTags, t.CirconusCheckSearchTag, + t.CirconusBrokerID, t.CirconusBrokerSelectTag, t.CirconusCheckForceMetricActivation, t.CirconusCheckInstanceID, + t.CirconusCheckSubmissionURL, t.CirconusCheckDisplayName) != nil { + if t.CirconusAPIURL == "" { + return errors.New("incomplete Circonus telemetry configuration, missing circonus_api_url") + } else if t.CirconusAPIToken != "" { + return errors.New("incomplete Circonus telemetry configuration, missing circonus_api_token") + } + } + if len(t.DogStatsDTags) > 0 && t.DogStatsDAddr == "" { + return errors.New("incomplete DogStatsD telemetry configuration, missing dogstatsd_addr, while dogstatsd_tags specified") + } + + // If any Stackdriver setting is present but we're missing the basic fields... + if coalesce(t.StackdriverNamespace, t.StackdriverLocation, t.StackdriverDebugLogs, t.StackdriverNamespace) != nil { + if t.StackdriverProjectID == "" { + return errors.New("incomplete Stackdriver telemetry configuration, missing stackdriver_project_id") + } + if t.StackdriverLocation == "" { + return errors.New("incomplete Stackdriver telemetry configuration, missing stackdriver_location") + } + if t.StackdriverNamespace == "" { + return errors.New("incomplete Stackdriver telemetry configuration, missing stackdriver_namespace") + } + } + } + return nil + }) + var metricSink *metricsutil.ClusterMetricSink var metricsHelper *metricsutil.MetricsHelper @@ -676,3 +713,12 @@ SEALFAIL: }) return nil } + +func coalesce(values ...interface{}) interface{} { + for _, val := range values { + if val != nil && val != "" { + return val + } + } + return nil +} diff --git a/command/operator_diagnose_test.go b/command/operator_diagnose_test.go index 5768c95e1345d..d99740ff65a2d 100644 --- a/command/operator_diagnose_test.go +++ b/command/operator_diagnose_test.go @@ -415,6 +415,58 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) { }, }, }, + { + "diagnose_telemetry_partial_circonus", + []string{ + "-config", "./server/test-fixtures/diagnose_bad_telemetry1.hcl", + }, + []*diagnose.Result{ + { + Name: "Check Telemetry", + Status: diagnose.ErrorStatus, + Message: "incomplete Circonus telemetry configuration, missing circonus_api_url", + }, + }, + }, + { + "diagnose_telemetry_partial_dogstats", + []string{ + "-config", "./server/test-fixtures/diagnose_bad_telemetry2.hcl", + }, + []*diagnose.Result{ + { + Name: "Check Telemetry", + Status: diagnose.ErrorStatus, + Message: "incomplete DogStatsD telemetry configuration, missing dogstatsd_addr, while dogstatsd_tags specified", + }, + }, + }, + { + "diagnose_telemetry_partial_stackdriver", + []string{ + "-config", "./server/test-fixtures/diagnose_bad_telemetry3.hcl", + }, + []*diagnose.Result{ + { + Name: "Check Telemetry", + Status: diagnose.ErrorStatus, + Message: "incomplete Stackdriver telemetry configuration, missing stackdriver_project_id", + }, + }, + }, + { + "diagnose_telemetry_default", + []string{ + "-config", "./server/test-fixtures/config4.hcl", + }, + []*diagnose.Result{ + { + Name: "Check Telemetry", + Status: diagnose.WarningStatus, + Warnings: []string{"Telemetry is using default configuration"}, + }, + }, + }, } t.Run("validations", func(t *testing.T) { diff --git a/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl b/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl index 49d1de056e81b..6faecaab73fbf 100644 --- a/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl +++ b/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl @@ -28,17 +28,6 @@ service_registration "consul" { tls_key_file = "./../vault/diagnose/test-fixtures/expiredprivatekey.pem" } -telemetry { - statsd_address = "bar" - usage_gauge_period = "5m" - maximum_gauge_cardinality = 100 - - statsite_address = "foo" - dogstatsd_addr = "127.0.0.1:7254" - dogstatsd_tags = ["tag_1:val_1", "tag_2:val_2"] - metrics_prefix = "myprefix" -} - sentinel { additional_enabled_modules = [] } diff --git a/command/server/test-fixtures/diagnose_bad_telemetry1.hcl b/command/server/test-fixtures/diagnose_bad_telemetry1.hcl new file mode 100644 index 0000000000000..f7629bdd02d26 --- /dev/null +++ b/command/server/test-fixtures/diagnose_bad_telemetry1.hcl @@ -0,0 +1,18 @@ +disable_cache = true +disable_mlock = true +ui = true + +listener "tcp" { + address = "127.0.0.1:8200" +} + +backend "consul" { + advertise_addr = "foo" + token = "foo" +} + +telemetry { + circonus_check_id = "bar" +} + +cluster_addr = "127.0.0.1:8201" diff --git a/command/server/test-fixtures/diagnose_bad_telemetry2.hcl b/command/server/test-fixtures/diagnose_bad_telemetry2.hcl new file mode 100644 index 0000000000000..5c967e3ef926d --- /dev/null +++ b/command/server/test-fixtures/diagnose_bad_telemetry2.hcl @@ -0,0 +1,18 @@ +disable_cache = true +disable_mlock = true +ui = true + +listener "tcp" { + address = "127.0.0.1:8200" +} + +backend "consul" { + advertise_addr = "foo" + token = "foo" +} + +telemetry { + dogstatsd_tags = ["bar"] +} + +cluster_addr = "127.0.0.1:8201" diff --git a/command/server/test-fixtures/diagnose_bad_telemetry3.hcl b/command/server/test-fixtures/diagnose_bad_telemetry3.hcl new file mode 100644 index 0000000000000..f9669258460b3 --- /dev/null +++ b/command/server/test-fixtures/diagnose_bad_telemetry3.hcl @@ -0,0 +1,18 @@ +disable_cache = true +disable_mlock = true +ui = true + +listener "tcp" { + address = "127.0.0.1:8200" +} + +backend "consul" { + advertise_addr = "foo" + token = "foo" +} + +telemetry { + stackdriver_namespace = "bar" +} + +cluster_addr = "127.0.0.1:8201"