From 4a3fb6512985870e739aa59fa0a5b5b8651b189d Mon Sep 17 00:00:00 2001 From: Scott Miller Date: Wed, 20 Oct 2021 16:47:59 -0500 Subject: [PATCH] Diagnose partial/missing telemetry configuration (#12802) * Diagnose partial/missing telemetry configuration * changelog * fixup * not sure which component? --- changelog/12802.txt | 3 ++ command/operator_diagnose.go | 46 ++++++++++++++++ command/operator_diagnose_test.go | 52 +++++++++++++++++++ .../diagnose_bad_https_consul_sr.hcl | 11 ---- .../test-fixtures/diagnose_bad_telemetry1.hcl | 18 +++++++ .../test-fixtures/diagnose_bad_telemetry2.hcl | 18 +++++++ .../test-fixtures/diagnose_bad_telemetry3.hcl | 18 +++++++ 7 files changed, 155 insertions(+), 11 deletions(-) create mode 100644 changelog/12802.txt create mode 100644 command/server/test-fixtures/diagnose_bad_telemetry1.hcl create mode 100644 command/server/test-fixtures/diagnose_bad_telemetry2.hcl create mode 100644 command/server/test-fixtures/diagnose_bad_telemetry3.hcl diff --git a/changelog/12802.txt b/changelog/12802.txt new file mode 100644 index 0000000000000..9c49bf1466a73 --- /dev/null +++ b/changelog/12802.txt @@ -0,0 +1,3 @@ +```release-note:improvement +cli: Operator diagnose now tests for missing or partial telemetry configurations. +``` \ No newline at end of file diff --git a/command/operator_diagnose.go b/command/operator_diagnose.go index 7e95134dc643a..d793280158703 100644 --- a/command/operator_diagnose.go +++ b/command/operator_diagnose.go @@ -3,6 +3,7 @@ package command import ( "context" "encoding/json" + "errors" "fmt" "io" "os" @@ -249,6 +250,42 @@ func (c *OperatorDiagnoseCommand) offlineDiagnostics(ctx context.Context) error return fmt.Errorf("No vault server configuration found.") } + diagnose.Test(ctx, "Check Telemetry", func(ctx context.Context) (err error) { + if config.Telemetry == nil { + diagnose.Warn(ctx, "Telemetry is using default configuration") + diagnose.Advise(ctx, "By default only Prometheus and JSON metrics are available. Ignore this warning if you are using telemetry or are using these metrics and are satisfied with the default retention time and gauge period.") + } else { + t := config.Telemetry + // If any Circonus setting is present but we're missing the basic fields... + if coalesce(t.CirconusAPIURL, t.CirconusAPIToken, t.CirconusCheckID, t.CirconusCheckTags, t.CirconusCheckSearchTag, + t.CirconusBrokerID, t.CirconusBrokerSelectTag, t.CirconusCheckForceMetricActivation, t.CirconusCheckInstanceID, + t.CirconusCheckSubmissionURL, t.CirconusCheckDisplayName) != nil { + if t.CirconusAPIURL == "" { + return errors.New("incomplete Circonus telemetry configuration, missing circonus_api_url") + } else if t.CirconusAPIToken != "" { + return errors.New("incomplete Circonus telemetry configuration, missing circonus_api_token") + } + } + if len(t.DogStatsDTags) > 0 && t.DogStatsDAddr == "" { + return errors.New("incomplete DogStatsD telemetry configuration, missing dogstatsd_addr, while dogstatsd_tags specified") + } + + // If any Stackdriver setting is present but we're missing the basic fields... + if coalesce(t.StackdriverNamespace, t.StackdriverLocation, t.StackdriverDebugLogs, t.StackdriverNamespace) != nil { + if t.StackdriverProjectID == "" { + return errors.New("incomplete Stackdriver telemetry configuration, missing stackdriver_project_id") + } + if t.StackdriverLocation == "" { + return errors.New("incomplete Stackdriver telemetry configuration, missing stackdriver_location") + } + if t.StackdriverNamespace == "" { + return errors.New("incomplete Stackdriver telemetry configuration, missing stackdriver_namespace") + } + } + } + return nil + }) + var metricSink *metricsutil.ClusterMetricSink var metricsHelper *metricsutil.MetricsHelper @@ -676,3 +713,12 @@ SEALFAIL: }) return nil } + +func coalesce(values ...interface{}) interface{} { + for _, val := range values { + if val != nil && val != "" { + return val + } + } + return nil +} diff --git a/command/operator_diagnose_test.go b/command/operator_diagnose_test.go index 5768c95e1345d..d99740ff65a2d 100644 --- a/command/operator_diagnose_test.go +++ b/command/operator_diagnose_test.go @@ -415,6 +415,58 @@ func TestOperatorDiagnoseCommand_Run(t *testing.T) { }, }, }, + { + "diagnose_telemetry_partial_circonus", + []string{ + "-config", "./server/test-fixtures/diagnose_bad_telemetry1.hcl", + }, + []*diagnose.Result{ + { + Name: "Check Telemetry", + Status: diagnose.ErrorStatus, + Message: "incomplete Circonus telemetry configuration, missing circonus_api_url", + }, + }, + }, + { + "diagnose_telemetry_partial_dogstats", + []string{ + "-config", "./server/test-fixtures/diagnose_bad_telemetry2.hcl", + }, + []*diagnose.Result{ + { + Name: "Check Telemetry", + Status: diagnose.ErrorStatus, + Message: "incomplete DogStatsD telemetry configuration, missing dogstatsd_addr, while dogstatsd_tags specified", + }, + }, + }, + { + "diagnose_telemetry_partial_stackdriver", + []string{ + "-config", "./server/test-fixtures/diagnose_bad_telemetry3.hcl", + }, + []*diagnose.Result{ + { + Name: "Check Telemetry", + Status: diagnose.ErrorStatus, + Message: "incomplete Stackdriver telemetry configuration, missing stackdriver_project_id", + }, + }, + }, + { + "diagnose_telemetry_default", + []string{ + "-config", "./server/test-fixtures/config4.hcl", + }, + []*diagnose.Result{ + { + Name: "Check Telemetry", + Status: diagnose.WarningStatus, + Warnings: []string{"Telemetry is using default configuration"}, + }, + }, + }, } t.Run("validations", func(t *testing.T) { diff --git a/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl b/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl index 49d1de056e81b..6faecaab73fbf 100644 --- a/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl +++ b/command/server/test-fixtures/diagnose_bad_https_consul_sr.hcl @@ -28,17 +28,6 @@ service_registration "consul" { tls_key_file = "./../vault/diagnose/test-fixtures/expiredprivatekey.pem" } -telemetry { - statsd_address = "bar" - usage_gauge_period = "5m" - maximum_gauge_cardinality = 100 - - statsite_address = "foo" - dogstatsd_addr = "127.0.0.1:7254" - dogstatsd_tags = ["tag_1:val_1", "tag_2:val_2"] - metrics_prefix = "myprefix" -} - sentinel { additional_enabled_modules = [] } diff --git a/command/server/test-fixtures/diagnose_bad_telemetry1.hcl b/command/server/test-fixtures/diagnose_bad_telemetry1.hcl new file mode 100644 index 0000000000000..f7629bdd02d26 --- /dev/null +++ b/command/server/test-fixtures/diagnose_bad_telemetry1.hcl @@ -0,0 +1,18 @@ +disable_cache = true +disable_mlock = true +ui = true + +listener "tcp" { + address = "127.0.0.1:8200" +} + +backend "consul" { + advertise_addr = "foo" + token = "foo" +} + +telemetry { + circonus_check_id = "bar" +} + +cluster_addr = "127.0.0.1:8201" diff --git a/command/server/test-fixtures/diagnose_bad_telemetry2.hcl b/command/server/test-fixtures/diagnose_bad_telemetry2.hcl new file mode 100644 index 0000000000000..5c967e3ef926d --- /dev/null +++ b/command/server/test-fixtures/diagnose_bad_telemetry2.hcl @@ -0,0 +1,18 @@ +disable_cache = true +disable_mlock = true +ui = true + +listener "tcp" { + address = "127.0.0.1:8200" +} + +backend "consul" { + advertise_addr = "foo" + token = "foo" +} + +telemetry { + dogstatsd_tags = ["bar"] +} + +cluster_addr = "127.0.0.1:8201" diff --git a/command/server/test-fixtures/diagnose_bad_telemetry3.hcl b/command/server/test-fixtures/diagnose_bad_telemetry3.hcl new file mode 100644 index 0000000000000..f9669258460b3 --- /dev/null +++ b/command/server/test-fixtures/diagnose_bad_telemetry3.hcl @@ -0,0 +1,18 @@ +disable_cache = true +disable_mlock = true +ui = true + +listener "tcp" { + address = "127.0.0.1:8200" +} + +backend "consul" { + advertise_addr = "foo" + token = "foo" +} + +telemetry { + stackdriver_namespace = "bar" +} + +cluster_addr = "127.0.0.1:8201"