Skip to content

Commit

Permalink
Remove finalizers (#115)
Browse files Browse the repository at this point in the history
* Try untagged prom client

* Use partial match deletion

* Remove unused clear functions

* Debug why report name isn't added

* Make linter happy

* Print full report

* Remove finalizers

* Fix some syntax

* More logging

* More debugging

* Some newlines

* Moar logs

* Check report after first publish

* Always enable report_name when exposing detail metrics

* Add log when clearing metrics

* Remove debug logs

* Add log when updating endpoints

* Fix shard endpoint call

* Use namespacedname as report name

* Stringify namespacedname

* Cleanup

* WIP basic shard tests

* Refactor endpoint updates and add first basic test

* More tests

* Tidy go mod

* Cleanup

* More cleanup

* Use new client in configauditreport controller

* Cleanup

* Set report_name for configauditreports

* Typo
  • Loading branch information
stone-z committed Jun 10, 2022
1 parent 47ff8d4 commit 82845f0
Show file tree
Hide file tree
Showing 8 changed files with 322 additions and 172 deletions.
85 changes: 28 additions & 57 deletions controllers/configauditreport/configauditreport_controller.go
Expand Up @@ -23,8 +23,10 @@ import (
aqua "github.com/aquasecurity/starboard/pkg/apis/aquasecurity/v1alpha1"
"github.com/go-logr/logr"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
apitypes "k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
ctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
Expand Down Expand Up @@ -55,33 +57,22 @@ func (r *ConfigAuditReportReconciler) Reconcile(ctx context.Context, req ctrl.Re
_ = log.FromContext(ctx)
_ = r.Log.WithValues("configauditreport", req.NamespacedName)

report := &aqua.ConfigAuditReport{}
if err := r.Client.Get(ctx, req.NamespacedName, report); err != nil {
if apierrors.IsNotFound(err) {
// Most likely the report was deleted.
return ctrl.Result{}, nil
}

// Error reading the object.
r.Log.Error(err, "Unable to read configauditreport")
return ctrl.Result{}, err
}

// We have fetched the report and have four possibilities:
// - Not deleting + belongs to our shard --> give it our label (to trigger reconciliation by any previous owner) and expose the metric.
// - Not deleting + does not belong to our shard --> remove it from our metrics. Keep the finalizer.
// - Deleting + belongs to our shard --> remove it from our metrics. Remove the finalizer.
// - Deleting + does not belong to our shard --> remove it from our metrics. Keep the finalizer.
deletedSummaries := ConfigAuditSummary.DeletePartialMatch(prometheus.Labels{"report_name": req.NamespacedName.String()})

shouldOwn := r.ShardHelper.ShouldOwn(req.NamespacedName.String())
if shouldOwn {

if report.DeletionTimestamp.IsZero() && shouldOwn {
// Give the report our finalizer if it doesn't have one.
if !utils.SliceContains(report.GetFinalizers(), ConfigAuditReportFinalizer) {
ctrlutil.AddFinalizer(report, ConfigAuditReportFinalizer)
if err := r.Update(ctx, report); err != nil {
return ctrl.Result{}, err
// Try to get the report. It might not exist anymore, in which case we don't need to do anything.
report := &aqua.ConfigAuditReport{}
if err := r.Client.Get(ctx, req.NamespacedName, report); err != nil {
if apierrors.IsNotFound(err) {
// Most likely the report was deleted.
return ctrl.Result{}, nil
}

// Error reading the object.
r.Log.Error(err, "Unable to read report")
return ctrl.Result{}, err
}

r.Log.Info(fmt.Sprintf("Reconciled %s || Found (C/H/M/L): %d/%d/%d/%d",
Expand All @@ -92,32 +83,26 @@ func (r *ConfigAuditReportReconciler) Reconcile(ctx context.Context, req ctrl.Re
report.Report.Summary.LowCount,
))

// Publish summary metrics for this report.
// Publish summary and CVE metrics for this report.
publishSummaryMetrics(report)

if utils.SliceContains(report.GetFinalizers(), ConfigAuditReportFinalizer) {
// Remove the finalizer if we're the shard owner.
ctrlutil.RemoveFinalizer(report, ConfigAuditReportFinalizer)
if err := r.Update(ctx, report); err != nil {
return ctrl.Result{}, err
}
}

// Add a label to this report so any previous owners will reconcile and drop the metric.
report.Labels[controllers.ShardOwnerLabel] = r.ShardHelper.PodIP
err := r.Client.Update(ctx, report, &client.UpdateOptions{})
if err != nil {
r.Log.Error(err, "unable to add shard owner label")
}

} else {
// Unfortunately, we can't yet clear the series based on one label value,
// we have to reconstruct all of the label values to delete the series.
// That's the only reason the finalizer is needed at all.
// So we first clear our metrics for the report, and then remove the finalizer
// if we're the shard which owns this report.

// Drop the report from our metrics.
r.clearImageMetrics(report)

if shouldOwn && utils.SliceContains(report.GetFinalizers(), ConfigAuditReportFinalizer) {
// Remove the finalizer if we're the shard owner.
ctrlutil.RemoveFinalizer(report, ConfigAuditReportFinalizer)
if err := r.Update(ctx, report); err != nil {
return ctrl.Result{}, err
}
if deletedSummaries > 0 {
r.Log.Info(fmt.Sprintf("cleared %d summary metrics", deletedSummaries))
}
}

Expand All @@ -136,22 +121,6 @@ func (r *ConfigAuditReportReconciler) SetupWithManager(mgr ctrl.Manager) error {
return nil
}

func (r *ConfigAuditReportReconciler) clearImageMetrics(report *aqua.ConfigAuditReport) {
// clear summary metrics
summaryValues := valuesForReport(report, metricLabels)

// Delete the series for each severity.
for severity := range getCountPerSeverity(report) {
v := summaryValues
v["severity"] = severity

// Delete the metric.
ConfigAuditSummary.Delete(
v,
)
}
}

func RequeueReportsForPod(c client.Client, log logr.Logger, podIP string) {
reportList := &aqua.ConfigAuditReportList{}
opts := []client.ListOption{
Expand Down Expand Up @@ -219,12 +188,14 @@ func valuesForReport(report *aqua.ConfigAuditReport, labels []string) map[string

func reportValueFor(field string, report *aqua.ConfigAuditReport) string {
switch field {
case "report_name":
return apitypes.NamespacedName{Name: report.Name, Namespace: report.Namespace}.String()
case "resource_name":
return report.Name
case "resource_namespace":
return report.Namespace
case "severity":
return "" // this value will be overwritten on publishSummaryMetrics
return "" // this value will be overwritten in publishSummaryMetrics
default:
// Error?
return ""
Expand Down
1 change: 1 addition & 0 deletions controllers/configauditreport/configauditreport_metrics.go
Expand Up @@ -11,6 +11,7 @@ const (
)

var metricLabels = []string{
"report_name",
"resource_name",
"resource_namespace",
"severity",
Expand Down
118 changes: 27 additions & 91 deletions controllers/vulnerabilityreport/vulnerabilityreport_controller.go
Expand Up @@ -27,6 +27,7 @@ import (
"github.com/prometheus/client_golang/prometheus"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/runtime"
apitypes "k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
ctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
Expand Down Expand Up @@ -64,37 +65,24 @@ func (r *VulnerabilityReportReconciler) Reconcile(ctx context.Context, req ctrl.

registerMetricsOnce.Do(r.registerMetrics)

// Once deleting metrics based on partial matches is supported, the logic here will change so we clear the metric for all names not belonging to our shard.
// Then we can only fetch reports for our shard. For now, we'll try to get the report once even if it isn't our shard so that we can later clear the metric.

report := &aqua.VulnerabilityReport{}
if err := r.Client.Get(ctx, req.NamespacedName, report); err != nil {
if apierrors.IsNotFound(err) {
// Most likely the report was deleted.
return ctrl.Result{}, nil
}

// Error reading the object.
r.Log.Error(err, "Unable to read report")
return ctrl.Result{}, err
}

// We have fetched the report and have four possibilities:
// - Not deleting + belongs to our shard --> give it our label (to trigger reconciliation by any previous owner) and expose the metric.
// - Not deleting + does not belong to our shard --> remove it from our metrics. Keep the finalizer.
// - Deleting + belongs to our shard --> remove it from our metrics. Remove the finalizer.
// - Deleting + does not belong to our shard --> remove it from our metrics. Keep the finalizer.
// The report has changed, meaning our metrics are out of date for this report. Clear them.
deletedSummaries := VulnerabilitySummary.DeletePartialMatch(prometheus.Labels{"report_name": req.NamespacedName.String()})
deletedDetails := VulnerabilityInfo.DeletePartialMatch(prometheus.Labels{"report_name": req.NamespacedName.String()})

shouldOwn := r.ShardHelper.ShouldOwn(req.NamespacedName.String())
if shouldOwn {

if report.DeletionTimestamp.IsZero() && shouldOwn {

// Give the report our finalizer if it doesn't have one.
if !utils.SliceContains(report.GetFinalizers(), VulnerabilityReportFinalizer) {
ctrlutil.AddFinalizer(report, VulnerabilityReportFinalizer)
if err := r.Update(ctx, report); err != nil {
return ctrl.Result{}, err
// Try to get the report. It might not exist anymore, in which case we don't need to do anything.
report := &aqua.VulnerabilityReport{}
if err := r.Client.Get(ctx, req.NamespacedName, report); err != nil {
if apierrors.IsNotFound(err) {
// Most likely the report was deleted.
return ctrl.Result{}, nil
}

// Error reading the object.
r.Log.Error(err, "Unable to read report")
return ctrl.Result{}, err
}

r.Log.Info(fmt.Sprintf("Reconciled %s || Found (C/H/M/L/N/U): %d/%d/%d/%d/%d/%d",
Expand All @@ -110,29 +98,23 @@ func (r *VulnerabilityReportReconciler) Reconcile(ctx context.Context, req ctrl.
// Publish summary and CVE metrics for this report.
r.publishImageMetrics(report)

if utils.SliceContains(report.GetFinalizers(), VulnerabilityReportFinalizer) {
// Remove the finalizer if we're the shard owner.
ctrlutil.RemoveFinalizer(report, VulnerabilityReportFinalizer)
if err := r.Update(ctx, report); err != nil {
return ctrl.Result{}, err
}
}

// Add a label to this report so any previous owners will reconcile and drop the metric.
report.Labels[controllers.ShardOwnerLabel] = r.ShardHelper.PodIP
err := r.Client.Update(ctx, report, &client.UpdateOptions{})
if err != nil {
r.Log.Error(err, "unable to add shard owner label")
}

} else {
// Unfortunately, we can't yet clear the series based on one label value,
// we have to reconstruct all of the label values to delete the series.
// That's the only reason the finalizer is needed at all.
// So we first clear our metrics for the report, and then remove the finalizer
// if we're the shard which owns this report.

// Drop the report from our metrics.
r.clearImageMetrics(report)

if shouldOwn && utils.SliceContains(report.GetFinalizers(), VulnerabilityReportFinalizer) {
// Remove the finalizer if we're the shard owner.
ctrlutil.RemoveFinalizer(report, VulnerabilityReportFinalizer)
if err := r.Update(ctx, report); err != nil {
return ctrl.Result{}, err
}
if deletedSummaries > 0 || deletedDetails > 0 {
r.Log.Info(fmt.Sprintf("cleared %d summary and %d detail metrics", deletedSummaries, deletedDetails))
}
}

Expand Down Expand Up @@ -166,20 +148,8 @@ func (r *VulnerabilityReportReconciler) SetupWithManager(mgr ctrl.Manager) error
return nil
}

func (r *VulnerabilityReportReconciler) clearImageMetrics(report *aqua.VulnerabilityReport) {

clearSummaryMetrics(report)

// If we have custom metrics to delete, do it.
if len(r.TargetLabels) > 0 {
clearCustomMetrics(report, r.TargetLabels)
}
}

func (r *VulnerabilityReportReconciler) publishImageMetrics(report *aqua.VulnerabilityReport) {

publishSummaryMetrics(report)

// If we have custom metrics to expose, do it.
if len(r.TargetLabels) > 0 {
publishCustomMetrics(report, r.TargetLabels)
Expand Down Expand Up @@ -229,24 +199,8 @@ func getCountPerSeverity(report *aqua.VulnerabilityReport) map[string]float64 {
}
}

func clearSummaryMetrics(report *aqua.VulnerabilityReport) {
summaryValues := valuesForReport(report, LabelsForGroup(labelGroupSummary))

// Delete the series for each severity.
for severity := range getCountPerSeverity(report) {
v := summaryValues
v["severity"] = severity

// Expose the metric.
VulnerabilitySummary.Delete(
v,
)
}
}

func publishSummaryMetrics(report *aqua.VulnerabilityReport) {
summaryValues := valuesForReport(report, LabelsForGroup(labelGroupSummary))

// Add the severity label after the standard labels and expose each severity metric.
for severity, count := range getCountPerSeverity(report) {
v := summaryValues
Expand All @@ -259,27 +213,8 @@ func publishSummaryMetrics(report *aqua.VulnerabilityReport) {
}
}

func clearCustomMetrics(report *aqua.VulnerabilityReport, targetLabels []VulnerabilityLabel) {
reportValues := valuesForReport(report, targetLabels)

for _, v := range report.Report.Vulnerabilities {
vulnValues := valuesForVulnerability(v, targetLabels)

// Include the Report-level values.
for label, value := range reportValues {
vulnValues[label] = value
}

// Delete the metric
VulnerabilityInfo.Delete(
vulnValues,
)
}
}

func publishCustomMetrics(report *aqua.VulnerabilityReport, targetLabels []VulnerabilityLabel) {
reportValues := valuesForReport(report, targetLabels)

for _, v := range report.Report.Vulnerabilities {
vulnValues := valuesForVulnerability(v, targetLabels)

Expand Down Expand Up @@ -324,7 +259,8 @@ func valuesForVulnerability(vuln aqua.Vulnerability, labels []VulnerabilityLabel
func reportValueFor(field string, report *aqua.VulnerabilityReport) string {
switch field {
case "report_name":
return report.Name
// Construct the namespacedname which we'll later be given at reconciliation.
return apitypes.NamespacedName{Name: report.Name, Namespace: report.Namespace}.String()
case "image_namespace":
return report.Namespace
case "image_registry":
Expand Down
9 changes: 7 additions & 2 deletions go.mod
Expand Up @@ -8,13 +8,19 @@ require (
github.com/cespare/xxhash/v2 v2.1.2
github.com/go-logr/logr v1.2.3
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.12.2
github.com/prometheus/client_golang v1.12.2-0.20220421062905-4dcf02ec7b3c
// github.com/prometheus/client_golang v1.12.1
k8s.io/api v0.24.0
k8s.io/apimachinery v0.24.0
k8s.io/client-go v0.24.0
sigs.k8s.io/controller-runtime v0.12.1
)

require (
github.com/google/go-cmp v0.5.8
gotest.tools v2.2.0+incompatible
)

require (
cloud.google.com/go/compute v1.6.1 // indirect
github.com/Azure/go-autorest v14.2.0+incompatible // indirect
Expand All @@ -37,7 +43,6 @@ require (
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/google/gnostic v0.6.9 // indirect
github.com/google/go-cmp v0.5.8 // indirect
github.com/google/gofuzz v1.2.0 // indirect
github.com/google/uuid v1.3.0 // indirect
github.com/imdario/mergo v0.3.12 // indirect
Expand Down
7 changes: 5 additions & 2 deletions go.sum
Expand Up @@ -443,8 +443,8 @@ github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5Fsn
github.com/prometheus/client_golang v1.7.1/go.mod h1:PY5Wy2awLA44sXw4AOSfFBetzPP4j5+D6mVACh+pe2M=
github.com/prometheus/client_golang v1.11.0/go.mod h1:Z6t4BnS23TR94PD6BsDNk8yVqroYurpAkEiz0P2BEV0=
github.com/prometheus/client_golang v1.12.1/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY=
github.com/prometheus/client_golang v1.12.2 h1:51L9cDoUHVrXx4zWYlcLQIZ+d+VXHgqnYKkIuq4g/34=
github.com/prometheus/client_golang v1.12.2/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrbd179Rt297l4aS6nDY=
github.com/prometheus/client_golang v1.12.2-0.20220421062905-4dcf02ec7b3c h1:UPm1o0MgQzLM9Vv2RB3xAFgAOi3hIHvpb4fUHSGVxJo=
github.com/prometheus/client_golang v1.12.2-0.20220421062905-4dcf02ec7b3c/go.mod h1:hnQ3yqQt3g4fD/UXIaxxYrafyiYfxYUS9zsKetoOmXQ=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
Expand All @@ -456,6 +456,7 @@ github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y8
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=
github.com/prometheus/common v0.32.1/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+M/gUGO4Hls=
github.com/prometheus/common v0.33.0/go.mod h1:gB3sOl7P0TvJabZpLY5uQMpUqRCPPCyRLCZYc7JZTNE=
github.com/prometheus/common v0.34.0 h1:RBmGO9d/FVjqHT0yUGQwBJhkwKV+wPCn7KGpvfab0uE=
github.com/prometheus/common v0.34.0/go.mod h1:gB3sOl7P0TvJabZpLY5uQMpUqRCPPCyRLCZYc7JZTNE=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
Expand Down Expand Up @@ -1065,6 +1066,8 @@ gopkg.in/yaml.v3 v3.0.0-20200615113413-eeeca48fe776/go.mod h1:K4uyk7z7BCEPqu6E+C
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.0 h1:hjy8E9ON/egN1tAYqKb61G10WtihqetD4sz2H+8nIeA=
gopkg.in/yaml.v3 v3.0.0/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gotest.tools v2.2.0+incompatible h1:VsBPFP1AI068pPrMxtb/S8Zkgf9xEmTLJjfM+P5UIEo=
gotest.tools v2.2.0+incompatible/go.mod h1:DsYFclhRJ6vuDpmuTbkuFWG+y2sxOXAzmJt81HFBacw=
gotest.tools/v3 v3.0.2/go.mod h1:3SzNCllyD9/Y+b5r9JIKQ474KzkZyqLqEfYqMsX94Bk=
gotest.tools/v3 v3.0.3/go.mod h1:Z7Lb0S5l+klDB31fvDQX8ss/FlKDxtlFlw3Oa8Ymbl8=
honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4=
Expand Down
4 changes: 4 additions & 0 deletions main.go
Expand Up @@ -116,6 +116,10 @@ func main() {
targetLabels = appendIfNotExists(targetLabels, []vulnerabilityreport.VulnerabilityLabel{label})
}

// If exposing detail metrics, we must always include the report name in order to delete them by name later.
reportNameLabel, _ := vulnerabilityreport.LabelWithName("report_name")
targetLabels = appendIfNotExists(targetLabels, []vulnerabilityreport.VulnerabilityLabel{reportNameLabel})

return nil
})

Expand Down

0 comments on commit 82845f0

Please sign in to comment.