Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[alerting] Add severity and refactor code #569

Merged
merged 6 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
94 changes: 94 additions & 0 deletions probes/alerting/alertinfo/alertinfo.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Copyright 2023 The Cloudprober Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package alertinfo implements AlertInfo struct for sharing alert data
// across modules.
package alertinfo

import (
"sort"
"strconv"
"strings"
"time"

"github.com/cloudprober/cloudprober/common/strtemplate"
"github.com/cloudprober/cloudprober/targets/endpoint"
)

// AlertInfo contains information about an alert.
type AlertInfo struct {
Name string
ProbeName string
ConditionID string
Target endpoint.Endpoint
Failures int
Total int
FailingSince time.Time
}

func (ai *AlertInfo) Fields(templateDetails map[string]string) map[string]string {
fields := map[string]string{
"alert": ai.Name,
"probe": ai.ProbeName,
"target": ai.Target.Dst(),
"condition_id": ai.ConditionID,
"failures": strconv.Itoa(ai.Failures),
"total": strconv.Itoa(ai.Total),
"since": ai.FailingSince.Format(time.RFC3339),
}

for k, v := range ai.Target.Labels {
fields["target.label."+k] = v
}

if ai.Target.IP != nil {
fields["target_ip"] = ai.Target.IP.String()
}

// Note that we parse details in the end, that's because details template
// may use other parsed fields like dashboard_url, playbook_url, etc.
for k, v := range templateDetails {
if k != "details" {
fields[k], _ = strtemplate.SubstituteLabels(v, fields)
}
}
if templateDetails["details"] != "" {
fields["details"], _ = strtemplate.SubstituteLabels(templateDetails["details"], fields)
}

return fields
}

func FieldsToString(fields map[string]string, skipKeys ...string) string {
skipMap := make(map[string]bool)
for _, k := range skipKeys {
skipMap[k] = true
}

var keys []string
for k := range fields {
if !skipMap[k] {
keys = append(keys, k)
}
}

sort.Strings(keys)

var out []string
for _, k := range keys {
out = append(out, k+": "+fields[k])
}

return strings.Join(out, "\n")
}
135 changes: 135 additions & 0 deletions probes/alerting/alertinfo/alertinfo_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
// Copyright 2023 The Cloudprober Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

// Package alertinfo implements AlertInfo struct for sharing alert data
// across modules.
package alertinfo

import (
"net"
"testing"
"time"

"github.com/cloudprober/cloudprober/targets/endpoint"
"github.com/stretchr/testify/assert"
)

func TestAlertInfoFields(t *testing.T) {
testTarget := endpoint.Endpoint{
Name: "test-target",
IP: net.ParseIP("10.11.12.13"),
Labels: map[string]string{
"apptype": "backend",
"language": "go",
},
}

tests := []struct {
name string
ai *AlertInfo
templateDetails map[string]string
want map[string]string
}{
{
name: "no_template_details",
ai: &AlertInfo{
Name: "test-alert",
ProbeName: "test-probe",
ConditionID: "122333444",
Target: testTarget,
Failures: 8,
Total: 12,
FailingSince: time.Time{}.Add(time.Second),
},
want: map[string]string{
"alert": "test-alert",
"probe": "test-probe",
"condition_id": "122333444",
"target": "test-target",
"target_ip": "10.11.12.13",
"failures": "8",
"total": "12",
"since": "0001-01-01T00:00:01Z",
"target.label.apptype": "backend",
"target.label.language": "go",
},
},
{
name: "with_template_details",
ai: &AlertInfo{
Name: "test-alert",
ProbeName: "test-probe",
ConditionID: "122333444",
Target: testTarget,
Failures: 8,
Total: 12,
FailingSince: time.Time{}.Add(time.Second),
},
templateDetails: map[string]string{
"summary": "Cloudprober alert \"@alert@\" for \"@target@\"",
"dashboard_url": "https://my-dashboard.com/probe=@probe@&target=@target@",
"details": "Dashboard: @dashboard_url@",
},
want: map[string]string{
"alert": "test-alert",
"probe": "test-probe",
"condition_id": "122333444",
"target": "test-target",
"target_ip": "10.11.12.13",
"failures": "8",
"total": "12",
"since": "0001-01-01T00:00:01Z",
"target.label.apptype": "backend",
"target.label.language": "go",
"summary": "Cloudprober alert \"test-alert\" for \"test-target\"",
"dashboard_url": "https://my-dashboard.com/probe=test-probe&target=test-target",
"details": "Dashboard: https://my-dashboard.com/probe=test-probe&target=test-target",
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, tt.ai.Fields(tt.templateDetails), "Fields don't match")
})
}
}

func TestFieldsToString(t *testing.T) {
fields := map[string]string{
"alert": "test-alert",
"probe": "test-probe",
"condition_id": "122333444",
}

tests := []struct {
name string
skipKeys []string
want string
}{
{
name: "skip_none",
want: "alert: test-alert\ncondition_id: 122333444\nprobe: test-probe",
},
{
name: "skip_probe",
skipKeys: []string{"condition_id"},
want: "alert: test-alert\nprobe: test-probe",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, tt.want, FieldsToString(fields, tt.skipKeys...), "Fields don't match")
})
}
}
5 changes: 3 additions & 2 deletions probes/alerting/alerting.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"github.com/cloudprober/cloudprober/logger"
"github.com/cloudprober/cloudprober/metrics"
"github.com/cloudprober/cloudprober/probes/alerting/alertinfo"
"github.com/cloudprober/cloudprober/probes/alerting/notifier"
configpb "github.com/cloudprober/cloudprober/probes/alerting/proto"
"github.com/cloudprober/cloudprober/targets/endpoint"
Expand All @@ -50,7 +51,7 @@ type AlertHandler struct {
probeName string
condition *configpb.Condition
notifyConfig *configpb.NotifyConfig
notifyCh chan *notifier.AlertInfo // Used only for testing for now.
notifyCh chan *alertinfo.AlertInfo // Used only for testing for now.
notifier *notifier.Notifier

mu sync.Mutex
Expand Down Expand Up @@ -125,7 +126,7 @@ func (ah *AlertHandler) notify(ep endpoint.Endpoint, ts *targetState, totalFailu
ah.l.Warningf("ALERT (%s): target (%s), failures (%d) higher than (%d) since (%v)", ah.name, ep.Name, totalFailures, ah.condition.Failures, ts.failingSince)

ts.alerted = true
alertInfo := &notifier.AlertInfo{
alertInfo := &alertinfo.AlertInfo{
Name: ah.name,
ProbeName: ah.probeName,
ConditionID: ts.conditionID,
Expand Down
20 changes: 10 additions & 10 deletions probes/alerting/alerting_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import (
"sync"
"time"

"github.com/cloudprober/cloudprober/probes/alerting/notifier"
"github.com/cloudprober/cloudprober/probes/alerting/alertinfo"
)

var statusTmpl = template.Must(template.New("status").Parse(`
Expand Down Expand Up @@ -78,29 +78,29 @@ var statusTmpl = template.Must(template.New("status").Parse(`
// resolvedAlert is used to keep track of resolved alerts, to be able to show
// alerts hitory on the alerts dashboard.
type resolvedAlert struct {
AlertInfo *notifier.AlertInfo
AlertInfo *alertinfo.AlertInfo
ResolvedAt time.Time
}

var maxAlertsHistory = 20

type state struct {
mu sync.RWMutex
currentAlerts map[string]*notifier.AlertInfo
currentAlerts map[string]*alertinfo.AlertInfo
resolvedAlerts []resolvedAlert
}

func (st *state) get(key string) *notifier.AlertInfo {
func (st *state) get(key string) *alertinfo.AlertInfo {
st.mu.RLock()
defer st.mu.RUnlock()
return st.currentAlerts[key]
}

func (st *state) add(key string, ai *notifier.AlertInfo) {
func (st *state) add(key string, ai *alertinfo.AlertInfo) {
st.mu.Lock()
defer st.mu.Unlock()
if st.currentAlerts == nil {
st.currentAlerts = make(map[string]*notifier.AlertInfo)
st.currentAlerts = make(map[string]*alertinfo.AlertInfo)
}
st.currentAlerts[key] = ai
}
Expand All @@ -117,11 +117,11 @@ func (st *state) resolve(key string) {
delete(st.currentAlerts, key)
}

func (st *state) list() ([]*notifier.AlertInfo, []resolvedAlert) {
func (st *state) list() ([]*alertinfo.AlertInfo, []resolvedAlert) {
st.mu.RLock()
defer st.mu.RUnlock()

var currentAlerts []*notifier.AlertInfo
var currentAlerts []*alertinfo.AlertInfo
for _, ai := range st.currentAlerts {
currentAlerts = append(currentAlerts, ai)
}
Expand All @@ -145,7 +145,7 @@ func (st *state) statusHTML() (string, error) {
}

err := statusTmpl.Execute(&statusBuf, struct {
CurrentAlerts []*notifier.AlertInfo
CurrentAlerts []*alertinfo.AlertInfo
PreviousAlerts []resolvedAlert
}{
CurrentAlerts: currentAlerts, PreviousAlerts: previousAlerts,
Expand All @@ -155,7 +155,7 @@ func (st *state) statusHTML() (string, error) {
}

var globalState = state{
currentAlerts: make(map[string]*notifier.AlertInfo),
currentAlerts: make(map[string]*alertinfo.AlertInfo),
}

func StatusHTML() (string, error) {
Expand Down