Skip to content

Commit

Permalink
Alerting: Fetch alerts from a remote Alertmanager (#75844)
Browse files Browse the repository at this point in the history
* Alerting: post alerts to the remote Alertmanager and fetch them

* fix broken tests

* Alerting: Add Mimir Backend image to devenv (blocks)

* add alerting as code owner for mimir_backend block

* Alerting: Use Mimir image to run integration tests for the remote Alertmanager

* skip integration test when running all tests

* skipping integration test when no Alertmanager URL is provided

* fix bad host for mimir_backend

* remove basic auth testing until we have an nginx image in our CI

* add integration tests for alerts

* fix tests

* change SendCtx -> Send, add context.Context to Send, fix CI

* add reover() for functions from the Prometheus Alertmanager HTTP client that could panic

* add TODO to implement PutAlerts in a way that mimicks what Prometheus does

* fix log format
  • Loading branch information
santihernandezc committed Oct 19, 2023
1 parent 241996b commit 61cb267
Show file tree
Hide file tree
Showing 11 changed files with 234 additions and 57 deletions.
2 changes: 2 additions & 0 deletions pkg/services/ngalert/api/api_alertmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ func (srv AlertmanagerSrv) RouteGetAMAlertGroups(c *contextmodel.ReqContext) res
}

groups, err := am.GetAlertGroups(
c.Req.Context(),
c.QueryBoolWithDefault("active", true),
c.QueryBoolWithDefault("silenced", true),
c.QueryBoolWithDefault("inhibited", true),
Expand All @@ -173,6 +174,7 @@ func (srv AlertmanagerSrv) RouteGetAMAlerts(c *contextmodel.ReqContext) response
}

alerts, err := am.GetAlerts(
c.Req.Context(),
c.QueryBoolWithDefault("active", true),
c.QueryBoolWithDefault("silenced", true),
c.QueryBoolWithDefault("inhibited", true),
Expand Down
2 changes: 1 addition & 1 deletion pkg/services/ngalert/notifier/alertmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ func (am *alertmanager) buildReceiverIntegrations(receiver *alertingNotify.APIRe
}

// PutAlerts receives the alerts and then sends them through the corresponding route based on whenever the alert has a receiver embedded or not
func (am *alertmanager) PutAlerts(postableAlerts apimodels.PostableAlerts) error {
func (am *alertmanager) PutAlerts(_ context.Context, postableAlerts apimodels.PostableAlerts) error {
alerts := make(alertingNotify.PostableAlerts, 0, len(postableAlerts.PostableAlerts))
for _, pa := range postableAlerts.PostableAlerts {
alerts = append(alerts, &alertingNotify.PostableAlert{
Expand Down
6 changes: 4 additions & 2 deletions pkg/services/ngalert/notifier/alerts.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
package notifier

import (
"context"

alertingNotify "github.com/grafana/alerting/notify"
)

func (am *alertmanager) GetAlerts(active, silenced, inhibited bool, filter []string, receivers string) (alertingNotify.GettableAlerts, error) {
func (am *alertmanager) GetAlerts(_ context.Context, active, silenced, inhibited bool, filter []string, receivers string) (alertingNotify.GettableAlerts, error) {
return am.Base.GetAlerts(active, silenced, inhibited, filter, receivers)
}

func (am *alertmanager) GetAlertGroups(active, silenced, inhibited bool, filter []string, receivers string) (alertingNotify.AlertGroups, error) {
func (am *alertmanager) GetAlertGroups(_ context.Context, active, silenced, inhibited bool, filter []string, receivers string) (alertingNotify.AlertGroups, error) {
return am.Base.GetAlertGroups(active, silenced, inhibited, filter, receivers)
}
115 changes: 97 additions & 18 deletions pkg/services/ngalert/notifier/external_alertmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,13 @@ import (

httptransport "github.com/go-openapi/runtime/client"
"github.com/go-openapi/strfmt"
alertingNotify "github.com/grafana/alerting/notify"
"github.com/grafana/grafana/pkg/infra/log"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/models"
amclient "github.com/prometheus/alertmanager/api/v2/client"
amalert "github.com/prometheus/alertmanager/api/v2/client/alert"
amalertgroup "github.com/prometheus/alertmanager/api/v2/client/alertgroup"
amsilence "github.com/prometheus/alertmanager/api/v2/client/silence"
)

Expand Down Expand Up @@ -69,6 +72,10 @@ func newExternalAlertmanager(cfg externalAlertmanagerConfig, orgID int64) (*exte
}, nil
}

func (am *externalAlertmanager) ApplyConfig(ctx context.Context, config *models.AlertConfiguration) error {
return nil
}

func (am *externalAlertmanager) SaveAndApplyConfig(ctx context.Context, cfg *apimodels.PostableUserConfig) error {
return nil
}
Expand All @@ -78,6 +85,12 @@ func (am *externalAlertmanager) SaveAndApplyDefaultConfig(ctx context.Context) e
}

func (am *externalAlertmanager) CreateSilence(ctx context.Context, silence *apimodels.PostableSilence) (string, error) {
defer func() {
if r := recover(); r != nil {
am.log.Error("Panic while creating silence", "err", r)
}
}()

params := amsilence.NewPostSilencesParamsWithContext(ctx).WithSilence(silence)
res, err := am.amClient.Silence.PostSilences(params)
if err != nil {
Expand All @@ -88,6 +101,12 @@ func (am *externalAlertmanager) CreateSilence(ctx context.Context, silence *apim
}

func (am *externalAlertmanager) DeleteSilence(ctx context.Context, silenceID string) error {
defer func() {
if r := recover(); r != nil {
am.log.Error("Panic while deleting silence", "err", r)
}
}()

params := amsilence.NewDeleteSilenceParamsWithContext(ctx).WithSilenceID(strfmt.UUID(silenceID))
_, err := am.amClient.Silence.DeleteSilence(params)
if err != nil {
Expand All @@ -97,21 +116,28 @@ func (am *externalAlertmanager) DeleteSilence(ctx context.Context, silenceID str
}

func (am *externalAlertmanager) GetSilence(ctx context.Context, silenceID string) (apimodels.GettableSilence, error) {
defer func() {
if r := recover(); r != nil {
am.log.Error("Panic while getting silence", "err", r)
}
}()

params := amsilence.NewGetSilenceParamsWithContext(ctx).WithSilenceID(strfmt.UUID(silenceID))
res, err := am.amClient.Silence.GetSilence(params)
if err != nil {
return apimodels.GettableSilence{}, err
}

if res != nil {
return *res.Payload, nil
}

// In theory, this should never happen as is not possible for GetSilence to return an empty payload but no error.
return apimodels.GettableSilence{}, fmt.Errorf("unexpected error while trying to fetch silence: %s", silenceID)
return *res.Payload, nil
}

func (am *externalAlertmanager) ListSilences(ctx context.Context, filter []string) (apimodels.GettableSilences, error) {
defer func() {
if r := recover(); r != nil {
am.log.Error("Panic while listing silences", "err", r)
}
}()

params := amsilence.NewGetSilencesParamsWithContext(ctx).WithFilter(filter)
res, err := am.amClient.Silence.GetSilences(params)
if err != nil {
Expand All @@ -121,30 +147,83 @@ func (am *externalAlertmanager) ListSilences(ctx context.Context, filter []strin
return res.Payload, nil
}

func (am *externalAlertmanager) GetStatus() apimodels.GettableStatus {
return apimodels.GettableStatus{}
func (am *externalAlertmanager) GetAlerts(ctx context.Context, active, silenced, inhibited bool, filter []string, receiver string) (apimodels.GettableAlerts, error) {
defer func() {
if r := recover(); r != nil {
am.log.Error("Panic while getting alerts", "err", r)
}
}()

params := amalert.NewGetAlertsParamsWithContext(ctx).
WithActive(&active).
WithSilenced(&silenced).
WithInhibited(&inhibited).
WithFilter(filter).
WithReceiver(&receiver)

res, err := am.amClient.Alert.GetAlerts(params)
if err != nil {
return apimodels.GettableAlerts{}, err
}

return res.Payload, nil
}

func (am *externalAlertmanager) GetAlerts(active, silenced, inhibited bool, filter []string, receiver string) (apimodels.GettableAlerts, error) {
return apimodels.GettableAlerts{}, nil
func (am *externalAlertmanager) GetAlertGroups(ctx context.Context, active, silenced, inhibited bool, filter []string, receiver string) (apimodels.AlertGroups, error) {
defer func() {
if r := recover(); r != nil {
am.log.Error("Panic while getting alert groups", "err", r)
}
}()

params := amalertgroup.NewGetAlertGroupsParamsWithContext(ctx).
WithActive(&active).
WithSilenced(&silenced).
WithInhibited(&inhibited).
WithFilter(filter).
WithReceiver(&receiver)

res, err := am.amClient.Alertgroup.GetAlertGroups(params)
if err != nil {
return apimodels.AlertGroups{}, err
}

return res.Payload, nil
}

func (am *externalAlertmanager) GetAlertGroups(active, silenced, inhibited bool, filter []string, receiver string) (apimodels.AlertGroups, error) {
return apimodels.AlertGroups{}, nil
// TODO: implement PutAlerts in a way that is similar to what Prometheus does.
// This current implementation is only good for testing methods that retrieve alerts from the remote Alertmanager.
// More details in issue https://github.com/grafana/grafana/issues/76692
func (am *externalAlertmanager) PutAlerts(ctx context.Context, postableAlerts apimodels.PostableAlerts) error {
defer func() {
if r := recover(); r != nil {
am.log.Error("Panic while putting alerts", "err", r)
}
}()

alerts := make(alertingNotify.PostableAlerts, 0, len(postableAlerts.PostableAlerts))
for _, pa := range postableAlerts.PostableAlerts {
alerts = append(alerts, &alertingNotify.PostableAlert{
Annotations: pa.Annotations,
EndsAt: pa.EndsAt,
StartsAt: pa.StartsAt,
Alert: pa.Alert,
})
}

params := amalert.NewPostAlertsParamsWithContext(ctx).WithAlerts(alerts)
_, err := am.amClient.Alert.PostAlerts(params)
return err
}

func (am *externalAlertmanager) PutAlerts(postableAlerts apimodels.PostableAlerts) error {
return nil
func (am *externalAlertmanager) GetStatus() apimodels.GettableStatus {
return apimodels.GettableStatus{}
}

func (am *externalAlertmanager) GetReceivers(ctx context.Context) []apimodels.Receiver {
return []apimodels.Receiver{}
}

func (am *externalAlertmanager) ApplyConfig(ctx context.Context, config *models.AlertConfiguration) error {
return nil
}

func (am *externalAlertmanager) TestReceivers(ctx context.Context, c apimodels.TestReceiversConfigBodyParams) (*TestReceiversResult, error) {
return &TestReceiversResult{}, nil
}
Expand Down
72 changes: 72 additions & 0 deletions pkg/services/ngalert/notifier/external_alertmanager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,61 @@ func TestIntegrationRemoteAlertmanagerSilences(t *testing.T) {
require.Equal(t, *silences[1].Status.State, "expired")
}

func TestIntegrationRemoteAlertmanagerAlerts(t *testing.T) {
if testing.Short() {
t.Skip("skipping integration test")
}

amURL, ok := os.LookupEnv("AM_URL")
if !ok {
t.Skip("No Alertmanager URL provided")
}
tenantID := os.Getenv("AM_TENANT_ID")
password := os.Getenv("AM_PASSWORD")

cfg := externalAlertmanagerConfig{
URL: amURL + "/alertmanager",
TenantID: tenantID,
BasicAuthPassword: password,
DefaultConfig: validConfig,
}
am, err := newExternalAlertmanager(cfg, 1)
require.NoError(t, err)

// We should have no alerts and no groups at first.
alerts, err := am.GetAlerts(context.Background(), true, true, true, []string{}, "")
require.NoError(t, err)
require.Equal(t, 0, len(alerts))

alertGroups, err := am.GetAlertGroups(context.Background(), true, true, true, []string{}, "")
require.NoError(t, err)
require.Equal(t, 0, len(alertGroups))

// Let's create two active alerts and one expired one.
alert1 := genAlert(true, map[string]string{"test_1": "test_1"})
alert2 := genAlert(true, map[string]string{"test_2": "test_2"})
alert3 := genAlert(false, map[string]string{"test_3": "test_3"})
postableAlerts := apimodels.PostableAlerts{
PostableAlerts: []amv2.PostableAlert{alert1, alert2, alert3},
}
err = am.PutAlerts(context.Background(), postableAlerts)
require.NoError(t, err)

// We should have two alerts and one group now.
alerts, err = am.GetAlerts(context.Background(), true, true, true, []string{}, "")
require.NoError(t, err)
require.Equal(t, 2, len(alerts))

alertGroups, err = am.GetAlertGroups(context.Background(), true, true, true, []string{}, "")
require.NoError(t, err)
require.Equal(t, 1, len(alertGroups))

// Filtering by `test_1=test_1` should return one alert.
alerts, err = am.GetAlerts(context.Background(), true, true, true, []string{"test_1=test_1"}, "")
require.NoError(t, err)
require.Equal(t, 1, len(alerts))
}

func genSilence(createdBy string) apimodels.PostableSilence {
starts := strfmt.DateTime(time.Now().Add(time.Duration(rand.Int63n(9)+1) * time.Second))
ends := strfmt.DateTime(time.Now().Add(time.Duration(rand.Int63n(9)+10) * time.Second))
Expand All @@ -188,3 +243,20 @@ func genSilence(createdBy string) apimodels.PostableSilence {
},
}
}

func genAlert(active bool, labels map[string]string) amv2.PostableAlert {
endsAt := time.Now()
if active {
endsAt = time.Now().Add(1 * time.Minute)
}

return amv2.PostableAlert{
Annotations: amv2.LabelSet(map[string]string{"test_annotation": "test_annotation_value"}),
StartsAt: strfmt.DateTime(time.Now()),
EndsAt: strfmt.DateTime(endsAt),
Alert: amv2.Alert{
GeneratorURL: strfmt.URI("http://localhost:8080"),
Labels: amv2.LabelSet(labels),
},
}
}
8 changes: 4 additions & 4 deletions pkg/services/ngalert/notifier/multiorg_alertmanager.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,10 @@ var (

type Alertmanager interface {
// Configuration
ApplyConfig(context.Context, *models.AlertConfiguration) error
SaveAndApplyConfig(ctx context.Context, config *apimodels.PostableUserConfig) error
SaveAndApplyDefaultConfig(ctx context.Context) error
GetStatus() apimodels.GettableStatus
ApplyConfig(context.Context, *models.AlertConfiguration) error

// Silences
CreateSilence(context.Context, *apimodels.PostableSilence) (string, error)
Expand All @@ -44,9 +44,9 @@ type Alertmanager interface {
ListSilences(context.Context, []string) (apimodels.GettableSilences, error)

// Alerts
GetAlerts(active, silenced, inhibited bool, filter []string, receiver string) (apimodels.GettableAlerts, error)
GetAlertGroups(active, silenced, inhibited bool, filter []string, receiver string) (apimodels.AlertGroups, error)
PutAlerts(postableAlerts apimodels.PostableAlerts) error
GetAlerts(ctx context.Context, active, silenced, inhibited bool, filter []string, receiver string) (apimodels.GettableAlerts, error)
GetAlertGroups(ctx context.Context, active, silenced, inhibited bool, filter []string, receiver string) (apimodels.AlertGroups, error)
PutAlerts(context.Context, apimodels.PostableAlerts) error

// Receivers
GetReceivers(ctx context.Context) []apimodels.Receiver
Expand Down

0 comments on commit 61cb267

Please sign in to comment.