Alerting: Expose info about notification delivery errors in a new /receivers endpoint (#55429) (#56899)
* (WIP) switch to fork AM, first implementation of the API, generate spec
* get receivers avoiding race conditions
* use latest version of our forked AM, tests
* make linter happy, delete TODO comment
* update number of expected paths to += 2
* delete unused endpoint code, code review comments, tests
* Update pkg/services/ngalert/notifier/alertmanager.go
Co-authored-by: Matthew Jacobson <matthew.jacobson@grafana.com>
* remove call to fmt.Println
* clear naming for fields
* shorter variable names in GetReceivers
Co-authored-by: Matthew Jacobson <matthew.jacobson@grafana.com>
(cherry picked from commit 09f8e026a1)
This commit is contained in:
@@ -728,7 +728,7 @@ func TestNotificationChannels(t *testing.T) {
|
||||
channels.GetBoundary = func() string { return "abcd" }
|
||||
|
||||
env.NotificationService.EmailHandlerSync = mockEmail.sendEmailCommandHandlerSync
|
||||
// As we are using a NotificationService mock here, but he test expects real NotificationService -
|
||||
// As we are using a NotificationService mock here, but the test expects real NotificationService -
|
||||
// we try to issue a real POST request here
|
||||
env.NotificationService.WebhookHandler = func(_ context.Context, cmd *models.SendWebhookSync) error {
|
||||
if res, err := http.Post(cmd.Url, "", strings.NewReader(cmd.Body)); err == nil {
|
||||
@@ -770,11 +770,31 @@ func TestNotificationChannels(t *testing.T) {
|
||||
re := regexp.MustCompile(`"uid":"([\w|-]*)"`)
|
||||
e := getExpAlertmanagerConfigFromAPI(mockChannel.server.Addr)
|
||||
require.JSONEq(t, e, string(re.ReplaceAll([]byte(b), []byte(`"uid":""`))))
|
||||
|
||||
// Check the receivers API. No errors nor attempts to notify should be registered.
|
||||
receiversURL := fmt.Sprintf("http://grafana:password@%s/api/alertmanager/grafana/config/api/v1/receivers", grafanaListedAddr)
|
||||
resp = getRequest(t, receiversURL, http.StatusOK) // nolint
|
||||
b = getBody(t, resp.Body)
|
||||
|
||||
var receivers apimodels.Receivers
|
||||
err := json.Unmarshal([]byte(b), &receivers)
|
||||
require.NoError(t, err)
|
||||
for _, rcv := range receivers {
|
||||
require.NotNil(t, rcv.Name)
|
||||
require.NotNil(t, rcv.Active)
|
||||
require.NotEmpty(t, rcv.Integrations)
|
||||
for _, integration := range rcv.Integrations {
|
||||
require.NotNil(t, integration.Name)
|
||||
require.NotNil(t, integration.SendResolved)
|
||||
require.Equal(t, "", integration.LastNotifyAttemptError)
|
||||
require.Zero(t, integration.LastNotifyAttempt)
|
||||
require.Equal(t, "0s", integration.LastNotifyAttemptDuration)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// Create rules that will fire as quickly as possible
|
||||
|
||||
originalFunction := store.GenerateNewAlertRuleUID
|
||||
t.Cleanup(func() {
|
||||
store.GenerateNewAlertRuleUID = originalFunction
|
||||
@@ -791,6 +811,7 @@ func TestNotificationChannels(t *testing.T) {
|
||||
// Eventually, we'll get all the desired alerts.
|
||||
// nolint:gosec
|
||||
require.Eventually(t, func() bool {
|
||||
// TODO: not waiting for the failed notifications, flaky test?
|
||||
return mockChannel.totalNotifications() >= len(nonEmailAlertNames) && len(mockEmail.emails) >= 1
|
||||
}, 30*time.Second, 1*time.Second)
|
||||
|
||||
@@ -798,6 +819,60 @@ func TestNotificationChannels(t *testing.T) {
|
||||
require.Equal(t, expEmailNotifications, mockEmail.emails)
|
||||
require.NoError(t, mockChannel.Close())
|
||||
|
||||
// Check the receivers API. Errors and inactive receivers are expected, attempts to deliver notifications should be registered.
|
||||
receiversURL := fmt.Sprintf("http://grafana:password@%s/api/alertmanager/grafana/config/api/v1/receivers", grafanaListedAddr)
|
||||
resp := getRequest(t, receiversURL, http.StatusOK) // nolint
|
||||
b := getBody(t, resp.Body)
|
||||
|
||||
var receivers apimodels.Receivers
|
||||
err := json.Unmarshal([]byte(b), &receivers)
|
||||
require.NoError(t, err)
|
||||
for _, rcv := range receivers {
|
||||
var expActive bool
|
||||
if _, ok := expInactiveReceivers[*rcv.Name]; !ok {
|
||||
expActive = true
|
||||
}
|
||||
var expErr bool
|
||||
if _, ok := expNotificationErrors[*rcv.Name]; ok {
|
||||
expErr = true
|
||||
}
|
||||
|
||||
require.NotNil(t, rcv.Name)
|
||||
require.NotNil(t, rcv.Active)
|
||||
require.NotEmpty(t, rcv.Integrations)
|
||||
if expActive {
|
||||
require.True(t, *rcv.Active)
|
||||
}
|
||||
|
||||
// We don't have test alerts for the default notifier, continue iterating.
|
||||
if *rcv.Name == "grafana-default-email" {
|
||||
continue
|
||||
}
|
||||
|
||||
for _, integration := range rcv.Integrations {
|
||||
require.NotNil(t, integration.Name)
|
||||
require.NotNil(t, integration.SendResolved)
|
||||
|
||||
// If the receiver is not active, no attempts to send notifications should be registered.
|
||||
if expActive {
|
||||
require.NotZero(t, integration.LastNotifyAttempt)
|
||||
require.NotEqual(t, "0s", integration.LastNotifyAttemptDuration)
|
||||
} else {
|
||||
require.Zero(t, integration.LastNotifyAttempt)
|
||||
require.Equal(t, "0s", integration.LastNotifyAttemptDuration)
|
||||
}
|
||||
|
||||
// Check whether we're expecting an error on this integration.
|
||||
if expErr {
|
||||
for _, integration := range rcv.Integrations {
|
||||
require.Equal(t, expNotificationErrors[*rcv.Name], integration.LastNotifyAttemptError)
|
||||
}
|
||||
} else {
|
||||
require.Equal(t, "", integration.LastNotifyAttemptError)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
// Delete the configuration; so it returns the default configuration.
|
||||
u := fmt.Sprintf("http://grafana:password@%s/api/alertmanager/grafana/config/api/v1/alerts", grafanaListedAddr)
|
||||
@@ -859,6 +934,10 @@ var emailAlertNames = []string{
|
||||
"EmailAlert",
|
||||
}
|
||||
|
||||
var failedAlertNames = []string{
|
||||
"SlackFailedAlert",
|
||||
}
|
||||
|
||||
func getRulesConfig(t *testing.T) string {
|
||||
t.Helper()
|
||||
interval, err := model.ParseDuration("10s")
|
||||
@@ -869,7 +948,10 @@ func getRulesConfig(t *testing.T) string {
|
||||
}
|
||||
|
||||
// Create rules that will fire as quickly as possible for all the routes.
|
||||
for _, alertName := range append(nonEmailAlertNames, emailAlertNames...) {
|
||||
rulesToCreate := append(nonEmailAlertNames, emailAlertNames...)
|
||||
rulesToCreate = append(rulesToCreate, failedAlertNames...)
|
||||
|
||||
for _, alertName := range rulesToCreate {
|
||||
rules.Rules = append(rules.Rules, apimodels.PostableExtendedRuleNode{
|
||||
GrafanaManagedAlert: &apimodels.PostableGrafanaRule{
|
||||
Title: alertName,
|
||||
@@ -1124,6 +1206,26 @@ const alertmanagerConfig = `
|
||||
"alertname=\"SlackAlert2\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"receiver": "slack_failed_recv",
|
||||
"group_wait": "0s",
|
||||
"group_by": [
|
||||
"alertname"
|
||||
],
|
||||
"matchers": [
|
||||
"alertname=\"SlackFailedAlert\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"receiver": "slack_inactive_recv",
|
||||
"group_wait": "0s",
|
||||
"group_by": [
|
||||
"alertname"
|
||||
],
|
||||
"matchers": [
|
||||
"alertname=\"Inactive\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"receiver": "pagerduty_recv",
|
||||
"group_wait": "0s",
|
||||
@@ -1273,7 +1375,7 @@ const alertmanagerConfig = `
|
||||
"matchers": [
|
||||
"alertname=\"TelegramAlert\""
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"receivers": [
|
||||
@@ -1483,7 +1585,7 @@ const alertmanagerConfig = `
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
{
|
||||
"name": "slack_recv1",
|
||||
"grafana_managed_receiver_configs": [
|
||||
{
|
||||
@@ -1506,8 +1608,8 @@ const alertmanagerConfig = `
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
},
|
||||
{
|
||||
"name": "slack_recv2",
|
||||
"grafana_managed_receiver_configs": [
|
||||
{
|
||||
@@ -1523,6 +1625,39 @@ const alertmanagerConfig = `
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "slack_failed_recv",
|
||||
"grafana_managed_receiver_configs": [
|
||||
{
|
||||
"name": "slack_failed_test",
|
||||
"type": "slack",
|
||||
"settings": {
|
||||
"recipient": "#test-channel",
|
||||
"username": "test",
|
||||
"text": "Integration Test"
|
||||
},
|
||||
"secureSettings": {
|
||||
"url": "htt://127.0.0.1:8080/slack_failed_recv/slack_failed_test"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "slack_inactive_recv",
|
||||
"grafana_managed_receiver_configs": [
|
||||
{
|
||||
"name": "inactive",
|
||||
"type": "slack",
|
||||
"settings": {
|
||||
"recipient": "#inactive-channel",
|
||||
"username": "Integration Test"
|
||||
},
|
||||
"secureSettings": {
|
||||
"token": "myfullysecrettoken"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "pagerduty_recv",
|
||||
@@ -1589,7 +1724,26 @@ var expAlertmanagerConfigFromAPI = `
|
||||
"alertname=\"SlackAlert2\""
|
||||
]
|
||||
},
|
||||
{
|
||||
{
|
||||
"receiver": "slack_failed_recv",
|
||||
"group_wait": "0s",
|
||||
"group_by": [
|
||||
"alertname"
|
||||
],
|
||||
"matchers": [
|
||||
"alertname=\"SlackFailedAlert\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"receiver": "slack_inactive_recv",
|
||||
"group_wait": "0s",
|
||||
"group_by": [
|
||||
"alertname"
|
||||
],
|
||||
"matchers": [
|
||||
"alertname=\"Inactive\""
|
||||
]
|
||||
}, {
|
||||
"receiver": "pagerduty_recv",
|
||||
"group_wait": "0s",
|
||||
"group_by": [
|
||||
@@ -1738,7 +1892,7 @@ var expAlertmanagerConfigFromAPI = `
|
||||
"matchers": [
|
||||
"alertname=\"TelegramAlert\""
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
"templates": null,
|
||||
@@ -1986,7 +2140,7 @@ var expAlertmanagerConfigFromAPI = `
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
{
|
||||
"name": "slack_recv1",
|
||||
"grafana_managed_receiver_configs": [
|
||||
{
|
||||
@@ -2031,6 +2185,43 @@ var expAlertmanagerConfigFromAPI = `
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "slack_failed_recv",
|
||||
"grafana_managed_receiver_configs": [
|
||||
{
|
||||
"uid": "",
|
||||
"name": "slack_failed_test",
|
||||
"type": "slack",
|
||||
"disableResolveMessage": false,
|
||||
"settings": {
|
||||
"recipient": "#test-channel",
|
||||
"username": "test",
|
||||
"text": "Integration Test"
|
||||
},
|
||||
"secureFields": {
|
||||
"url": true
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "slack_inactive_recv",
|
||||
"grafana_managed_receiver_configs": [
|
||||
{
|
||||
"uid": "",
|
||||
"name": "inactive",
|
||||
"type": "slack",
|
||||
"disableResolveMessage": false,
|
||||
"settings": {
|
||||
"recipient": "#inactive-channel",
|
||||
"username": "Integration Test"
|
||||
},
|
||||
"secureFields": {
|
||||
"token": true
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"name": "pagerduty_recv",
|
||||
"grafana_managed_receiver_configs": [
|
||||
@@ -2435,3 +2626,13 @@ var expNonEmailNotifications = map[string][]string{
|
||||
]`,
|
||||
},
|
||||
}
|
||||
|
||||
// expNotificationErrors maps a receiver name with its expected error string.
|
||||
var expNotificationErrors = map[string]string{
|
||||
"slack_failed_recv": `Post "htt://127.0.0.1:8080/slack_failed_recv/slack_failed_test": unsupported protocol scheme "htt"`,
|
||||
}
|
||||
|
||||
// expNotificationErrors maps a receiver name with its expected error string.
|
||||
var expInactiveReceivers = map[string]struct{}{
|
||||
"slack_inactive_recv": {},
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user