Alerting: Repurpose rule testing endpoint to return potential alerts (#69755)

* Alerting: Repurpose rule testing endpoint to return potential alerts

This feature replaces the existing no-longer in-use grafana ruler testing API endpoint /api/v1/rule/test/grafana. The new endpoint returns a list of potential alerts created by the given alert rule, including built-in + interpolated labels and annotations.

The key priority of this endpoint is that it is intended to be as true as possible to what would be generated by the ruler except that the resulting alerts are not filtered to only Resolved / Firing and ready to be sent.

This means that the endpoint will, among other things:

- Attach static annotations and labels from the rule configuration to the alert instances.
- Attach dynamic annotations from the datasource to the alert instances.
- Attach built-in labels and annotations created by the Grafana Ruler (such as alertname and grafana_folder) to the alert instances.
- Interpolate templated annotations / labels and accept allowed template functions.
This commit is contained in:
Matthew Jacobson
2023-06-08 18:59:54 -04:00
committed by GitHub
parent 0c688190f7
commit ba3994d338
19 changed files with 1246 additions and 361 deletions
+178
View File
@@ -0,0 +1,178 @@
package state
import (
"encoding/json"
"fmt"
"net/url"
"path"
"strconv"
"time"
"github.com/benbjohnson/clock"
"github.com/go-openapi/strfmt"
alertingModels "github.com/grafana/alerting/models"
"github.com/grafana/grafana-plugin-sdk-go/data"
"github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/common/model"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
)
const (
NoDataAlertName = "DatasourceNoData"
ErrorAlertName = "DatasourceError"
Rulename = "rulename"
)
// StateToPostableAlert converts a state to a model that is accepted by Alertmanager. Annotations and Labels are copied from the state.
// - if state has at least one result, a new label '__value_string__' is added to the label set
// - the alert's GeneratorURL is constructed to point to the alert detail view
// - if evaluation state is either NoData or Error, the resulting set of labels is changed:
// - original alert name (label: model.AlertNameLabel) is backed up to OriginalAlertName
// - label model.AlertNameLabel is overwritten to either NoDataAlertName or ErrorAlertName
func StateToPostableAlert(alertState *State, appURL *url.URL) *models.PostableAlert {
nL := alertState.Labels.Copy()
nA := data.Labels(alertState.Annotations).Copy()
// encode the values as JSON where it will be expanded later
if len(alertState.Values) > 0 {
if b, err := json.Marshal(alertState.Values); err == nil {
nA[alertingModels.ValuesAnnotation] = string(b)
}
}
if alertState.LastEvaluationString != "" {
nA[alertingModels.ValueStringAnnotation] = alertState.LastEvaluationString
}
if alertState.Image != nil {
nA[alertingModels.ImageTokenAnnotation] = generateImageURI(alertState.Image)
}
if alertState.StateReason != "" {
nA[alertingModels.StateReasonAnnotation] = alertState.StateReason
}
if alertState.OrgID != 0 {
nA[alertingModels.OrgIDAnnotation] = strconv.FormatInt(alertState.OrgID, 10)
}
var urlStr string
if uid := nL[alertingModels.RuleUIDLabel]; len(uid) > 0 && appURL != nil {
u := *appURL
u.Path = path.Join(u.Path, fmt.Sprintf("/alerting/grafana/%s/view", uid))
urlStr = u.String()
} else if appURL != nil {
urlStr = appURL.String()
} else {
urlStr = ""
}
if alertState.State == eval.NoData {
return noDataAlert(nL, nA, alertState, urlStr)
}
if alertState.State == eval.Error {
return errorAlert(nL, nA, alertState, urlStr)
}
return &models.PostableAlert{
Annotations: models.LabelSet(nA),
StartsAt: strfmt.DateTime(alertState.StartsAt),
EndsAt: strfmt.DateTime(alertState.EndsAt),
Alert: models.Alert{
Labels: models.LabelSet(nL),
GeneratorURL: strfmt.URI(urlStr),
},
}
}
// NoDataAlert is a special alert sent by Grafana to the Alertmanager, that indicates we received no data from the datasource.
// It effectively replaces the legacy behavior of "Keep Last State" by separating the regular alerting flow from the no data scenario into a separate alerts.
// The Alert is defined as:
// { alertname=DatasourceNoData rulename=original_alertname } + { rule labelset } + { rule annotations }
func noDataAlert(labels data.Labels, annotations data.Labels, alertState *State, urlStr string) *models.PostableAlert {
if name, ok := labels[model.AlertNameLabel]; ok {
labels[Rulename] = name
}
labels[model.AlertNameLabel] = NoDataAlertName
return &models.PostableAlert{
Annotations: models.LabelSet(annotations),
StartsAt: strfmt.DateTime(alertState.StartsAt),
EndsAt: strfmt.DateTime(alertState.EndsAt),
Alert: models.Alert{
Labels: models.LabelSet(labels),
GeneratorURL: strfmt.URI(urlStr),
},
}
}
// errorAlert is a special alert sent when evaluation of an alert rule failed due to an error. Like noDataAlert, it
// replaces the old behaviour of "Keep Last State" creating a separate alert called DatasourceError.
func errorAlert(labels, annotations data.Labels, alertState *State, urlStr string) *models.PostableAlert {
if name, ok := labels[model.AlertNameLabel]; ok {
labels[Rulename] = name
}
labels[model.AlertNameLabel] = ErrorAlertName
return &models.PostableAlert{
Annotations: models.LabelSet(annotations),
StartsAt: strfmt.DateTime(alertState.StartsAt),
EndsAt: strfmt.DateTime(alertState.EndsAt),
Alert: models.Alert{
Labels: models.LabelSet(labels),
GeneratorURL: strfmt.URI(urlStr),
},
}
}
func FromStateTransitionToPostableAlerts(firingStates []StateTransition, stateManager *Manager, appURL *url.URL) apimodels.PostableAlerts {
alerts := apimodels.PostableAlerts{PostableAlerts: make([]models.PostableAlert, 0, len(firingStates))}
var sentAlerts []*State
ts := time.Now()
for _, alertState := range firingStates {
if !alertState.NeedsSending(stateManager.ResendDelay) {
continue
}
alert := StateToPostableAlert(alertState.State, appURL)
alerts.PostableAlerts = append(alerts.PostableAlerts, *alert)
if alertState.StateReason == ngModels.StateReasonMissingSeries { // do not put stale state back to state manager
continue
}
alertState.LastSentAt = ts
sentAlerts = append(sentAlerts, alertState.State)
}
stateManager.Put(sentAlerts)
return alerts
}
// FromAlertsStateToStoppedAlert selects only transitions from firing states (states eval.Alerting, eval.NoData, eval.Error)
// and converts them to models.PostableAlert with EndsAt set to time.Now
func FromAlertsStateToStoppedAlert(firingStates []StateTransition, appURL *url.URL, clock clock.Clock) apimodels.PostableAlerts {
alerts := apimodels.PostableAlerts{PostableAlerts: make([]models.PostableAlert, 0, len(firingStates))}
ts := clock.Now()
for _, transition := range firingStates {
if transition.PreviousState == eval.Normal || transition.PreviousState == eval.Pending {
continue
}
postableAlert := StateToPostableAlert(transition.State, appURL)
postableAlert.EndsAt = strfmt.DateTime(ts)
alerts.PostableAlerts = append(alerts.PostableAlerts, *postableAlert)
}
return alerts
}
// generateImageURI returns a string that serves as an identifier for the image.
// It first checks if there is an image URL available, and if not,
// it prefixes the image token with `token://` and uses it as the URI.
func generateImageURI(image *ngModels.Image) string {
if image.URL != "" {
return image.URL
}
return "token://" + image.Token
}
+286
View File
@@ -0,0 +1,286 @@
package state
import (
"fmt"
"math/rand"
"net/url"
"testing"
"time"
"github.com/benbjohnson/clock"
"github.com/go-openapi/strfmt"
alertingModels "github.com/grafana/alerting/models"
"github.com/prometheus/alertmanager/api/v2/models"
"github.com/prometheus/common/model"
"github.com/stretchr/testify/require"
"github.com/grafana/grafana/pkg/services/ngalert/eval"
ngModels "github.com/grafana/grafana/pkg/services/ngalert/models"
"github.com/grafana/grafana/pkg/util"
)
func Test_StateToPostableAlert(t *testing.T) {
appURL := &url.URL{
Scheme: "http:",
Host: fmt.Sprintf("host-%d", rand.Int()),
Path: fmt.Sprintf("path-%d", rand.Int()),
}
testCases := []struct {
name string
state eval.State
}{
{
name: "when state is Normal",
state: eval.Normal,
},
{
name: "when state is Alerting",
state: eval.Alerting,
},
{
name: "when state is Pending",
state: eval.Pending,
},
{
name: "when state is NoData",
state: eval.NoData,
},
{
name: "when state is Error",
state: eval.Error,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Run("it generates proper URL", func(t *testing.T) {
t.Run("to alert rule", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels[alertingModels.RuleUIDLabel] = alertState.AlertRuleUID
result := StateToPostableAlert(alertState, appURL)
u := *appURL
u.Path = u.Path + "/alerting/grafana/" + alertState.AlertRuleUID + "/view"
require.Equal(t, u.String(), result.Alert.GeneratorURL.String())
})
t.Run("app URL as is if rule UID is not specified", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels[alertingModels.RuleUIDLabel] = ""
result := StateToPostableAlert(alertState, appURL)
require.Equal(t, appURL.String(), result.Alert.GeneratorURL.String())
delete(alertState.Labels, alertingModels.RuleUIDLabel)
result = StateToPostableAlert(alertState, appURL)
require.Equal(t, appURL.String(), result.Alert.GeneratorURL.String())
})
t.Run("empty string if app URL is not provided", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels[alertingModels.RuleUIDLabel] = alertState.AlertRuleUID
result := StateToPostableAlert(alertState, nil)
require.Equal(t, "", result.Alert.GeneratorURL.String())
})
})
t.Run("Start and End timestamps should be the same", func(t *testing.T) {
alertState := randomState(tc.state)
result := StateToPostableAlert(alertState, appURL)
require.Equal(t, strfmt.DateTime(alertState.StartsAt), result.StartsAt)
require.Equal(t, strfmt.DateTime(alertState.EndsAt), result.EndsAt)
})
t.Run("should copy annotations", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Annotations = randomMapOfStrings()
result := StateToPostableAlert(alertState, appURL)
require.Equal(t, models.LabelSet(alertState.Annotations), result.Annotations)
t.Run("add __value_string__ if it has results", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Annotations = randomMapOfStrings()
expectedValueString := util.GenerateShortUID()
alertState.LastEvaluationString = expectedValueString
result := StateToPostableAlert(alertState, appURL)
expected := make(models.LabelSet, len(alertState.Annotations)+1)
for k, v := range alertState.Annotations {
expected[k] = v
}
expected["__value_string__"] = expectedValueString
require.Equal(t, expected, result.Annotations)
// even overwrites
alertState.Annotations["__value_string__"] = util.GenerateShortUID()
result = StateToPostableAlert(alertState, appURL)
require.Equal(t, expected, result.Annotations)
})
t.Run("add __alertImageToken__ if there is an image token", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Annotations = randomMapOfStrings()
alertState.Image = &ngModels.Image{Token: "test_token"}
result := StateToPostableAlert(alertState, appURL)
expected := make(models.LabelSet, len(alertState.Annotations)+1)
for k, v := range alertState.Annotations {
expected[k] = v
}
expected["__alertImageToken__"] = "token://" + alertState.Image.Token
require.Equal(t, expected, result.Annotations)
})
})
t.Run("should add state reason annotation if not empty", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.StateReason = "TEST_STATE_REASON"
result := StateToPostableAlert(alertState, appURL)
require.Equal(t, alertState.StateReason, result.Annotations[ngModels.StateReasonAnnotation])
})
switch tc.state {
case eval.NoData:
t.Run("should keep existing labels and change name", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels = randomMapOfStrings()
alertName := util.GenerateShortUID()
alertState.Labels[model.AlertNameLabel] = alertName
result := StateToPostableAlert(alertState, appURL)
expected := make(models.LabelSet, len(alertState.Labels)+1)
for k, v := range alertState.Labels {
expected[k] = v
}
expected[model.AlertNameLabel] = NoDataAlertName
expected[Rulename] = alertName
require.Equal(t, expected, result.Labels)
t.Run("should not backup original alert name if it does not exist", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels = randomMapOfStrings()
delete(alertState.Labels, model.AlertNameLabel)
result := StateToPostableAlert(alertState, appURL)
require.Equal(t, NoDataAlertName, result.Labels[model.AlertNameLabel])
require.NotContains(t, result.Labels[model.AlertNameLabel], Rulename)
})
})
case eval.Error:
t.Run("should keep existing labels and change name", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels = randomMapOfStrings()
alertName := util.GenerateShortUID()
alertState.Labels[model.AlertNameLabel] = alertName
result := StateToPostableAlert(alertState, appURL)
expected := make(models.LabelSet, len(alertState.Labels)+1)
for k, v := range alertState.Labels {
expected[k] = v
}
expected[model.AlertNameLabel] = ErrorAlertName
expected[Rulename] = alertName
require.Equal(t, expected, result.Labels)
t.Run("should not backup original alert name if it does not exist", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels = randomMapOfStrings()
delete(alertState.Labels, model.AlertNameLabel)
result := StateToPostableAlert(alertState, appURL)
require.Equal(t, ErrorAlertName, result.Labels[model.AlertNameLabel])
require.NotContains(t, result.Labels[model.AlertNameLabel], Rulename)
})
})
default:
t.Run("should copy labels as is", func(t *testing.T) {
alertState := randomState(tc.state)
alertState.Labels = randomMapOfStrings()
result := StateToPostableAlert(alertState, appURL)
require.Equal(t, models.LabelSet(alertState.Labels), result.Labels)
})
}
})
}
}
func Test_FromAlertsStateToStoppedAlert(t *testing.T) {
appURL := &url.URL{
Scheme: "http:",
Host: fmt.Sprintf("host-%d", rand.Int()),
Path: fmt.Sprintf("path-%d", rand.Int()),
}
evalStates := [...]eval.State{eval.Normal, eval.Alerting, eval.Pending, eval.Error, eval.NoData}
states := make([]StateTransition, 0, len(evalStates)*len(evalStates))
for _, to := range evalStates {
for _, from := range evalStates {
states = append(states, StateTransition{
State: randomState(to),
PreviousState: from,
})
}
}
clk := clock.NewMock()
clk.Set(time.Now())
expected := make([]models.PostableAlert, 0, len(states))
for _, s := range states {
if !(s.PreviousState == eval.Alerting || s.PreviousState == eval.Error || s.PreviousState == eval.NoData) {
continue
}
alert := StateToPostableAlert(s.State, appURL)
alert.EndsAt = strfmt.DateTime(clk.Now())
expected = append(expected, *alert)
}
result := FromAlertsStateToStoppedAlert(states, appURL, clk)
require.Equal(t, expected, result.PostableAlerts)
}
func randomMapOfStrings() map[string]string {
max := 5
result := make(map[string]string, max)
for i := 0; i < max; i++ {
result[util.GenerateShortUID()] = util.GenerateShortUID()
}
return result
}
func randomDuration() time.Duration {
return time.Duration(rand.Int63n(599)+1) * time.Second
}
func randomTimeInFuture() time.Time {
return time.Now().Add(randomDuration())
}
func randomTimeInPast() time.Time {
return time.Now().Add(-randomDuration())
}
func randomState(evalState eval.State) *State {
return &State{
State: evalState,
AlertRuleUID: util.GenerateShortUID(),
StartsAt: time.Now(),
EndsAt: randomTimeInFuture(),
LastEvaluationTime: randomTimeInPast(),
EvaluationDuration: randomDuration(),
LastSentAt: randomTimeInPast(),
Annotations: make(map[string]string),
Labels: make(map[string]string),
Values: make(map[string]float64),
}
}
+16
View File
@@ -8,7 +8,9 @@ import (
"strings"
"time"
alertingModels "github.com/grafana/alerting/models"
"github.com/grafana/grafana-plugin-sdk-go/data"
prometheusModel "github.com/prometheus/common/model"
"github.com/grafana/grafana/pkg/expr"
"github.com/grafana/grafana/pkg/infra/log"
@@ -397,3 +399,17 @@ func FormatStateAndReason(state eval.State, reason string) string {
}
return s
}
// GetRuleExtraLabels returns a map of built-in labels that should be added to an alert before it is sent to the Alertmanager or its state is cached.
func GetRuleExtraLabels(rule *models.AlertRule, folderTitle string, includeFolder bool) map[string]string {
extraLabels := make(map[string]string, 4)
extraLabels[alertingModels.NamespaceUIDLabel] = rule.NamespaceUID
extraLabels[prometheusModel.AlertNameLabel] = rule.Title
extraLabels[alertingModels.RuleUIDLabel] = rule.UID
if includeFolder {
extraLabels[models.FolderTitleLabel] = folderTitle
}
return extraLabels
}