Alerting: Ensure errors cleared when Alerting after error (#105246)

When a rule configured with `ExecErrState` state of `Alerting`, has an instance which is Alerting then has a data source error, then successfully evaluates and continues to be Alerting, the cached instance keeps the error cached until it is no longer firing.

This is unexpected and leads to misleading results.
This commit is contained in:
Moustafa Baiou
2025-06-04 06:16:14 -04:00
committed by GitHub
parent 5bfcbc1f47
commit 0ce086bd2e
3 changed files with 202 additions and 0 deletions
@@ -3779,6 +3779,205 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
},
},
},
{
desc: "t1[1:alerting] t2[QueryError] t3[1:alerting] and 'for'=1 at t2,t3",
ruleMutators: []ngmodels.AlertRuleMutator{ngmodels.RuleMuts.WithForNTimes(1)},
results: map[time.Time]eval.Results{
t1: {
newResult(eval.WithState(eval.Alerting), eval.WithLabels(labels1), eval.WithValues(map[string]eval.NumberValueCapture{"A": {Var: "A", Value: util.Pointer(1.0)}})),
},
t2: {
newResult(eval.WithError(datasourceError)),
},
t3: {
newResult(eval.WithState(eval.Alerting), eval.WithLabels(labels1), eval.WithValues(map[string]eval.NumberValueCapture{"A": {Var: "A", Value: util.Pointer(1.0)}})),
},
},
expectedTransitions: map[ngmodels.ExecutionErrorState]map[time.Time][]StateTransition{
ngmodels.ErrorErrState: {
t2: {
{
PreviousState: eval.Pending,
State: &State{
Labels: labels["system + rule + labels1"],
State: eval.Pending,
LatestResult: newEvaluationWithValues(t1, eval.Alerting, map[string]float64{"A": 1.0}),
StartsAt: t1,
EndsAt: t1.Add(ResendDelay * 4),
LastEvaluationTime: t1,
Values: map[string]float64{"A": 1.0},
},
},
{
PreviousState: eval.Normal,
State: &State{
CacheID: labels["system + rule"].Fingerprint(),
Labels: labels["system + rule + datasource-error"],
State: eval.Error,
Error: datasourceError,
LatestResult: newEvaluation(t2, eval.Error),
StartsAt: t2,
EndsAt: t2.Add(ResendDelay * 4),
LastEvaluationTime: t2,
LastSentAt: &t2,
Annotations: mergeLabels(baseRule.Annotations, data.Labels{
"Error": datasourceError.Error(),
}),
},
},
},
t3: {
{
PreviousState: eval.Pending,
State: &State{
Labels: labels["system + rule + labels1"],
State: eval.Alerting,
Error: nil,
LatestResult: newEvaluationWithValues(t3, eval.Alerting, map[string]float64{"A": 1.0}),
StartsAt: t3,
FiredAt: &t3,
EndsAt: t3.Add(ResendDelay * 4),
LastEvaluationTime: t3,
LastSentAt: &t3,
Annotations: baseRule.Annotations,
Values: map[string]float64{"A": 1.0},
},
},
{
PreviousState: eval.Error,
State: &State{
CacheID: labels["system + rule"].Fingerprint(),
Labels: labels["system + rule + datasource-error"],
State: eval.Error,
Error: datasourceError,
LatestResult: newEvaluation(t2, eval.Error),
StartsAt: t2,
EndsAt: t2.Add(ResendDelay * 4),
LastEvaluationTime: t2,
LastSentAt: &t2,
Annotations: mergeLabels(baseRule.Annotations, data.Labels{
"Error": datasourceError.Error(),
}),
},
},
},
},
ngmodels.AlertingErrState: {
t2: {
{
PreviousState: eval.Pending,
State: &State{
Labels: labels["system + rule + labels1"],
State: eval.Alerting,
StateReason: eval.Error.String(),
Error: datasourceError,
Annotations: datasourceErrorAnnotations,
LatestResult: newEvaluationWithValues(t2, eval.Error, map[string]float64{"A": -1}),
StartsAt: t2,
EndsAt: t2.Add(ResendDelay * 4),
FiredAt: &t2,
LastEvaluationTime: t2,
LastSentAt: &t2,
Values: map[string]float64{"A": -1},
},
},
},
t3: {
{
PreviousStateReason: eval.Error.String(),
PreviousState: eval.Alerting,
State: &State{
Labels: labels["system + rule + labels1"],
State: eval.Alerting,
Error: nil,
Annotations: baseRule.Annotations,
LatestResult: newEvaluationWithValues(t3, eval.Alerting, map[string]float64{"A": 1.0}),
StartsAt: t2,
EndsAt: t3.Add(ResendDelay * 4),
FiredAt: &t2,
LastEvaluationTime: t3,
LastSentAt: &t2,
Values: map[string]float64{"A": 1.0},
},
},
},
},
ngmodels.OkErrState: {
t2: {
{
PreviousState: eval.Pending,
State: &State{
Labels: labels["system + rule + labels1"],
State: eval.Normal,
StateReason: eval.Error.String(),
Annotations: datasourceErrorAnnotations,
LatestResult: newEvaluationWithValues(t2, eval.Error, map[string]float64{"A": float64(-1)}),
StartsAt: t2,
EndsAt: t2,
LastEvaluationTime: t2,
Values: map[string]float64{"A": float64(-1)},
},
},
},
t3: {
{
PreviousState: eval.Normal,
PreviousStateReason: eval.Error.String(),
State: &State{
Labels: labels["system + rule + labels1"],
State: eval.Pending,
Annotations: baseRule.Annotations,
LatestResult: newEvaluationWithValues(t3, eval.Alerting, map[string]float64{"A": 1.0}),
StartsAt: t3,
EndsAt: t3.Add(ResendDelay * 4),
LastEvaluationTime: t3,
Values: map[string]float64{"A": 1.0},
},
},
},
},
ngmodels.KeepLastErrState: {
t2: {
{
PreviousState: eval.Pending,
State: &State{
Labels: labels["system + rule + labels1"],
State: eval.Alerting,
StateReason: ngmodels.ConcatReasons(eval.Error.String(), ngmodels.StateReasonKeepLast),
Annotations: datasourceErrorAnnotations,
LatestResult: newEvaluationWithValues(t2, eval.Error, map[string]float64{"A": float64(-1)}),
StartsAt: t2,
EndsAt: t2.Add(ResendDelay * 4),
FiredAt: &t2,
LastEvaluationTime: t2,
LastSentAt: &t2,
Values: map[string]float64{"A": float64(-1)},
},
},
},
t3: {
{
PreviousState: eval.Alerting,
PreviousStateReason: ngmodels.ConcatReasons(eval.Error.String(), ngmodels.StateReasonKeepLast),
State: &State{
Labels: labels["system + rule + labels1"],
State: eval.Alerting,
StateReason: "",
Error: nil,
LatestResult: newEvaluationWithValues(t3, eval.Alerting, map[string]float64{"A": 1.0}),
StartsAt: t2,
FiredAt: &t2,
EndsAt: t3.Add(ResendDelay * 4),
LastEvaluationTime: t3,
LastSentAt: &t2,
Annotations: baseRule.Annotations,
Values: map[string]float64{"A": 1.0},
},
},
},
},
},
},
{
desc: "t1[1:normal] t2[QueryError] at t2",
results: map[time.Time]eval.Results{
@@ -1013,6 +1013,7 @@ func TestProcessEvalResults(t *testing.T) {
},
},
{
// TODO(@moustafab): figure out why this test doesn't fail as is
desc: "classic condition, execution Error as Error (alerting -> query error -> alerting)",
alertRule: baseRuleWith(m.WithErrorExecAs(models.ErrorErrState)),
expectedAnnotations: 3,
+2
View File
@@ -401,6 +401,8 @@ func resultAlerting(state *State, rule *models.AlertRule, result eval.Result, lo
case eval.Alerting:
prevEndsAt := state.EndsAt
state.Maintain(rule.IntervalSeconds, result.EvaluatedAt)
// explicitly clear errors
state.Error = nil
logger.Debug("Keeping state",
"state",
state.State,