Alerting: Ensure errors cleared when Alerting after error (#105246)
When a rule configured with `ExecErrState` state of `Alerting`, has an instance which is Alerting then has a data source error, then successfully evaluates and continues to be Alerting, the cached instance keeps the error cached until it is no longer firing. This is unexpected and leads to misleading results.
This commit is contained in:
@@ -3779,6 +3779,205 @@ func TestProcessEvalResults_StateTransitions(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "t1[1:alerting] t2[QueryError] t3[1:alerting] and 'for'=1 at t2,t3",
|
||||
ruleMutators: []ngmodels.AlertRuleMutator{ngmodels.RuleMuts.WithForNTimes(1)},
|
||||
results: map[time.Time]eval.Results{
|
||||
t1: {
|
||||
newResult(eval.WithState(eval.Alerting), eval.WithLabels(labels1), eval.WithValues(map[string]eval.NumberValueCapture{"A": {Var: "A", Value: util.Pointer(1.0)}})),
|
||||
},
|
||||
t2: {
|
||||
newResult(eval.WithError(datasourceError)),
|
||||
},
|
||||
t3: {
|
||||
newResult(eval.WithState(eval.Alerting), eval.WithLabels(labels1), eval.WithValues(map[string]eval.NumberValueCapture{"A": {Var: "A", Value: util.Pointer(1.0)}})),
|
||||
},
|
||||
},
|
||||
expectedTransitions: map[ngmodels.ExecutionErrorState]map[time.Time][]StateTransition{
|
||||
ngmodels.ErrorErrState: {
|
||||
t2: {
|
||||
{
|
||||
PreviousState: eval.Pending,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + labels1"],
|
||||
State: eval.Pending,
|
||||
LatestResult: newEvaluationWithValues(t1, eval.Alerting, map[string]float64{"A": 1.0}),
|
||||
StartsAt: t1,
|
||||
EndsAt: t1.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t1,
|
||||
Values: map[string]float64{"A": 1.0},
|
||||
},
|
||||
},
|
||||
{
|
||||
PreviousState: eval.Normal,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
LatestResult: newEvaluation(t2, eval.Error),
|
||||
StartsAt: t2,
|
||||
EndsAt: t2.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t2,
|
||||
LastSentAt: &t2,
|
||||
Annotations: mergeLabels(baseRule.Annotations, data.Labels{
|
||||
"Error": datasourceError.Error(),
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
t3: {
|
||||
{
|
||||
PreviousState: eval.Pending,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + labels1"],
|
||||
State: eval.Alerting,
|
||||
Error: nil,
|
||||
LatestResult: newEvaluationWithValues(t3, eval.Alerting, map[string]float64{"A": 1.0}),
|
||||
StartsAt: t3,
|
||||
FiredAt: &t3,
|
||||
EndsAt: t3.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t3,
|
||||
LastSentAt: &t3,
|
||||
Annotations: baseRule.Annotations,
|
||||
Values: map[string]float64{"A": 1.0},
|
||||
},
|
||||
},
|
||||
{
|
||||
PreviousState: eval.Error,
|
||||
State: &State{
|
||||
CacheID: labels["system + rule"].Fingerprint(),
|
||||
Labels: labels["system + rule + datasource-error"],
|
||||
State: eval.Error,
|
||||
Error: datasourceError,
|
||||
LatestResult: newEvaluation(t2, eval.Error),
|
||||
StartsAt: t2,
|
||||
EndsAt: t2.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t2,
|
||||
LastSentAt: &t2,
|
||||
Annotations: mergeLabels(baseRule.Annotations, data.Labels{
|
||||
"Error": datasourceError.Error(),
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
ngmodels.AlertingErrState: {
|
||||
t2: {
|
||||
{
|
||||
PreviousState: eval.Pending,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + labels1"],
|
||||
State: eval.Alerting,
|
||||
StateReason: eval.Error.String(),
|
||||
Error: datasourceError,
|
||||
Annotations: datasourceErrorAnnotations,
|
||||
LatestResult: newEvaluationWithValues(t2, eval.Error, map[string]float64{"A": -1}),
|
||||
StartsAt: t2,
|
||||
EndsAt: t2.Add(ResendDelay * 4),
|
||||
FiredAt: &t2,
|
||||
LastEvaluationTime: t2,
|
||||
LastSentAt: &t2,
|
||||
Values: map[string]float64{"A": -1},
|
||||
},
|
||||
},
|
||||
},
|
||||
t3: {
|
||||
{
|
||||
PreviousStateReason: eval.Error.String(),
|
||||
PreviousState: eval.Alerting,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + labels1"],
|
||||
State: eval.Alerting,
|
||||
Error: nil,
|
||||
Annotations: baseRule.Annotations,
|
||||
LatestResult: newEvaluationWithValues(t3, eval.Alerting, map[string]float64{"A": 1.0}),
|
||||
StartsAt: t2,
|
||||
EndsAt: t3.Add(ResendDelay * 4),
|
||||
FiredAt: &t2,
|
||||
LastEvaluationTime: t3,
|
||||
LastSentAt: &t2,
|
||||
Values: map[string]float64{"A": 1.0},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
ngmodels.OkErrState: {
|
||||
t2: {
|
||||
{
|
||||
PreviousState: eval.Pending,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + labels1"],
|
||||
State: eval.Normal,
|
||||
StateReason: eval.Error.String(),
|
||||
Annotations: datasourceErrorAnnotations,
|
||||
LatestResult: newEvaluationWithValues(t2, eval.Error, map[string]float64{"A": float64(-1)}),
|
||||
StartsAt: t2,
|
||||
EndsAt: t2,
|
||||
LastEvaluationTime: t2,
|
||||
Values: map[string]float64{"A": float64(-1)},
|
||||
},
|
||||
},
|
||||
},
|
||||
t3: {
|
||||
{
|
||||
PreviousState: eval.Normal,
|
||||
PreviousStateReason: eval.Error.String(),
|
||||
State: &State{
|
||||
Labels: labels["system + rule + labels1"],
|
||||
State: eval.Pending,
|
||||
Annotations: baseRule.Annotations,
|
||||
LatestResult: newEvaluationWithValues(t3, eval.Alerting, map[string]float64{"A": 1.0}),
|
||||
StartsAt: t3,
|
||||
EndsAt: t3.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t3,
|
||||
Values: map[string]float64{"A": 1.0},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
ngmodels.KeepLastErrState: {
|
||||
t2: {
|
||||
{
|
||||
PreviousState: eval.Pending,
|
||||
State: &State{
|
||||
Labels: labels["system + rule + labels1"],
|
||||
State: eval.Alerting,
|
||||
StateReason: ngmodels.ConcatReasons(eval.Error.String(), ngmodels.StateReasonKeepLast),
|
||||
Annotations: datasourceErrorAnnotations,
|
||||
LatestResult: newEvaluationWithValues(t2, eval.Error, map[string]float64{"A": float64(-1)}),
|
||||
StartsAt: t2,
|
||||
EndsAt: t2.Add(ResendDelay * 4),
|
||||
FiredAt: &t2,
|
||||
LastEvaluationTime: t2,
|
||||
LastSentAt: &t2,
|
||||
Values: map[string]float64{"A": float64(-1)},
|
||||
},
|
||||
},
|
||||
},
|
||||
t3: {
|
||||
{
|
||||
PreviousState: eval.Alerting,
|
||||
PreviousStateReason: ngmodels.ConcatReasons(eval.Error.String(), ngmodels.StateReasonKeepLast),
|
||||
State: &State{
|
||||
Labels: labels["system + rule + labels1"],
|
||||
State: eval.Alerting,
|
||||
StateReason: "",
|
||||
Error: nil,
|
||||
LatestResult: newEvaluationWithValues(t3, eval.Alerting, map[string]float64{"A": 1.0}),
|
||||
StartsAt: t2,
|
||||
FiredAt: &t2,
|
||||
EndsAt: t3.Add(ResendDelay * 4),
|
||||
LastEvaluationTime: t3,
|
||||
LastSentAt: &t2,
|
||||
Annotations: baseRule.Annotations,
|
||||
Values: map[string]float64{"A": 1.0},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
desc: "t1[1:normal] t2[QueryError] at t2",
|
||||
results: map[time.Time]eval.Results{
|
||||
|
||||
@@ -1013,6 +1013,7 @@ func TestProcessEvalResults(t *testing.T) {
|
||||
},
|
||||
},
|
||||
{
|
||||
// TODO(@moustafab): figure out why this test doesn't fail as is
|
||||
desc: "classic condition, execution Error as Error (alerting -> query error -> alerting)",
|
||||
alertRule: baseRuleWith(m.WithErrorExecAs(models.ErrorErrState)),
|
||||
expectedAnnotations: 3,
|
||||
|
||||
@@ -401,6 +401,8 @@ func resultAlerting(state *State, rule *models.AlertRule, result eval.Result, lo
|
||||
case eval.Alerting:
|
||||
prevEndsAt := state.EndsAt
|
||||
state.Maintain(rule.IntervalSeconds, result.EvaluatedAt)
|
||||
// explicitly clear errors
|
||||
state.Error = nil
|
||||
logger.Debug("Keeping state",
|
||||
"state",
|
||||
state.State,
|
||||
|
||||
Reference in New Issue
Block a user