Alerting: Add backend support for keep_firing_for (#100750)

What is this feature?

This PR introduces a new alert rule configuration option, keep_firing_for (Prometheus documentation).

keep_firing_for prevents alerts from resolving immediately after the alert condition returns to normal. Instead, they transition into a "Recovering" state and are not considered resolved by the Alertmanager. Once the recovery period ends (or after the next evaluation if it is bigger than keep_firing_for), the alert transitions to "Normal" if it doesn't start alerting again:

Before                                          

+----------+     +----------+                    
| Alerting |---->|  Normal  |                    
+----------+     +----------+                    

-----
After

+----------+      +------------+     +----------+
| Alerting |----->| Recovering |---->|  Normal  |
+----------+      +------------+     +----------+                                                 

Why do we need this feature?

This feature prevents flapping alerts by adding a recovery period. This helps avoid false resolutions caused by brief alert
This commit is contained in:
Alexander Akhmetov
2025-03-18 11:24:48 +01:00
committed by GitHub
parent 9491fa1895
commit 695ac91290
31 changed files with 1280 additions and 53 deletions
@@ -379,6 +379,9 @@ func TestIntegrationPrometheusRulesFilterByDashboard(t *testing.T) {
interval, err := model.ParseDuration("10s")
require.NoError(t, err)
keepFiringFor, err := model.ParseDuration("15s")
require.NoError(t, err)
// Now, let's create some rules
{
rules := apimodels.PostableRuleGroupConfig{
@@ -386,8 +389,9 @@ func TestIntegrationPrometheusRulesFilterByDashboard(t *testing.T) {
Rules: []apimodels.PostableExtendedRuleNode{
{
ApiRuleNode: &apimodels.ApiRuleNode{
For: &interval,
Labels: map[string]string{},
For: &interval,
KeepFiringFor: &keepFiringFor,
Labels: map[string]string{},
Annotations: map[string]string{
"__dashboardUid__": dashboardUID,
"__panelId__": "1",
@@ -472,6 +476,7 @@ func TestIntegrationPrometheusRulesFilterByDashboard(t *testing.T) {
"folderUid": "default",
"query": "[{\"refId\":\"A\",\"queryType\":\"\",\"relativeTimeRange\":{\"from\":18000,\"to\":10800},\"datasourceUid\":\"__expr__\",\"model\":{\"expression\":\"2 + 3 \\u003e 1\",\"intervalMs\":1000,\"maxDataPoints\":43200,\"type\":\"math\"}}]",
"duration": 10,
"keepFiringFor": 15,
"annotations": {
"__dashboardUid__": "%s",
"__panelId__": "1"
@@ -518,6 +523,7 @@ func TestIntegrationPrometheusRulesFilterByDashboard(t *testing.T) {
"folderUid": "default",
"query": "[{\"refId\":\"A\",\"queryType\":\"\",\"relativeTimeRange\":{\"from\":18000,\"to\":10800},\"datasourceUid\":\"__expr__\",\"model\":{\"expression\":\"2 + 3 \\u003e 1\",\"intervalMs\":1000,\"maxDataPoints\":43200,\"type\":\"math\"}}]",
"duration": 10,
"keepFiringFor": 15,
"annotations": {
"__dashboardUid__": "%s",
"__panelId__": "1"