Alerting: Change default for max_attempts to 3. (#97461)

Currently the default is 1, this means that by default users will see transient
query errors reflected as alert evaluation failures, when often an immediate
retry is sufficient to evaluate the rule successfully.

Enabling retries by default leads to a better experience out of the box.
This commit is contained in:
Steve Simpson
2024-12-05 21:48:24 +01:00
committed by GitHub
parent 6a1685ab5e
commit c440bd2bda
4 changed files with 9 additions and 9 deletions

View File

@@ -49,7 +49,7 @@ const (
evaluatorDefaultEvaluationTimeout = 30 * time.Second
schedulerDefaultAdminConfigPollInterval = time.Minute
schedulerDefaultExecuteAlerts = true
schedulerDefaultMaxAttempts = 1
schedulerDefaultMaxAttempts = 3
schedulerDefaultLegacyMinInterval = 1
screenshotsDefaultCapture = false
screenshotsDefaultCaptureTimeout = 10 * time.Second

View File

@@ -120,14 +120,14 @@ func TestUnifiedAlertingSettings(t *testing.T) {
"evaluation_timeout": evaluatorDefaultEvaluationTimeout.String(),
},
alertingOptions: map[string]string{
"max_attempts": "1",
"max_attempts": "1", // Note: Ignored, setting does not exist.
"min_interval_seconds": "120",
"execute_alerts": "true",
"evaluation_timeout_seconds": "160",
},
verifyCfg: func(t *testing.T, cfg Cfg) {
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.AdminConfigPollInterval)
require.Equal(t, int64(1), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, int64(3), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, true, cfg.UnifiedAlerting.ExecuteAlerts)
require.Equal(t, 160*time.Second, cfg.UnifiedAlerting.EvaluationTimeout)
@@ -168,14 +168,14 @@ func TestUnifiedAlertingSettings(t *testing.T) {
"evaluation_timeout": "invalid",
},
alertingOptions: map[string]string{
"max_attempts": "1",
"max_attempts": "1", // Note: Ignored, setting does not exist.
"min_interval_seconds": "120",
"execute_alerts": "false",
"evaluation_timeout_seconds": "160",
},
verifyCfg: func(t *testing.T, cfg Cfg) {
require.Equal(t, alertmanagerDefaultConfigPollInterval, cfg.UnifiedAlerting.AdminConfigPollInterval)
require.Equal(t, int64(1), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, int64(3), cfg.UnifiedAlerting.MaxAttempts)
require.Equal(t, 120*time.Second, cfg.UnifiedAlerting.MinInterval)
require.Equal(t, false, cfg.UnifiedAlerting.ExecuteAlerts)
require.Equal(t, 160*time.Second, cfg.UnifiedAlerting.EvaluationTimeout)