Alerting: Write and Delete multiple alert instances. (#55350)

Prior to this change, all alert instance writes and deletes happened
individually, in their own database transaction. This change batches up
writes or deletes for a given rule's evaluation loop into a single
transaction before applying it.

These new transactions are off by default, guarded by the feature toggle "alertingBigTransactions"

Before:

```
goos: darwin
goarch: arm64
pkg: github.com/grafana/grafana/pkg/services/ngalert/store
BenchmarkAlertInstanceOperations-8           398           2991381 ns/op         1133537 B/op      27703 allocs/op
--- BENCH: BenchmarkAlertInstanceOperations-8
    util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created
    util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created
    util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created
PASS
ok      github.com/grafana/grafana/pkg/services/ngalert/store   1.619s
```

After:

```
goos: darwin
goarch: arm64
pkg: github.com/grafana/grafana/pkg/services/ngalert/store
BenchmarkAlertInstanceOperations-8          1440            816484 ns/op          352297 B/op       6529 allocs/op
--- BENCH: BenchmarkAlertInstanceOperations-8
    util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created
    util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created
    util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created
PASS
ok      github.com/grafana/grafana/pkg/services/ngalert/store   1.383s
```

So we cut time by about 75% and memory allocations by about 60% when
storing and deleting 100 instances.
This commit is contained in:
Joe Blubaugh
2022-10-06 14:22:58 +08:00
committed by GitHub
parent b4e23e5d32
commit b476ae62fb
21 changed files with 606 additions and 193 deletions
+23 -10
View File
@@ -38,16 +38,28 @@ import (
"github.com/stretchr/testify/require"
)
type FakeFeatures struct {
BigTransactions bool
}
func (f *FakeFeatures) IsEnabled(feature string) bool {
if feature == featuremgmt.FlagAlertingBigTransactions {
return f.BigTransactions
}
return false
}
// SetupTestEnv initializes a store to used by the tests.
func SetupTestEnv(t *testing.T, baseInterval time.Duration) (*ngalert.AlertNG, *store.DBstore) {
t.Helper()
func SetupTestEnv(tb testing.TB, baseInterval time.Duration) (*ngalert.AlertNG, *store.DBstore) {
tb.Helper()
origNewGuardian := guardian.New
guardian.MockDashboardGuardian(&guardian.FakeDashboardGuardian{
CanSaveValue: true,
CanViewValue: true,
CanAdminValue: true,
})
t.Cleanup(func() {
tb.Cleanup(func() {
guardian.New = origNewGuardian
})
@@ -60,8 +72,8 @@ func SetupTestEnv(t *testing.T, baseInterval time.Duration) (*ngalert.AlertNG, *
*cfg.UnifiedAlerting.Enabled = true
m := metrics.NewNGAlert(prometheus.NewRegistry())
sqlStore := sqlstore.InitTestDB(t)
secretsService := secretsManager.SetupTestService(t, database.ProvideSecretsStore(sqlStore))
sqlStore := sqlstore.InitTestDB(tb)
secretsService := secretsManager.SetupTestService(tb, database.ProvideSecretsStore(sqlStore))
dashboardStore := databasestore.ProvideDashboardStore(sqlStore, featuremgmt.WithFeatures(), tagimpl.ProvideService(sqlStore, sqlStore.Cfg))
ac := acmock.New()
@@ -82,12 +94,13 @@ func SetupTestEnv(t *testing.T, baseInterval time.Duration) (*ngalert.AlertNG, *
)
ng, err := ngalert.ProvideService(
cfg, nil, nil, routing.NewRouteRegister(), sqlStore, nil, nil, nil, nil,
cfg, &FakeFeatures{}, nil, nil, routing.NewRouteRegister(), sqlStore, nil, nil, nil, nil,
secretsService, nil, m, folderService, ac, &dashboards.FakeDashboardService{}, nil, bus, ac, annotationstest.NewFakeAnnotationsRepo(),
)
require.NoError(t, err)
require.NoError(tb, err)
return ng, &store.DBstore{
SQLStore: ng.SQLStore,
FeatureToggles: ng.FeatureToggles,
SQLStore: ng.SQLStore,
Cfg: setting.UnifiedAlertingSettings{
BaseInterval: baseInterval * time.Second,
},
@@ -98,11 +111,11 @@ func SetupTestEnv(t *testing.T, baseInterval time.Duration) (*ngalert.AlertNG, *
}
// CreateTestAlertRule creates a dummy alert definition to be used by the tests.
func CreateTestAlertRule(t *testing.T, ctx context.Context, dbstore *store.DBstore, intervalSeconds int64, orgID int64) *models.AlertRule {
func CreateTestAlertRule(t testing.TB, ctx context.Context, dbstore *store.DBstore, intervalSeconds int64, orgID int64) *models.AlertRule {
return CreateTestAlertRuleWithLabels(t, ctx, dbstore, intervalSeconds, orgID, nil)
}
func CreateTestAlertRuleWithLabels(t *testing.T, ctx context.Context, dbstore *store.DBstore, intervalSeconds int64, orgID int64, labels map[string]string) *models.AlertRule {
func CreateTestAlertRuleWithLabels(t testing.TB, ctx context.Context, dbstore *store.DBstore, intervalSeconds int64, orgID int64, labels map[string]string) *models.AlertRule {
ruleGroup := fmt.Sprintf("ruleGroup-%s", util.GenerateShortUID())
folderUID := "namespace"
user := &user.SignedInUser{