Alerting: Write and Delete multiple alert instances. (#54072)

Prior to this change, all alert instance writes and deletes happened
individually, in their own database transaction. This change batches up
writes or deletes for a given rule's evaluation loop into a single
transaction before applying it.

Before:
```
goos: darwin
goarch: arm64
pkg: github.com/grafana/grafana/pkg/services/ngalert/store
BenchmarkAlertInstanceOperations-8           398           2991381 ns/op         1133537 B/op      27703 allocs/op
--- BENCH: BenchmarkAlertInstanceOperations-8
    util.go:127: alert definition: {orgID: 1, UID: FovKXiRVzm} with title: "an alert definition FTvFXmRVkz" interval: 60 created
    util.go:127: alert definition: {orgID: 1, UID: foDFXmRVkm} with title: "an alert definition fovFXmRVkz" interval: 60 created
    util.go:127: alert definition: {orgID: 1, UID: VQvFuigVkm} with title: "an alert definition VwDKXmR4kz" interval: 60 created
PASS
ok      github.com/grafana/grafana/pkg/services/ngalert/store   1.619s
```

After:
```
goos: darwin
goarch: arm64
pkg: github.com/grafana/grafana/pkg/services/ngalert/store
BenchmarkAlertInstanceOperations-8          1440            816484 ns/op          352297 B/op       6529 allocs/op
--- BENCH: BenchmarkAlertInstanceOperations-8
    util.go:127: alert definition: {orgID: 1, UID: 302r_igVzm} with title: "an alert definition q0h9lmR4zz" interval: 60 created
    util.go:127: alert definition: {orgID: 1, UID: 71hrlmR4km} with title: "an alert definition nJ29_mR4zz" interval: 60 created
    util.go:127: alert definition: {orgID: 1, UID: Cahr_mR4zm} with title: "an alert definition ja2rlmg4zz" interval: 60 created
PASS
ok      github.com/grafana/grafana/pkg/services/ngalert/store   1.383s
```

So we cut time by about 75% and memory allocations by about 60% when
storing and deleting 100 instances.

This change also updates some of our tests so that they run successfully against postgreSQL - we were using random Int64s, but postgres integers, which our tables use, max out at 2^31-1
This commit is contained in:
Joe Blubaugh
2022-09-02 11:17:20 +08:00
committed by GitHub
parent d706320d0a
commit 5e4fd94413
15 changed files with 559 additions and 210 deletions
+8 -8
View File
@@ -37,15 +37,15 @@ import (
)
// SetupTestEnv initializes a store to used by the tests.
func SetupTestEnv(t *testing.T, baseInterval time.Duration) (*ngalert.AlertNG, *store.DBstore) {
t.Helper()
func SetupTestEnv(tb testing.TB, baseInterval time.Duration) (*ngalert.AlertNG, *store.DBstore) {
tb.Helper()
origNewGuardian := guardian.New
guardian.MockDashboardGuardian(&guardian.FakeDashboardGuardian{
CanSaveValue: true,
CanViewValue: true,
CanAdminValue: true,
})
t.Cleanup(func() {
tb.Cleanup(func() {
guardian.New = origNewGuardian
})
@@ -58,8 +58,8 @@ func SetupTestEnv(t *testing.T, baseInterval time.Duration) (*ngalert.AlertNG, *
*cfg.UnifiedAlerting.Enabled = true
m := metrics.NewNGAlert(prometheus.NewRegistry())
sqlStore := sqlstore.InitTestDB(t)
secretsService := secretsManager.SetupTestService(t, database.ProvideSecretsStore(sqlStore))
sqlStore := sqlstore.InitTestDB(tb)
secretsService := secretsManager.SetupTestService(tb, database.ProvideSecretsStore(sqlStore))
dashboardStore := databasestore.ProvideDashboardStore(sqlStore, featuremgmt.WithFeatures())
ac := acmock.New()
@@ -83,7 +83,7 @@ func SetupTestEnv(t *testing.T, baseInterval time.Duration) (*ngalert.AlertNG, *
cfg, nil, nil, routing.NewRouteRegister(), sqlStore, nil, nil, nil, nil,
secretsService, nil, m, folderService, ac, &dashboards.FakeDashboardService{}, nil, bus, ac,
)
require.NoError(t, err)
require.NoError(tb, err)
return ng, &store.DBstore{
SQLStore: ng.SQLStore,
Cfg: setting.UnifiedAlertingSettings{
@@ -96,11 +96,11 @@ func SetupTestEnv(t *testing.T, baseInterval time.Duration) (*ngalert.AlertNG, *
}
// CreateTestAlertRule creates a dummy alert definition to be used by the tests.
func CreateTestAlertRule(t *testing.T, ctx context.Context, dbstore *store.DBstore, intervalSeconds int64, orgID int64) *models.AlertRule {
func CreateTestAlertRule(t testing.TB, ctx context.Context, dbstore *store.DBstore, intervalSeconds int64, orgID int64) *models.AlertRule {
return CreateTestAlertRuleWithLabels(t, ctx, dbstore, intervalSeconds, orgID, nil)
}
func CreateTestAlertRuleWithLabels(t *testing.T, ctx context.Context, dbstore *store.DBstore, intervalSeconds int64, orgID int64, labels map[string]string) *models.AlertRule {
func CreateTestAlertRuleWithLabels(t testing.TB, ctx context.Context, dbstore *store.DBstore, intervalSeconds int64, orgID int64, labels map[string]string) *models.AlertRule {
ruleGroup := fmt.Sprintf("ruleGroup-%s", util.GenerateShortUID())
folderUID := "namespace"
user := &user.SignedInUser{