diff --git a/pkg/services/sqlstore/migrations/ualert/alert_rule.go b/pkg/services/sqlstore/migrations/ualert/alert_rule.go index 58e47f302c0..5c53806d7bd 100644 --- a/pkg/services/sqlstore/migrations/ualert/alert_rule.go +++ b/pkg/services/sqlstore/migrations/ualert/alert_rule.go @@ -7,9 +7,11 @@ import ( "github.com/grafana/grafana/pkg/components/simplejson" "github.com/grafana/grafana/pkg/expr" + "github.com/grafana/grafana/pkg/infra/log" legacymodels "github.com/grafana/grafana/pkg/services/alerting/models" + "github.com/grafana/grafana/pkg/services/dashboards" + "github.com/grafana/grafana/pkg/services/datasources" ngmodels "github.com/grafana/grafana/pkg/services/ngalert/models" - "github.com/grafana/grafana/pkg/tsdb/graphite" ) const ( @@ -111,7 +113,17 @@ func (m *migration) makeAlertRule(cond condition, da dashAlert, folderUID string annotations["message"] = da.Message var err error - data, err := migrateAlertRuleQueries(cond.Data) + d, err := m.fetchDashboard(da.OrgId, da.DashboardUID) + // Getting the dashboard is important, as we might want to extract some data out of it. Especially for Graphite queries.Ï + if err != nil { + return nil, fmt.Errorf("failed to migrate alert rule queries: related dashboard could not be loaded: %w", err) + } + dsTypeMap, err := m.fetchDsTypes(cond.Data) + if err != nil { + return nil, fmt.Errorf("failed to migrate alert rule queries: datasources could not be loaded: %w", err) + } + l := log.New("ngalert.migration.queries") + data, err := migrateAlertRuleQueries(l, da.Id, cond.Data, da.PanelId, d, dsTypeMap) if err != nil { return nil, fmt.Errorf("failed to migrate alert rule queries: %w", err) } @@ -173,8 +185,65 @@ func (m *migration) makeAlertRule(cond condition, da dashAlert, folderUID string return ar, nil } +func (m *migration) fetchDashboard(orgID int64, dashboardUID string) (*dashboards.Dashboard, error) { + // This is a hack for unit tests in 9.4. + if m.sess == nil { + return &dashboards.Dashboard{}, nil + } + + var queryResult *dashboards.Dashboard + + dashboard := dashboards.Dashboard{OrgID: orgID, UID: dashboardUID} + + has, err := m.sess.Get(&dashboard) + + if err != nil { + return nil, err + } else if !has { + return nil, dashboards.ErrDashboardNotFound + } + + dashboard.SetID(dashboard.ID) + dashboard.SetUID(dashboard.UID) + queryResult = &dashboard + + return queryResult, err +} + +type dsType struct { + UID string `xorm:"uid"` + Type string `xorm:"type"` +} + +func (dsType) TableName() string { + return "data_source" +} + +func (m *migration) fetchDsTypes(data []alertQuery) (map[string]string, error) { + // This is a hack for unit tests in 9.4. + if m.sess == nil { + return map[string]string{}, nil + } + + result := make(map[string]string) + for _, q := range data { + result[q.DatasourceUID] = "" + } + var dsTypes []*dsType + for _, uid := range result { + dsTypes = append(dsTypes, &dsType{UID: uid}) + } + + err := m.sess.Find(&dsTypes) + + for _, ds := range dsTypes { + result[ds.UID] = ds.Type + } + return result, err +} + // migrateAlertRuleQueries attempts to fix alert rule queries so they can work in unified alerting. Queries of some data sources are not compatible with unified alerting. -func migrateAlertRuleQueries(data []alertQuery) ([]alertQuery, error) { +func migrateAlertRuleQueries(l log.Logger, ruleID int64, data []alertQuery, panelID int64, dashboard *dashboards.Dashboard, dsTypes map[string]string) ([]alertQuery, error) { result := make([]alertQuery, 0, len(data)) for _, d := range data { // queries that are expression are not relevant, skip them. @@ -182,6 +251,14 @@ func migrateAlertRuleQueries(data []alertQuery) ([]alertQuery, error) { result = append(result, d) continue } + dsType, ok := dsTypes[d.DatasourceUID] + if !ok { + l.Error("datasource not found", "uid", d.DatasourceUID) + return nil, fmt.Errorf("datasource not found") + } + if dsType != datasources.DS_GRAPHITE { + continue + } var fixedData map[string]json.RawMessage err := json.Unmarshal(d.Model, &fixedData) if err != nil { @@ -189,7 +266,7 @@ func migrateAlertRuleQueries(data []alertQuery) ([]alertQuery, error) { } // remove hidden tag from the query (if exists) delete(fixedData, "hide") - fixedData = fixGraphiteReferencedSubQueries(fixedData) + fixedData = fixGraphiteReferencedSubQueries(l, fixedData, ruleID, panelID, dashboard) updatedModel, err := json.Marshal(fixedData) if err != nil { return nil, err @@ -200,18 +277,6 @@ func migrateAlertRuleQueries(data []alertQuery) ([]alertQuery, error) { return result, nil } -// fixGraphiteReferencedSubQueries attempts to fix graphite referenced sub queries, given unified alerting does not support this. -// targetFull of Graphite data source contains the expanded version of field 'target', so let's copy that. -func fixGraphiteReferencedSubQueries(queryData map[string]json.RawMessage) map[string]json.RawMessage { - fullQuery, ok := queryData[graphite.TargetFullModelField] - if ok { - delete(queryData, graphite.TargetFullModelField) - queryData[graphite.TargetModelField] = fullQuery - } - - return queryData -} - type alertQuery struct { // RefID is the unique identifier of the query, set by the frontend call. RefID string `json:"refId"` @@ -305,7 +370,10 @@ func normalizeRuleName(daName string, uid string) string { trunc := DefaultFieldMaxLength - 1 - len(uid) daName = daName[:trunc] + "_" + uid } - + // The name can be empty, as this validation did not always exist. + if daName == "" { + return uid + } return daName } diff --git a/pkg/services/sqlstore/migrations/ualert/alert_rule_graphite.go b/pkg/services/sqlstore/migrations/ualert/alert_rule_graphite.go new file mode 100644 index 00000000000..cb70dacce2a --- /dev/null +++ b/pkg/services/sqlstore/migrations/ualert/alert_rule_graphite.go @@ -0,0 +1,204 @@ +package ualert + +import ( + "encoding/json" + "errors" + "fmt" + "regexp" + "strings" + + "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/services/dashboards" + "github.com/grafana/grafana/pkg/tsdb/graphite" +) + +var ( + hasPlaceholdersRe = regexp.MustCompile(`#([A-Za-z]+)`) +) + +var ( + failedGraphiteMigrations int64 + successfulGraphiteMigrationCopy int64 + successfulGraphiteMigrationDashboard int64 +) + +type panel struct { + ID int64 `json:"id"` + Targets []target `json:"targets"` + Panels []panel `json:"panels"` +} + +type target struct { + RefID string `json:"refId"` + Target string `json:"target"` +} + +// fixGraphiteReferencedSubQueries attempts to fix graphite referenced sub queries, given unified alerting does not support this. +// targetFull of Graphite data source contains the expanded version of field 'target', so let's copy that. +func fixGraphiteReferencedSubQueries(l log.Logger, queryData map[string]json.RawMessage, ruleID, panelID int64, dashboard *dashboards.Dashboard) map[string]json.RawMessage { + if !isFixable(l, queryData) { + return queryData + } + fullQuery, ok := queryData[graphite.TargetFullModelField] + // We also need to check for placeholders here, as the data can be very old and suffer from a bug that only + // the first placeholder of the same name got replaced. + if ok && !hasPlaceholders(string(fullQuery)) { + successfulGraphiteMigrationCopy++ + delete(queryData, graphite.TargetFullModelField) + queryData[graphite.TargetModelField] = fullQuery + return queryData + } + + // Sometimes it can happen that the "targetFull" field is not there. In this case we can + // extract this information from the panel of the dashboard. + fullQueryRaw, err := unwrapFromDashboard(l, queryData, panelID, dashboard) + if err != nil { + failedGraphiteMigrations++ + l.Error("graphite query migration: failed to unwrap query from dashboard", "err", err, "rule_id", ruleID) + return queryData + } + b, err := json.Marshal(fullQueryRaw) + if err != nil { + failedGraphiteMigrations++ + l.Error("graphite query migration: failed to marshal the unwrapped query", "query", fullQueryRaw, "err", err, "rule_id", ruleID) + return queryData + } + successfulGraphiteMigrationDashboard++ + l.Debug("graphite query migration: successfully unwrapped query using the dashboard", "query", fullQueryRaw, "rule_id", ruleID) + queryData[graphite.TargetModelField] = b + + return queryData +} + +func isFixable(l log.Logger, queryData map[string]json.RawMessage) bool { + _, ok := queryData[graphite.TargetFullModelField] + if ok { + return true + } + // Check if the target field has any placeholders. + targetRaw, ok := queryData[graphite.TargetModelField] + if !ok { + l.Error("query data does not have field 'target'") + return false + } + var target string + err := json.Unmarshal(targetRaw, &target) + if err != nil { + l.Error("failed to unmarshal target", "err", err) + return false + } + if !hasPlaceholders(target) { + return false + } + return true +} + +func unwrapFromDashboard(l log.Logger, queryData map[string]json.RawMessage, panelID int64, dashboard *dashboards.Dashboard) (string, error) { + refIDRaw, ok := queryData["refId"] + if !ok { + return "", fmt.Errorf("query data does not have field 'refId'") + } + var refID string + err := json.Unmarshal(refIDRaw, &refID) + if err != nil { + return "", fmt.Errorf("failed to unmarshal refId: %w", err) + } + panelsRaw := dashboard.Data.Get("panels") + // Simplejson doesn't let you unmarshal in a type, so we need to work around this. + b, err := panelsRaw.MarshalJSON() + if err != nil { + return "", fmt.Errorf("failed to marshal panels: %w", err) + } + var panels []panel + err = json.Unmarshal(b, &panels) + if err != nil { + return "", fmt.Errorf("failed to unmarshal panels: %w", err) + } + return unwrapFromPanel(l, panels, panelID, refID, 0) +} + +var ( + errPanelNotFound = errors.New("panel not found") +) + +func unwrapFromPanel(l log.Logger, panels []panel, panelID int64, refID string, parent int64) (string, error) { + for _, panel := range panels { + if panel.ID == panelID { + return unwrapTarget(refID, panel.Targets) + } + if len(panel.Panels) > 0 { + unwrappedTarget, err := unwrapFromPanel(l, panel.Panels, panelID, refID, panel.ID) + if err == nil { + return unwrappedTarget, nil + } + if !errors.Is(err, errPanelNotFound) { + return "", err + } + } + } + return "", errPanelNotFound +} + +func unwrapTarget(refID string, targets []target) (string, error) { + m := make(map[string]string) + visited := make(map[string]bool) + + // Populate the map with the target strings. + for _, t := range targets { + m[t.RefID] = t.Target + } + + _, ok := m[refID] + if !ok { + return "", fmt.Errorf("refID %s not found in targets", refID) + } + + // Check for circular dependencies by keeping track of visited refIDs. + // The detection of circular dependencies happens by marking a refID as visited + // before we start its replacement process, and unmarking it after we're done. + // This way, if we encounter the same refID during its own replacement process, + // we know it's a circular dependency. + var unwrap func(string) (string, error) + unwrap = func(currentRefID string) (string, error) { + if visited[currentRefID] { + return "", fmt.Errorf("circular dependency on refID %s", currentRefID) + } + + visited[currentRefID] = true + defer func() { visited[currentRefID] = false }() + + currentResult, ok := m[currentRefID] + if !ok { + return "", fmt.Errorf("refID %s not found in targets", currentRefID) + } + + matches := hasPlaceholdersRe.FindAllStringSubmatch(currentResult, -1) + for _, match := range matches { + placeholderRef := match[1] + replacement, err := unwrap(placeholderRef) + if err != nil { + return "", err + } + currentResult = strings.ReplaceAll(currentResult, "#"+placeholderRef, replacement) + } + + return currentResult, nil + } + + return unwrap(refID) +} + +func hasPlaceholders(s string) bool { + // Use the regular expression to match the string + return hasPlaceholdersRe.MatchString(s) +} + +func logGraphiteMigrationStats(l log.Logger) { + if successfulGraphiteMigrationDashboard == 0 && successfulGraphiteMigrationCopy == 0 && failedGraphiteMigrations == 0 { + return + } + l.Info("Graphite migration stats", + "failed", failedGraphiteMigrations, + "success_copy", successfulGraphiteMigrationCopy, + "success_dashboard", successfulGraphiteMigrationDashboard) +} diff --git a/pkg/services/sqlstore/migrations/ualert/alert_rule_graphite_test.go b/pkg/services/sqlstore/migrations/ualert/alert_rule_graphite_test.go new file mode 100644 index 00000000000..0159c8b8d01 --- /dev/null +++ b/pkg/services/sqlstore/migrations/ualert/alert_rule_graphite_test.go @@ -0,0 +1,97 @@ +package ualert + +import ( + "testing" + + "github.com/stretchr/testify/require" +) + +func TestUnwrapTarget(t *testing.T) { + // Define the test cases + testCases := []struct { + name string + refID string + targets []target + expected string + wantErr bool + }{ + { + name: "Valid reference substitution", + refID: "C", + targets: []target{ + { + RefID: "A", + Target: "first.query.count", + }, + { + RefID: "B", + Target: "second.query.count", + }, + { + RefID: "C", + Target: "scale(asPercent(diffSeries(#B, #A), #B), 100)", + }, + }, + expected: "scale(asPercent(diffSeries(second.query.count, first.query.count), second.query.count), 100)", + wantErr: false, + }, + { + name: "No error on multiple matches", + refID: "D", + targets: []target{ + { + RefID: "D", + Target: "alias(divideSeries(scale(#C, 100.0), #B), 'SLI')", + }, + { + RefID: "C", + Target: "integral(aggregations.secondly.chef.api.client.domain.transaction.statusok.count)", + }, + { + RefID: "B", + Target: "sumSeries(#A, #C)", + }, + { + RefID: "A", + Target: "integral(aggregations.secondly.chef.api.client.domain.transaction.statusnook.count)", + }, + }, + expected: "alias(divideSeries(scale(integral(aggregations.secondly.chef.api.client.domain.transaction.statusok.count), 100.0), sumSeries(integral(aggregations.secondly.chef.api.client.domain.transaction.statusnook.count), integral(aggregations.secondly.chef.api.client.domain.transaction.statusok.count))), 'SLI')", + wantErr: false, + }, + { + name: "Error on circular references", + refID: "D", + targets: []target{ + { + RefID: "D", + Target: "alias(divideSeries(scale(#C, 100.0), #B), 'SLI')", + }, + { + RefID: "B", + Target: "sumSeries(#A, #D)", + }, + { + RefID: "A", + Target: "integral(aggregations.secondly.chef.api.client.domain.transaction.statusnook.count)", + }, + }, + wantErr: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + // Act + result, err := unwrapTarget(tc.refID, tc.targets) + + // Assert + if tc.wantErr { + require.Error(t, err, "unwrapTarget should return an error") + } else { + require.NoError(t, err, "unwrapTarget should not return an error") + require.Equal(t, tc.expected, result, "unwrapTarget returned unexpected result") + } + }) + } +} diff --git a/pkg/services/sqlstore/migrations/ualert/alert_rule_test.go b/pkg/services/sqlstore/migrations/ualert/alert_rule_test.go index 2132e9fa652..5b5863d1ef3 100644 --- a/pkg/services/sqlstore/migrations/ualert/alert_rule_test.go +++ b/pkg/services/sqlstore/migrations/ualert/alert_rule_test.go @@ -8,6 +8,8 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/grafana/pkg/components/simplejson" + "github.com/grafana/grafana/pkg/infra/log" + "github.com/grafana/grafana/pkg/services/dashboards" ) func TestMigrateAlertRuleQueries(t *testing.T) { @@ -18,18 +20,25 @@ func TestMigrateAlertRuleQueries(t *testing.T) { err error }{ { - name: "when a query has a sub query - it is extracted", - input: simplejson.NewFromAny(map[string]interface{}{"targetFull": "thisisafullquery", "target": "ahalfquery"}), + name: "when a query has a sub query - it is extracted", + input: simplejson.NewFromAny(map[string]interface{}{ + "targetFull": "thisisafullquery", + "target": "ahalfquery", + }), expected: `{"target":"thisisafullquery"}`, }, { - name: "when a query does not have a sub query - it no-ops", - input: simplejson.NewFromAny(map[string]interface{}{"target": "ahalfquery"}), + name: "when a query does not have a sub query - it no-ops", + input: simplejson.NewFromAny(map[string]interface{}{ + "target": "ahalfquery", + }), expected: `{"target":"ahalfquery"}`, }, { - name: "when query was hidden, it removes the flag", - input: simplejson.NewFromAny(map[string]interface{}{"hide": true}), + name: "when query was hidden, it removes the flag", + input: simplejson.NewFromAny(map[string]interface{}{ + "hide": true, + }), expected: `{}`, }, } @@ -38,7 +47,7 @@ func TestMigrateAlertRuleQueries(t *testing.T) { t.Run(tt.name, func(t *testing.T) { model, err := tt.input.Encode() require.NoError(t, err) - queries, err := migrateAlertRuleQueries([]alertQuery{{Model: model}}) + queries, err := migrateAlertRuleQueries(log.NewNopLogger(), 0, []alertQuery{{Model: model, DatasourceUID: "a"}}, 0, &dashboards.Dashboard{}, map[string]string{"a": "graphite"}) if tt.err != nil { require.Error(t, err) require.EqualError(t, err, tt.err.Error()) diff --git a/pkg/services/sqlstore/migrations/ualert/ualert.go b/pkg/services/sqlstore/migrations/ualert/ualert.go index 6c404357709..a695b78c148 100644 --- a/pkg/services/sqlstore/migrations/ualert/ualert.go +++ b/pkg/services/sqlstore/migrations/ualert/ualert.go @@ -391,6 +391,8 @@ func (m *migration) Exec(sess *xorm.Session, mg *migrator.Migrator) error { } } + logGraphiteMigrationStats(m.mg.Logger) + for orgID := range rulesPerOrg { if err := m.addPauseSilence(orgID); err != nil { m.mg.Logger.Error("alert migration error: failed to create silence for paused alerts")