Files
grafana/pkg/services/ngalert/api/api.go
gotjosh a2f4344bf2 Alerting: Refactor & fix unified alerting metrics structure (#39151)
* Alerting: Refactor & fix unified alerting metrics structure

Fixes and refactors the metrics structure we have for the ngalert service. Now, each component has its own metric struct that includes the JUST the metrics it uses. Additionally, I have fixed the configuration metrics and added new metrics to determine if we have discovered and started all the necessary configurations of an instance.

This allows us to alert on `grafana_alerting_discovered_configurations - grafana_alerting_active_configurations != 0` to know whether an alertmanager instance did not start successfully.
2021-09-14 12:55:01 +01:00

106 lines
3.7 KiB
Go

package api
import (
"context"
"net/url"
"time"
"github.com/grafana/grafana/pkg/api/routing"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/services/datasourceproxy"
"github.com/grafana/grafana/pkg/services/datasources"
apimodels "github.com/grafana/grafana/pkg/services/ngalert/api/tooling/definitions"
"github.com/grafana/grafana/pkg/services/ngalert/metrics"
"github.com/grafana/grafana/pkg/services/ngalert/notifier"
"github.com/grafana/grafana/pkg/services/ngalert/schedule"
"github.com/grafana/grafana/pkg/services/ngalert/state"
"github.com/grafana/grafana/pkg/services/ngalert/store"
"github.com/grafana/grafana/pkg/services/quota"
"github.com/grafana/grafana/pkg/setting"
"github.com/grafana/grafana/pkg/tsdb"
)
// timeNow makes it possible to test usage of time
var timeNow = time.Now
type Scheduler interface {
AlertmanagersFor(orgID int64) []*url.URL
DroppedAlertmanagersFor(orgID int64) []*url.URL
}
type Alertmanager interface {
// Configuration
SaveAndApplyConfig(config *apimodels.PostableUserConfig) error
SaveAndApplyDefaultConfig() error
GetStatus() apimodels.GettableStatus
// Silences
CreateSilence(ps *apimodels.PostableSilence) (string, error)
DeleteSilence(silenceID string) error
GetSilence(silenceID string) (apimodels.GettableSilence, error)
ListSilences(filter []string) (apimodels.GettableSilences, error)
// Alerts
GetAlerts(active, silenced, inhibited bool, filter []string, receiver string) (apimodels.GettableAlerts, error)
GetAlertGroups(active, silenced, inhibited bool, filter []string, receiver string) (apimodels.AlertGroups, error)
// Testing
TestReceivers(ctx context.Context, c apimodels.TestReceiversConfigParams) (*notifier.TestReceiversResult, error)
}
// API handlers.
type API struct {
Cfg *setting.Cfg
DatasourceCache datasources.CacheService
RouteRegister routing.RouteRegister
DataService *tsdb.Service
QuotaService *quota.QuotaService
Schedule schedule.ScheduleService
RuleStore store.RuleStore
InstanceStore store.InstanceStore
AlertingStore store.AlertingStore
AdminConfigStore store.AdminConfigurationStore
DataProxy *datasourceproxy.DataSourceProxyService
MultiOrgAlertmanager *notifier.MultiOrgAlertmanager
StateManager *state.Manager
}
// RegisterAPIEndpoints registers API handlers
func (api *API) RegisterAPIEndpoints(m *metrics.API) {
logger := log.New("ngalert.api")
proxy := &AlertingProxy{
DataProxy: api.DataProxy,
}
// Register endpoints for proxying to Alertmanager-compatible backends.
api.RegisterAlertmanagerApiEndpoints(NewForkedAM(
api.DatasourceCache,
NewLotexAM(proxy, logger),
AlertmanagerSrv{store: api.AlertingStore, mam: api.MultiOrgAlertmanager, log: logger},
), m)
// Register endpoints for proxying to Prometheus-compatible backends.
api.RegisterPrometheusApiEndpoints(NewForkedProm(
api.DatasourceCache,
NewLotexProm(proxy, logger),
PrometheusSrv{log: logger, manager: api.StateManager, store: api.RuleStore},
), m)
// Register endpoints for proxying to Cortex Ruler-compatible backends.
api.RegisterRulerApiEndpoints(NewForkedRuler(
api.DatasourceCache,
NewLotexRuler(proxy, logger),
RulerSrv{DatasourceCache: api.DatasourceCache, QuotaService: api.QuotaService, manager: api.StateManager, store: api.RuleStore, log: logger},
), m)
api.RegisterTestingApiEndpoints(TestingApiSrv{
AlertingProxy: proxy,
Cfg: api.Cfg,
DataService: api.DataService,
DatasourceCache: api.DatasourceCache,
log: logger,
}, m)
api.RegisterConfigurationApiEndpoints(AdminSrv{
store: api.AdminConfigStore,
log: logger,
scheduler: api.Schedule,
}, m)
}