diff --git a/go.mod b/go.mod index 7cd05dbc66a..7e4734a5631 100644 --- a/go.mod +++ b/go.mod @@ -42,7 +42,7 @@ require ( github.com/go-sql-driver/mysql v1.6.0 github.com/go-stack/stack v1.8.0 github.com/gobwas/glob v0.2.3 - github.com/gofrs/uuid v4.0.0+incompatible // indirect + github.com/gofrs/uuid v4.0.0+incompatible github.com/gogo/protobuf v1.3.2 github.com/golang/mock v1.6.0 github.com/golang/snappy v0.0.4 @@ -243,6 +243,7 @@ require ( github.com/armon/go-radix v1.0.0 github.com/blugelabs/bluge v0.1.9 github.com/blugelabs/bluge_segment_api v0.2.0 + github.com/dlmiddlecote/sqlstats v1.0.2 github.com/getkin/kin-openapi v0.94.0 github.com/golang-migrate/migrate/v4 v4.7.0 github.com/google/go-github/v45 v45.2.0 diff --git a/go.sum b/go.sum index 2151dcceef6..28cff2143e2 100644 --- a/go.sum +++ b/go.sum @@ -726,6 +726,8 @@ github.com/dimchansky/utfbom v1.1.0/go.mod h1:rO41eb7gLfo8SF1jd9F8HplJm1Fewwi4mQ github.com/dimchansky/utfbom v1.1.1/go.mod h1:SxdoEBH5qIqFocHMyGOXVAybYJdr71b1Q/j0mACtrfE= github.com/dlclark/regexp2 v1.4.1-0.20201116162257-a2a8dda75c91 h1:Izz0+t1Z5nI16/II7vuEo/nHjodOg0p7+OiDpjX5t1E= github.com/dlclark/regexp2 v1.4.1-0.20201116162257-a2a8dda75c91/go.mod h1:2pZnwuY/m+8K6iRw6wQdMtk+rH5tNGR1i55kozfMjCc= +github.com/dlmiddlecote/sqlstats v1.0.2 h1:gSU11YN23D/iY50A2zVYwgXgy072khatTsIW6UPjUtI= +github.com/dlmiddlecote/sqlstats v1.0.2/go.mod h1:0CWaIh/Th+z2aI6Q9Jpfg/o21zmGxWhbByHgQSCUQvY= github.com/dnaeon/go-vcr v1.0.1/go.mod h1:aBB1+wY4s93YsC3HHjMBMrwTj2R9FHDzUr9KyGc8n1E= github.com/dnaeon/go-vcr v1.1.0 h1:ReYa/UBrRyQdant9B4fNHGoCNKw6qh6P0fsdGmZpR7c= github.com/dnaeon/go-vcr v1.1.0/go.mod h1:M7tiix8f0r6mKKJ3Yq/kqU1OYf3MnfmBWVbPx/yU9ko= @@ -1790,6 +1792,7 @@ github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m github.com/mattn/go-shellwords v1.0.3/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= github.com/mattn/go-sqlite3 v1.10.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= github.com/mattn/go-sqlite3 v1.11.0/go.mod h1:FPy6KqzDD04eiIsT53CuJW3U88zkxoIYsOqkbpncsNc= +github.com/mattn/go-sqlite3 v1.14.6/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= github.com/mattn/go-sqlite3 v1.14.7 h1:fxWBnXkxfM6sRiuH3bqJ4CfzZojMOLVc0UTsTglEghA= github.com/mattn/go-sqlite3 v1.14.7/go.mod h1:NyWgC/yNuGj7Q9rpYnZvas74GogHl5/Z4A/KQRfk6bU= github.com/mattn/go-tty v0.0.0-20180907095812-13ff1204f104/go.mod h1:XPvLUNfbS4fJH25nqRHfWLMa1ONC8Amw+mIA639KxkE= diff --git a/pkg/services/sqlstore/sqlstore.go b/pkg/services/sqlstore/sqlstore.go index 022836826de..f85e4e38a05 100644 --- a/pkg/services/sqlstore/sqlstore.go +++ b/pkg/services/sqlstore/sqlstore.go @@ -11,6 +11,7 @@ import ( "sync" "time" + "github.com/dlmiddlecote/sqlstats" "github.com/go-sql-driver/mysql" _ "github.com/lib/pq" "github.com/prometheus/client_golang/prometheus" @@ -54,17 +55,6 @@ type SQLStore struct { skipEnsureDefaultOrgAndUser bool migrations registry.DatabaseMigrator tracer tracing.Tracer - metrics struct { - maxOpenConnections prometheus.Gauge - openConnections prometheus.Gauge - inUse prometheus.Gauge - idle prometheus.Gauge - waitCount prometheus.Counter - waitDuration prometheus.Counter - maxIdleClosed prometheus.Counter - maxIdleTimeClosed prometheus.Counter - maxLifetimeClosed prometheus.Counter - } } func ProvideService(cfg *setting.Cfg, cacheService *localcache.CacheService, migrations registry.DatabaseMigrator, bus bus.Bus, tracer tracing.Tracer) (*SQLStore, error) { @@ -86,9 +76,13 @@ func ProvideService(cfg *setting.Cfg, cacheService *localcache.CacheService, mig } s.tracer = tracer - s.initMetrics() + // initialize and register metrics wrapper around the *sql.DB + db := s.engine.DB().DB - prometheus.MustRegister(s) + // register the go_sql_stats_connections_* metrics + prometheus.MustRegister(sqlstats.NewStatsCollector("grafana", db)) + // TODO: deprecate/remove these metrics + prometheus.MustRegister(newSQLStoreMetrics(db)) return s, nil } @@ -442,118 +436,6 @@ func (ss *SQLStore) readConfig() error { return nil } -// initMetrics initializes the database connection metrics -func (ss *SQLStore) initMetrics() { - namespace := "grafana" - subsystem := "database" - - ss.metrics.maxOpenConnections = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "conn_max_open", - Help: "Maximum number of open connections to the database", - }) - - ss.metrics.openConnections = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "conn_open", - Help: "The number of established connections both in use and idle", - }) - - ss.metrics.inUse = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "conn_in_use", - Help: "The number of connections currently in use", - }) - - ss.metrics.idle = prometheus.NewGauge(prometheus.GaugeOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "conn_idle", - Help: "The number of idle connections", - }) - - ss.metrics.waitCount = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "conn_wait_count_total", - Help: "The total number of connections waited for", - }) - - ss.metrics.waitDuration = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "conn_wait_duration_seconds", - Help: "The total time blocked waiting for a new connection", - }) - - ss.metrics.maxIdleClosed = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "conn_max_idle_closed_total", - Help: "The total number of connections closed due to SetMaxIdleConns", - }) - - ss.metrics.maxIdleTimeClosed = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "conn_max_idle_closed_seconds", - Help: "The total number of connections closed due to SetConnMaxIdleTime", - }) - - ss.metrics.maxLifetimeClosed = prometheus.NewCounter(prometheus.CounterOpts{ - Namespace: namespace, - Subsystem: subsystem, - Name: "conn_max_lifetime_closed_total", - Help: "The total number of connections closed due to SetConnMaxLifetime", - }) -} - -// collectDBStats instruments connections stats from the database. -func (ss *SQLStore) collectDBstats() { - dbstats := ss.engine.DB().Stats() - ss.metrics.maxOpenConnections.Set(float64(dbstats.MaxOpenConnections)) - ss.metrics.openConnections.Set(float64(dbstats.MaxOpenConnections)) - ss.metrics.inUse.Set(float64(dbstats.InUse)) - ss.metrics.idle.Set(float64(dbstats.Idle)) - - ss.metrics.waitCount.Add(float64(dbstats.WaitCount)) - ss.metrics.waitDuration.Add(float64(dbstats.WaitDuration / time.Second)) - ss.metrics.maxIdleClosed.Add(float64(dbstats.MaxIdleClosed)) - ss.metrics.maxIdleTimeClosed.Add(float64(dbstats.MaxIdleTimeClosed)) - ss.metrics.maxLifetimeClosed.Add(float64(dbstats.MaxLifetimeClosed)) -} - -// Collect implements Prometheus.Collector. -func (ss *SQLStore) Collect(ch chan<- prometheus.Metric) { - ss.collectDBstats() - - ss.metrics.maxOpenConnections.Collect(ch) - ss.metrics.openConnections.Collect(ch) - ss.metrics.inUse.Collect(ch) - ss.metrics.idle.Collect(ch) - ss.metrics.waitCount.Collect(ch) - ss.metrics.waitDuration.Collect(ch) - ss.metrics.maxIdleClosed.Collect(ch) - ss.metrics.maxIdleTimeClosed.Collect(ch) - ss.metrics.maxLifetimeClosed.Collect(ch) -} - -// Describe implements Prometheus.Collector. -func (ss *SQLStore) Describe(ch chan<- *prometheus.Desc) { - ss.metrics.maxOpenConnections.Describe(ch) - ss.metrics.openConnections.Describe(ch) - ss.metrics.inUse.Describe(ch) - ss.metrics.idle.Describe(ch) - ss.metrics.waitCount.Describe(ch) - ss.metrics.waitDuration.Describe(ch) - ss.metrics.maxIdleClosed.Describe(ch) - ss.metrics.maxIdleTimeClosed.Describe(ch) - ss.metrics.maxLifetimeClosed.Describe(ch) -} - // ITestDB is an interface of arguments for testing db type ITestDB interface { Helper() diff --git a/pkg/services/sqlstore/sqlstore_metrics.go b/pkg/services/sqlstore/sqlstore_metrics.go new file mode 100644 index 00000000000..17621189e29 --- /dev/null +++ b/pkg/services/sqlstore/sqlstore_metrics.go @@ -0,0 +1,144 @@ +package sqlstore + +import ( + "github.com/dlmiddlecote/sqlstats" + "github.com/prometheus/client_golang/prometheus" +) + +type sqlStoreMetrics struct { + db sqlstats.StatsGetter + + // gauges + maxOpenConnections *prometheus.Desc + openConnections *prometheus.Desc + inUse *prometheus.Desc + idle *prometheus.Desc + + // counters + waitCount *prometheus.Desc + waitDuration *prometheus.Desc + maxIdleClosed *prometheus.Desc + maxIdleTimeClosed *prometheus.Desc + maxLifetimeClosed *prometheus.Desc +} + +func newSQLStoreMetrics(db sqlstats.StatsGetter) *sqlStoreMetrics { + ns := "grafana" + sub := "database" + + return &sqlStoreMetrics{ + db: db, + maxOpenConnections: prometheus.NewDesc( + prometheus.BuildFQName(ns, sub, "conn_max_open"), + "Maximum number of open connections to the database", + nil, nil, + ), + openConnections: prometheus.NewDesc( + prometheus.BuildFQName(ns, sub, "conn_open"), + "The number of established connections both in use and idle", + nil, nil, + ), + inUse: prometheus.NewDesc( + prometheus.BuildFQName(ns, sub, "conn_in_use"), + "The number of connections currently in use", + nil, nil, + ), + idle: prometheus.NewDesc( + prometheus.BuildFQName(ns, sub, "conn_idle"), + "The number of idle connections", + nil, nil, + ), + + waitCount: prometheus.NewDesc( + prometheus.BuildFQName(ns, sub, "conn_wait_count_total"), + "The total number of connections waited for", + nil, nil, + ), + waitDuration: prometheus.NewDesc( + prometheus.BuildFQName(ns, sub, "conn_wait_duration_seconds"), + "The total time blocked waiting for a new connection", + nil, nil, + ), + maxIdleClosed: prometheus.NewDesc( + prometheus.BuildFQName(ns, sub, "conn_max_idle_closed_total"), + "The total number of connections closed due to SetMaxIdleConns", + nil, nil, + ), + maxIdleTimeClosed: prometheus.NewDesc( + prometheus.BuildFQName(ns, sub, "conn_max_idle_closed_seconds"), + "The total number of connections closed due to SetConnMaxIdleTime", + nil, nil, + ), + maxLifetimeClosed: prometheus.NewDesc( + prometheus.BuildFQName(ns, sub, "conn_max_lifetime_closed_total"), + "The total number of connections closed due to SetConnMaxLifetime", + nil, nil, + ), + } +} + +// Collect implements Prometheus.Collector. +func (m *sqlStoreMetrics) Collect(ch chan<- prometheus.Metric) { + stats := m.db.Stats() + + ch <- prometheus.MustNewConstMetric( + m.maxOpenConnections, + prometheus.GaugeValue, + float64(stats.MaxOpenConnections), + ) + ch <- prometheus.MustNewConstMetric( + m.openConnections, + prometheus.GaugeValue, + float64(stats.OpenConnections), + ) + ch <- prometheus.MustNewConstMetric( + m.inUse, + prometheus.GaugeValue, + float64(stats.InUse), + ) + ch <- prometheus.MustNewConstMetric( + m.idle, + prometheus.GaugeValue, + float64(stats.Idle), + ) + + ch <- prometheus.MustNewConstMetric( + m.waitCount, + prometheus.CounterValue, + float64(stats.WaitCount), + ) + ch <- prometheus.MustNewConstMetric( + m.waitDuration, + prometheus.CounterValue, + stats.WaitDuration.Seconds(), + ) + ch <- prometheus.MustNewConstMetric( + m.maxIdleClosed, + prometheus.CounterValue, + float64(stats.MaxIdleClosed), + ) + ch <- prometheus.MustNewConstMetric( + m.maxIdleTimeClosed, + prometheus.CounterValue, + float64(stats.MaxIdleTimeClosed), + ) + ch <- prometheus.MustNewConstMetric( + m.maxLifetimeClosed, + prometheus.CounterValue, + float64(stats.MaxLifetimeClosed), + ) +} + +// Describe implements Prometheus.Collector. +func (m *sqlStoreMetrics) Describe(ch chan<- *prometheus.Desc) { + ch <- m.maxOpenConnections + ch <- m.openConnections + ch <- m.inUse + ch <- m.idle + + ch <- m.waitCount + ch <- m.waitDuration + ch <- m.maxIdleClosed + ch <- m.maxIdleTimeClosed + ch <- m.maxLifetimeClosed +} diff --git a/pkg/services/sqlstore/sqlstore_metrics_test.go b/pkg/services/sqlstore/sqlstore_metrics_test.go new file mode 100644 index 00000000000..60d604a1d5c --- /dev/null +++ b/pkg/services/sqlstore/sqlstore_metrics_test.go @@ -0,0 +1,68 @@ +package sqlstore + +import ( + "database/sql" + "strings" + "testing" + "time" + + "github.com/dlmiddlecote/sqlstats" + "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/stretchr/testify/require" +) + +func TestSQLStore_Metrics(t *testing.T) { + stats := sql.DBStats{ + MaxOpenConnections: 9, + OpenConnections: 8, + InUse: 4, + Idle: 4, + WaitCount: 5, + WaitDuration: 6 * time.Second, + MaxIdleClosed: 7, + MaxIdleTimeClosed: 8, + MaxLifetimeClosed: 9, + } + + m := newSQLStoreMetrics(&fakeStatsGetter{stats: stats}) + + require.NoError(t, testutil.CollectAndCompare(m, strings.NewReader(` + # HELP grafana_database_conn_idle The number of idle connections + # TYPE grafana_database_conn_idle gauge + grafana_database_conn_idle 4 + # HELP grafana_database_conn_in_use The number of connections currently in use + # TYPE grafana_database_conn_in_use gauge + grafana_database_conn_in_use 4 + # HELP grafana_database_conn_max_idle_closed_seconds The total number of connections closed due to SetConnMaxIdleTime + # TYPE grafana_database_conn_max_idle_closed_seconds counter + grafana_database_conn_max_idle_closed_seconds 8 + # HELP grafana_database_conn_max_idle_closed_total The total number of connections closed due to SetMaxIdleConns + # TYPE grafana_database_conn_max_idle_closed_total counter + grafana_database_conn_max_idle_closed_total 7 + # HELP grafana_database_conn_max_lifetime_closed_total The total number of connections closed due to SetConnMaxLifetime + # TYPE grafana_database_conn_max_lifetime_closed_total counter + grafana_database_conn_max_lifetime_closed_total 9 + # HELP grafana_database_conn_max_open Maximum number of open connections to the database + # TYPE grafana_database_conn_max_open gauge + grafana_database_conn_max_open 9 + # HELP grafana_database_conn_open The number of established connections both in use and idle + # TYPE grafana_database_conn_open gauge + grafana_database_conn_open 8 + # HELP grafana_database_conn_wait_count_total The total number of connections waited for + # TYPE grafana_database_conn_wait_count_total counter + grafana_database_conn_wait_count_total 5 + # HELP grafana_database_conn_wait_duration_seconds The total time blocked waiting for a new connection + # TYPE grafana_database_conn_wait_duration_seconds counter + grafana_database_conn_wait_duration_seconds 6 + `))) +} + +type fakeStatsGetter struct { + stats sql.DBStats +} + +var _ sqlstats.StatsGetter = (*fakeStatsGetter)(nil) + +func (f *fakeStatsGetter) Stats() sql.DBStats { + return f.stats +}