73873f5a8a
* Alerting: Optimize rule status gathering APIs when a limit is applied. The frontend very commonly calls the `/rules` API with `limit_alerts=16`. When there are a very large number of alert instances present, this API is quite slow to respond, and profiling suggests that a big part of the problem is sorting the alerts by importance, in order to select the first 16. This changes the application of the limit to use a more efficient heap-based top-k algorithm. This maintains a slice of only the highest ranked items whilst iterating the full set of alert instances, which substantially reduces the number of comparisons needed. This is particularly effective, as the `AlertsByImportance` comparison is quite complex. I've included a benchmark to compare the new TopK function to the existing Sort/limit strategy. It shows that for small limits, the new approach is much faster, especially at high numbers of alerts, e.g. 100K alerts / limit 16: 1.91s vs 0.02s (-99%) For situations where there is no effective limit, sorting is marginally faster, therefore in the API implementation, if there is either a) no limit or b) no effective limit, then we just sort the alerts as before. There is also a space overhead using a heap which would matter for large limits. * Remove commented test cases * Make linter happy
88 lines
2.1 KiB
Go
88 lines
2.1 KiB
Go
package definitions
|
|
|
|
import (
|
|
"flag"
|
|
"fmt"
|
|
"math/rand"
|
|
"testing"
|
|
)
|
|
|
|
var topkStrategy = flag.String("topk", "heap", "topk strategy to benchmark. choices: sort, heap")
|
|
var showComparisons = flag.Bool("show-comparisons", false, "whether to show the number of comparisons made")
|
|
|
|
func makeAlerts(amount int) []Alert {
|
|
// A typical distribution of alert states is that most are Normal
|
|
// and a few are Alerting, so we assume 99% Normal and 1% Alerting.
|
|
percentAlerting := 1
|
|
|
|
// Series will commonly have many labels.
|
|
numLabels := 10
|
|
|
|
alerts := make([]Alert, amount)
|
|
|
|
for i := 0; i < len(alerts); i++ {
|
|
alerts[i].Labels = make(map[string]string)
|
|
for label := 0; label < numLabels; label++ {
|
|
alerts[i].Labels[fmt.Sprintf("label_%d", label)] = fmt.Sprintf("label_%d_value_%d", label, i%100)
|
|
}
|
|
|
|
if i%100 < percentAlerting {
|
|
alerts[i].State = "alerting"
|
|
// Should populate ActiveAt because this prevents needing label comparison
|
|
} else {
|
|
alerts[i].State = "normal"
|
|
}
|
|
}
|
|
|
|
// Shuffle in a repeatable order to avoid any bias from the initial ordering.
|
|
r := rand.New(rand.NewSource(1))
|
|
r.Shuffle(len(alerts), func(i, j int) { alerts[i], alerts[j] = alerts[j], alerts[i] })
|
|
|
|
return alerts
|
|
}
|
|
|
|
func BenchmarkSortAlertsByImportance(b *testing.B) {
|
|
var topkFunc func(AlertsBy, []Alert, int)
|
|
|
|
switch *topkStrategy {
|
|
case "sort":
|
|
topkFunc = func(by AlertsBy, alerts []Alert, limit int) {
|
|
by.Sort(alerts)
|
|
if len(alerts) > limit {
|
|
_ = alerts[0:limit]
|
|
}
|
|
}
|
|
|
|
case "heap":
|
|
topkFunc = func(by AlertsBy, alerts []Alert, limit int) {
|
|
_ = by.TopK(alerts, limit)
|
|
}
|
|
}
|
|
|
|
for _, n := range []int{1000, 10000, 100000} {
|
|
for _, k := range []int{16, 100, 1000, 100000} {
|
|
b.Run(fmt.Sprintf("n_%d_k_%d", n, k), func(b *testing.B) {
|
|
b.StopTimer()
|
|
|
|
for bi := 0; bi < b.N; bi++ {
|
|
alerts := makeAlerts(n)
|
|
|
|
comparisons := 0
|
|
by := func(a1, a2 *Alert) bool {
|
|
comparisons++
|
|
return AlertsByImportance(a1, a2)
|
|
}
|
|
|
|
b.StartTimer()
|
|
topkFunc(by, alerts, k)
|
|
b.StopTimer()
|
|
|
|
if *showComparisons {
|
|
fmt.Printf("Number of comparisons (strategy: %s): %d\n", *topkStrategy, comparisons)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
}
|
|
}
|