9ddc70423b
* Provisioning: Cleanup tester interface * undo accidental change * cleanup * cleanup test
209 lines
6.5 KiB
Go
209 lines
6.5 KiB
Go
package controller
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"time"
|
|
|
|
provisioning "github.com/grafana/grafana/apps/provisioning/pkg/apis/provisioning/v0alpha1"
|
|
"github.com/grafana/grafana/apps/provisioning/pkg/repository"
|
|
)
|
|
|
|
// StatusPatcher defines the interface for updating repository status
|
|
//
|
|
//go:generate mockery --name=StatusPatcher
|
|
type StatusPatcher interface {
|
|
Patch(ctx context.Context, repo *provisioning.Repository, patchOperations ...map[string]interface{}) error
|
|
}
|
|
|
|
// HealthChecker provides unified health checking for repositories
|
|
type HealthChecker struct {
|
|
statusPatcher StatusPatcher
|
|
}
|
|
|
|
// NewHealthChecker creates a new health checker
|
|
func NewHealthChecker(statusPatcher StatusPatcher) *HealthChecker {
|
|
return &HealthChecker{
|
|
statusPatcher: statusPatcher,
|
|
}
|
|
}
|
|
|
|
// ShouldCheckHealth determines if a repository health check should be performed
|
|
func (hc *HealthChecker) ShouldCheckHealth(repo *provisioning.Repository) bool {
|
|
// If the repository has been updated, run the health check
|
|
if repo.Generation != repo.Status.ObservedGeneration {
|
|
return true
|
|
}
|
|
|
|
// If the repository has a hook error, don't run the health check
|
|
if repo.Status.Health.Error == provisioning.HealthFailureHook {
|
|
return false
|
|
}
|
|
|
|
// Check general timing for health checks
|
|
return !hc.hasRecentHealthCheck(repo.Status.Health)
|
|
}
|
|
|
|
// hasRecentHealthCheck checks if a health check was performed recently (for timing purposes)
|
|
func (hc *HealthChecker) hasRecentHealthCheck(healthStatus provisioning.HealthStatus) bool {
|
|
if healthStatus.Checked == 0 {
|
|
return false // Never checked
|
|
}
|
|
|
|
age := time.Since(time.UnixMilli(healthStatus.Checked))
|
|
if healthStatus.Healthy {
|
|
return age <= time.Minute*5 // Recent if checked within 5 minutes when healthy
|
|
}
|
|
return age <= time.Minute // Recent if checked within 1 minute when unhealthy
|
|
}
|
|
|
|
// HasRecentFailure checks if there's a recent failure of a specific type
|
|
func (hc *HealthChecker) HasRecentFailure(healthStatus provisioning.HealthStatus, failureType provisioning.HealthFailureType) bool {
|
|
if healthStatus.Checked == 0 || healthStatus.Healthy || healthStatus.Error != failureType {
|
|
return false // No failure of this type
|
|
}
|
|
|
|
age := time.Since(time.UnixMilli(healthStatus.Checked))
|
|
return age <= time.Minute // Recent if within 1 minute
|
|
}
|
|
|
|
// RecordFailureAndUpdate records a failure and updates the repository status
|
|
func (hc *HealthChecker) RecordFailure(ctx context.Context, failureType provisioning.HealthFailureType, err error, repo *provisioning.Repository) error {
|
|
// Create the health status with the failure
|
|
healthStatus := hc.recordFailure(failureType, err)
|
|
|
|
// Create patch operation
|
|
patchOp := map[string]interface{}{
|
|
"op": "replace",
|
|
"path": "/status/health",
|
|
"value": healthStatus,
|
|
}
|
|
|
|
// Apply the patch
|
|
return hc.statusPatcher.Patch(ctx, repo, patchOp)
|
|
}
|
|
|
|
// recordFailure creates a health status with a specific failure
|
|
func (hc *HealthChecker) recordFailure(failureType provisioning.HealthFailureType, err error) provisioning.HealthStatus {
|
|
return provisioning.HealthStatus{
|
|
Healthy: false,
|
|
Error: failureType,
|
|
Checked: time.Now().UnixMilli(),
|
|
Message: []string{err.Error()},
|
|
}
|
|
}
|
|
|
|
// hasHealthStatusChanged checks if the health status has meaningfully changed
|
|
func (hc *HealthChecker) hasHealthStatusChanged(old, new provisioning.HealthStatus) bool {
|
|
if old.Healthy != new.Healthy {
|
|
return true
|
|
}
|
|
|
|
if len(old.Message) != len(new.Message) {
|
|
return true
|
|
}
|
|
|
|
if old.Checked != new.Checked {
|
|
return true
|
|
}
|
|
|
|
for i, oldMsg := range old.Message {
|
|
if i >= len(new.Message) || oldMsg != new.Message[i] {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// RefreshHealth performs a health check on an existing repository,
|
|
// updates its status if needed, and returns the test results
|
|
func (hc *HealthChecker) RefreshHealth(ctx context.Context, repo repository.Repository) (*provisioning.TestResults, provisioning.HealthStatus, error) {
|
|
cfg := repo.Config()
|
|
|
|
// Use health checker to perform comprehensive health check with existing status
|
|
testResults, newHealthStatus, err := hc.refreshHealth(ctx, repo, cfg.Status.Health)
|
|
if err != nil {
|
|
return nil, provisioning.HealthStatus{}, fmt.Errorf("health check failed: %w", err)
|
|
}
|
|
|
|
// Only update if health status actually changed
|
|
if hc.hasHealthStatusChanged(cfg.Status.Health, newHealthStatus) {
|
|
patchOp := map[string]interface{}{
|
|
"op": "replace",
|
|
"path": "/status/health",
|
|
"value": newHealthStatus,
|
|
}
|
|
|
|
if err := hc.statusPatcher.Patch(ctx, cfg, patchOp); err != nil {
|
|
return testResults, newHealthStatus, fmt.Errorf("update health status: %w", err)
|
|
}
|
|
}
|
|
|
|
return testResults, newHealthStatus, nil
|
|
}
|
|
|
|
// RefreshTimestamp updates the health status timestamp without changing other fields
|
|
func (hc *HealthChecker) RefreshTimestamp(ctx context.Context, repo *provisioning.Repository) error {
|
|
// Update the timestamp on the existing health status
|
|
healthStatus := repo.Status.Health
|
|
healthStatus.Checked = time.Now().UnixMilli()
|
|
|
|
// Create patch operation
|
|
patchOp := map[string]interface{}{
|
|
"op": "replace",
|
|
"path": "/status/health",
|
|
"value": healthStatus,
|
|
}
|
|
|
|
// Apply the patch
|
|
return hc.statusPatcher.Patch(ctx, repo, patchOp)
|
|
}
|
|
|
|
// refreshHealth performs a comprehensive health check
|
|
// Returns test results, health status, and any error
|
|
func (hc *HealthChecker) refreshHealth(ctx context.Context, repo repository.Repository, existingStatus provisioning.HealthStatus) (*provisioning.TestResults, provisioning.HealthStatus, error) {
|
|
res, err := repository.TestRepository(ctx, repo)
|
|
if err != nil {
|
|
return nil, existingStatus, fmt.Errorf("failed to test repository: %w", err)
|
|
}
|
|
|
|
if !res.Success {
|
|
// Build error messages
|
|
var errorMsgs []string
|
|
for _, testErr := range res.Errors {
|
|
if testErr.Detail != "" {
|
|
errorMsgs = append(errorMsgs, testErr.Detail)
|
|
}
|
|
}
|
|
|
|
healthStatus := provisioning.HealthStatus{
|
|
Healthy: false,
|
|
Error: provisioning.HealthFailureHealth,
|
|
Checked: time.Now().UnixMilli(),
|
|
Message: errorMsgs,
|
|
}
|
|
|
|
return res, healthStatus, nil
|
|
}
|
|
|
|
// Health check succeeded
|
|
now := time.Now()
|
|
healthStatus := provisioning.HealthStatus{
|
|
Healthy: true,
|
|
Checked: now.UnixMilli(),
|
|
}
|
|
|
|
// If the existing status is already healthy with no error messages and
|
|
// the last check was recent (within 30 seconds), preserve the existing timestamp
|
|
// to avoid unnecessary updates
|
|
if existingStatus.Healthy && existingStatus.Error == "" && len(existingStatus.Message) == 0 {
|
|
lastCheckedTime := time.UnixMilli(existingStatus.Checked)
|
|
if now.Sub(lastCheckedTime) < 30*time.Second {
|
|
healthStatus.Checked = existingStatus.Checked
|
|
}
|
|
}
|
|
|
|
return res, healthStatus, nil
|
|
}
|