cdc6a6114c
* Provisioning: Improve logging and tracing in job processing - Add comprehensive tracing with OpenTelemetry spans across all job operations - Enhance logging with consistent style: lowercase, concise messages, appropriate log levels - Use past tense for completed lifecycle events (e.g., 'stopped' vs 'stop') - Add structured logging with contextual attributes for better searchability - Handle graceful shutdowns without throwing errors on context cancellation - Refactor Cleanup method into listExpiredJobs and cleanUpExpiredJob for better code quality - Avoid double logging by only logging errors when handled locally - Add tracing and logging to historyjob controller cleanup operations Files modified: - pkg/registry/apis/provisioning/jobs/driver.go: Add tracing spans and improve error handling for graceful shutdown - pkg/registry/apis/provisioning/jobs/concurrent_driver.go: Add tracing and consistent logging - pkg/registry/apis/provisioning/jobs/persistentstore.go: Add comprehensive tracing and logging to all public methods, refactor cleanup - apps/provisioning/pkg/controller/historyjob.go: Add tracing and improve logging consistency * Update pkg/registry/apis/provisioning/jobs/persistentstore.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Refactor logging in persistentstore.go - Remove debug log statements at the start of job operations for cleaner output - Maintain structured logging with contextual attributes for improved traceability Files modified: - pkg/registry/apis/provisioning/jobs/persistentstore.go: Clean up logging for job operations * Enhance logging and tracing in provisioning job operations - Introduce OpenTelemetry spans for better observability in job processing and webhook handling - Improve structured logging with contextual attributes for key operations - Remove unnecessary tracing spans in long-running functions to streamline performance - Update error handling to record errors in spans for better traceability Files modified: - pkg/registry/apis/provisioning/controller/repository.go: Add tracing and structured logging to sync job operations - pkg/registry/apis/provisioning/jobs/concurrent_driver.go: Remove tracing span from long-running function - pkg/registry/apis/provisioning/jobs/driver.go: Enhance logging and tracing in job processing - pkg/registry/apis/provisioning/webhooks/webhook.go: Implement tracing and structured logging for webhook connections * Update pkg/registry/apis/provisioning/jobs/driver.go Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * Improve error handling in ConcurrentJobDriver to differentiate between graceful shutdown and unexpected stops * Remove unused import in driver.go to clean up code --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
100 lines
2.8 KiB
Go
100 lines
2.8 KiB
Go
package controller
|
|
|
|
import (
|
|
"context"
|
|
"time"
|
|
|
|
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
|
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
|
"k8s.io/apiserver/pkg/endpoints/request"
|
|
"k8s.io/client-go/tools/cache"
|
|
|
|
"github.com/grafana/grafana-app-sdk/logging"
|
|
provisioning "github.com/grafana/grafana/apps/provisioning/pkg/apis/provisioning/v0alpha1"
|
|
client "github.com/grafana/grafana/apps/provisioning/pkg/generated/clientset/versioned/typed/provisioning/v0alpha1"
|
|
informer "github.com/grafana/grafana/apps/provisioning/pkg/generated/informers/externalversions/provisioning/v0alpha1"
|
|
"github.com/grafana/grafana/pkg/apimachinery/identity"
|
|
)
|
|
|
|
const (
|
|
historyJobControllerLoggerName = "provisioning-historyjob-controller"
|
|
)
|
|
|
|
// HistoryJobController manages the cleanup of old HistoryJob entries.
|
|
type HistoryJobController struct {
|
|
client client.ProvisioningV0alpha1Interface
|
|
logger logging.Logger
|
|
expirationTime time.Duration
|
|
}
|
|
|
|
// NewHistoryJobController creates a new HistoryJobController.
|
|
func NewHistoryJobController(
|
|
provisioningClient client.ProvisioningV0alpha1Interface,
|
|
historyJobInformer informer.HistoricJobInformer,
|
|
expirationTime time.Duration,
|
|
) (*HistoryJobController, error) {
|
|
c := &HistoryJobController{
|
|
client: provisioningClient,
|
|
logger: logging.DefaultLogger.With("logger", historyJobControllerLoggerName),
|
|
expirationTime: expirationTime,
|
|
}
|
|
|
|
// Use the resync events from the shared informer to trigger cleanup for each job
|
|
_, err := historyJobInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
|
|
AddFunc: func(obj interface{}) {
|
|
c.cleanupJob(obj)
|
|
},
|
|
UpdateFunc: func(oldObj, newObj interface{}) {
|
|
c.cleanupJob(newObj)
|
|
},
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return c, nil
|
|
}
|
|
|
|
func (c *HistoryJobController) cleanupJob(obj interface{}) {
|
|
job, ok := obj.(*provisioning.HistoricJob)
|
|
if !ok {
|
|
c.logger.Error("unexpected object type - expected HistoricJob", "type", obj)
|
|
return
|
|
}
|
|
|
|
age := time.Since(job.CreationTimestamp.Time)
|
|
|
|
// Only cleanup jobs older than expiration time
|
|
if age <= c.expirationTime {
|
|
return
|
|
}
|
|
|
|
logger := c.logger.With(
|
|
"job", job.Name,
|
|
"namespace", job.Namespace,
|
|
"age", age,
|
|
)
|
|
|
|
logger.Debug("start cleanup expired historic job")
|
|
|
|
namespace := job.Namespace
|
|
ctx, _, err := identity.WithProvisioningIdentity(context.Background(), namespace)
|
|
if err != nil {
|
|
logger.Error("failed to set provisioning identity", "error", err)
|
|
return
|
|
}
|
|
|
|
ctx = request.WithNamespace(ctx, namespace)
|
|
err = c.client.HistoricJobs(job.Namespace).Delete(ctx, job.Name, metav1.DeleteOptions{})
|
|
if err != nil {
|
|
if apierrors.IsNotFound(err) {
|
|
logger.Debug("historic job already deleted")
|
|
return
|
|
}
|
|
logger.Error("failed to delete expired historic job", "error", err)
|
|
return
|
|
}
|
|
|
|
logger.Info("deleted expired historic job")
|
|
}
|