CloudMigrations: Send local events to gms during the migration process (#90637)

* add gms client function

* add timeout config for endpoint

* report events to gms

* fix lint error

* clean up report calls and make sure reports all have local ids

* extra validation

* improve error logging and fix url
This commit is contained in:
Michael Mandrus
2024-07-20 00:02:31 -04:00
committed by GitHub
parent 1c5ed0da4d
commit ee90cd3031
9 changed files with 157 additions and 2 deletions
@@ -11,9 +11,11 @@ import (
"sync"
"time"
"github.com/google/uuid"
"github.com/grafana/grafana/pkg/api/response"
"github.com/grafana/grafana/pkg/api/routing"
"github.com/grafana/grafana/pkg/infra/db"
"github.com/grafana/grafana/pkg/infra/kvstore"
"github.com/grafana/grafana/pkg/infra/log"
"github.com/grafana/grafana/pkg/infra/tracing"
"github.com/grafana/grafana/pkg/services/cloudmigration"
@@ -55,6 +57,7 @@ type Service struct {
dashboardService dashboards.DashboardService
folderService folder.Service
secretsService secrets.Service
kvStore *kvstore.NamespacedKVStore
api *api.CloudMigrationAPI
tracer tracing.Tracer
@@ -85,6 +88,7 @@ func ProvideService(
tracer tracing.Tracer,
dashboardService dashboards.DashboardService,
folderService folder.Service,
kvStore kvstore.KVStore,
) (cloudmigration.Service, error) {
if !features.IsEnabledGlobally(featuremgmt.FlagOnPremToCloudMigrations) {
return &NoopServiceImpl{}, nil
@@ -101,6 +105,7 @@ func ProvideService(
secretsService: secretsService,
dashboardService: dashboardService,
folderService: folderService,
kvStore: kvstore.WithNamespace(kvStore, 0, "cloudmigration"),
}
s.api = api.RegisterApi(routeRegister, s, tracer)
@@ -379,6 +384,8 @@ func (s *Service) CreateSession(ctx context.Context, cmd cloudmigration.CloudMig
return nil, fmt.Errorf("error creating migration: %w", err)
}
s.report(ctx, cm, gmsclient.EventConnect, 0, nil)
return &cloudmigration.CloudMigrationSessionResponse{
UID: cm.UID,
Slug: token.Instance.Slug,
@@ -460,6 +467,9 @@ func (s *Service) DeleteSession(ctx context.Context, uid string) (*cloudmigratio
if err != nil {
return c, fmt.Errorf("deleting migration from db: %w", err)
}
s.report(ctx, c, gmsclient.EventDisconnect, 0, nil)
return c, nil
}
@@ -511,7 +521,11 @@ func (s *Service) CreateSnapshot(ctx context.Context, signedInUser *user.SignedI
ctx, cancelFunc := context.WithCancel(context.Background())
s.cancelFunc = cancelFunc
if err := s.buildSnapshot(ctx, signedInUser, initResp.MaxItemsPerPartition, snapshot); err != nil {
s.report(ctx, session, gmsclient.EventStartBuildingSnapshot, 0, nil)
start := time.Now()
err := s.buildSnapshot(ctx, signedInUser, initResp.MaxItemsPerPartition, snapshot)
if err != nil {
s.log.Error("building snapshot", "err", err.Error())
// Update status to error with retries
if err := s.updateSnapshotWithRetries(context.Background(), cloudmigration.UpdateSnapshotCmd{
@@ -521,6 +535,8 @@ func (s *Service) CreateSnapshot(ctx context.Context, signedInUser *user.SignedI
s.log.Error("critical failure during snapshot creation - please report any error logs")
}
}
s.report(ctx, session, gmsclient.EventDoneBuildingSnapshot, time.Since(start), err)
}()
return &snapshot, nil
@@ -637,7 +653,11 @@ func (s *Service) UploadSnapshot(ctx context.Context, sessionUid string, snapsho
ctx, cancelFunc := context.WithCancel(context.Background())
s.cancelFunc = cancelFunc
if err := s.uploadSnapshot(ctx, session, snapshot, uploadUrl); err != nil {
s.report(ctx, session, gmsclient.EventStartUploadingSnapshot, 0, nil)
start := time.Now()
err := s.uploadSnapshot(ctx, session, snapshot, uploadUrl)
if err != nil {
s.log.Error("uploading snapshot", "err", err.Error())
// Update status to error with retries
if err := s.updateSnapshotWithRetries(context.Background(), cloudmigration.UpdateSnapshotCmd{
@@ -647,6 +667,8 @@ func (s *Service) UploadSnapshot(ctx context.Context, sessionUid string, snapsho
s.log.Error("critical failure during snapshot upload - please report any error logs")
}
}
s.report(ctx, session, gmsclient.EventDoneUploadingSnapshot, time.Since(start), err)
}()
return nil
@@ -678,3 +700,52 @@ func (s *Service) CancelSnapshot(ctx context.Context, sessionUid string, snapsho
return nil
}
func (s *Service) report(
ctx context.Context,
sess *cloudmigration.CloudMigrationSession,
t gmsclient.LocalEventType,
d time.Duration,
evtErr error,
) {
id, err := s.getLocalEventId(ctx)
if err != nil {
s.log.Error("failed to report event", "type", t, "error", err.Error())
return
}
e := gmsclient.EventRequestDTO{
Event: t,
LocalID: id,
}
if d != 0 {
e.DurationIfFinished = d
}
if evtErr != nil {
e.Error = evtErr.Error()
}
s.gmsClient.ReportEvent(ctx, *sess, e)
}
func (s *Service) getLocalEventId(ctx context.Context) (string, error) {
anonId, ok, err := s.kvStore.Get(ctx, "anonymous_id")
if err != nil {
return "", fmt.Errorf("failed to get usage stats id: %w", err)
}
if ok {
return anonId, nil
}
anonId = uuid.NewString()
err = s.kvStore.Set(ctx, "anonymous_id", anonId)
if err != nil {
s.log.Error("Failed to store usage stats id", "error", err)
return "", fmt.Errorf("failed to store usage stats id: %w", err)
}
return anonId, nil
}