CloudMigrations: Send local events to gms during the migration process (#90637)
* add gms client function * add timeout config for endpoint * report events to gms * fix lint error * clean up report calls and make sure reports all have local ids * extra validation * improve error logging and fix url
This commit is contained in:
@@ -11,9 +11,11 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/grafana/grafana/pkg/api/response"
|
||||
"github.com/grafana/grafana/pkg/api/routing"
|
||||
"github.com/grafana/grafana/pkg/infra/db"
|
||||
"github.com/grafana/grafana/pkg/infra/kvstore"
|
||||
"github.com/grafana/grafana/pkg/infra/log"
|
||||
"github.com/grafana/grafana/pkg/infra/tracing"
|
||||
"github.com/grafana/grafana/pkg/services/cloudmigration"
|
||||
@@ -55,6 +57,7 @@ type Service struct {
|
||||
dashboardService dashboards.DashboardService
|
||||
folderService folder.Service
|
||||
secretsService secrets.Service
|
||||
kvStore *kvstore.NamespacedKVStore
|
||||
|
||||
api *api.CloudMigrationAPI
|
||||
tracer tracing.Tracer
|
||||
@@ -85,6 +88,7 @@ func ProvideService(
|
||||
tracer tracing.Tracer,
|
||||
dashboardService dashboards.DashboardService,
|
||||
folderService folder.Service,
|
||||
kvStore kvstore.KVStore,
|
||||
) (cloudmigration.Service, error) {
|
||||
if !features.IsEnabledGlobally(featuremgmt.FlagOnPremToCloudMigrations) {
|
||||
return &NoopServiceImpl{}, nil
|
||||
@@ -101,6 +105,7 @@ func ProvideService(
|
||||
secretsService: secretsService,
|
||||
dashboardService: dashboardService,
|
||||
folderService: folderService,
|
||||
kvStore: kvstore.WithNamespace(kvStore, 0, "cloudmigration"),
|
||||
}
|
||||
s.api = api.RegisterApi(routeRegister, s, tracer)
|
||||
|
||||
@@ -379,6 +384,8 @@ func (s *Service) CreateSession(ctx context.Context, cmd cloudmigration.CloudMig
|
||||
return nil, fmt.Errorf("error creating migration: %w", err)
|
||||
}
|
||||
|
||||
s.report(ctx, cm, gmsclient.EventConnect, 0, nil)
|
||||
|
||||
return &cloudmigration.CloudMigrationSessionResponse{
|
||||
UID: cm.UID,
|
||||
Slug: token.Instance.Slug,
|
||||
@@ -460,6 +467,9 @@ func (s *Service) DeleteSession(ctx context.Context, uid string) (*cloudmigratio
|
||||
if err != nil {
|
||||
return c, fmt.Errorf("deleting migration from db: %w", err)
|
||||
}
|
||||
|
||||
s.report(ctx, c, gmsclient.EventDisconnect, 0, nil)
|
||||
|
||||
return c, nil
|
||||
}
|
||||
|
||||
@@ -511,7 +521,11 @@ func (s *Service) CreateSnapshot(ctx context.Context, signedInUser *user.SignedI
|
||||
ctx, cancelFunc := context.WithCancel(context.Background())
|
||||
s.cancelFunc = cancelFunc
|
||||
|
||||
if err := s.buildSnapshot(ctx, signedInUser, initResp.MaxItemsPerPartition, snapshot); err != nil {
|
||||
s.report(ctx, session, gmsclient.EventStartBuildingSnapshot, 0, nil)
|
||||
|
||||
start := time.Now()
|
||||
err := s.buildSnapshot(ctx, signedInUser, initResp.MaxItemsPerPartition, snapshot)
|
||||
if err != nil {
|
||||
s.log.Error("building snapshot", "err", err.Error())
|
||||
// Update status to error with retries
|
||||
if err := s.updateSnapshotWithRetries(context.Background(), cloudmigration.UpdateSnapshotCmd{
|
||||
@@ -521,6 +535,8 @@ func (s *Service) CreateSnapshot(ctx context.Context, signedInUser *user.SignedI
|
||||
s.log.Error("critical failure during snapshot creation - please report any error logs")
|
||||
}
|
||||
}
|
||||
|
||||
s.report(ctx, session, gmsclient.EventDoneBuildingSnapshot, time.Since(start), err)
|
||||
}()
|
||||
|
||||
return &snapshot, nil
|
||||
@@ -637,7 +653,11 @@ func (s *Service) UploadSnapshot(ctx context.Context, sessionUid string, snapsho
|
||||
ctx, cancelFunc := context.WithCancel(context.Background())
|
||||
s.cancelFunc = cancelFunc
|
||||
|
||||
if err := s.uploadSnapshot(ctx, session, snapshot, uploadUrl); err != nil {
|
||||
s.report(ctx, session, gmsclient.EventStartUploadingSnapshot, 0, nil)
|
||||
|
||||
start := time.Now()
|
||||
err := s.uploadSnapshot(ctx, session, snapshot, uploadUrl)
|
||||
if err != nil {
|
||||
s.log.Error("uploading snapshot", "err", err.Error())
|
||||
// Update status to error with retries
|
||||
if err := s.updateSnapshotWithRetries(context.Background(), cloudmigration.UpdateSnapshotCmd{
|
||||
@@ -647,6 +667,8 @@ func (s *Service) UploadSnapshot(ctx context.Context, sessionUid string, snapsho
|
||||
s.log.Error("critical failure during snapshot upload - please report any error logs")
|
||||
}
|
||||
}
|
||||
|
||||
s.report(ctx, session, gmsclient.EventDoneUploadingSnapshot, time.Since(start), err)
|
||||
}()
|
||||
|
||||
return nil
|
||||
@@ -678,3 +700,52 @@ func (s *Service) CancelSnapshot(ctx context.Context, sessionUid string, snapsho
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *Service) report(
|
||||
ctx context.Context,
|
||||
sess *cloudmigration.CloudMigrationSession,
|
||||
t gmsclient.LocalEventType,
|
||||
d time.Duration,
|
||||
evtErr error,
|
||||
) {
|
||||
id, err := s.getLocalEventId(ctx)
|
||||
if err != nil {
|
||||
s.log.Error("failed to report event", "type", t, "error", err.Error())
|
||||
return
|
||||
}
|
||||
|
||||
e := gmsclient.EventRequestDTO{
|
||||
Event: t,
|
||||
LocalID: id,
|
||||
}
|
||||
|
||||
if d != 0 {
|
||||
e.DurationIfFinished = d
|
||||
}
|
||||
if evtErr != nil {
|
||||
e.Error = evtErr.Error()
|
||||
}
|
||||
|
||||
s.gmsClient.ReportEvent(ctx, *sess, e)
|
||||
}
|
||||
|
||||
func (s *Service) getLocalEventId(ctx context.Context) (string, error) {
|
||||
anonId, ok, err := s.kvStore.Get(ctx, "anonymous_id")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to get usage stats id: %w", err)
|
||||
}
|
||||
|
||||
if ok {
|
||||
return anonId, nil
|
||||
}
|
||||
|
||||
anonId = uuid.NewString()
|
||||
|
||||
err = s.kvStore.Set(ctx, "anonymous_id", anonId)
|
||||
if err != nil {
|
||||
s.log.Error("Failed to store usage stats id", "error", err)
|
||||
return "", fmt.Errorf("failed to store usage stats id: %w", err)
|
||||
}
|
||||
|
||||
return anonId, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user