X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/0d63ac0c2486a43198eb1015ba8d1028239139ee..7d887106d3eabb9844c4a687403a18581167a823:/services/datamanager/datamanager.go diff --git a/services/datamanager/datamanager.go b/services/datamanager/datamanager.go index a8e506eacb..a9306ce83a 100644 --- a/services/datamanager/datamanager.go +++ b/services/datamanager/datamanager.go @@ -3,13 +3,17 @@ package main import ( + "errors" "flag" + "fmt" "git.curoverse.com/arvados.git/sdk/go/arvadosclient" + "git.curoverse.com/arvados.git/sdk/go/keepclient" "git.curoverse.com/arvados.git/sdk/go/logger" "git.curoverse.com/arvados.git/sdk/go/util" "git.curoverse.com/arvados.git/services/datamanager/collection" "git.curoverse.com/arvados.git/services/datamanager/keep" "git.curoverse.com/arvados.git/services/datamanager/loggerutil" + "git.curoverse.com/arvados.git/services/datamanager/summary" "log" "time" ) @@ -38,33 +42,44 @@ func init() { func main() { flag.Parse() if minutesBetweenRuns == 0 { - singlerun() + err := singlerun(makeArvadosClient()) + if err != nil { + log.Fatalf("singlerun: %v", err) + } } else { waitTime := time.Minute * time.Duration(minutesBetweenRuns) for { log.Println("Beginning Run") - singlerun() + err := singlerun(makeArvadosClient()) + if err != nil { + log.Printf("singlerun: %v", err) + } log.Printf("Sleeping for %d minutes", minutesBetweenRuns) time.Sleep(waitTime) } } } -func singlerun() { +func makeArvadosClient() arvadosclient.ArvadosClient { arv, err := arvadosclient.MakeArvadosClient() if err != nil { - log.Fatalf("Error setting up arvados client %s", err.Error()) + log.Fatalf("Error setting up arvados client: %s", err) } + return arv +} - if is_admin, err := util.UserIsAdmin(arv); err != nil { - log.Fatalf("Error querying current arvados user %s", err.Error()) - } else if !is_admin { - log.Fatalf("Current user is not an admin. Datamanager can only be run by admins.") +func singlerun(arv arvadosclient.ArvadosClient) error { + var err error + if isAdmin, err := util.UserIsAdmin(arv); err != nil { + return errors.New("Error verifying admin token: " + err.Error()) + } else if !isAdmin { + return errors.New("Current user is not an admin. Datamanager requires a privileged token.") } var arvLogger *logger.Logger if logEventTypePrefix != "" { - arvLogger = logger.NewLogger(logger.LoggerParams{Client: arv, + arvLogger = logger.NewLogger(logger.LoggerParams{ + Client: arv, EventTypePrefix: logEventTypePrefix, WriteInterval: time.Second * time.Duration(logFrequencySeconds)}) } @@ -74,28 +89,102 @@ func singlerun() { arvLogger.AddWriteHook(loggerutil.LogMemoryAlloc) } - collectionChannel := make(chan collection.ReadCollections) + var ( + dataFetcher summary.DataFetcher + readCollections collection.ReadCollections + keepServerInfo keep.ReadServers + ) - go func() { - collectionChannel <- collection.GetCollectionsAndSummarize( - collection.GetCollectionsParams{ - Client: arv, Logger: arvLogger, BatchSize: 50}) - }() + if summary.ShouldReadData() { + dataFetcher = summary.ReadData + } else { + dataFetcher = BuildDataFetcher(arv) + } - keepServerInfo := keep.GetKeepServersAndSummarize( - keep.GetKeepServersParams{Client: arv, Logger: arvLogger, Limit: 1000}) + dataFetcher(arvLogger, &readCollections, &keepServerInfo) - readCollections := <-collectionChannel + summary.MaybeWriteData(arvLogger, readCollections, keepServerInfo) - // TODO(misha): Use these together to verify replication. - _ = readCollections - _ = keepServerInfo + buckets := summary.BucketReplication(readCollections, keepServerInfo) + bucketCounts := buckets.Counts() + + replicationSummary := buckets.SummarizeBuckets(readCollections) + replicationCounts := replicationSummary.ComputeCounts() + + log.Printf("Blocks In Collections: %d, "+ + "\nBlocks In Keep: %d.", + len(readCollections.BlockToDesiredReplication), + len(keepServerInfo.BlockToServers)) + log.Println(replicationCounts.PrettyPrint()) + + log.Printf("Blocks Histogram:") + for _, rlbss := range bucketCounts { + log.Printf("%+v: %10d", + rlbss.Levels, + rlbss.Count) + } + + kc, err := keepclient.MakeKeepClient(&arv) + if err != nil { + loggerutil.FatalWithMessage(arvLogger, + fmt.Sprintf("Error setting up keep client %s", err.Error())) + } // Log that we're finished. We force the recording, since go will - // not wait for the timer before exiting. + // not wait for the write timer before exiting. if arvLogger != nil { - arvLogger.FinalUpdate(func(p map[string]interface{}, e map[string]interface{}) { + defer arvLogger.FinalUpdate(func(p map[string]interface{}, e map[string]interface{}) { + summaryInfo := logger.GetOrCreateMap(p, "summary_info") + summaryInfo["block_replication_counts"] = bucketCounts + summaryInfo["replication_summary"] = replicationCounts + p["summary_info"] = summaryInfo + p["run_info"].(map[string]interface{})["finished_at"] = time.Now() }) } + + pullServers := summary.ComputePullServers(kc, + &keepServerInfo, + readCollections.BlockToDesiredReplication, + replicationSummary.UnderReplicatedBlocks) + + pullLists := summary.BuildPullLists(pullServers) + + trashLists, trashErr := summary.BuildTrashLists(kc, + &keepServerInfo, + replicationSummary.KeepBlocksNotInCollections) + + summary.WritePullLists(arvLogger, pullLists) + + if trashErr != nil { + return err + } + keep.SendTrashLists(kc, trashLists) + + return nil +} + +// BuildDataFetcher returns a data fetcher that fetches data from remote servers. +func BuildDataFetcher(arv arvadosclient.ArvadosClient) summary.DataFetcher { + return func(arvLogger *logger.Logger, + readCollections *collection.ReadCollections, + keepServerInfo *keep.ReadServers) { + collectionChannel := make(chan collection.ReadCollections) + + go func() { + collectionChannel <- collection.GetCollectionsAndSummarize( + collection.GetCollectionsParams{ + Client: arv, + Logger: arvLogger, + BatchSize: 50}) + }() + + *keepServerInfo = keep.GetKeepServersAndSummarize( + keep.GetKeepServersParams{ + Client: arv, + Logger: arvLogger, + Limit: 1000}) + + *readCollections = <-collectionChannel + } }