"git.curoverse.com/arvados.git/services/datamanager/loggerutil"
"git.curoverse.com/arvados.git/services/datamanager/summary"
"log"
- "os"
"time"
)
logEventTypePrefix string
logFrequencySeconds int
minutesBetweenRuns int
+ collectionBatchSize int
dryRun bool
)
"minutes-between-runs",
0,
"How many minutes we wait between data manager runs. 0 means run once and exit.")
+ flag.IntVar(&collectionBatchSize,
+ "collection-batch-size",
+ 1000,
+ "How many collections to request in each batch.")
flag.BoolVar(&dryRun,
"dry-run",
false,
}
if logEventTypePrefix != "" {
- arvLogger = logger.NewLogger(logger.LoggerParams{
+ arvLogger, err = logger.NewLogger(logger.LoggerParams{
Client: arv,
EventTypePrefix: logEventTypePrefix,
WriteInterval: time.Second * time.Duration(logFrequencySeconds)})
dataFetcher = BuildDataFetcher(arv)
}
- dataFetcher(arvLogger, &readCollections, &keepServerInfo)
-
- if readCollections.Err != nil {
- return readCollections.Err
+ err = dataFetcher(arvLogger, &readCollections, &keepServerInfo)
+ if err != nil {
+ return err
}
err = summary.MaybeWriteData(arvLogger, readCollections, keepServerInfo)
rlbss.Count)
}
+ kc, err := keepclient.MakeKeepClient(&arv)
+ if err != nil {
+ return fmt.Errorf("Error setting up keep client %v", err.Error())
+ }
+
// Log that we're finished. We force the recording, since go will
// not wait for the write timer before exiting.
if arvLogger != nil {
p["summary_info"] = summaryInfo
p["run_info"].(map[string]interface{})["finished_at"] = time.Now()
- p["run_info"].(map[string]interface{})["args"] = os.Args
})
}
- // If dry-run, do not issue any changes to keepstore
- if dryRun {
- log.Printf("Datamanager dry-run. Returning without issuing any keepstore updates.")
- return nil
- }
-
- // Not dry-run; issue changes to keepstore
- kc, err := keepclient.MakeKeepClient(&arv)
- if err != nil {
- return fmt.Errorf("Error setting up keep client %v", err.Error())
- }
-
pullServers := summary.ComputePullServers(kc,
&keepServerInfo,
readCollections.BlockToDesiredReplication,
&keepServerInfo,
replicationSummary.KeepBlocksNotInCollections)
- err = summary.WritePullLists(arvLogger, pullLists)
+ err = summary.WritePullLists(arvLogger, pullLists, dryRun)
if err != nil {
return err
}
if trashErr != nil {
return err
}
- keep.SendTrashLists(kc, trashLists)
+ keep.SendTrashLists(arvLogger, kc, trashLists, dryRun)
return nil
}
// BuildDataFetcher returns a data fetcher that fetches data from remote servers.
func BuildDataFetcher(arv arvadosclient.ArvadosClient) summary.DataFetcher {
- return func(arvLogger *logger.Logger,
+ return func(
+ arvLogger *logger.Logger,
readCollections *collection.ReadCollections,
- keepServerInfo *keep.ReadServers) {
- collectionChannel := make(chan collection.ReadCollections)
-
+ keepServerInfo *keep.ReadServers,
+ ) error {
+ collDone := make(chan struct{})
+ var collErr error
go func() {
- collectionChannel <- collection.GetCollectionsAndSummarize(
+ *readCollections, collErr = collection.GetCollectionsAndSummarize(
collection.GetCollectionsParams{
Client: arv,
Logger: arvLogger,
- BatchSize: 50})
+ BatchSize: collectionBatchSize})
+ collDone <- struct{}{}
}()
- var err error
- *keepServerInfo, err = keep.GetKeepServersAndSummarize(
+ var keepErr error
+ *keepServerInfo, keepErr = keep.GetKeepServersAndSummarize(
keep.GetKeepServersParams{
Client: arv,
Logger: arvLogger,
Limit: 1000})
- if err != nil {
- return
- }
+ <-collDone
- *readCollections = <-collectionChannel
+ // Return a nil error only if both parts succeeded.
+ if collErr != nil {
+ return collErr
+ }
+ return keepErr
}
}