logEventTypePrefix string
logFrequencySeconds int
minutesBetweenRuns int
+ collectionBatchSize int
+ dryRun bool
)
func init() {
flag.IntVar(&minutesBetweenRuns,
"minutes-between-runs",
0,
- "How many minutes we wait betwen data manager runs. 0 means run once and exit.")
+ "How many minutes we wait between data manager runs. 0 means run once and exit.")
+ flag.IntVar(&collectionBatchSize,
+ "collection-batch-size",
+ 1000,
+ "How many collections to request in each batch.")
+ flag.BoolVar(&dryRun,
+ "dry-run",
+ false,
+ "Perform a dry run. Log how many blocks would be deleted/moved, but do not issue any changes to keepstore.")
}
func main() {
flag.Parse()
+
if minutesBetweenRuns == 0 {
arv, err := arvadosclient.MakeArvadosClient()
if err != nil {
dataFetcher = BuildDataFetcher(arv)
}
- dataFetcher(arvLogger, &readCollections, &keepServerInfo)
-
- if readCollections.Err != nil {
- return readCollections.Err
+ err = dataFetcher(arvLogger, &readCollections, &keepServerInfo)
+ if err != nil {
+ return err
}
err = summary.MaybeWriteData(arvLogger, readCollections, keepServerInfo)
&keepServerInfo,
replicationSummary.KeepBlocksNotInCollections)
- err = summary.WritePullLists(arvLogger, pullLists)
+ err = summary.WritePullLists(arvLogger, pullLists, dryRun)
if err != nil {
return err
}
if trashErr != nil {
return err
}
- keep.SendTrashLists(kc, trashLists)
+ keep.SendTrashLists(arvLogger, kc, trashLists, dryRun)
return nil
}
// BuildDataFetcher returns a data fetcher that fetches data from remote servers.
func BuildDataFetcher(arv arvadosclient.ArvadosClient) summary.DataFetcher {
- return func(arvLogger *logger.Logger,
+ return func(
+ arvLogger *logger.Logger,
readCollections *collection.ReadCollections,
- keepServerInfo *keep.ReadServers) {
- collectionChannel := make(chan collection.ReadCollections)
-
+ keepServerInfo *keep.ReadServers,
+ ) error {
+ collDone := make(chan struct{})
+ var collErr error
go func() {
- collectionChannel <- collection.GetCollectionsAndSummarize(
+ *readCollections, collErr = collection.GetCollectionsAndSummarize(
collection.GetCollectionsParams{
Client: arv,
Logger: arvLogger,
- BatchSize: 50})
+ BatchSize: collectionBatchSize})
+ collDone <- struct{}{}
}()
- var err error
- *keepServerInfo, err = keep.GetKeepServersAndSummarize(
+ var keepErr error
+ *keepServerInfo, keepErr = keep.GetKeepServersAndSummarize(
keep.GetKeepServersParams{
Client: arv,
Logger: arvLogger,
Limit: 1000})
- if err != nil {
- return
- }
+ <-collDone
- *readCollections = <-collectionChannel
+ // Return a nil error only if both parts succeeded.
+ if collErr != nil {
+ return collErr
+ }
+ return keepErr
}
}