Modifies the loop termination condition in GetCollection so that
it continues until there are no more items available according to
the API server (returned in collections.ItemsAvailable).
Modifies the query code so that it uses an offset to page through
results in case an entire batch has equal modified_at timestamps.
sdkParams := arvadosclient.Dict{
"select": fieldsWanted,
"order": []string{"modified_at ASC"},
sdkParams := arvadosclient.Dict{
"select": fieldsWanted,
"order": []string{"modified_at ASC"},
- "filters": [][]string{[]string{"modified_at", ">=", "1900-01-01T00:00:00Z"}}}
+ "filters": [][]string{[]string{"modified_at", ">=", "1900-01-01T00:00:00Z"}},
+ "offset": 0}
if params.BatchSize > 0 {
sdkParams["limit"] = params.BatchSize
if params.BatchSize > 0 {
sdkParams["limit"] = params.BatchSize
// These values are just for getting the loop to run the first time,
// afterwards they'll be set to real values.
// These values are just for getting the loop to run the first time,
// afterwards they'll be set to real values.
- previousTotalCollections := -1
- totalCollections := 0
- for totalCollections > previousTotalCollections {
+ remainingCollections := 1
+ var totalCollections int
+ var previousTotalCollections int
+ for remainingCollections > 0 {
// We're still finding new collections
// Write the heap profile for examining memory usage
// We're still finding new collections
// Write the heap profile for examining memory usage
+ // Update count of remaining collections
+ remainingCollections = collections.ItemsAvailable - params.BatchSize - sdkParams["offset"].(int)
+ if remainingCollections < 0 {
+ remainingCollections = 0
+ }
+
// Process collection and update our date filter.
latestModificationDate, maxManifestSize, totalManifestSize, err := ProcessCollections(params.Logger,
collections.Items,
// Process collection and update our date filter.
latestModificationDate, maxManifestSize, totalManifestSize, err := ProcessCollections(params.Logger,
collections.Items,
if err != nil {
return results, err
}
if err != nil {
return results, err
}
- sdkParams["filters"].([][]string)[0][2] = latestModificationDate.Format(time.RFC3339)
+ if sdkParams["filters"].([][]string)[0][2] != latestModificationDate.Format(time.RFC3339) {
+ sdkParams["filters"].([][]string)[0][2] = latestModificationDate.Format(time.RFC3339)
+ sdkParams["offset"] = 0
+ } else {
+ sdkParams["offset"] = sdkParams["offset"].(int) + params.BatchSize
+ }
// update counts
previousTotalCollections = totalCollections
totalCollections = len(results.UUIDToCollection)
log.Printf("%d collections read, %d new in last batch, "+
// update counts
previousTotalCollections = totalCollections
totalCollections = len(results.UUIDToCollection)
log.Printf("%d collections read, %d new in last batch, "+
"%s latest modified date, %.0f %d %d avg,max,total manifest size",
totalCollections,
totalCollections-previousTotalCollections,
"%s latest modified date, %.0f %d %d avg,max,total manifest size",
totalCollections,
totalCollections-previousTotalCollections,
sdkParams["filters"].([][]string)[0][2],
float32(totalManifestSize)/float32(totalCollections),
maxManifestSize, totalManifestSize)
sdkParams["filters"].([][]string)[0][2],
float32(totalManifestSize)/float32(totalCollections),
maxManifestSize, totalManifestSize)