"flag"
"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
"git.curoverse.com/arvados.git/sdk/go/blockdigest"
+ "git.curoverse.com/arvados.git/sdk/go/logger"
"git.curoverse.com/arvados.git/sdk/go/manifest"
- //"git.curoverse.com/arvados.git/sdk/go/util"
"log"
"os"
"runtime"
type GetCollectionsParams struct {
Client arvadosclient.ArvadosClient
+ Logger *logger.Logger
BatchSize int
}
sdkParams["limit"] = params.BatchSize
}
- // MISHA UNDO THIS TEMPORARY HACK TO FIND BUG!
- sdkParams["limit"] = 50
-
initialNumberOfCollectionsAvailable := NumberCollectionsAvailable(params.Client)
// Include a 1% margin for collections added while we're reading so
// that we don't have to grow the map in most cases.
float64(initialNumberOfCollectionsAvailable) * 1.01)
results.UuidToCollection = make(map[string]Collection, maxExpectedCollections)
+ {
+ properties,_ := params.Logger.Edit()
+ properties["num_collections_at_start"] = initialNumberOfCollectionsAvailable
+ }
+ params.Logger.Record()
+
// These values are just for getting the loop to run the first time,
// afterwards they'll be set to real values.
previousTotalCollections := -1
sdkParams["filters"].([][]string)[0][2],
float32(totalManifestSize)/float32(totalCollections),
maxManifestSize, totalManifestSize)
+
+ {
+ properties,_ := params.Logger.Edit()
+ properties["collections_read"] = totalCollections
+ properties["latest_modified_date"] = sdkParams["filters"].([][]string)[0][2]
+ properties["total_manifest_size"] = totalManifestSize
+ properties["max_manifest_size"] = maxManifestSize
+ }
+ params.Logger.Record()
}
// Just in case this lowers the numbers reported in the heap profile.
manifest := manifest.Manifest{sdkCollection.ManifestText}
manifestSize := uint64(len(sdkCollection.ManifestText))
- totalManifestSize += manifestSize
+ if _, alreadySeen := uuidToCollection[collection.Uuid]; !alreadySeen {
+ totalManifestSize += manifestSize
+ }
if manifestSize > maxManifestSize {
maxManifestSize = manifestSize
}
import (
"flag"
"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+ "git.curoverse.com/arvados.git/sdk/go/logger"
"git.curoverse.com/arvados.git/sdk/go/util"
"git.curoverse.com/arvados.git/services/datamanager/collection"
"git.curoverse.com/arvados.git/services/datamanager/keep"
"log"
+ "os"
+ "time"
)
+var (
+ logEventType string
+ logFrequencySeconds int
+)
+
+func init() {
+ flag.StringVar(&logEventType,
+ "log-event-type",
+ "experimental-data-manager-report",
+ "event_type to use in our arvados log entries.")
+ flag.IntVar(&logFrequencySeconds,
+ "log-frequency-seconds",
+ 20,
+ "How frequently we'll write log entries in seconds.")
+}
+
func main() {
flag.Parse()
log.Fatalf("Current user is not an admin. Datamanager can only be run by admins.")
}
+ arvLogger := logger.NewLogger(logger.LoggerParams{Client: arv,
+ EventType: logEventType,
+ MinimumWriteInterval: time.Second * time.Duration(logFrequencySeconds)})
+
+ {
+ properties, _ := arvLogger.Edit()
+ properties["start_time"] = time.Now()
+ properties["args"] = os.Args
+ hostname, err := os.Hostname()
+ if err != nil {
+ properties["hostname_error"] = err.Error()
+ } else {
+ properties["hostname"] = hostname
+ }
+ }
+ arvLogger.Record()
+
// TODO(misha): Read Collections and Keep Contents concurrently as goroutines.
+ // This requires waiting on them to finish before you let main() exit.
+
+ RunCollections(collection.GetCollectionsParams{
+ Client: arv, Logger: arvLogger, BatchSize: 500})
- // readCollections := collection.GetCollections(
- // collection.GetCollectionsParams{
- // Client: arv, BatchSize: 500})
+ RunKeep(keep.GetKeepServersParams{Client: arv, Limit: 1000})
+}
- // UserUsage := ComputeSizeOfOwnedCollections(readCollections)
- // log.Printf("Uuid to Size used: %v", UserUsage)
+func RunCollections(params collection.GetCollectionsParams) {
+ readCollections := collection.GetCollections(params)
- // // TODO(misha): Add a "readonly" flag. If we're in readonly mode,
- // // lots of behaviors can become warnings (and obviously we can't
- // // write anything).
- // // if !readCollections.ReadAllCollections {
- // // log.Fatalf("Did not read all collections")
- // // }
+ UserUsage := ComputeSizeOfOwnedCollections(readCollections)
+ log.Printf("Uuid to Size used: %v", UserUsage)
- // log.Printf("Read and processed %d collections",
- // len(readCollections.UuidToCollection))
+ // TODO(misha): Add a "readonly" flag. If we're in readonly mode,
+ // lots of behaviors can become warnings (and obviously we can't
+ // write anything).
+ // if !readCollections.ReadAllCollections {
+ // log.Fatalf("Did not read all collections")
+ // }
+
+ log.Printf("Read and processed %d collections",
+ len(readCollections.UuidToCollection))
+}
- readServers := keep.GetKeepServers(
- keep.GetKeepServersParams{Client: arv, Limit: 1000})
+func RunKeep(params keep.GetKeepServersParams) {
+ readServers := keep.GetKeepServers(params)
log.Printf("Returned %d keep disks", len(readServers.ServerToContents))