Added flags to write network data and then read it back. This is useful to speed...
authormishaz <misha@curoverse.com>
Mon, 23 Feb 2015 21:41:19 +0000 (21:41 +0000)
committermishaz <misha@curoverse.com>
Mon, 23 Feb 2015 21:41:19 +0000 (21:41 +0000)
Unfortunately had to switch H,L fields in blockdigest from private to public, otherwise they would not be exported.

sdk/go/blockdigest/blockdigest.go
services/datamanager/datamanager.go
services/datamanager/summary/summary.go [new file with mode: 0644]

index 9b818d365303ac6805c15be534400a0c3c854448..ad2e36504a15e6937f5e4642a980dfc449066304 100644 (file)
@@ -11,12 +11,12 @@ import (
 // Stores a Block Locator Digest compactly, up to 128 bits.
 // Can be used as a map key.
 type BlockDigest struct {
-       h uint64
-       l uint64
+       H uint64
+       L uint64
 }
 
 func (d BlockDigest) String() string {
-       return fmt.Sprintf("%016x%016x", d.h, d.l)
+       return fmt.Sprintf("%016x%016x", d.H, d.L)
 }
 
 // Will create a new BlockDigest unless an error is encountered.
@@ -27,11 +27,11 @@ func FromString(s string) (dig BlockDigest, err error) {
        }
 
        var d BlockDigest
-       d.h, err = strconv.ParseUint(s[:16], 16, 64)
+       d.H, err = strconv.ParseUint(s[:16], 16, 64)
        if err != nil {
                return
        }
-       d.l, err = strconv.ParseUint(s[16:], 16, 64)
+       d.L, err = strconv.ParseUint(s[16:], 16, 64)
        if err != nil {
                return
        }
index 67dbf085b08a3557b78ee3cd033b80d6af5b4507..9035f11a6bece337c285808b7f2949a398c48f4a 100644 (file)
@@ -10,6 +10,7 @@ import (
        "git.curoverse.com/arvados.git/services/datamanager/collection"
        "git.curoverse.com/arvados.git/services/datamanager/keep"
        "git.curoverse.com/arvados.git/services/datamanager/loggerutil"
+       "git.curoverse.com/arvados.git/services/datamanager/summary"
        "log"
        "time"
 )
@@ -75,23 +76,32 @@ func singlerun() {
                arvLogger.AddWriteHook(loggerutil.LogMemoryAlloc)
        }
 
-       collectionChannel := make(chan collection.ReadCollections)
+       var (
+               readCollections collection.ReadCollections
+               keepServerInfo keep.ReadServers
+       )
 
-       go func() {
-               collectionChannel <- collection.GetCollectionsAndSummarize(
-                       collection.GetCollectionsParams{
+       if !summary.MaybeReadData(arvLogger, &readCollections, &keepServerInfo) {
+               collectionChannel := make(chan collection.ReadCollections)
+
+               go func() {
+                       collectionChannel <- collection.GetCollectionsAndSummarize(
+                               collection.GetCollectionsParams{
+                                       Client: arv,
+                                       Logger: arvLogger,
+                                       BatchSize: 50})
+               }()
+
+               keepServerInfo = keep.GetKeepServersAndSummarize(
+                       keep.GetKeepServersParams{
                                Client: arv,
                                Logger: arvLogger,
-                               BatchSize: 50})
-       }()
+                               Limit: 1000})
 
-       keepServerInfo := keep.GetKeepServersAndSummarize(
-               keep.GetKeepServersParams{
-                       Client: arv,
-                       Logger: arvLogger,
-                       Limit: 1000})
+               readCollections = <-collectionChannel
+       }
 
-       readCollections := <-collectionChannel
+       summary.MaybeWriteData(arvLogger, readCollections, keepServerInfo)
 
        // TODO(misha): Use these together to verify replication.
        _ = readCollections
diff --git a/services/datamanager/summary/summary.go b/services/datamanager/summary/summary.go
new file mode 100644 (file)
index 0000000..8a381eb
--- /dev/null
@@ -0,0 +1,101 @@
+/* Computes Summary based on data read from API server. */
+
+package summary
+
+import (
+       "encoding/gob"
+       "flag"
+       "fmt"
+       "git.curoverse.com/arvados.git/sdk/go/logger"
+       "git.curoverse.com/arvados.git/services/datamanager/collection"
+       "git.curoverse.com/arvados.git/services/datamanager/keep"
+       "git.curoverse.com/arvados.git/services/datamanager/loggerutil"
+       "log"
+       "os"
+)
+
+var (
+       // These are just used for development, to save network i/o
+       writeDataTo  string
+       readDataFrom string
+)
+
+type serializedData struct {
+       ReadCollections collection.ReadCollections
+       KeepServerInfo  keep.ReadServers
+}
+
+func init() {
+       flag.StringVar(&writeDataTo,
+               "write-data-to",
+               "",
+               "Write summary of data received to this file. Used for development only.")
+       flag.StringVar(&readDataFrom,
+               "read-data-from",
+               "",
+               "Avoid network i/o and read summary data from this file instead. Used for development only.")
+}
+
+// Writes data we've read to a file.
+//
+// This is useful for development, so that we don't need to read all our data from the network every time we tweak something.
+//
+// This should not be used outside of development, since you'll be
+// working with stale data.
+func MaybeWriteData(arvLogger *logger.Logger,
+       readCollections collection.ReadCollections,
+       keepServerInfo keep.ReadServers) bool {
+       if writeDataTo == "" {
+               return false
+       } else {
+               summaryFile, err := os.Create(writeDataTo)
+               if err != nil {
+                       loggerutil.FatalWithMessage(arvLogger,
+                               fmt.Sprintf("Failed to open %s: %v", writeDataTo, err))
+               }
+               defer summaryFile.Close()
+
+               enc := gob.NewEncoder(summaryFile)
+               data := serializedData{
+                       ReadCollections: readCollections,
+                       KeepServerInfo:  keepServerInfo}
+               err = enc.Encode(data)
+               if err != nil {
+                       loggerutil.FatalWithMessage(arvLogger,
+                               fmt.Sprintf("Failed to write summary data: %v", err))
+               }
+               log.Printf("Wrote summary data to: %s", writeDataTo)
+               return true
+       }
+}
+
+// Reads data that we've read to a file.
+//
+// This is useful for development, so that we don't need to read all our data from the network every time we tweak something.
+//
+// This should not be used outside of development, since you'll be
+// working with stale data.
+func MaybeReadData(arvLogger *logger.Logger,
+       readCollections *collection.ReadCollections,
+       keepServerInfo *keep.ReadServers) bool {
+       if readDataFrom == "" {
+               return false
+       } else {
+               summaryFile, err := os.Open(readDataFrom)
+               if err != nil {
+                       loggerutil.FatalWithMessage(arvLogger,
+                               fmt.Sprintf("Failed to open %s: %v", readDataFrom, err))
+               }
+               defer summaryFile.Close()
+
+               dec := gob.NewDecoder(summaryFile)
+               data := serializedData{}
+               err = dec.Decode(&data)
+               if err != nil {
+                       loggerutil.FatalWithMessage(arvLogger,
+                               fmt.Sprintf("Failed to read summary data: %v", err))
+               }
+               log.Printf("Read summary data from: %s", readDataFrom)
+               return true
+       }
+}