7167: Rename conf flags to -src and -dst to match arv-copy. Always add .conf for...
[arvados.git] / tools / keep-rsync / keep-rsync.go
index 9f14a9e5c08cd7e5670bc21e70017ede04fcb92e..c39f0675c6cc52e9935700bbfa157ecba6311f44 100644 (file)
@@ -24,19 +24,15 @@ func main() {
 
        flag.StringVar(
                &srcConfigFile,
-               "src-config-file",
+               "src",
                "",
-               "Source configuration filename with full path that contains "+
-                       "an ARVADOS_API_TOKEN which is a valid datamanager token recognized by the source keep servers, "+
-                       "ARVADOS_API_HOST, ARVADOS_API_HOST_INSECURE, ARVADOS_EXTERNAL_CLIENT and ARVADOS_BLOB_SIGNING_KEY.")
+               "Source configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf")
 
        flag.StringVar(
                &dstConfigFile,
-               "dst-config-file",
+               "dst",
                "",
-               "Destination configuration filename with full path that contains "+
-                       "an ARVADOS_API_TOKEN which is a valid datamanager token recognized by the destination keep servers, "+
-                       "ARVADOS_API_HOST, ARVADOS_API_HOST_INSECURE, ARVADOS_EXTERNAL_CLIENT and ARVADOS_BLOB_SIGNING_KEY.")
+               "Destination configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf")
 
        flag.StringVar(
                &srcKeepServicesJSON,
@@ -56,7 +52,8 @@ func main() {
                &replications,
                "replications",
                0,
-               "Number of replications to write to the destination.")
+               "Number of replications to write to the destination. If replications not specified, "+
+                       "default replication level configured on destination server will be used.")
 
        flag.StringVar(
                &prefix,
@@ -66,15 +63,25 @@ func main() {
 
        flag.Parse()
 
-       srcConfig, dstConfig, srcBlobSigningKey, _, err := loadConfig(srcConfigFile, dstConfigFile)
+       srcConfig, srcBlobSigningKey, err := loadConfig(srcConfigFile)
        if err != nil {
-               log.Fatalf("Error loading configuration from files: %s", err.Error())
+               log.Fatalf("Error loading src configuration from file: %s", err.Error())
+       }
+
+       dstConfig, _, err := loadConfig(dstConfigFile)
+       if err != nil {
+               log.Fatalf("Error loading dst configuration from file: %s", err.Error())
        }
 
        // setup src and dst keepclients
-       kcSrc, kcDst, err := setupKeepClients(srcConfig, dstConfig, srcKeepServicesJSON, dstKeepServicesJSON, replications)
+       kcSrc, err := setupKeepClient(srcConfig, srcKeepServicesJSON, false, 0)
+       if err != nil {
+               log.Fatalf("Error configuring src keepclient: %s", err.Error())
+       }
+
+       kcDst, err := setupKeepClient(dstConfig, dstKeepServicesJSON, true, replications)
        if err != nil {
-               log.Fatalf("Error configuring keep-rsync: %s", err.Error())
+               log.Fatalf("Error configuring dst keepclient: %s", err.Error())
        }
 
        // Copy blocks not found in dst from src
@@ -84,23 +91,22 @@ func main() {
        }
 }
 
-// Load src and dst config from given files
-func loadConfig(srcConfigFile, dstConfigFile string) (srcConfig, dstConfig arvadosclient.APIConfig, srcBlobSigningKey, dstBlobSigningKey string, err error) {
-       if srcConfigFile == "" {
-               return srcConfig, dstConfig, srcBlobSigningKey, dstBlobSigningKey, errors.New("-src-config-file must be specified")
-       }
+type apiConfig struct {
+       APIToken        string
+       APIHost         string
+       APIHostInsecure bool
+       ExternalClient  bool
+}
 
-       srcConfig, srcBlobSigningKey, err = readConfigFromFile(srcConfigFile)
-       if err != nil {
-               return srcConfig, dstConfig, srcBlobSigningKey, dstBlobSigningKey, fmt.Errorf("Error reading source configuration: %v", err)
+// Load src and dst config from given files
+func loadConfig(configFile string) (config apiConfig, blobSigningKey string, err error) {
+       if configFile == "" {
+               return config, blobSigningKey, errors.New("config file not specified")
        }
 
-       if dstConfigFile == "" {
-               return srcConfig, dstConfig, srcBlobSigningKey, dstBlobSigningKey, errors.New("-dst-config-file must be specified")
-       }
-       dstConfig, dstBlobSigningKey, err = readConfigFromFile(dstConfigFile)
+       config, blobSigningKey, err = readConfigFromFile(configFile)
        if err != nil {
-               return srcConfig, dstConfig, srcBlobSigningKey, dstBlobSigningKey, fmt.Errorf("Error reading destination configuration: %v", err)
+               return config, blobSigningKey, fmt.Errorf("Error reading config file: %v", err)
        }
 
        return
@@ -109,9 +115,9 @@ func loadConfig(srcConfigFile, dstConfigFile string) (srcConfig, dstConfig arvad
 var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
 
 // Read config from file
-func readConfigFromFile(filename string) (config arvadosclient.APIConfig, blobSigningKey string, err error) {
+func readConfigFromFile(filename string) (config apiConfig, blobSigningKey string, err error) {
        if !strings.Contains(filename, "/") {
-               filename = os.Getenv("HOME") + "/.config/arvados/" + filename
+               filename = os.Getenv("HOME") + "/.config/arvados/" + filename + ".conf"
        }
 
        content, err := ioutil.ReadFile(filename)
@@ -146,66 +152,46 @@ func readConfigFromFile(filename string) (config arvadosclient.APIConfig, blobSi
        return
 }
 
-// Initializes keep-rsync using the config provided
-func setupKeepClients(srcConfig, dstConfig arvadosclient.APIConfig, srcKeepServicesJSON, dstKeepServicesJSON string, replications int) (kcSrc, kcDst *keepclient.KeepClient, err error) {
-       // arvSrc from srcConfig
-       arvSrc := arvadosclient.ArvadosClient{
-               ApiToken:    srcConfig.APIToken,
-               ApiServer:   srcConfig.APIHost,
-               ApiInsecure: srcConfig.APIHostInsecure,
-               Client: &http.Client{Transport: &http.Transport{
-                       TLSClientConfig: &tls.Config{InsecureSkipVerify: srcConfig.APIHostInsecure}}},
-               External: srcConfig.ExternalClient,
-       }
-
-       // arvDst from dstConfig
-       arvDst := arvadosclient.ArvadosClient{
-               ApiToken:    dstConfig.APIToken,
-               ApiServer:   dstConfig.APIHost,
-               ApiInsecure: dstConfig.APIHostInsecure,
+// setup keepclient using the config provided
+func setupKeepClient(config apiConfig, keepServicesJSON string, isDst bool, replications int) (kc *keepclient.KeepClient, err error) {
+       arv := arvadosclient.ArvadosClient{
+               ApiToken:    config.APIToken,
+               ApiServer:   config.APIHost,
+               ApiInsecure: config.APIHostInsecure,
                Client: &http.Client{Transport: &http.Transport{
-                       TLSClientConfig: &tls.Config{InsecureSkipVerify: dstConfig.APIHostInsecure}}},
-               External: dstConfig.ExternalClient,
+                       TLSClientConfig: &tls.Config{InsecureSkipVerify: config.APIHostInsecure}}},
+               External: config.ExternalClient,
        }
 
-       // Get default replications value from destination, if it is not already provided
-       if replications == 0 {
-               value, err := arvDst.Discovery("defaultCollectionReplication")
-               if err == nil {
-                       replications = int(value.(float64))
-               } else {
-                       replications = 2
-               }
-       }
-
-       // if srcKeepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers
-       if srcKeepServicesJSON == "" {
-               kcSrc, err = keepclient.MakeKeepClient(&arvSrc)
+       // if keepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers
+       if keepServicesJSON == "" {
+               kc, err = keepclient.MakeKeepClient(&arv)
                if err != nil {
-                       return nil, nil, err
+                       return nil, err
                }
        } else {
-               kcSrc, err = keepclient.MakeKeepClientFromJSON(&arvSrc, srcKeepServicesJSON)
+               kc = keepclient.New(&arv)
+               err = kc.LoadKeepServicesFromJSON(keepServicesJSON)
                if err != nil {
-                       return kcSrc, kcDst, err
+                       return kc, err
                }
        }
 
-       // if dstKeepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers
-       if dstKeepServicesJSON == "" {
-               kcDst, err = keepclient.MakeKeepClient(&arvDst)
-               if err != nil {
-                       return kcSrc, kcDst, err
-               }
-       } else {
-               kcDst, err = keepclient.MakeKeepClientFromJSON(&arvDst, dstKeepServicesJSON)
-               if err != nil {
-                       return kcSrc, kcDst, err
+       if isDst {
+               // Get default replications value from destination, if it is not already provided
+               if replications == 0 {
+                       value, err := arv.Discovery("defaultCollectionReplication")
+                       if err == nil {
+                               replications = int(value.(float64))
+                       } else {
+                               return nil, err
+                       }
                }
+
+               kc.Want_replicas = replications
        }
-       kcDst.Want_replicas = replications
 
-       return kcSrc, kcDst, nil
+       return kc, nil
 }
 
 // Get unique block locators from src and dst
@@ -227,6 +213,9 @@ func performKeepRsync(kcSrc, kcDst *keepclient.KeepClient, blobSigningKey, prefi
        toBeCopied := getMissingLocators(srcIndex, dstIndex)
 
        // Copy each missing block to dst
+       log.Printf("Before keep-rsync, there are %d blocks in src and %d blocks in dst. Start copying %d blocks from src not found in dst.",
+               len(srcIndex), len(dstIndex), len(toBeCopied))
+
        err = copyBlocksToDst(toBeCopied, kcSrc, kcDst, blobSigningKey)
 
        return err
@@ -264,11 +253,18 @@ func getMissingLocators(srcLocators, dstLocators map[string]bool) []string {
 
 // Copy blocks from src to dst; only those that are missing in dst are copied
 func copyBlocksToDst(toBeCopied []string, kcSrc, kcDst *keepclient.KeepClient, blobSigningKey string) error {
-       done := 0
        total := len(toBeCopied)
 
-       for _, locator := range toBeCopied {
-               log.Printf("Getting block %d of %d: %v", done+1, total, locator)
+       startedAt := time.Now()
+       for done, locator := range toBeCopied {
+               if done == 0 {
+                       log.Printf("Copying data block %d of %d (%.2f%% done): %v", done+1, total,
+                               float64(done)/float64(total)*100, locator)
+               } else {
+                       timePerBlock := time.Since(startedAt) / time.Duration(done)
+                       log.Printf("Copying data block %d of %d (%.2f%% done, ETA %v): %v", done+1, total,
+                               float64(done)/float64(total)*100, timePerBlock*time.Duration(total-done), locator)
+               }
 
                getLocator := locator
                expiresAt := time.Now().AddDate(0, 0, 1)
@@ -276,23 +272,15 @@ func copyBlocksToDst(toBeCopied []string, kcSrc, kcDst *keepclient.KeepClient, b
                        getLocator = keepclient.SignLocator(getLocator, kcSrc.Arvados.ApiToken, expiresAt, []byte(blobSigningKey))
                }
 
-               reader, _, _, err := kcSrc.Get(getLocator)
+               reader, len, _, err := kcSrc.Get(getLocator)
                if err != nil {
                        return fmt.Errorf("Error getting block: %v %v", locator, err)
                }
-               data, err := ioutil.ReadAll(reader)
-               if err != nil {
-                       return fmt.Errorf("Error reading block data: %v %v", locator, err)
-               }
 
-               log.Printf("Writing block%d of %d: %v", locator)
-               _, _, err = kcDst.PutB(data)
+               _, _, err = kcDst.PutHR(getLocator[:32], reader, len)
                if err != nil {
-                       return fmt.Errorf("Error putting block data: %v %v", locator, err)
+                       return fmt.Errorf("Error copying data block: %v %v", locator, err)
                }
-
-               done++
-               log.Printf("%.2f%% done", float64(done)/float64(total)*100)
        }
 
        log.Printf("Successfully copied to destination %d blocks.", total)