+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
package main
import (
+ "bufio"
+ "crypto/tls"
+ "errors"
"flag"
- "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
- "git.curoverse.com/arvados.git/sdk/go/keepclient"
+ "fmt"
"io/ioutil"
"log"
+ "net/http"
+ "os"
"strings"
-)
+ "time"
-// keep-rsync arguments
-var (
- srcConfig map[string]string
- dstConfig map[string]string
- srcKeepServicesJSON string
- dstKeepServicesJSON string
- replications int
- prefix string
+ "git.arvados.org/arvados.git/sdk/go/arvadosclient"
+ "git.arvados.org/arvados.git/sdk/go/keepclient"
)
+var version = "dev"
+
func main() {
- var srcConfigFile string
- var dstConfigFile string
+ err := doMain()
+ if err != nil {
+ log.Fatalf("%v", err)
+ }
+}
+
+func doMain() error {
+ flags := flag.NewFlagSet("keep-rsync", flag.ExitOnError)
- flag.StringVar(
- &srcConfigFile,
- "src-config-file",
+ srcConfigFile := flags.String(
+ "src",
"",
- "Source configuration filename with full path that contains "+
- "an ARVADOS_API_TOKEN which is a valid datamanager token recognized by the source keep servers, "+
- "ARVADOS_API_HOST, ARVADOS_API_HOST_INSECURE, and ARVADOS_BLOB_SIGNING_KEY.")
+ "Source configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf file. This file is expected to specify the values for ARVADOS_API_TOKEN, ARVADOS_API_HOST, ARVADOS_API_HOST_INSECURE, and ARVADOS_BLOB_SIGNING_KEY for the source.")
- flag.StringVar(
- &dstConfigFile,
- "dst-config-file",
+ dstConfigFile := flags.String(
+ "dst",
"",
- "Destination configuration filename with full path that contains "+
- "an ARVADOS_API_TOKEN which is a valid datamanager token recognized by the destination keep servers, "+
- "ARVADOS_API_HOST, ARVADOS_API_HOST_INSECURE, and ARVADOS_BLOB_SIGNING_KEY.")
+ "Destination configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf file. This file is expected to specify the values for ARVADOS_API_TOKEN, ARVADOS_API_HOST, and ARVADOS_API_HOST_INSECURE for the destination.")
- flag.StringVar(
- &srcKeepServicesJSON,
+ srcKeepServicesJSON := flags.String(
"src-keep-services-json",
"",
"An optional list of available source keepservices. "+
"If not provided, this list is obtained from api server configured in src-config-file.")
- flag.StringVar(
- &dstKeepServicesJSON,
+ dstKeepServicesJSON := flags.String(
"dst-keep-services-json",
"",
"An optional list of available destination keepservices. "+
"If not provided, this list is obtained from api server configured in dst-config-file.")
- flag.IntVar(
- &replications,
+ replications := flags.Int(
"replications",
- 3,
- "Number of replications to write to the destination.")
+ 0,
+ "Number of replications to write to the destination. If replications not specified, "+
+ "default replication level configured on destination server will be used.")
- flag.StringVar(
- &prefix,
+ prefix := flags.String(
"prefix",
"",
"Index prefix")
- flag.Parse()
+ srcBlobSignatureTTLFlag := flags.Duration(
+ "src-blob-signature-ttl",
+ 0,
+ "Lifetime of blob permission signatures on source keepservers. If not provided, this will be retrieved from the API server's discovery document.")
+
+ getVersion := flags.Bool(
+ "version",
+ false,
+ "Print version information and exit.")
+
+ // Parse args; omit the first arg which is the command name
+ flags.Parse(os.Args[1:])
- var err error
+ // Print version information if requested
+ if *getVersion {
+ fmt.Printf("keep-rsync %s\n", version)
+ os.Exit(0)
+ }
- if srcConfigFile == "" {
- log.Fatal("-src-config-file must be specified.")
+ srcConfig, srcBlobSigningKey, err := loadConfig(*srcConfigFile)
+ if err != nil {
+ return fmt.Errorf("Error loading src configuration from file: %s", err.Error())
}
- srcConfig, err = readConfigFromFile(srcConfigFile)
+
+ dstConfig, _, err := loadConfig(*dstConfigFile)
if err != nil {
- log.Fatal("Error reading source configuration: %s", err.Error())
+ return fmt.Errorf("Error loading dst configuration from file: %s", err.Error())
}
- if dstConfigFile == "" {
- log.Fatal("-dst-config-file must be specified.")
+ // setup src and dst keepclients
+ kcSrc, srcBlobSignatureTTL, err := setupKeepClient(srcConfig, *srcKeepServicesJSON, false, 0, *srcBlobSignatureTTLFlag)
+ if err != nil {
+ return fmt.Errorf("Error configuring src keepclient: %s", err.Error())
}
- dstConfig, err = readConfigFromFile(dstConfigFile)
+
+ kcDst, _, err := setupKeepClient(dstConfig, *dstKeepServicesJSON, true, *replications, 0)
if err != nil {
- log.Fatal("Error reading destination configuration: %s", err.Error())
+ return fmt.Errorf("Error configuring dst keepclient: %s", err.Error())
+ }
+
+ // Copy blocks not found in dst from src
+ err = performKeepRsync(kcSrc, kcDst, srcBlobSignatureTTL, srcBlobSigningKey, *prefix)
+ if err != nil {
+ return fmt.Errorf("Error while syncing data: %s", err.Error())
+ }
+
+ return nil
+}
+
+type apiConfig struct {
+ APIToken string
+ APIHost string
+ APIHostInsecure bool
+ ExternalClient bool
+}
+
+// Load src and dst config from given files
+func loadConfig(configFile string) (config apiConfig, blobSigningKey string, err error) {
+ if configFile == "" {
+ return config, blobSigningKey, errors.New("config file not specified")
}
- err = initializeKeepRsync()
+ config, blobSigningKey, err = readConfigFromFile(configFile)
if err != nil {
- log.Fatal("Error configurating keep-rsync: %s", err.Error())
+ return config, blobSigningKey, fmt.Errorf("Error reading config file: %v", err)
}
+
+ return
}
-// Reads config from file
-func readConfigFromFile(filename string) (map[string]string, error) {
+// Read config from file
+func readConfigFromFile(filename string) (config apiConfig, blobSigningKey string, err error) {
+ if !strings.Contains(filename, "/") {
+ filename = os.Getenv("HOME") + "/.config/arvados/" + filename + ".conf"
+ }
+
content, err := ioutil.ReadFile(filename)
+
if err != nil {
- return nil, err
+ return config, "", err
}
- config := make(map[string]string)
lines := strings.Split(string(content), "\n")
for _, line := range lines {
if line == "" {
continue
}
- kv := strings.Split(line, "=")
- config[kv[0]] = kv[1]
+
+ kv := strings.SplitN(line, "=", 2)
+ key := strings.TrimSpace(kv[0])
+ value := strings.TrimSpace(kv[1])
+
+ switch key {
+ case "ARVADOS_API_TOKEN":
+ config.APIToken = value
+ case "ARVADOS_API_HOST":
+ config.APIHost = value
+ case "ARVADOS_API_HOST_INSECURE":
+ config.APIHostInsecure = arvadosclient.StringBool(value)
+ case "ARVADOS_EXTERNAL_CLIENT":
+ config.ExternalClient = arvadosclient.StringBool(value)
+ case "ARVADOS_BLOB_SIGNING_KEY":
+ blobSigningKey = value
+ }
}
- return config, nil
+ return
}
-// keep-rsync source and destination clients
-var (
- arvSrc arvadosclient.ArvadosClient
- arvDst arvadosclient.ArvadosClient
- kcSrc *keepclient.KeepClient
- kcDst *keepclient.KeepClient
-)
+// setup keepclient using the config provided
+func setupKeepClient(config apiConfig, keepServicesJSON string, isDst bool, replications int, srcBlobSignatureTTL time.Duration) (kc *keepclient.KeepClient, blobSignatureTTL time.Duration, err error) {
+ arv := arvadosclient.ArvadosClient{
+ ApiToken: config.APIToken,
+ ApiServer: config.APIHost,
+ ApiInsecure: config.APIHostInsecure,
+ Client: &http.Client{Transport: &http.Transport{
+ TLSClientConfig: &tls.Config{InsecureSkipVerify: config.APIHostInsecure}}},
+ External: config.ExternalClient,
+ }
+
+ // If keepServicesJSON is provided, use it instead of service discovery
+ if keepServicesJSON == "" {
+ kc, err = keepclient.MakeKeepClient(&arv)
+ if err != nil {
+ return nil, 0, err
+ }
+ } else {
+ kc = keepclient.New(&arv)
+ err = kc.LoadKeepServicesFromJSON(keepServicesJSON)
+ if err != nil {
+ return kc, 0, err
+ }
+ }
+
+ if isDst {
+ // Get default replications value from destination, if it is not already provided
+ if replications == 0 {
+ value, err := arv.Discovery("defaultCollectionReplication")
+ if err == nil {
+ replications = int(value.(float64))
+ } else {
+ return nil, 0, err
+ }
+ }
+
+ kc.Want_replicas = replications
+ }
-// Initializes keep-rsync using the config provided
-func initializeKeepRsync() (err error) {
- // arvSrc from srcConfig
- arvSrc, err = arvadosclient.MakeArvadosClientWithConfig(srcConfig)
+ // If srcBlobSignatureTTL is not provided, get it from API server discovery doc
+ blobSignatureTTL = srcBlobSignatureTTL
+ if !isDst && srcBlobSignatureTTL == 0 {
+ value, err := arv.Discovery("blobSignatureTtl")
+ if err == nil {
+ blobSignatureTTL = time.Duration(int(value.(float64))) * time.Second
+ } else {
+ return nil, 0, err
+ }
+ }
+
+ return kc, blobSignatureTTL, nil
+}
+
+// Get unique block locators from src and dst
+// Copy any blocks missing in dst
+func performKeepRsync(kcSrc, kcDst *keepclient.KeepClient, srcBlobSignatureTTL time.Duration, blobSigningKey, prefix string) error {
+ // Get unique locators from src
+ srcIndex, err := getUniqueLocators(kcSrc, prefix)
if err != nil {
- return
+ return err
}
- // arvDst from dstConfig
- arvDst, err = arvadosclient.MakeArvadosClientWithConfig(dstConfig)
+ // Get unique locators from dst
+ dstIndex, err := getUniqueLocators(kcDst, prefix)
if err != nil {
- return
+ return err
}
- // if srcKeepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers
- if srcKeepServicesJSON == "" {
- kcSrc, err = keepclient.MakeKeepClient(&arvSrc)
+ // Get list of locators found in src, but missing in dst
+ toBeCopied := getMissingLocators(srcIndex, dstIndex)
+
+ // Copy each missing block to dst
+ log.Printf("Before keep-rsync, there are %d blocks in src and %d blocks in dst. Start copying %d blocks from src not found in dst.",
+ len(srcIndex), len(dstIndex), len(toBeCopied))
+
+ err = copyBlocksToDst(toBeCopied, kcSrc, kcDst, srcBlobSignatureTTL, blobSigningKey)
+
+ return err
+}
+
+// Get list of unique locators from the specified cluster
+func getUniqueLocators(kc *keepclient.KeepClient, prefix string) (map[string]bool, error) {
+ uniqueLocators := map[string]bool{}
+
+ // Get index and dedup
+ for uuid := range kc.LocalRoots() {
+ reader, err := kc.GetIndex(uuid, prefix)
if err != nil {
- return
+ return uniqueLocators, err
}
- } else {
- kcSrc, err = keepclient.MakeKeepClientFromJSON(&arvSrc, srcKeepServicesJSON)
- if err != nil {
- return
+ scanner := bufio.NewScanner(reader)
+ for scanner.Scan() {
+ uniqueLocators[strings.Split(scanner.Text(), " ")[0]] = true
+ }
+ }
+
+ return uniqueLocators, nil
+}
+
+// Get list of locators that are in src but not in dst
+func getMissingLocators(srcLocators, dstLocators map[string]bool) []string {
+ var missingLocators []string
+ for locator := range srcLocators {
+ if _, ok := dstLocators[locator]; !ok {
+ missingLocators = append(missingLocators, locator)
}
}
+ return missingLocators
+}
+
+// Copy blocks from src to dst; only those that are missing in dst are copied
+func copyBlocksToDst(toBeCopied []string, kcSrc, kcDst *keepclient.KeepClient, srcBlobSignatureTTL time.Duration, blobSigningKey string) error {
+ total := len(toBeCopied)
- // if dstKeepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers
- if dstKeepServicesJSON == "" {
- kcDst, err = keepclient.MakeKeepClient(&arvDst)
+ startedAt := time.Now()
+ for done, locator := range toBeCopied {
+ if done == 0 {
+ log.Printf("Copying data block %d of %d (%.2f%% done): %v", done+1, total,
+ float64(done)/float64(total)*100, locator)
+ } else {
+ timePerBlock := time.Since(startedAt) / time.Duration(done)
+ log.Printf("Copying data block %d of %d (%.2f%% done, %v est. time remaining): %v", done+1, total,
+ float64(done)/float64(total)*100, timePerBlock*time.Duration(total-done), locator)
+ }
+
+ getLocator := locator
+ expiresAt := time.Now().AddDate(0, 0, 1)
+ if blobSigningKey != "" {
+ getLocator = keepclient.SignLocator(getLocator, kcSrc.Arvados.ApiToken, expiresAt, srcBlobSignatureTTL, []byte(blobSigningKey))
+ }
+
+ reader, len, _, err := kcSrc.Get(getLocator)
if err != nil {
- return
+ return fmt.Errorf("Error getting block: %v %v", locator, err)
}
- } else {
- kcDst, err = keepclient.MakeKeepClientFromJSON(&arvDst, dstKeepServicesJSON)
+
+ _, _, err = kcDst.PutHR(getLocator[:32], reader, len)
if err != nil {
- return
+ return fmt.Errorf("Error copying data block: %v %v", locator, err)
}
}
- return
+ log.Printf("Successfully copied to destination %d blocks.", total)
+ return nil
}