X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/db728a63e741c61f93a200129997c74fdc3065b7..10440ac12d6771ab80469adf551d2cac8d3461e6:/tools/keep-rsync/keep-rsync.go diff --git a/tools/keep-rsync/keep-rsync.go b/tools/keep-rsync/keep-rsync.go index 88caf903b8..7e519f775b 100644 --- a/tools/keep-rsync/keep-rsync.go +++ b/tools/keep-rsync/keep-rsync.go @@ -1,114 +1,149 @@ +// Copyright (C) The Arvados Authors. All rights reserved. +// +// SPDX-License-Identifier: AGPL-3.0 + package main import ( - "bytes" + "bufio" + "crypto/tls" + "errors" "flag" - "git.curoverse.com/arvados.git/sdk/go/arvadosclient" - "git.curoverse.com/arvados.git/sdk/go/keepclient" + "fmt" "io/ioutil" "log" - "regexp" + "net/http" + "os" "strings" "time" -) -// keep-rsync arguments -var ( - srcConfig arvadosclient.APIConfig - dstConfig arvadosclient.APIConfig - blobSigningKey string - srcKeepServicesJSON string - dstKeepServicesJSON string - replications int - prefix string + "git.arvados.org/arvados.git/lib/cmd" + "git.arvados.org/arvados.git/sdk/go/arvadosclient" + "git.arvados.org/arvados.git/sdk/go/keepclient" ) +var version = "dev" + func main() { - var srcConfigFile string - var dstConfigFile string + err := doMain() + if err != nil { + log.Fatalf("%v", err) + } +} + +func doMain() error { + flags := flag.NewFlagSet("keep-rsync", flag.ExitOnError) - flag.StringVar( - &srcConfigFile, - "src-config-file", + srcConfigFile := flags.String( + "src", "", - "Source configuration filename with full path that contains "+ - "an ARVADOS_API_TOKEN which is a valid datamanager token recognized by the source keep servers, "+ - "ARVADOS_API_HOST, ARVADOS_API_HOST_INSECURE, ARVADOS_EXTERNAL_CLIENT and ARVADOS_BLOB_SIGNING_KEY.") + "Source configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf file. This file is expected to specify the values for ARVADOS_API_TOKEN, ARVADOS_API_HOST, ARVADOS_API_HOST_INSECURE, and ARVADOS_BLOB_SIGNING_KEY for the source.") - flag.StringVar( - &dstConfigFile, - "dst-config-file", + dstConfigFile := flags.String( + "dst", "", - "Destination configuration filename with full path that contains "+ - "an ARVADOS_API_TOKEN which is a valid datamanager token recognized by the destination keep servers, "+ - "ARVADOS_API_HOST, ARVADOS_API_HOST_INSECURE, ARVADOS_EXTERNAL_CLIENT and ARVADOS_BLOB_SIGNING_KEY.") + "Destination configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf file. This file is expected to specify the values for ARVADOS_API_TOKEN, ARVADOS_API_HOST, and ARVADOS_API_HOST_INSECURE for the destination.") - flag.StringVar( - &srcKeepServicesJSON, + srcKeepServicesJSON := flags.String( "src-keep-services-json", "", "An optional list of available source keepservices. "+ "If not provided, this list is obtained from api server configured in src-config-file.") - flag.StringVar( - &dstKeepServicesJSON, + dstKeepServicesJSON := flags.String( "dst-keep-services-json", "", "An optional list of available destination keepservices. "+ "If not provided, this list is obtained from api server configured in dst-config-file.") - flag.IntVar( - &replications, + replications := flags.Int( "replications", 0, - "Number of replications to write to the destination.") + "Number of replications to write to the destination. If replications not specified, "+ + "default replication level configured on destination server will be used.") - flag.StringVar( - &prefix, + prefix := flags.String( "prefix", "", "Index prefix") - flag.Parse() - - var err error - - // Load config - if srcConfigFile == "" { - log.Fatal("-src-config-file must be specified.") + srcBlobSignatureTTLFlag := flags.Duration( + "src-blob-signature-ttl", + 0, + "Lifetime of blob permission signatures on source keepservers. If not provided, this will be retrieved from the API server's discovery document.") + + getVersion := flags.Bool( + "version", + false, + "Print version information and exit.") + + if ok, code := cmd.ParseFlags(flags, os.Args[0], os.Args[1:], "", os.Stderr); !ok { + os.Exit(code) + } else if *getVersion { + fmt.Printf("%s %s\n", os.Args[0], version) + os.Exit(0) } - srcConfig, err = readConfigFromFile(srcConfigFile) + + srcConfig, srcBlobSigningKey, err := loadConfig(*srcConfigFile) if err != nil { - log.Fatal("Error reading source configuration: %s", err.Error()) + return fmt.Errorf("Error loading src configuration from file: %s", err.Error()) } - if dstConfigFile == "" { - log.Fatal("-dst-config-file must be specified.") + dstConfig, _, err := loadConfig(*dstConfigFile) + if err != nil { + return fmt.Errorf("Error loading dst configuration from file: %s", err.Error()) } - dstConfig, err = readConfigFromFile(dstConfigFile) + + // setup src and dst keepclients + kcSrc, srcBlobSignatureTTL, err := setupKeepClient(srcConfig, *srcKeepServicesJSON, false, 0, *srcBlobSignatureTTLFlag) if err != nil { - log.Fatal("Error reading destination configuration: %s", err.Error()) + return fmt.Errorf("Error configuring src keepclient: %s", err.Error()) } - // Initialize keep-rsync - err = initializeKeepRsync() + kcDst, _, err := setupKeepClient(dstConfig, *dstKeepServicesJSON, true, *replications, 0) if err != nil { - log.Fatal("Error configuring keep-rsync: %s", err.Error()) + return fmt.Errorf("Error configuring dst keepclient: %s", err.Error()) } // Copy blocks not found in dst from src - performKeepRsync() + err = performKeepRsync(kcSrc, kcDst, srcBlobSignatureTTL, srcBlobSigningKey, *prefix) + if err != nil { + return fmt.Errorf("Error while syncing data: %s", err.Error()) + } + + return nil +} + +type apiConfig struct { + APIToken string + APIHost string + APIHostInsecure bool } -var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$") +// Load src and dst config from given files +func loadConfig(configFile string) (config apiConfig, blobSigningKey string, err error) { + if configFile == "" { + return config, blobSigningKey, errors.New("config file not specified") + } + + config, blobSigningKey, err = readConfigFromFile(configFile) + if err != nil { + return config, blobSigningKey, fmt.Errorf("Error reading config file: %v", err) + } + + return +} -// Reads config from file -func readConfigFromFile(filename string) (arvadosclient.APIConfig, error) { - var config arvadosclient.APIConfig +// Read config from file +func readConfigFromFile(filename string) (config apiConfig, blobSigningKey string, err error) { + if !strings.Contains(filename, "/") { + filename = os.Getenv("HOME") + "/.config/arvados/" + filename + ".conf" + } content, err := ioutil.ReadFile(filename) + if err != nil { - return config, err + return config, "", err } lines := strings.Split(string(content), "\n") @@ -116,89 +151,80 @@ func readConfigFromFile(filename string) (arvadosclient.APIConfig, error) { if line == "" { continue } - kv := strings.Split(line, "=") - switch kv[0] { + kv := strings.SplitN(line, "=", 2) + key := strings.TrimSpace(kv[0]) + value := strings.TrimSpace(kv[1]) + + switch key { case "ARVADOS_API_TOKEN": - config.APIToken = kv[1] + config.APIToken = value case "ARVADOS_API_HOST": - config.APIHost = kv[1] + config.APIHost = value case "ARVADOS_API_HOST_INSECURE": - config.APIHostInsecure = matchTrue.MatchString(kv[1]) - case "ARVADOS_EXTERNAL_CLIENT": - config.ExternalClient = matchTrue.MatchString(kv[1]) + config.APIHostInsecure = arvadosclient.StringBool(value) case "ARVADOS_BLOB_SIGNING_KEY": - blobSigningKey = kv[1] + blobSigningKey = value } } - return config, nil + return } -// keep-rsync source and destination clients -var ( - arvSrc arvadosclient.ArvadosClient - arvDst arvadosclient.ArvadosClient - kcSrc *keepclient.KeepClient - kcDst *keepclient.KeepClient -) - -// Initializes keep-rsync using the config provided -func initializeKeepRsync() (err error) { - // arvSrc from srcConfig - arvSrc, err = arvadosclient.New(srcConfig) - if err != nil { - return +// setup keepclient using the config provided +func setupKeepClient(config apiConfig, keepServicesJSON string, isDst bool, replications int, srcBlobSignatureTTL time.Duration) (kc *keepclient.KeepClient, blobSignatureTTL time.Duration, err error) { + arv := arvadosclient.ArvadosClient{ + ApiToken: config.APIToken, + ApiServer: config.APIHost, + ApiInsecure: config.APIHostInsecure, + Client: &http.Client{Transport: &http.Transport{ + TLSClientConfig: &tls.Config{InsecureSkipVerify: config.APIHostInsecure}}}, } - // arvDst from dstConfig - arvDst, err = arvadosclient.New(dstConfig) - if err != nil { - return - } - - // Get default replications value from destination, if it is not already provided - if replications == 0 { - value, err := arvDst.Discovery("defaultCollectionReplication") - if err == nil { - replications = int(value.(float64)) - } else { - replications = 2 - } - } - - // if srcKeepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers - if srcKeepServicesJSON == "" { - kcSrc, err = keepclient.MakeKeepClient(&arvSrc) + // If keepServicesJSON is provided, use it instead of service discovery + if keepServicesJSON == "" { + kc, err = keepclient.MakeKeepClient(&arv) if err != nil { - return + return nil, 0, err } } else { - kcSrc, err = keepclient.MakeKeepClientFromJSON(&arvSrc, srcKeepServicesJSON) + kc = keepclient.New(&arv) + err = kc.LoadKeepServicesFromJSON(keepServicesJSON) if err != nil { - return + return kc, 0, err } } - // if dstKeepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers - if dstKeepServicesJSON == "" { - kcDst, err = keepclient.MakeKeepClient(&arvDst) - if err != nil { - return + if isDst { + // Get default replications value from destination, if it is not already provided + if replications == 0 { + value, err := arv.Discovery("defaultCollectionReplication") + if err == nil { + replications = int(value.(float64)) + } else { + return nil, 0, err + } } - } else { - kcDst, err = keepclient.MakeKeepClientFromJSON(&arvDst, dstKeepServicesJSON) - if err != nil { - return + + kc.Want_replicas = replications + } + + // If srcBlobSignatureTTL is not provided, get it from API server discovery doc + blobSignatureTTL = srcBlobSignatureTTL + if !isDst && srcBlobSignatureTTL == 0 { + value, err := arv.Discovery("blobSignatureTtl") + if err == nil { + blobSignatureTTL = time.Duration(int(value.(float64))) * time.Second + } else { + return nil, 0, err } } - kcDst.Want_replicas = replications - return + return kc, blobSignatureTTL, nil } // Get unique block locators from src and dst // Copy any blocks missing in dst -func performKeepRsync() error { +func performKeepRsync(kcSrc, kcDst *keepclient.KeepClient, srcBlobSignatureTTL time.Duration, blobSigningKey, prefix string) error { // Get unique locators from src srcIndex, err := getUniqueLocators(kcSrc, prefix) if err != nil { @@ -215,49 +241,35 @@ func performKeepRsync() error { toBeCopied := getMissingLocators(srcIndex, dstIndex) // Copy each missing block to dst - copyBlocksToDst(toBeCopied) + log.Printf("Before keep-rsync, there are %d blocks in src and %d blocks in dst. Start copying %d blocks from src not found in dst.", + len(srcIndex), len(dstIndex), len(toBeCopied)) - return nil + err = copyBlocksToDst(toBeCopied, kcSrc, kcDst, srcBlobSignatureTTL, blobSigningKey) + + return err } // Get list of unique locators from the specified cluster -func getUniqueLocators(kc *keepclient.KeepClient, indexPrefix string) (map[string]bool, error) { - var indexBytes []byte +func getUniqueLocators(kc *keepclient.KeepClient, prefix string) (map[string]bool, error) { + uniqueLocators := map[string]bool{} + // Get index and dedup for uuid := range kc.LocalRoots() { - reader, err := kc.GetIndex(uuid, indexPrefix) + reader, err := kc.GetIndex(uuid, prefix) if err != nil { - return nil, err + return uniqueLocators, err } - - var readBytes []byte - readBytes, err = ioutil.ReadAll(reader) - if err != nil { - return nil, err + scanner := bufio.NewScanner(reader) + for scanner.Scan() { + uniqueLocators[strings.Split(scanner.Text(), " ")[0]] = true } - - indexBytes = append(indexBytes, readBytes...) } - // Got index; Now dedup it - locators := bytes.Split(indexBytes, []byte("\n")) - - uniqueLocators := map[string]bool{} - for _, loc := range locators { - if len(loc) == 0 { - continue - } - - locator := string(bytes.Split(loc, []byte(" "))[0]) - if _, ok := uniqueLocators[locator]; !ok { - uniqueLocators[locator] = true - } - } return uniqueLocators, nil } // Get list of locators that are in src but not in dst -func getMissingLocators(srcLocators map[string]bool, dstLocators map[string]bool) []string { +func getMissingLocators(srcLocators, dstLocators map[string]bool) []string { var missingLocators []string for locator := range srcLocators { if _, ok := dstLocators[locator]; !ok { @@ -268,52 +280,37 @@ func getMissingLocators(srcLocators map[string]bool, dstLocators map[string]bool } // Copy blocks from src to dst; only those that are missing in dst are copied -func copyBlocksToDst(toBeCopied []string) { - done := 0 +func copyBlocksToDst(toBeCopied []string, kcSrc, kcDst *keepclient.KeepClient, srcBlobSignatureTTL time.Duration, blobSigningKey string) error { total := len(toBeCopied) - var failed []string - - for _, locator := range toBeCopied { - log.Printf("Getting block %d of %d", done+1, total) - log.Printf("Getting block: %v", locator) + startedAt := time.Now() + for done, locator := range toBeCopied { + if done == 0 { + log.Printf("Copying data block %d of %d (%.2f%% done): %v", done+1, total, + float64(done)/float64(total)*100, locator) + } else { + timePerBlock := time.Since(startedAt) / time.Duration(done) + log.Printf("Copying data block %d of %d (%.2f%% done, %v est. time remaining): %v", done+1, total, + float64(done)/float64(total)*100, timePerBlock*time.Duration(total-done), locator) + } getLocator := locator expiresAt := time.Now().AddDate(0, 0, 1) if blobSigningKey != "" { - getLocator = keepclient.SignLocator(getLocator, arvSrc.ApiToken, expiresAt, []byte(blobSigningKey)) + getLocator = keepclient.SignLocator(getLocator, kcSrc.Arvados.ApiToken, expiresAt, srcBlobSignatureTTL, []byte(blobSigningKey)) } - reader, _, _, err := kcSrc.Get(getLocator) + reader, len, _, err := kcSrc.Get(getLocator) if err != nil { - log.Printf("Error getting block: %q %v", locator, err) - failed = append(failed, locator) - continue - } - data, err := ioutil.ReadAll(reader) - if err != nil { - log.Printf("Error reading block data: %q %v", locator, err) - failed = append(failed, locator) - continue + return fmt.Errorf("Error getting block: %v %v", locator, err) } - log.Printf("Copying block: %q", locator) - _, rep, err := kcDst.PutB(data) + _, _, err = kcDst.PutHR(getLocator[:32], reader, len) if err != nil { - log.Printf("Error putting block data: %q %v", locator, err) - failed = append(failed, locator) - continue + return fmt.Errorf("Error copying data block: %v %v", locator, err) } - if rep != replications { - log.Printf("Failed to put enough number of replicas. Wanted: %d; Put: %d", replications, rep) - failed = append(failed, locator) - continue - } - - done++ - log.Printf("%.2f%% done", float64(done)/float64(total)*100) } - log.Printf("Successfully copied to destination %d and failed %d out of a total of %d", done, len(failed), total) - log.Printf("Failed blocks %v", failed) + log.Printf("Successfully copied to destination %d blocks.", total) + return nil }