1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
20 "git.arvados.org/arvados.git/lib/cmd"
21 "git.arvados.org/arvados.git/sdk/go/arvadosclient"
22 "git.arvados.org/arvados.git/sdk/go/keepclient"
35 flags := flag.NewFlagSet("keep-rsync", flag.ExitOnError)
37 srcConfigFile := flags.String(
40 "Source configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf file. This file is expected to specify the values for ARVADOS_API_TOKEN, ARVADOS_API_HOST, ARVADOS_API_HOST_INSECURE, and ARVADOS_BLOB_SIGNING_KEY for the source.")
42 dstConfigFile := flags.String(
45 "Destination configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf file. This file is expected to specify the values for ARVADOS_API_TOKEN, ARVADOS_API_HOST, and ARVADOS_API_HOST_INSECURE for the destination.")
47 srcKeepServicesJSON := flags.String(
48 "src-keep-services-json",
50 "An optional list of available source keepservices. "+
51 "If not provided, this list is obtained from api server configured in src-config-file.")
53 dstKeepServicesJSON := flags.String(
54 "dst-keep-services-json",
56 "An optional list of available destination keepservices. "+
57 "If not provided, this list is obtained from api server configured in dst-config-file.")
59 replications := flags.Int(
62 "Number of replications to write to the destination. If replications not specified, "+
63 "default replication level configured on destination server will be used.")
65 prefix := flags.String(
70 srcBlobSignatureTTLFlag := flags.Duration(
71 "src-blob-signature-ttl",
73 "Lifetime of blob permission signatures on source keepservers. If not provided, this will be retrieved from the API server's discovery document.")
75 getVersion := flags.Bool(
78 "Print version information and exit.")
80 if ok, code := cmd.ParseFlags(flags, os.Args[0], os.Args[1:], "", os.Stderr); !ok {
82 } else if *getVersion {
83 fmt.Printf("%s %s\n", os.Args[0], version)
87 srcConfig, srcBlobSigningKey, err := loadConfig(*srcConfigFile)
89 return fmt.Errorf("Error loading src configuration from file: %s", err.Error())
92 dstConfig, _, err := loadConfig(*dstConfigFile)
94 return fmt.Errorf("Error loading dst configuration from file: %s", err.Error())
97 // setup src and dst keepclients
98 kcSrc, srcBlobSignatureTTL, err := setupKeepClient(srcConfig, *srcKeepServicesJSON, false, 0, *srcBlobSignatureTTLFlag)
100 return fmt.Errorf("Error configuring src keepclient: %s", err.Error())
103 kcDst, _, err := setupKeepClient(dstConfig, *dstKeepServicesJSON, true, *replications, 0)
105 return fmt.Errorf("Error configuring dst keepclient: %s", err.Error())
108 // Copy blocks not found in dst from src
109 err = performKeepRsync(kcSrc, kcDst, srcBlobSignatureTTL, srcBlobSigningKey, *prefix)
111 return fmt.Errorf("Error while syncing data: %s", err.Error())
117 type apiConfig struct {
123 // Load src and dst config from given files
124 func loadConfig(configFile string) (config apiConfig, blobSigningKey string, err error) {
125 if configFile == "" {
126 return config, blobSigningKey, errors.New("config file not specified")
129 config, blobSigningKey, err = readConfigFromFile(configFile)
131 return config, blobSigningKey, fmt.Errorf("Error reading config file: %v", err)
137 // Read config from file
138 func readConfigFromFile(filename string) (config apiConfig, blobSigningKey string, err error) {
139 if !strings.Contains(filename, "/") {
140 filename = os.Getenv("HOME") + "/.config/arvados/" + filename + ".conf"
143 content, err := ioutil.ReadFile(filename)
146 return config, "", err
149 lines := strings.Split(string(content), "\n")
150 for _, line := range lines {
155 kv := strings.SplitN(line, "=", 2)
156 key := strings.TrimSpace(kv[0])
157 value := strings.TrimSpace(kv[1])
160 case "ARVADOS_API_TOKEN":
161 config.APIToken = value
162 case "ARVADOS_API_HOST":
163 config.APIHost = value
164 case "ARVADOS_API_HOST_INSECURE":
165 config.APIHostInsecure = arvadosclient.StringBool(value)
166 case "ARVADOS_BLOB_SIGNING_KEY":
167 blobSigningKey = value
173 // setup keepclient using the config provided
174 func setupKeepClient(config apiConfig, keepServicesJSON string, isDst bool, replications int, srcBlobSignatureTTL time.Duration) (kc *keepclient.KeepClient, blobSignatureTTL time.Duration, err error) {
175 arv := arvadosclient.ArvadosClient{
176 ApiToken: config.APIToken,
177 ApiServer: config.APIHost,
178 ApiInsecure: config.APIHostInsecure,
179 Client: &http.Client{Transport: &http.Transport{
180 TLSClientConfig: &tls.Config{InsecureSkipVerify: config.APIHostInsecure}}},
183 // If keepServicesJSON is provided, use it instead of service discovery
184 if keepServicesJSON == "" {
185 kc, err = keepclient.MakeKeepClient(&arv)
190 kc = keepclient.New(&arv)
191 err = kc.LoadKeepServicesFromJSON(keepServicesJSON)
196 kc.DiskCacheSize = keepclient.DiskCacheDisabled
199 // Get default replications value from destination, if it is not already provided
200 if replications == 0 {
201 value, err := arv.Discovery("defaultCollectionReplication")
203 replications = int(value.(float64))
209 kc.Want_replicas = replications
212 // If srcBlobSignatureTTL is not provided, get it from API server discovery doc
213 blobSignatureTTL = srcBlobSignatureTTL
214 if !isDst && srcBlobSignatureTTL == 0 {
215 value, err := arv.Discovery("blobSignatureTtl")
217 blobSignatureTTL = time.Duration(int(value.(float64))) * time.Second
223 return kc, blobSignatureTTL, nil
226 // Get unique block locators from src and dst
227 // Copy any blocks missing in dst
228 func performKeepRsync(kcSrc, kcDst *keepclient.KeepClient, srcBlobSignatureTTL time.Duration, blobSigningKey, prefix string) error {
229 // Get unique locators from src
230 srcIndex, err := getUniqueLocators(kcSrc, prefix)
235 // Get unique locators from dst
236 dstIndex, err := getUniqueLocators(kcDst, prefix)
241 // Get list of locators found in src, but missing in dst
242 toBeCopied := getMissingLocators(srcIndex, dstIndex)
244 // Copy each missing block to dst
245 log.Printf("Before keep-rsync, there are %d blocks in src and %d blocks in dst. Start copying %d blocks from src not found in dst.",
246 len(srcIndex), len(dstIndex), len(toBeCopied))
248 err = copyBlocksToDst(toBeCopied, kcSrc, kcDst, srcBlobSignatureTTL, blobSigningKey)
253 // Get list of unique locators from the specified cluster
254 func getUniqueLocators(kc *keepclient.KeepClient, prefix string) (map[string]bool, error) {
255 uniqueLocators := map[string]bool{}
257 // Get index and dedup
258 for uuid := range kc.LocalRoots() {
259 reader, err := kc.GetIndex(uuid, prefix)
261 return uniqueLocators, err
263 scanner := bufio.NewScanner(reader)
265 uniqueLocators[strings.Split(scanner.Text(), " ")[0]] = true
269 return uniqueLocators, nil
272 // Get list of locators that are in src but not in dst
273 func getMissingLocators(srcLocators, dstLocators map[string]bool) []string {
274 var missingLocators []string
275 for locator := range srcLocators {
276 if _, ok := dstLocators[locator]; !ok {
277 missingLocators = append(missingLocators, locator)
280 return missingLocators
283 // Copy blocks from src to dst; only those that are missing in dst are copied
284 func copyBlocksToDst(toBeCopied []string, kcSrc, kcDst *keepclient.KeepClient, srcBlobSignatureTTL time.Duration, blobSigningKey string) error {
285 total := len(toBeCopied)
287 startedAt := time.Now()
288 for done, locator := range toBeCopied {
290 log.Printf("Copying data block %d of %d (%.2f%% done): %v", done+1, total,
291 float64(done)/float64(total)*100, locator)
293 timePerBlock := time.Since(startedAt) / time.Duration(done)
294 log.Printf("Copying data block %d of %d (%.2f%% done, %v est. time remaining): %v", done+1, total,
295 float64(done)/float64(total)*100, timePerBlock*time.Duration(total-done), locator)
298 getLocator := locator
299 expiresAt := time.Now().AddDate(0, 0, 1)
300 if blobSigningKey != "" {
301 getLocator = keepclient.SignLocator(getLocator, kcSrc.Arvados.ApiToken, expiresAt, srcBlobSignatureTTL, []byte(blobSigningKey))
304 reader, len, _, err := kcSrc.Get(getLocator)
306 return fmt.Errorf("Error getting block: %v %v", locator, err)
309 _, _, err = kcDst.PutHR(getLocator[:32], reader, len)
311 return fmt.Errorf("Error copying data block: %v %v", locator, err)
315 log.Printf("Successfully copied to destination %d blocks.", total)