9 "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
10 "git.curoverse.com/arvados.git/sdk/go/keepclient"
21 var srcConfigFile, dstConfigFile, srcKeepServicesJSON, dstKeepServicesJSON, prefix string
23 var srcBlobSigningKey string
29 "Source configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf")
35 "Destination configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf")
39 "src-keep-services-json",
41 "An optional list of available source keepservices. "+
42 "If not provided, this list is obtained from api server configured in src-config-file.")
46 "dst-keep-services-json",
48 "An optional list of available destination keepservices. "+
49 "If not provided, this list is obtained from api server configured in dst-config-file.")
55 "Number of replications to write to the destination. If replications not specified, "+
56 "default replication level configured on destination server will be used.")
66 srcConfig, srcBlobSigningKey, err := loadConfig(srcConfigFile)
68 log.Fatalf("Error loading src configuration from file: %s", err.Error())
71 dstConfig, _, err := loadConfig(dstConfigFile)
73 log.Fatalf("Error loading dst configuration from file: %s", err.Error())
76 // setup src and dst keepclients
77 kcSrc, err := setupKeepClient(srcConfig, srcKeepServicesJSON, false, 0)
79 log.Fatalf("Error configuring src keepclient: %s", err.Error())
82 kcDst, err := setupKeepClient(dstConfig, dstKeepServicesJSON, true, replications)
84 log.Fatalf("Error configuring dst keepclient: %s", err.Error())
87 // Copy blocks not found in dst from src
88 err = performKeepRsync(kcSrc, kcDst, srcBlobSigningKey, prefix)
90 log.Fatalf("Error while syncing data: %s", err.Error())
94 type apiConfig struct {
101 // Load src and dst config from given files
102 func loadConfig(configFile string) (config apiConfig, blobSigningKey string, err error) {
103 if configFile == "" {
104 return config, blobSigningKey, errors.New("config file not specified")
107 config, blobSigningKey, err = readConfigFromFile(configFile)
109 return config, blobSigningKey, fmt.Errorf("Error reading config file: %v", err)
115 var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
117 // Read config from file
118 func readConfigFromFile(filename string) (config apiConfig, blobSigningKey string, err error) {
119 if !strings.Contains(filename, "/") {
120 filename = os.Getenv("HOME") + "/.config/arvados/" + filename
121 if !strings.HasSuffix(filename, ".conf") {
122 filename = filename + ".conf"
126 content, err := ioutil.ReadFile(filename)
129 return config, "", err
132 lines := strings.Split(string(content), "\n")
133 for _, line := range lines {
138 kv := strings.SplitN(line, "=", 2)
139 key := strings.TrimSpace(kv[0])
140 value := strings.TrimSpace(kv[1])
143 case "ARVADOS_API_TOKEN":
144 config.APIToken = value
145 case "ARVADOS_API_HOST":
146 config.APIHost = value
147 case "ARVADOS_API_HOST_INSECURE":
148 config.APIHostInsecure = matchTrue.MatchString(value)
149 case "ARVADOS_EXTERNAL_CLIENT":
150 config.ExternalClient = matchTrue.MatchString(value)
151 case "ARVADOS_BLOB_SIGNING_KEY":
152 blobSigningKey = value
158 // setup keepclient using the config provided
159 func setupKeepClient(config apiConfig, keepServicesJSON string, isDst bool, replications int) (kc *keepclient.KeepClient, err error) {
160 arv := arvadosclient.ArvadosClient{
161 ApiToken: config.APIToken,
162 ApiServer: config.APIHost,
163 ApiInsecure: config.APIHostInsecure,
164 Client: &http.Client{Transport: &http.Transport{
165 TLSClientConfig: &tls.Config{InsecureSkipVerify: config.APIHostInsecure}}},
166 External: config.ExternalClient,
169 // if keepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers
170 if keepServicesJSON == "" {
171 kc, err = keepclient.MakeKeepClient(&arv)
176 kc = keepclient.New(&arv)
177 err = kc.LoadKeepServicesFromJSON(keepServicesJSON)
184 // Get default replications value from destination, if it is not already provided
185 if replications == 0 {
186 value, err := arv.Discovery("defaultCollectionReplication")
188 replications = int(value.(float64))
194 kc.Want_replicas = replications
200 // Get unique block locators from src and dst
201 // Copy any blocks missing in dst
202 func performKeepRsync(kcSrc, kcDst *keepclient.KeepClient, blobSigningKey, prefix string) error {
203 // Get unique locators from src
204 srcIndex, err := getUniqueLocators(kcSrc, prefix)
209 // Get unique locators from dst
210 dstIndex, err := getUniqueLocators(kcDst, prefix)
215 // Get list of locators found in src, but missing in dst
216 toBeCopied := getMissingLocators(srcIndex, dstIndex)
218 // Copy each missing block to dst
219 log.Printf("Before keep-rsync, there are %d blocks in src and %d blocks in dst. Start copying %d blocks from src not found in dst.",
220 len(srcIndex), len(dstIndex), len(toBeCopied))
222 err = copyBlocksToDst(toBeCopied, kcSrc, kcDst, blobSigningKey)
227 // Get list of unique locators from the specified cluster
228 func getUniqueLocators(kc *keepclient.KeepClient, prefix string) (map[string]bool, error) {
229 uniqueLocators := map[string]bool{}
231 // Get index and dedup
232 for uuid := range kc.LocalRoots() {
233 reader, err := kc.GetIndex(uuid, prefix)
235 return uniqueLocators, err
237 scanner := bufio.NewScanner(reader)
239 uniqueLocators[strings.Split(scanner.Text(), " ")[0]] = true
243 return uniqueLocators, nil
246 // Get list of locators that are in src but not in dst
247 func getMissingLocators(srcLocators, dstLocators map[string]bool) []string {
248 var missingLocators []string
249 for locator := range srcLocators {
250 if _, ok := dstLocators[locator]; !ok {
251 missingLocators = append(missingLocators, locator)
254 return missingLocators
257 // Copy blocks from src to dst; only those that are missing in dst are copied
258 func copyBlocksToDst(toBeCopied []string, kcSrc, kcDst *keepclient.KeepClient, blobSigningKey string) error {
260 total := len(toBeCopied)
262 startedAt := time.Now()
264 for _, locator := range toBeCopied {
265 log.Printf("Getting block %d of %d: %v", done+1, total, locator)
267 getLocator := locator
268 expiresAt := time.Now().AddDate(0, 0, 1)
269 if blobSigningKey != "" {
270 getLocator = keepclient.SignLocator(getLocator, kcSrc.Arvados.ApiToken, expiresAt, []byte(blobSigningKey))
273 reader, len, _, err := kcSrc.Get(getLocator)
275 return fmt.Errorf("Error getting block: %v %v", locator, err)
279 log.Printf("Copying data block %d of %d (%.2f%% done): %v", done+1, total,
280 float64(done)/float64(total)*100, locator)
282 log.Printf("Copying data block %d of %d (%.2f%% done, ETA %v): %v", done+1, total,
283 float64(done)/float64(total)*100, time.Duration(blockTime*int64(total-done)), locator)
285 _, _, err = kcDst.PutHR(getLocator[:32], reader, len)
287 return fmt.Errorf("Error copying data block: %v %v", locator, err)
291 blockTime = int64(time.Now().Sub(startedAt))
297 log.Printf("Successfully copied to destination %d blocks.", total)