9 "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
10 "git.curoverse.com/arvados.git/sdk/go/keepclient"
28 flags := flag.NewFlagSet("keep-rsync", flag.ExitOnError)
30 srcConfigFile := flags.String(
33 "Source configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf file. This file is expected to specify the values for ARVADOS_API_TOKEN, ARVADOS_API_HOST, ARVADOS_API_HOST_INSECURE, and ARVADOS_BLOB_SIGNING_KEY for the source.")
35 dstConfigFile := flags.String(
38 "Destination configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf file. This file is expected to specify the values for ARVADOS_API_TOKEN, ARVADOS_API_HOST, and ARVADOS_API_HOST_INSECURE for the destination.")
40 srcKeepServicesJSON := flags.String(
41 "src-keep-services-json",
43 "An optional list of available source keepservices. "+
44 "If not provided, this list is obtained from api server configured in src-config-file.")
46 dstKeepServicesJSON := flags.String(
47 "dst-keep-services-json",
49 "An optional list of available destination keepservices. "+
50 "If not provided, this list is obtained from api server configured in dst-config-file.")
52 replications := flags.Int(
55 "Number of replications to write to the destination. If replications not specified, "+
56 "default replication level configured on destination server will be used.")
58 prefix := flags.String(
63 blobSigningTTL := flags.Duration(
66 "Lifetime of blob permission signatures on source keepservers. If not provided, this will be retrieved from the keepservers.")
68 // Parse args; omit the first arg which is the command name
69 flags.Parse(os.Args[1:])
71 srcConfig, srcBlobSigningKey, err := loadConfig(*srcConfigFile)
73 return fmt.Errorf("Error loading src configuration from file: %s", err.Error())
76 dstConfig, _, err := loadConfig(*dstConfigFile)
78 return fmt.Errorf("Error loading dst configuration from file: %s", err.Error())
81 // setup src and dst keepclients
82 kcSrc, err := setupKeepClient(srcConfig, *srcKeepServicesJSON, false, 0, *blobSigningTTL)
84 return fmt.Errorf("Error configuring src keepclient: %s", err.Error())
87 kcDst, err := setupKeepClient(dstConfig, *dstKeepServicesJSON, true, *replications, *blobSigningTTL)
89 return fmt.Errorf("Error configuring dst keepclient: %s", err.Error())
92 // Copy blocks not found in dst from src
93 err = performKeepRsync(kcSrc, kcDst, *blobSigningTTL, srcBlobSigningKey, *prefix)
95 return fmt.Errorf("Error while syncing data: %s", err.Error())
101 type apiConfig struct {
108 // Load src and dst config from given files
109 func loadConfig(configFile string) (config apiConfig, blobSigningKey string, err error) {
110 if configFile == "" {
111 return config, blobSigningKey, errors.New("config file not specified")
114 config, blobSigningKey, err = readConfigFromFile(configFile)
116 return config, blobSigningKey, fmt.Errorf("Error reading config file: %v", err)
122 var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
124 // Read config from file
125 func readConfigFromFile(filename string) (config apiConfig, blobSigningKey string, err error) {
126 if !strings.Contains(filename, "/") {
127 filename = os.Getenv("HOME") + "/.config/arvados/" + filename + ".conf"
130 content, err := ioutil.ReadFile(filename)
133 return config, "", err
136 lines := strings.Split(string(content), "\n")
137 for _, line := range lines {
142 kv := strings.SplitN(line, "=", 2)
143 key := strings.TrimSpace(kv[0])
144 value := strings.TrimSpace(kv[1])
147 case "ARVADOS_API_TOKEN":
148 config.APIToken = value
149 case "ARVADOS_API_HOST":
150 config.APIHost = value
151 case "ARVADOS_API_HOST_INSECURE":
152 config.APIHostInsecure = matchTrue.MatchString(value)
153 case "ARVADOS_EXTERNAL_CLIENT":
154 config.ExternalClient = matchTrue.MatchString(value)
155 case "ARVADOS_BLOB_SIGNING_KEY":
156 blobSigningKey = value
162 // setup keepclient using the config provided
163 func setupKeepClient(config apiConfig, keepServicesJSON string, isDst bool, replications int, blobSigningTTL time.Duration) (kc *keepclient.KeepClient, err error) {
164 arv := arvadosclient.ArvadosClient{
165 ApiToken: config.APIToken,
166 ApiServer: config.APIHost,
167 ApiInsecure: config.APIHostInsecure,
168 Client: &http.Client{Transport: &http.Transport{
169 TLSClientConfig: &tls.Config{InsecureSkipVerify: config.APIHostInsecure}}},
170 External: config.ExternalClient,
173 // if keepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers
174 if keepServicesJSON == "" {
175 kc, err = keepclient.MakeKeepClient(&arv)
180 kc = keepclient.New(&arv)
181 err = kc.LoadKeepServicesFromJSON(keepServicesJSON)
188 // Get default replications value from destination, if it is not already provided
189 if replications == 0 {
190 value, err := arv.Discovery("defaultCollectionReplication")
192 replications = int(value.(float64))
198 kc.Want_replicas = replications
201 // If blobSigningTTL is not provided, get it from source
202 if !isDst && blobSigningTTL == 0 {
203 value, err := arv.Discovery("blobSignatureTtl")
205 blobSigningTTL = time.Duration(int(value.(float64))) * time.Second
214 // Get unique block locators from src and dst
215 // Copy any blocks missing in dst
216 func performKeepRsync(kcSrc, kcDst *keepclient.KeepClient, blobSigningTTL time.Duration, blobSigningKey, prefix string) error {
217 // Get unique locators from src
218 srcIndex, err := getUniqueLocators(kcSrc, prefix)
223 // Get unique locators from dst
224 dstIndex, err := getUniqueLocators(kcDst, prefix)
229 // Get list of locators found in src, but missing in dst
230 toBeCopied := getMissingLocators(srcIndex, dstIndex)
232 // Copy each missing block to dst
233 log.Printf("Before keep-rsync, there are %d blocks in src and %d blocks in dst. Start copying %d blocks from src not found in dst.",
234 len(srcIndex), len(dstIndex), len(toBeCopied))
236 err = copyBlocksToDst(toBeCopied, kcSrc, kcDst, blobSigningTTL, blobSigningKey)
241 // Get list of unique locators from the specified cluster
242 func getUniqueLocators(kc *keepclient.KeepClient, prefix string) (map[string]bool, error) {
243 uniqueLocators := map[string]bool{}
245 // Get index and dedup
246 for uuid := range kc.LocalRoots() {
247 reader, err := kc.GetIndex(uuid, prefix)
249 return uniqueLocators, err
251 scanner := bufio.NewScanner(reader)
253 uniqueLocators[strings.Split(scanner.Text(), " ")[0]] = true
257 return uniqueLocators, nil
260 // Get list of locators that are in src but not in dst
261 func getMissingLocators(srcLocators, dstLocators map[string]bool) []string {
262 var missingLocators []string
263 for locator := range srcLocators {
264 if _, ok := dstLocators[locator]; !ok {
265 missingLocators = append(missingLocators, locator)
268 return missingLocators
271 // Copy blocks from src to dst; only those that are missing in dst are copied
272 func copyBlocksToDst(toBeCopied []string, kcSrc, kcDst *keepclient.KeepClient, blobSigningTTL time.Duration, blobSigningKey string) error {
273 total := len(toBeCopied)
275 startedAt := time.Now()
276 for done, locator := range toBeCopied {
278 log.Printf("Copying data block %d of %d (%.2f%% done): %v", done+1, total,
279 float64(done)/float64(total)*100, locator)
281 timePerBlock := time.Since(startedAt) / time.Duration(done)
282 log.Printf("Copying data block %d of %d (%.2f%% done, %v est. time remaining): %v", done+1, total,
283 float64(done)/float64(total)*100, timePerBlock*time.Duration(total-done), locator)
286 getLocator := locator
287 expiresAt := time.Now().AddDate(0, 0, 1)
288 if blobSigningKey != "" {
289 getLocator = keepclient.SignLocator(getLocator, kcSrc.Arvados.ApiToken, expiresAt, blobSigningTTL, []byte(blobSigningKey))
292 reader, len, _, err := kcSrc.Get(getLocator)
294 return fmt.Errorf("Error getting block: %v %v", locator, err)
297 _, _, err = kcDst.PutHR(getLocator[:32], reader, len)
299 return fmt.Errorf("Error copying data block: %v %v", locator, err)
303 log.Printf("Successfully copied to destination %d blocks.", total)