package main
import (
- "bytes"
+ "bufio"
+ "crypto/tls"
"errors"
"flag"
+ "fmt"
"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
"git.curoverse.com/arvados.git/sdk/go/keepclient"
"io/ioutil"
"log"
+ "net/http"
+ "os"
"regexp"
"strings"
"time"
)
-// keep-rsync arguments
-var (
- srcConfig arvadosclient.APIConfig
- dstConfig arvadosclient.APIConfig
- blobSigningKey string
- srcKeepServicesJSON string
- dstKeepServicesJSON string
- replications int
- prefix string
-)
-
-var srcConfigFile string
-var dstConfigFile string
-
func main() {
+ var srcConfigFile, dstConfigFile, srcKeepServicesJSON, dstKeepServicesJSON, prefix string
+ var replications int
+ var srcBlobSigningKey string
+
flag.StringVar(
&srcConfigFile,
"src-config-file",
"",
- "Source configuration filename with full path that contains "+
- "an ARVADOS_API_TOKEN which is a valid datamanager token recognized by the source keep servers, "+
- "ARVADOS_API_HOST, ARVADOS_API_HOST_INSECURE, ARVADOS_EXTERNAL_CLIENT and ARVADOS_BLOB_SIGNING_KEY.")
+ "Source configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf")
flag.StringVar(
&dstConfigFile,
"dst-config-file",
"",
- "Destination configuration filename with full path that contains "+
- "an ARVADOS_API_TOKEN which is a valid datamanager token recognized by the destination keep servers, "+
- "ARVADOS_API_HOST, ARVADOS_API_HOST_INSECURE, ARVADOS_EXTERNAL_CLIENT and ARVADOS_BLOB_SIGNING_KEY.")
+ "Destination configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf")
flag.StringVar(
&srcKeepServicesJSON,
&replications,
"replications",
0,
- "Number of replications to write to the destination.")
+ "Number of replications to write to the destination. If replications not specified, "+
+ "default replication level configured on destination server will be used.")
flag.StringVar(
&prefix,
flag.Parse()
- var err error
-
- err = loadConfig()
+ srcConfig, srcBlobSigningKey, err := loadConfig(srcConfigFile)
if err != nil {
- log.Fatal("Error loading configuration from files: %s", err.Error())
+ log.Fatalf("Error loading src configuration from file: %s", err.Error())
}
- // Initialize keep-rsync
- err = initializeKeepRsync()
+ dstConfig, _, err := loadConfig(dstConfigFile)
if err != nil {
- log.Fatal("Error configuring keep-rsync: %s", err.Error())
+ log.Fatalf("Error loading dst configuration from file: %s", err.Error())
}
- // Copy blocks not found in dst from src
- err = performKeepRsync()
+ // setup src and dst keepclients
+ kcSrc, err := setupKeepClient(srcConfig, srcKeepServicesJSON, false, 0)
if err != nil {
- log.Fatal("Error while syncing data: %s", err.Error())
+ log.Fatalf("Error configuring src keepclient: %s", err.Error())
}
-}
-// Load src and dst config from given files
-func loadConfig() error {
- if srcConfigFile == "" {
- return errors.New("-src-config-file must be specified")
+ kcDst, err := setupKeepClient(dstConfig, dstKeepServicesJSON, true, replications)
+ if err != nil {
+ log.Fatalf("Error configuring dst keepclient: %s", err.Error())
}
- var err error
-
- srcConfig, err = readConfigFromFile(srcConfigFile)
+ // Copy blocks not found in dst from src
+ err = performKeepRsync(kcSrc, kcDst, srcBlobSigningKey, prefix)
if err != nil {
- log.Printf("Error reading source configuration: %s", err.Error())
- return err
+ log.Fatalf("Error while syncing data: %s", err.Error())
}
+}
+
+type apiConfig struct {
+ APIToken string
+ APIHost string
+ APIHostInsecure bool
+ ExternalClient bool
+}
- if dstConfigFile == "" {
- return errors.New("-dst-config-file must be specified")
+// Load src and dst config from given files
+func loadConfig(configFile string) (config apiConfig, blobSigningKey string, err error) {
+ if configFile == "" {
+ return config, blobSigningKey, errors.New("config file not specified")
}
- dstConfig, err = readConfigFromFile(dstConfigFile)
+
+ config, blobSigningKey, err = readConfigFromFile(configFile)
if err != nil {
- log.Printf("Error reading destination configuration: %s", err.Error())
+ return config, blobSigningKey, fmt.Errorf("Error reading config file: %v", err)
}
- return err
+ return
}
var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
-// Reads config from file
-func readConfigFromFile(filename string) (arvadosclient.APIConfig, error) {
- var config arvadosclient.APIConfig
+// Read config from file
+func readConfigFromFile(filename string) (config apiConfig, blobSigningKey string, err error) {
+ if !strings.Contains(filename, "/") {
+ filename = os.Getenv("HOME") + "/.config/arvados/" + filename
+ if !strings.HasSuffix(filename, ".conf") {
+ filename = filename + ".conf"
+ }
+ }
content, err := ioutil.ReadFile(filename)
+
if err != nil {
- return config, err
+ return config, "", err
}
lines := strings.Split(string(content), "\n")
if line == "" {
continue
}
- kv := strings.Split(line, "=")
- switch kv[0] {
+ kv := strings.SplitN(line, "=", 2)
+ key := strings.TrimSpace(kv[0])
+ value := strings.TrimSpace(kv[1])
+
+ switch key {
case "ARVADOS_API_TOKEN":
- config.APIToken = kv[1]
+ config.APIToken = value
case "ARVADOS_API_HOST":
- config.APIHost = kv[1]
+ config.APIHost = value
case "ARVADOS_API_HOST_INSECURE":
- config.APIHostInsecure = matchTrue.MatchString(kv[1])
+ config.APIHostInsecure = matchTrue.MatchString(value)
case "ARVADOS_EXTERNAL_CLIENT":
- config.ExternalClient = matchTrue.MatchString(kv[1])
+ config.ExternalClient = matchTrue.MatchString(value)
case "ARVADOS_BLOB_SIGNING_KEY":
- blobSigningKey = kv[1]
+ blobSigningKey = value
}
}
- return config, nil
+ return
}
-// keep-rsync source and destination clients
-var (
- arvSrc arvadosclient.ArvadosClient
- arvDst arvadosclient.ArvadosClient
- kcSrc *keepclient.KeepClient
- kcDst *keepclient.KeepClient
-)
-
-// Initializes keep-rsync using the config provided
-func initializeKeepRsync() (err error) {
- // arvSrc from srcConfig
- arvSrc, err = arvadosclient.New(srcConfig)
- if err != nil {
- return
- }
-
- // arvDst from dstConfig
- arvDst, err = arvadosclient.New(dstConfig)
- if err != nil {
- return
- }
-
- // Get default replications value from destination, if it is not already provided
- if replications == 0 {
- value, err := arvDst.Discovery("defaultCollectionReplication")
- if err == nil {
- replications = int(value.(float64))
- } else {
- replications = 2
- }
+// setup keepclient using the config provided
+func setupKeepClient(config apiConfig, keepServicesJSON string, isDst bool, replications int) (kc *keepclient.KeepClient, err error) {
+ arv := arvadosclient.ArvadosClient{
+ ApiToken: config.APIToken,
+ ApiServer: config.APIHost,
+ ApiInsecure: config.APIHostInsecure,
+ Client: &http.Client{Transport: &http.Transport{
+ TLSClientConfig: &tls.Config{InsecureSkipVerify: config.APIHostInsecure}}},
+ External: config.ExternalClient,
}
- // if srcKeepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers
- if srcKeepServicesJSON == "" {
- kcSrc, err = keepclient.MakeKeepClient(&arvSrc)
+ // if keepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers
+ if keepServicesJSON == "" {
+ kc, err = keepclient.MakeKeepClient(&arv)
if err != nil {
- return
+ return nil, err
}
} else {
- kcSrc, err = keepclient.MakeKeepClientFromJSON(&arvSrc, srcKeepServicesJSON)
+ kc = keepclient.New(&arv)
+ err = kc.LoadKeepServicesFromJSON(keepServicesJSON)
if err != nil {
- return
+ return kc, err
}
}
- // if dstKeepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers
- if dstKeepServicesJSON == "" {
- kcDst, err = keepclient.MakeKeepClient(&arvDst)
- if err != nil {
- return
- }
- } else {
- kcDst, err = keepclient.MakeKeepClientFromJSON(&arvDst, dstKeepServicesJSON)
- if err != nil {
- return
+ if isDst {
+ // Get default replications value from destination, if it is not already provided
+ if replications == 0 {
+ value, err := arv.Discovery("defaultCollectionReplication")
+ if err == nil {
+ replications = int(value.(float64))
+ } else {
+ return nil, err
+ }
}
+
+ kc.Want_replicas = replications
}
- kcDst.Want_replicas = replications
- return
+ return kc, nil
}
// Get unique block locators from src and dst
// Copy any blocks missing in dst
-func performKeepRsync() error {
+func performKeepRsync(kcSrc, kcDst *keepclient.KeepClient, blobSigningKey, prefix string) error {
// Get unique locators from src
srcIndex, err := getUniqueLocators(kcSrc, prefix)
if err != nil {
toBeCopied := getMissingLocators(srcIndex, dstIndex)
// Copy each missing block to dst
- err = copyBlocksToDst(toBeCopied)
+ log.Printf("Before keep-rsync, there are %d blocks in src and %d blocks in dst. Start copying %d blocks from src not found in dst.",
+ len(srcIndex), len(dstIndex), len(toBeCopied))
+
+ err = copyBlocksToDst(toBeCopied, kcSrc, kcDst, blobSigningKey)
return err
}
// Get list of unique locators from the specified cluster
-func getUniqueLocators(kc *keepclient.KeepClient, indexPrefix string) (map[string]bool, error) {
- var indexBytes []byte
+func getUniqueLocators(kc *keepclient.KeepClient, prefix string) (map[string]bool, error) {
+ uniqueLocators := map[string]bool{}
+ // Get index and dedup
for uuid := range kc.LocalRoots() {
- reader, err := kc.GetIndex(uuid, indexPrefix)
+ reader, err := kc.GetIndex(uuid, prefix)
if err != nil {
- return nil, err
+ return uniqueLocators, err
}
-
- var readBytes []byte
- readBytes, err = ioutil.ReadAll(reader)
- if err != nil {
- return nil, err
+ scanner := bufio.NewScanner(reader)
+ for scanner.Scan() {
+ uniqueLocators[strings.Split(scanner.Text(), " ")[0]] = true
}
-
- indexBytes = append(indexBytes, readBytes...)
}
- // Got index; Now dedup it
- locators := bytes.Split(indexBytes, []byte("\n"))
-
- uniqueLocators := map[string]bool{}
- for _, loc := range locators {
- if len(loc) == 0 {
- continue
- }
-
- locator := string(bytes.Split(loc, []byte(" "))[0])
- if _, ok := uniqueLocators[locator]; !ok {
- uniqueLocators[locator] = true
- }
- }
return uniqueLocators, nil
}
// Get list of locators that are in src but not in dst
-func getMissingLocators(srcLocators map[string]bool, dstLocators map[string]bool) []string {
+func getMissingLocators(srcLocators, dstLocators map[string]bool) []string {
var missingLocators []string
for locator := range srcLocators {
if _, ok := dstLocators[locator]; !ok {
}
// Copy blocks from src to dst; only those that are missing in dst are copied
-func copyBlocksToDst(toBeCopied []string) error {
+func copyBlocksToDst(toBeCopied []string, kcSrc, kcDst *keepclient.KeepClient, blobSigningKey string) error {
done := 0
total := len(toBeCopied)
+ startedAt := time.Now()
+ var blockTime int64
for _, locator := range toBeCopied {
- log.Printf("Getting block %d of %d", done+1, total)
-
- log.Printf("Getting block: %v", locator)
+ log.Printf("Getting block %d of %d: %v", done+1, total, locator)
getLocator := locator
expiresAt := time.Now().AddDate(0, 0, 1)
if blobSigningKey != "" {
- getLocator = keepclient.SignLocator(getLocator, arvSrc.ApiToken, expiresAt, []byte(blobSigningKey))
+ getLocator = keepclient.SignLocator(getLocator, kcSrc.Arvados.ApiToken, expiresAt, []byte(blobSigningKey))
}
- reader, _, _, err := kcSrc.Get(getLocator)
+ reader, len, _, err := kcSrc.Get(getLocator)
if err != nil {
- log.Printf("Error getting block: %q %v", locator, err)
- return err
+ return fmt.Errorf("Error getting block: %v %v", locator, err)
}
- data, err := ioutil.ReadAll(reader)
+
+ if done == 0 {
+ log.Printf("Copying data block %d of %d (%.2f%% done): %v", done+1, total,
+ float64(done)/float64(total)*100, locator)
+ } else {
+ log.Printf("Copying data block %d of %d (%.2f%% done, ETA %v): %v", done+1, total,
+ float64(done)/float64(total)*100, time.Duration(blockTime*int64(total-done)), locator)
+ }
+ _, _, err = kcDst.PutHR(getLocator[:32], reader, len)
if err != nil {
- log.Printf("Error reading block data: %q %v", locator, err)
- return err
+ return fmt.Errorf("Error copying data block: %v %v", locator, err)
}
- log.Printf("Copying block: %q", locator)
- _, _, err = kcDst.PutB(data)
- if err != nil {
- log.Printf("Error putting block data: %q %v", locator, err)
- return err
+ if done == 0 {
+ blockTime = int64(time.Now().Sub(startedAt))
}
done++
- log.Printf("%.2f%% done", float64(done)/float64(total)*100)
}
log.Printf("Successfully copied to destination %d blocks.", total)