2960: Merge branch 'main' into 2960-keepstore-streaming
[arvados.git] / sdk / go / keepclient / keepclient.go
index 64f7e47b7e14e85aa86d369da58c6fd8d1273ad7..d97a2d1fcd2096a7f44983bbc7349ce11c24d307 100644 (file)
@@ -44,6 +44,9 @@ var (
        DefaultProxyTLSHandshakeTimeout = 10 * time.Second
        DefaultProxyKeepAlive           = 120 * time.Second
 
+       DefaultRetryDelay = 2 * time.Second // see KeepClient.RetryDelay
+       MinimumRetryDelay = time.Millisecond
+
        rootCacheDir = "/var/cache/arvados/keep"
        userCacheDir = ".cache/arvados/keep" // relative to HOME
 )
@@ -107,14 +110,25 @@ const DiskCacheDisabled = arvados.ByteSizeOrPercent(1)
 
 // KeepClient holds information about Arvados and Keep servers.
 type KeepClient struct {
-       Arvados               *arvadosclient.ArvadosClient
-       Want_replicas         int
-       localRoots            map[string]string
-       writableLocalRoots    map[string]string
-       gatewayRoots          map[string]string
-       lock                  sync.RWMutex
-       HTTPClient            HTTPClient
-       Retries               int
+       Arvados            *arvadosclient.ArvadosClient
+       Want_replicas      int
+       localRoots         map[string]string
+       writableLocalRoots map[string]string
+       gatewayRoots       map[string]string
+       lock               sync.RWMutex
+       HTTPClient         HTTPClient
+
+       // Number of times to automatically retry a read/write
+       // operation after a transient failure.
+       Retries int
+
+       // Initial maximum delay for automatic retry. If zero,
+       // DefaultRetryDelay is used.  The delay after attempt N
+       // (0-based) will be a random duration between
+       // MinimumRetryDelay and RetryDelay * 2^N, not to exceed a cap
+       // of RetryDelay * 10.
+       RetryDelay time.Duration
+
        RequestID             string
        StorageClasses        []string
        DefaultStorageClasses []string                  // Set by cluster's exported config
@@ -143,6 +157,7 @@ func (kc *KeepClient) Clone() *KeepClient {
                gatewayRoots:          kc.gatewayRoots,
                HTTPClient:            kc.HTTPClient,
                Retries:               kc.Retries,
+               RetryDelay:            kc.RetryDelay,
                RequestID:             kc.RequestID,
                StorageClasses:        kc.StorageClasses,
                DefaultStorageClasses: kc.DefaultStorageClasses,
@@ -271,6 +286,7 @@ func (kc *KeepClient) getOrHead(method string, locator string, header http.Heade
 
        var errs []string
 
+       delay := delayCalculator{InitialMaxDelay: kc.RetryDelay}
        triesRemaining := 1 + kc.Retries
 
        serversToTry := kc.getSortedRoots(locator)
@@ -350,6 +366,9 @@ func (kc *KeepClient) getOrHead(method string, locator string, header http.Heade
                        return nil, expectLength, url, resp.Header, nil
                }
                serversToTry = retryList
+               if len(serversToTry) > 0 && triesRemaining > 0 {
+                       time.Sleep(delay.Next())
+               }
        }
        DebugPrintf("DEBUG: %s %s failed: %v", method, locator, errs)