Merge branch '21535-multi-wf-delete'
[arvados.git] / sdk / go / keepclient / keepclient.go
index 2bd7996b59c0260caf1d61560316c3bc42e09357..1c72e583cbc7f1cf97887fdc914550d5282ee33e 100644 (file)
@@ -44,6 +44,9 @@ var (
        DefaultProxyTLSHandshakeTimeout = 10 * time.Second
        DefaultProxyKeepAlive           = 120 * time.Second
 
+       DefaultRetryDelay = 2 * time.Second // see KeepClient.RetryDelay
+       MinimumRetryDelay = time.Millisecond
+
        rootCacheDir = "/var/cache/arvados/keep"
        userCacheDir = ".cache/arvados/keep" // relative to HOME
 )
@@ -75,6 +78,8 @@ type ErrNotFound struct {
        multipleResponseError
 }
 
+func (*ErrNotFound) HTTPStatus() int { return http.StatusNotFound }
+
 type InsufficientReplicasError struct{ error }
 
 type OversizeBlockError struct{ error }
@@ -95,6 +100,8 @@ const (
        XKeepReplicasStored          = "X-Keep-Replicas-Stored"
        XKeepStorageClasses          = "X-Keep-Storage-Classes"
        XKeepStorageClassesConfirmed = "X-Keep-Storage-Classes-Confirmed"
+       XKeepSignature               = "X-Keep-Signature"
+       XKeepLocator                 = "X-Keep-Locator"
 )
 
 type HTTPClient interface {
@@ -105,14 +112,25 @@ const DiskCacheDisabled = arvados.ByteSizeOrPercent(1)
 
 // KeepClient holds information about Arvados and Keep servers.
 type KeepClient struct {
-       Arvados               *arvadosclient.ArvadosClient
-       Want_replicas         int
-       localRoots            map[string]string
-       writableLocalRoots    map[string]string
-       gatewayRoots          map[string]string
-       lock                  sync.RWMutex
-       HTTPClient            HTTPClient
-       Retries               int
+       Arvados            *arvadosclient.ArvadosClient
+       Want_replicas      int
+       localRoots         map[string]string
+       writableLocalRoots map[string]string
+       gatewayRoots       map[string]string
+       lock               sync.RWMutex
+       HTTPClient         HTTPClient
+
+       // Number of times to automatically retry a read/write
+       // operation after a transient failure.
+       Retries int
+
+       // Initial maximum delay for automatic retry. If zero,
+       // DefaultRetryDelay is used.  The delay after attempt N
+       // (0-based) will be a random duration between
+       // MinimumRetryDelay and RetryDelay * 2^N, not to exceed a cap
+       // of RetryDelay * 10.
+       RetryDelay time.Duration
+
        RequestID             string
        StorageClasses        []string
        DefaultStorageClasses []string                  // Set by cluster's exported config
@@ -141,6 +159,7 @@ func (kc *KeepClient) Clone() *KeepClient {
                gatewayRoots:          kc.gatewayRoots,
                HTTPClient:            kc.HTTPClient,
                Retries:               kc.Retries,
+               RetryDelay:            kc.RetryDelay,
                RequestID:             kc.RequestID,
                StorageClasses:        kc.StorageClasses,
                DefaultStorageClasses: kc.DefaultStorageClasses,
@@ -192,8 +211,8 @@ func New(arv *arvadosclient.ArvadosClient) *KeepClient {
                Retries:       2,
        }
        err = kc.loadDefaultClasses()
-       if err != nil {
-               DebugPrintf("DEBUG: Unable to load the default storage classes cluster config")
+       if err != nil && arv.Logger != nil {
+               arv.Logger.WithError(err).Debug("unable to load the default storage classes cluster config")
        }
        return kc
 }
@@ -269,6 +288,7 @@ func (kc *KeepClient) getOrHead(method string, locator string, header http.Heade
 
        var errs []string
 
+       delay := delayCalculator{InitialMaxDelay: kc.RetryDelay}
        triesRemaining := 1 + kc.Retries
 
        serversToTry := kc.getSortedRoots(locator)
@@ -348,8 +368,13 @@ func (kc *KeepClient) getOrHead(method string, locator string, header http.Heade
                        return nil, expectLength, url, resp.Header, nil
                }
                serversToTry = retryList
+               if len(serversToTry) > 0 && triesRemaining > 0 {
+                       time.Sleep(delay.Next())
+               }
+       }
+       if kc.Arvados.Logger != nil {
+               kc.Arvados.Logger.Debugf("DEBUG: %s %s failed: %v", method, locator, errs)
        }
-       DebugPrintf("DEBUG: %s %s failed: %v", method, locator, errs)
 
        var err error
        if count404 == numServers {
@@ -397,6 +422,7 @@ func (kc *KeepClient) upstreamGateway() arvados.KeepGateway {
                        Dir:         cachedir,
                        MaxSize:     kc.DiskCacheSize,
                        KeepGateway: backend,
+                       Logger:      kc.Arvados.Logger,
                }
        }
        return kc.gatewayStack
@@ -704,6 +730,13 @@ func (kc *KeepClient) getRequestID() string {
        return reqIDGen.Next()
 }
 
+func (kc *KeepClient) debugf(format string, args ...interface{}) {
+       if kc.Arvados.Logger == nil {
+               return
+       }
+       kc.Arvados.Logger.Debugf(format, args...)
+}
+
 type Locator struct {
        Hash  string
        Size  int      // -1 if data size is not known