From 6f18406060f2ac32d505db14ceab97b08431ce04 Mon Sep 17 00:00:00 2001 From: Tom Clegg Date: Thu, 25 Apr 2019 13:54:03 -0400 Subject: [PATCH 1/1] 14399: Retry Azure ListBlobs call after 503 errors. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- services/keepstore/azure_blob_volume.go | 38 +++++++++++++++++--- services/keepstore/azure_blob_volume_test.go | 24 +++++++++---- 2 files changed, 50 insertions(+), 12 deletions(-) diff --git a/services/keepstore/azure_blob_volume.go b/services/keepstore/azure_blob_volume.go index 6b5b233c2a..3c17b3bd06 100644 --- a/services/keepstore/azure_blob_volume.go +++ b/services/keepstore/azure_blob_volume.go @@ -26,7 +26,11 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -const azureDefaultRequestTimeout = arvados.Duration(10 * time.Minute) +const ( + azureDefaultRequestTimeout = arvados.Duration(10 * time.Minute) + azureDefaultListBlobsMaxAttempts = 12 + azureDefaultListBlobsRetryDelay = arvados.Duration(10 * time.Second) +) var ( azureMaxGetBytes int @@ -108,6 +112,8 @@ type AzureBlobVolume struct { ReadOnly bool RequestTimeout arvados.Duration StorageClasses []string + ListBlobsRetryDelay arvados.Duration + ListBlobsMaxAttempts int azClient storage.Client container *azureContainer @@ -149,6 +155,12 @@ func (v *AzureBlobVolume) Type() string { // Start implements Volume. func (v *AzureBlobVolume) Start(vm *volumeMetricsVecs) error { + if v.ListBlobsRetryDelay == 0 { + v.ListBlobsRetryDelay = azureDefaultListBlobsRetryDelay + } + if v.ListBlobsMaxAttempts == 0 { + v.ListBlobsMaxAttempts = azureDefaultListBlobsMaxAttempts + } if v.ContainerName == "" { return errors.New("no container name given") } @@ -486,8 +498,8 @@ func (v *AzureBlobVolume) IndexTo(prefix string, writer io.Writer) error { Prefix: prefix, Include: &storage.IncludeBlobDataset{Metadata: true}, } - for { - resp, err := v.container.ListBlobs(params) + for page := 1; ; page++ { + resp, err := v.listBlobs(page, params) if err != nil { return err } @@ -517,6 +529,22 @@ func (v *AzureBlobVolume) IndexTo(prefix string, writer io.Writer) error { } } +// call v.container.ListBlobs, retrying if needed. +func (v *AzureBlobVolume) listBlobs(page int, params storage.ListBlobsParameters) (resp storage.BlobListResponse, err error) { + for i := 0; i < v.ListBlobsMaxAttempts; i++ { + resp, err = v.container.ListBlobs(params) + err = v.translateError(err) + if err == VolumeBusyError { + log.Printf("ListBlobs: will retry page %d in %s after error: %s", page, v.ListBlobsRetryDelay, err) + time.Sleep(time.Duration(v.ListBlobsRetryDelay)) + continue + } else { + break + } + } + return +} + // Trash a Keep block. func (v *AzureBlobVolume) Trash(loc string) error { if v.ReadOnly { @@ -674,8 +702,8 @@ func (v *AzureBlobVolume) EmptyTrash() { } params := storage.ListBlobsParameters{Include: &storage.IncludeBlobDataset{Metadata: true}} - for { - resp, err := v.container.ListBlobs(params) + for page := 1; ; page++ { + resp, err := v.listBlobs(page, params) if err != nil { log.Printf("EmptyTrash: ListBlobs: %v", err) break diff --git a/services/keepstore/azure_blob_volume_test.go b/services/keepstore/azure_blob_volume_test.go index cfad7577c5..8d02def144 100644 --- a/services/keepstore/azure_blob_volume_test.go +++ b/services/keepstore/azure_blob_volume_test.go @@ -27,6 +27,7 @@ import ( "testing" "time" + "git.curoverse.com/arvados.git/sdk/go/arvados" "github.com/Azure/azure-sdk-for-go/storage" "github.com/ghodss/yaml" "github.com/prometheus/client_golang/prometheus" @@ -65,8 +66,9 @@ type azBlob struct { type azStubHandler struct { sync.Mutex - blobs map[string]*azBlob - race chan chan struct{} + blobs map[string]*azBlob + race chan chan struct{} + didlist503 bool } func newAzStubHandler() *azStubHandler { @@ -281,6 +283,11 @@ func (h *azStubHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) { rw.WriteHeader(http.StatusAccepted) case r.Method == "GET" && r.Form.Get("comp") == "list" && r.Form.Get("restype") == "container": // "List Blobs" API + if !h.didlist503 { + h.didlist503 = true + rw.WriteHeader(http.StatusServiceUnavailable) + return + } prefix := container + "|" + r.Form.Get("prefix") marker := r.Form.Get("marker") @@ -388,14 +395,17 @@ func NewTestableAzureBlobVolume(t TB, readonly bool, replication int) *TestableA t.Fatal(err) } } + azClient.Sender = &singleSender{} bs := azClient.GetBlobService() v := &AzureBlobVolume{ - ContainerName: container, - ReadOnly: readonly, - AzureReplication: replication, - azClient: azClient, - container: &azureContainer{ctr: bs.GetContainerReference(container)}, + ContainerName: container, + ReadOnly: readonly, + AzureReplication: replication, + ListBlobsMaxAttempts: 2, + ListBlobsRetryDelay: arvados.Duration(time.Millisecond), + azClient: azClient, + container: &azureContainer{ctr: bs.GetContainerReference(container)}, } return &TestableAzureBlobVolume{ -- 2.30.2