X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/6fe8e52020d421797306e5c6536afbcee761510a..93cfe7c262708fb09eda5aad1839c832816d4591:/services/keepstore/azure_blob_volume.go diff --git a/services/keepstore/azure_blob_volume.go b/services/keepstore/azure_blob_volume.go index 5da2055b77..3c17b3bd06 100644 --- a/services/keepstore/azure_blob_volume.go +++ b/services/keepstore/azure_blob_volume.go @@ -23,9 +23,14 @@ import ( "git.curoverse.com/arvados.git/sdk/go/arvados" "github.com/Azure/azure-sdk-for-go/storage" + "github.com/prometheus/client_golang/prometheus" ) -const azureDefaultRequestTimeout = arvados.Duration(10 * time.Minute) +const ( + azureDefaultRequestTimeout = arvados.Duration(10 * time.Minute) + azureDefaultListBlobsMaxAttempts = 12 + azureDefaultListBlobsRetryDelay = arvados.Duration(10 * time.Second) +) var ( azureMaxGetBytes int @@ -107,6 +112,8 @@ type AzureBlobVolume struct { ReadOnly bool RequestTimeout arvados.Duration StorageClasses []string + ListBlobsRetryDelay arvados.Duration + ListBlobsMaxAttempts int azClient storage.Client container *azureContainer @@ -147,7 +154,13 @@ func (v *AzureBlobVolume) Type() string { } // Start implements Volume. -func (v *AzureBlobVolume) Start() error { +func (v *AzureBlobVolume) Start(vm *volumeMetricsVecs) error { + if v.ListBlobsRetryDelay == 0 { + v.ListBlobsRetryDelay = azureDefaultListBlobsRetryDelay + } + if v.ListBlobsMaxAttempts == 0 { + v.ListBlobsMaxAttempts = azureDefaultListBlobsMaxAttempts + } if v.ContainerName == "" { return errors.New("no container name given") } @@ -183,6 +196,10 @@ func (v *AzureBlobVolume) Start() error { } else if !ok { return fmt.Errorf("Azure container %q does not exist", v.ContainerName) } + // Set up prometheus metrics + lbls := prometheus.Labels{"device_id": v.DeviceID()} + v.container.stats.opsCounters, v.container.stats.errCounters, v.container.stats.ioBytes = vm.getCounterVecsFor(lbls) + return nil } @@ -481,8 +498,8 @@ func (v *AzureBlobVolume) IndexTo(prefix string, writer io.Writer) error { Prefix: prefix, Include: &storage.IncludeBlobDataset{Metadata: true}, } - for { - resp, err := v.container.ListBlobs(params) + for page := 1; ; page++ { + resp, err := v.listBlobs(page, params) if err != nil { return err } @@ -512,6 +529,22 @@ func (v *AzureBlobVolume) IndexTo(prefix string, writer io.Writer) error { } } +// call v.container.ListBlobs, retrying if needed. +func (v *AzureBlobVolume) listBlobs(page int, params storage.ListBlobsParameters) (resp storage.BlobListResponse, err error) { + for i := 0; i < v.ListBlobsMaxAttempts; i++ { + resp, err = v.container.ListBlobs(params) + err = v.translateError(err) + if err == VolumeBusyError { + log.Printf("ListBlobs: will retry page %d in %s after error: %s", page, v.ListBlobsRetryDelay, err) + time.Sleep(time.Duration(v.ListBlobsRetryDelay)) + continue + } else { + break + } + } + return +} + // Trash a Keep block. func (v *AzureBlobVolume) Trash(loc string) error { if v.ReadOnly { @@ -603,6 +636,9 @@ func (v *AzureBlobVolume) translateError(err error) error { switch { case err == nil: return err + case strings.Contains(err.Error(), "StatusCode=503"): + // "storage: service returned error: StatusCode=503, ErrorCode=ServerBusy, ErrorMessage=The server is busy" (See #14804) + return VolumeBusyError case strings.Contains(err.Error(), "Not Found"): // "storage: service returned without a response body (404 Not Found)" return os.ErrNotExist @@ -666,8 +702,8 @@ func (v *AzureBlobVolume) EmptyTrash() { } params := storage.ListBlobsParameters{Include: &storage.IncludeBlobDataset{Metadata: true}} - for { - resp, err := v.container.ListBlobs(params) + for page := 1; ; page++ { + resp, err := v.listBlobs(page, params) if err != nil { log.Printf("EmptyTrash: ListBlobs: %v", err) break @@ -724,6 +760,7 @@ type azureContainer struct { } func (c *azureContainer) Exists() (bool, error) { + c.stats.TickOps("exists") c.stats.Tick(&c.stats.Ops) ok, err := c.ctr.Exists() c.stats.TickErr(err) @@ -731,6 +768,7 @@ func (c *azureContainer) Exists() (bool, error) { } func (c *azureContainer) GetBlobMetadata(bname string) (storage.BlobMetadata, error) { + c.stats.TickOps("get_metadata") c.stats.Tick(&c.stats.Ops, &c.stats.GetMetadataOps) b := c.ctr.GetBlobReference(bname) err := b.GetMetadata(nil) @@ -739,6 +777,7 @@ func (c *azureContainer) GetBlobMetadata(bname string) (storage.BlobMetadata, er } func (c *azureContainer) GetBlobProperties(bname string) (*storage.BlobProperties, error) { + c.stats.TickOps("get_properties") c.stats.Tick(&c.stats.Ops, &c.stats.GetPropertiesOps) b := c.ctr.GetBlobReference(bname) err := b.GetProperties(nil) @@ -747,6 +786,7 @@ func (c *azureContainer) GetBlobProperties(bname string) (*storage.BlobPropertie } func (c *azureContainer) GetBlob(bname string) (io.ReadCloser, error) { + c.stats.TickOps("get") c.stats.Tick(&c.stats.Ops, &c.stats.GetOps) b := c.ctr.GetBlobReference(bname) rdr, err := b.Get(nil) @@ -755,6 +795,7 @@ func (c *azureContainer) GetBlob(bname string) (io.ReadCloser, error) { } func (c *azureContainer) GetBlobRange(bname string, start, end int, opts *storage.GetBlobOptions) (io.ReadCloser, error) { + c.stats.TickOps("get_range") c.stats.Tick(&c.stats.Ops, &c.stats.GetRangeOps) b := c.ctr.GetBlobReference(bname) rdr, err := b.GetRange(&storage.GetBlobRangeOptions{ @@ -782,6 +823,7 @@ func (r *readerWithAzureLen) Len() int { } func (c *azureContainer) CreateBlockBlobFromReader(bname string, size int, rdr io.Reader, opts *storage.PutBlobOptions) error { + c.stats.TickOps("create") c.stats.Tick(&c.stats.Ops, &c.stats.CreateOps) if size != 0 { rdr = &readerWithAzureLen{ @@ -796,6 +838,7 @@ func (c *azureContainer) CreateBlockBlobFromReader(bname string, size int, rdr i } func (c *azureContainer) SetBlobMetadata(bname string, m storage.BlobMetadata, opts *storage.SetBlobMetadataOptions) error { + c.stats.TickOps("set_metadata") c.stats.Tick(&c.stats.Ops, &c.stats.SetMetadataOps) b := c.ctr.GetBlobReference(bname) b.Metadata = m @@ -805,6 +848,7 @@ func (c *azureContainer) SetBlobMetadata(bname string, m storage.BlobMetadata, o } func (c *azureContainer) ListBlobs(params storage.ListBlobsParameters) (storage.BlobListResponse, error) { + c.stats.TickOps("list") c.stats.Tick(&c.stats.Ops, &c.stats.ListOps) resp, err := c.ctr.ListBlobs(params) c.stats.TickErr(err) @@ -812,6 +856,7 @@ func (c *azureContainer) ListBlobs(params storage.ListBlobsParameters) (storage. } func (c *azureContainer) DeleteBlob(bname string, opts *storage.DeleteBlobOptions) error { + c.stats.TickOps("delete") c.stats.Tick(&c.stats.Ops, &c.stats.DelOps) b := c.ctr.GetBlobReference(bname) err := b.Delete(opts)