+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
package main
import (
"bytes"
+ "context"
"errors"
"flag"
"fmt"
"io"
"io/ioutil"
- "log"
+ "net/http"
"os"
"regexp"
"strconv"
"sync"
"time"
+ "git.curoverse.com/arvados.git/sdk/go/arvados"
+ log "github.com/Sirupsen/logrus"
"github.com/curoverse/azure-sdk-for-go/storage"
)
+const azureDefaultRequestTimeout = arvados.Duration(10 * time.Minute)
+
var (
azureMaxGetBytes int
azureStorageAccountName string
type AzureBlobVolume struct {
StorageAccountName string
StorageAccountKeyFile string
+ StorageBaseURL string // "" means default, "core.windows.net"
ContainerName string
AzureReplication int
ReadOnly bool
+ RequestTimeout arvados.Duration
azClient storage.Client
- bsClient storage.BlobStorageClient
+ bsClient *azureBlobClient
}
// Examples implements VolumeWithExamples.
StorageAccountKeyFile: "/etc/azure_storage_account_key.txt",
ContainerName: "example-container-name",
AzureReplication: 3,
+ RequestTimeout: azureDefaultRequestTimeout,
+ },
+ &AzureBlobVolume{
+ StorageAccountName: "cn-account-name",
+ StorageAccountKeyFile: "/etc/azure_cn_storage_account_key.txt",
+ StorageBaseURL: "core.chinacloudapi.cn",
+ ContainerName: "cn-container-name",
+ AzureReplication: 3,
+ RequestTimeout: azureDefaultRequestTimeout,
},
}
}
if err != nil {
return err
}
- v.azClient, err = storage.NewBasicClient(v.StorageAccountName, accountKey)
+ if v.StorageBaseURL == "" {
+ v.StorageBaseURL = storage.DefaultBaseURL
+ }
+ v.azClient, err = storage.NewClient(v.StorageAccountName, accountKey, v.StorageBaseURL, storage.DefaultAPIVersion, true)
if err != nil {
return fmt.Errorf("creating Azure storage client: %s", err)
}
- v.bsClient = v.azClient.GetBlobService()
+
+ if v.RequestTimeout == 0 {
+ v.RequestTimeout = azureDefaultRequestTimeout
+ }
+ v.azClient.HTTPClient = &http.Client{
+ Timeout: time.Duration(v.RequestTimeout),
+ }
+ bs := v.azClient.GetBlobService()
+ v.bsClient = &azureBlobClient{
+ client: &bs,
+ }
ok, err := v.bsClient.ContainerExists(v.ContainerName)
if err != nil {
return nil
}
+// DeviceID returns a globally unique ID for the storage container.
+func (v *AzureBlobVolume) DeviceID() string {
+ return "azure://" + v.StorageBaseURL + "/" + v.StorageAccountName + "/" + v.ContainerName
+}
+
// Return true if expires_at metadata attribute is found on the block
func (v *AzureBlobVolume) checkTrashed(loc string) (bool, map[string]string, error) {
metadata, err := v.bsClient.GetBlobMetadata(v.ContainerName, loc)
// If the block is younger than azureWriteRaceInterval and is
// unexpectedly empty, assume a PutBlob operation is in progress, and
// wait for it to finish writing.
-func (v *AzureBlobVolume) Get(loc string, buf []byte) (int, error) {
+func (v *AzureBlobVolume) Get(ctx context.Context, loc string, buf []byte) (int, error) {
trashed, _, err := v.checkTrashed(loc)
if err != nil {
return 0, err
}
var deadline time.Time
haveDeadline := false
- size, err := v.get(loc, buf)
+ size, err := v.get(ctx, loc, buf)
for err == nil && size == 0 && loc != "d41d8cd98f00b204e9800998ecf8427e" {
// Seeing a brand new empty block probably means we're
// in a race with CreateBlob, which under the hood
} else if time.Now().After(deadline) {
break
}
- time.Sleep(azureWriteRacePollTime)
- size, err = v.get(loc, buf)
+ select {
+ case <-ctx.Done():
+ return 0, ctx.Err()
+ case <-time.After(azureWriteRacePollTime):
+ }
+ size, err = v.get(ctx, loc, buf)
}
if haveDeadline {
log.Printf("Race ended with size==%d", size)
return size, err
}
-func (v *AzureBlobVolume) get(loc string, buf []byte) (int, error) {
+func (v *AzureBlobVolume) get(ctx context.Context, loc string, buf []byte) (int, error) {
+ ctx, cancel := context.WithCancel(ctx)
+ defer cancel()
expectSize := len(buf)
if azureMaxGetBytes < BlockSize {
// Unfortunately the handler doesn't tell us how long the blob
// We'll update this actualSize if/when we get the last piece.
actualSize := -1
pieces := (expectSize + azureMaxGetBytes - 1) / azureMaxGetBytes
- errors := make([]error, pieces)
+ errors := make(chan error, pieces)
var wg sync.WaitGroup
wg.Add(pieces)
for p := 0; p < pieces; p++ {
+ // Each goroutine retrieves one piece. If we hit an
+ // error, it is sent to the errors chan so get() can
+ // return it -- but only if the error happens before
+ // ctx is done. This way, if ctx is done before we hit
+ // any other error (e.g., requesting client has hung
+ // up), we return the original ctx.Err() instead of
+ // the secondary errors from the transfers that got
+ // interrupted as a result.
go func(p int) {
defer wg.Done()
startPos := p * azureMaxGetBytes
}
var rdr io.ReadCloser
var err error
- if startPos == 0 && endPos == expectSize {
- rdr, err = v.bsClient.GetBlob(v.ContainerName, loc)
- } else {
- rdr, err = v.bsClient.GetBlobRange(v.ContainerName, loc, fmt.Sprintf("%d-%d", startPos, endPos-1), nil)
+ gotRdr := make(chan struct{})
+ go func() {
+ defer close(gotRdr)
+ if startPos == 0 && endPos == expectSize {
+ rdr, err = v.bsClient.GetBlob(v.ContainerName, loc)
+ } else {
+ rdr, err = v.bsClient.GetBlobRange(v.ContainerName, loc, fmt.Sprintf("%d-%d", startPos, endPos-1), nil)
+ }
+ }()
+ select {
+ case <-ctx.Done():
+ go func() {
+ <-gotRdr
+ if err == nil {
+ rdr.Close()
+ }
+ }()
+ return
+ case <-gotRdr:
}
if err != nil {
- errors[p] = err
+ errors <- err
+ cancel()
return
}
- defer rdr.Close()
+ go func() {
+ // Close the reader when the client
+ // hangs up or another piece fails
+ // (possibly interrupting ReadFull())
+ // or when all pieces succeed and
+ // get() returns.
+ <-ctx.Done()
+ rdr.Close()
+ }()
n, err := io.ReadFull(rdr, buf[startPos:endPos])
if pieces == 1 && (err == io.ErrUnexpectedEOF || err == io.EOF) {
// If we don't know the actual size,
// and just tried reading 64 MiB, it's
// normal to encounter EOF.
} else if err != nil {
- errors[p] = err
+ if ctx.Err() == nil {
+ errors <- err
+ }
+ cancel()
+ return
}
if p == pieces-1 {
actualSize = startPos + n
}(p)
}
wg.Wait()
- for _, err := range errors {
- if err != nil {
- return 0, v.translateError(err)
- }
+ close(errors)
+ if len(errors) > 0 {
+ return 0, v.translateError(<-errors)
+ }
+ if ctx.Err() != nil {
+ return 0, ctx.Err()
}
return actualSize, nil
}
// Compare the given data with existing stored data.
-func (v *AzureBlobVolume) Compare(loc string, expect []byte) error {
+func (v *AzureBlobVolume) Compare(ctx context.Context, loc string, expect []byte) error {
trashed, _, err := v.checkTrashed(loc)
if err != nil {
return err
if trashed {
return os.ErrNotExist
}
- rdr, err := v.bsClient.GetBlob(v.ContainerName, loc)
+ var rdr io.ReadCloser
+ gotRdr := make(chan struct{})
+ go func() {
+ defer close(gotRdr)
+ rdr, err = v.bsClient.GetBlob(v.ContainerName, loc)
+ }()
+ select {
+ case <-ctx.Done():
+ go func() {
+ <-gotRdr
+ if err == nil {
+ rdr.Close()
+ }
+ }()
+ return ctx.Err()
+ case <-gotRdr:
+ }
if err != nil {
return v.translateError(err)
}
defer rdr.Close()
- return compareReaderWithBuf(rdr, expect, loc[:32])
+ return compareReaderWithBuf(ctx, rdr, expect, loc[:32])
}
// Put stores a Keep block as a block blob in the container.
-func (v *AzureBlobVolume) Put(loc string, block []byte) error {
+func (v *AzureBlobVolume) Put(ctx context.Context, loc string, block []byte) error {
if v.ReadOnly {
return MethodDisabledError
}
- return v.bsClient.CreateBlockBlobFromReader(v.ContainerName, loc, uint64(len(block)), bytes.NewReader(block), nil)
+ // Send the block data through a pipe, so that (if we need to)
+ // we can close the pipe early and abandon our
+ // CreateBlockBlobFromReader() goroutine, without worrying
+ // about CreateBlockBlobFromReader() accessing our block
+ // buffer after we release it.
+ bufr, bufw := io.Pipe()
+ go func() {
+ io.Copy(bufw, bytes.NewReader(block))
+ bufw.Close()
+ }()
+ errChan := make(chan error)
+ go func() {
+ errChan <- v.bsClient.CreateBlockBlobFromReader(v.ContainerName, loc, uint64(len(block)), bufr, nil)
+ }()
+ select {
+ case <-ctx.Done():
+ theConfig.debugLogf("%s: taking CreateBlockBlobFromReader's input away: %s", v, ctx.Err())
+ // Our pipe might be stuck in Write(), waiting for
+ // io.Copy() to read. If so, un-stick it. This means
+ // CreateBlockBlobFromReader will get corrupt data,
+ // but that's OK: the size won't match, so the write
+ // will fail.
+ go io.Copy(ioutil.Discard, bufr)
+ // CloseWithError() will return once pending I/O is done.
+ bufw.CloseWithError(ctx.Err())
+ theConfig.debugLogf("%s: abandoning CreateBlockBlobFromReader goroutine", v)
+ return ctx.Err()
+ case err := <-errChan:
+ return err
+ }
}
// Touch updates the last-modified property of a block blob.
log.Printf("EmptyTrash stats for %v: Deleted %v bytes in %v blocks. Remaining in trash: %v bytes in %v blocks.", v.String(), bytesDeleted, blocksDeleted, bytesInTrash-bytesDeleted, blocksInTrash-blocksDeleted)
}
+
+// InternalStats returns bucket I/O and API call counters.
+func (v *AzureBlobVolume) InternalStats() interface{} {
+ return &v.bsClient.stats
+}
+
+type azureBlobStats struct {
+ statsTicker
+ Ops uint64
+ GetOps uint64
+ GetRangeOps uint64
+ GetMetadataOps uint64
+ GetPropertiesOps uint64
+ CreateOps uint64
+ SetMetadataOps uint64
+ DelOps uint64
+ ListOps uint64
+}
+
+func (s *azureBlobStats) TickErr(err error) {
+ if err == nil {
+ return
+ }
+ errType := fmt.Sprintf("%T", err)
+ if err, ok := err.(storage.AzureStorageServiceError); ok {
+ errType = errType + fmt.Sprintf(" %d (%s)", err.StatusCode, err.Code)
+ }
+ log.Printf("errType %T, err %s", err, err)
+ s.statsTicker.TickErr(err, errType)
+}
+
+// azureBlobClient wraps storage.BlobStorageClient in order to count
+// I/O and API usage stats.
+type azureBlobClient struct {
+ client *storage.BlobStorageClient
+ stats azureBlobStats
+}
+
+func (c *azureBlobClient) ContainerExists(cname string) (bool, error) {
+ c.stats.Tick(&c.stats.Ops)
+ ok, err := c.client.ContainerExists(cname)
+ c.stats.TickErr(err)
+ return ok, err
+}
+
+func (c *azureBlobClient) GetBlobMetadata(cname, bname string) (map[string]string, error) {
+ c.stats.Tick(&c.stats.Ops, &c.stats.GetMetadataOps)
+ m, err := c.client.GetBlobMetadata(cname, bname)
+ c.stats.TickErr(err)
+ return m, err
+}
+
+func (c *azureBlobClient) GetBlobProperties(cname, bname string) (*storage.BlobProperties, error) {
+ c.stats.Tick(&c.stats.Ops, &c.stats.GetPropertiesOps)
+ p, err := c.client.GetBlobProperties(cname, bname)
+ c.stats.TickErr(err)
+ return p, err
+}
+
+func (c *azureBlobClient) GetBlob(cname, bname string) (io.ReadCloser, error) {
+ c.stats.Tick(&c.stats.Ops, &c.stats.GetOps)
+ rdr, err := c.client.GetBlob(cname, bname)
+ c.stats.TickErr(err)
+ return NewCountingReader(rdr, c.stats.TickInBytes), err
+}
+
+func (c *azureBlobClient) GetBlobRange(cname, bname, byterange string, hdrs map[string]string) (io.ReadCloser, error) {
+ c.stats.Tick(&c.stats.Ops, &c.stats.GetRangeOps)
+ rdr, err := c.client.GetBlobRange(cname, bname, byterange, hdrs)
+ c.stats.TickErr(err)
+ return NewCountingReader(rdr, c.stats.TickInBytes), err
+}
+
+func (c *azureBlobClient) CreateBlockBlobFromReader(cname, bname string, size uint64, rdr io.Reader, hdrs map[string]string) error {
+ c.stats.Tick(&c.stats.Ops, &c.stats.CreateOps)
+ rdr = NewCountingReader(rdr, c.stats.TickOutBytes)
+ err := c.client.CreateBlockBlobFromReader(cname, bname, size, rdr, hdrs)
+ c.stats.TickErr(err)
+ return err
+}
+
+func (c *azureBlobClient) SetBlobMetadata(cname, bname string, m, hdrs map[string]string) error {
+ c.stats.Tick(&c.stats.Ops, &c.stats.SetMetadataOps)
+ err := c.client.SetBlobMetadata(cname, bname, m, hdrs)
+ c.stats.TickErr(err)
+ return err
+}
+
+func (c *azureBlobClient) ListBlobs(cname string, params storage.ListBlobsParameters) (storage.BlobListResponse, error) {
+ c.stats.Tick(&c.stats.Ops, &c.stats.ListOps)
+ resp, err := c.client.ListBlobs(cname, params)
+ c.stats.TickErr(err)
+ return resp, err
+}
+
+func (c *azureBlobClient) DeleteBlob(cname, bname string, hdrs map[string]string) error {
+ c.stats.Tick(&c.stats.Ops, &c.stats.DelOps)
+ err := c.client.DeleteBlob(cname, bname, hdrs)
+ c.stats.TickErr(err)
+ return err
+}