18 "github.com/curoverse/azure-sdk-for-go/storage"
23 azureStorageAccountName string
24 azureStorageAccountKeyFile string
25 azureStorageReplication int
26 azureWriteRaceInterval = 15 * time.Second
27 azureWriteRacePollTime = time.Second
30 func readKeyFromFile(file string) (string, error) {
31 buf, err := ioutil.ReadFile(file)
33 return "", errors.New("reading key from " + file + ": " + err.Error())
35 accountKey := strings.TrimSpace(string(buf))
37 return "", errors.New("empty account key in " + file)
39 return accountKey, nil
42 type azureVolumeAdder struct {
46 func (s *azureVolumeAdder) Set(containerName string) error {
47 if trashLifetime != 0 {
48 return ErrNotImplemented
51 if containerName == "" {
52 return errors.New("no container name given")
54 if azureStorageAccountName == "" || azureStorageAccountKeyFile == "" {
55 return errors.New("-azure-storage-account-name and -azure-storage-account-key-file arguments must given before -azure-storage-container-volume")
57 accountKey, err := readKeyFromFile(azureStorageAccountKeyFile)
61 azClient, err := storage.NewBasicClient(azureStorageAccountName, accountKey)
63 return errors.New("creating Azure storage client: " + err.Error())
66 log.Print("Notice: -serialize is not supported by azure-blob-container volumes.")
68 v := NewAzureBlobVolume(azClient, containerName, flagReadonly, azureStorageReplication)
69 if err := v.Check(); err != nil {
72 *s.volumeSet = append(*s.volumeSet, v)
77 flag.Var(&azureVolumeAdder{&volumes},
78 "azure-storage-container-volume",
79 "Use the given container as a storage volume. Can be given multiple times.")
81 &azureStorageAccountName,
82 "azure-storage-account-name",
84 "Azure storage account name used for subsequent --azure-storage-container-volume arguments.")
86 &azureStorageAccountKeyFile,
87 "azure-storage-account-key-file",
89 "File containing the account key used for subsequent --azure-storage-container-volume arguments.")
91 &azureStorageReplication,
92 "azure-storage-replication",
94 "Replication level to report to clients when data is stored in an Azure container.")
97 "azure-max-get-bytes",
99 fmt.Sprintf("Maximum bytes to request in a single GET request. If smaller than %d, use multiple concurrent range requests to retrieve a block.", BlockSize))
102 // An AzureBlobVolume stores and retrieves blocks in an Azure Blob
104 type AzureBlobVolume struct {
105 azClient storage.Client
106 bsClient storage.BlobStorageClient
112 // NewAzureBlobVolume returns a new AzureBlobVolume using the given
113 // client and container name. The replication argument specifies the
114 // replication level to report when writing data.
115 func NewAzureBlobVolume(client storage.Client, containerName string, readonly bool, replication int) *AzureBlobVolume {
116 return &AzureBlobVolume{
118 bsClient: client.GetBlobService(),
119 containerName: containerName,
121 replication: replication,
125 // Check returns nil if the volume is usable.
126 func (v *AzureBlobVolume) Check() error {
127 ok, err := v.bsClient.ContainerExists(v.containerName)
132 return errors.New("container does not exist")
137 // Return NotFoundError if trash marker is found on the block
138 func (v *AzureBlobVolume) checkTrashed(loc string) (bool, error) {
139 metadata, err := v.bsClient.GetBlobMetadata(v.containerName, loc)
141 return false, v.translateError(err)
143 if metadata["expires_at"] != "" {
144 return true, v.translateError(NotFoundError)
149 // Get reads a Keep block that has been stored as a block blob in the
152 // If the block is younger than azureWriteRaceInterval and is
153 // unexpectedly empty, assume a PutBlob operation is in progress, and
154 // wait for it to finish writing.
155 func (v *AzureBlobVolume) Get(loc string, buf []byte) (int, error) {
156 trashed, err := v.checkTrashed(loc)
161 return 0, os.ErrNotExist
163 var deadline time.Time
164 haveDeadline := false
165 size, err := v.get(loc, buf)
166 for err == nil && size == 0 && loc != "d41d8cd98f00b204e9800998ecf8427e" {
167 // Seeing a brand new empty block probably means we're
168 // in a race with CreateBlob, which under the hood
169 // (apparently) does "CreateEmpty" and "CommitData"
170 // with no additional transaction locking.
172 t, err := v.Mtime(loc)
174 log.Print("Got empty block (possible race) but Mtime failed: ", err)
177 deadline = t.Add(azureWriteRaceInterval)
178 if time.Now().After(deadline) {
181 log.Printf("Race? Block %s is 0 bytes, %s old. Polling until %s", loc, time.Since(t), deadline)
183 } else if time.Now().After(deadline) {
186 time.Sleep(azureWriteRacePollTime)
187 size, err = v.get(loc, buf)
190 log.Printf("Race ended with size==%d", size)
195 func (v *AzureBlobVolume) get(loc string, buf []byte) (int, error) {
196 expectSize := len(buf)
197 if azureMaxGetBytes < BlockSize {
198 // Unfortunately the handler doesn't tell us how long the blob
199 // is expected to be, so we have to ask Azure.
200 props, err := v.bsClient.GetBlobProperties(v.containerName, loc)
202 return 0, v.translateError(err)
204 if props.ContentLength > int64(BlockSize) || props.ContentLength < 0 {
205 return 0, fmt.Errorf("block %s invalid size %d (max %d)", loc, props.ContentLength, BlockSize)
207 expectSize = int(props.ContentLength)
214 // We'll update this actualSize if/when we get the last piece.
216 pieces := (expectSize + azureMaxGetBytes - 1) / azureMaxGetBytes
217 errors := make([]error, pieces)
218 var wg sync.WaitGroup
220 for p := 0; p < pieces; p++ {
223 startPos := p * azureMaxGetBytes
224 endPos := startPos + azureMaxGetBytes
225 if endPos > expectSize {
228 var rdr io.ReadCloser
230 if startPos == 0 && endPos == expectSize {
231 rdr, err = v.bsClient.GetBlob(v.containerName, loc)
233 rdr, err = v.bsClient.GetBlobRange(v.containerName, loc, fmt.Sprintf("%d-%d", startPos, endPos-1), nil)
240 n, err := io.ReadFull(rdr, buf[startPos:endPos])
241 if pieces == 1 && (err == io.ErrUnexpectedEOF || err == io.EOF) {
242 // If we don't know the actual size,
243 // and just tried reading 64 MiB, it's
244 // normal to encounter EOF.
245 } else if err != nil {
249 actualSize = startPos + n
254 for _, err := range errors {
256 return 0, v.translateError(err)
259 return actualSize, nil
262 // Compare the given data with existing stored data.
263 func (v *AzureBlobVolume) Compare(loc string, expect []byte) error {
264 trashed, err := v.checkTrashed(loc)
269 return os.ErrNotExist
271 rdr, err := v.bsClient.GetBlob(v.containerName, loc)
273 return v.translateError(err)
276 return compareReaderWithBuf(rdr, expect, loc[:32])
279 // Put stores a Keep block as a block blob in the container.
280 func (v *AzureBlobVolume) Put(loc string, block []byte) error {
282 return MethodDisabledError
284 extraHeaders := make(map[string]string)
285 extraHeaders["x-ms-meta-last_write_at"] = fmt.Sprintf("%d", time.Now().Add(trashLifetime).Unix())
286 return v.bsClient.CreateBlockBlobFromReader(v.containerName, loc, uint64(len(block)), bytes.NewReader(block), extraHeaders)
289 func (v *AzureBlobVolume) addToMetadata(loc, name, value string) error {
290 metadata, err := v.bsClient.GetBlobMetadata(v.containerName, loc)
294 metadata[name] = value
295 return v.bsClient.SetBlobMetadata(v.containerName, loc, metadata)
298 func (v *AzureBlobVolume) removeFromMetadata(loc, name string) error {
299 metadata, err := v.bsClient.GetBlobMetadata(v.containerName, loc)
304 return v.bsClient.SetBlobMetadata(v.containerName, loc, metadata)
307 // Touch updates the last-modified property of a block blob.
308 func (v *AzureBlobVolume) Touch(loc string) error {
310 return MethodDisabledError
312 trashed, err := v.checkTrashed(loc)
317 return os.ErrNotExist
319 return v.addToMetadata(loc, "last_write_at", fmt.Sprintf("%d", time.Now()))
322 // Mtime returns the last-modified property of a block blob.
323 func (v *AzureBlobVolume) Mtime(loc string) (time.Time, error) {
324 trashed, err := v.checkTrashed(loc)
326 return time.Time{}, err
329 return time.Time{}, os.ErrNotExist
331 metadata, err := v.bsClient.GetBlobMetadata(v.containerName, loc)
333 return time.Time{}, v.translateError(err)
336 lastWriteAt, err := strconv.ParseInt(metadata["last_write_at"], 10, 64)
338 return time.Time{}, v.translateError(err)
340 return time.Unix(lastWriteAt, 0), nil
343 // IndexTo writes a list of Keep blocks that are stored in the
345 func (v *AzureBlobVolume) IndexTo(prefix string, writer io.Writer) error {
346 params := storage.ListBlobsParameters{
350 resp, err := v.bsClient.ListBlobs(v.containerName, params)
354 for _, b := range resp.Blobs {
355 t, err := time.Parse(time.RFC1123, b.Properties.LastModified)
359 if !v.isKeepBlock(b.Name) {
362 if b.Properties.ContentLength == 0 && t.Add(azureWriteRaceInterval).After(time.Now()) {
363 // A new zero-length blob is probably
364 // just a new non-empty blob that
365 // hasn't committed its data yet (see
366 // Get()), and in any case has no
370 if b.Metadata["expires_at"] != "" {
371 // Trashed blob; exclude it from response
374 fmt.Fprintf(writer, "%s+%d %d\n", b.Name, b.Properties.ContentLength, t.Unix())
376 if resp.NextMarker == "" {
379 params.Marker = resp.NextMarker
383 // Trash a Keep block.
384 func (v *AzureBlobVolume) Trash(loc string) error {
386 return MethodDisabledError
389 // Ideally we would use If-Unmodified-Since, but that
390 // particular condition seems to be ignored by Azure. Instead,
391 // we get the Etag before checking Mtime, and use If-Match to
392 // ensure we don't delete data if Put() or Touch() happens
393 // between our calls to Mtime() and DeleteBlob().
394 props, err := v.bsClient.GetBlobProperties(v.containerName, loc)
398 if t, err := v.Mtime(loc); err != nil {
400 } else if time.Since(t) < blobSignatureTTL {
403 if trashLifetime == 0 {
404 return v.bsClient.DeleteBlob(v.containerName, loc, map[string]string{
405 "If-Match": props.Etag,
409 err = v.addToMetadata(loc, "expires_at", fmt.Sprintf("%d", time.Now().Add(trashLifetime).Unix()))
413 return v.bsClient.CreateBlockBlobFromReader(v.containerName,
414 fmt.Sprintf("trash.%d.%v", time.Now().Add(trashLifetime).Unix(), loc), 0, nil, nil)
417 // Untrash a Keep block.
418 // Delete the expires_at metadata attribute and trash marker
419 func (v *AzureBlobVolume) Untrash(loc string) error {
420 // if expires_at does not exist, return NotFoundError
421 metadata, err := v.bsClient.GetBlobMetadata(v.containerName, loc)
423 return v.translateError(err)
425 if metadata["expires_at"] == "" {
426 return v.translateError(NotFoundError)
428 // reset expires_at metadata attribute
429 err = v.removeFromMetadata(loc, "expires_at")
431 return v.translateError(err)
434 // delete trash marker if exists
435 _, err = v.bsClient.DeleteBlobIfExists(v.containerName, fmt.Sprintf("trash.%v.%v", metadata["expires_at"], loc), map[string]string{})
436 return v.translateError(err)
439 // Status returns a VolumeStatus struct with placeholder data.
440 func (v *AzureBlobVolume) Status() *VolumeStatus {
441 return &VolumeStatus{
443 BytesFree: BlockSize * 1000,
448 // String returns a volume label, including the container name.
449 func (v *AzureBlobVolume) String() string {
450 return fmt.Sprintf("azure-storage-container:%+q", v.containerName)
453 // Writable returns true, unless the -readonly flag was on when the
455 func (v *AzureBlobVolume) Writable() bool {
459 // Replication returns the replication level of the container, as
460 // specified by the -azure-storage-replication argument.
461 func (v *AzureBlobVolume) Replication() int {
465 // If possible, translate an Azure SDK error to a recognizable error
466 // like os.ErrNotExist.
467 func (v *AzureBlobVolume) translateError(err error) error {
471 case strings.Contains(err.Error(), "Not Found"):
472 // "storage: service returned without a response body (404 Not Found)"
473 return os.ErrNotExist
479 var keepBlockRegexp = regexp.MustCompile(`^[0-9a-f]{32}$`)
481 func (v *AzureBlobVolume) isKeepBlock(s string) bool {
482 return keepBlockRegexp.MatchString(s)
485 var azTrashLocRegexp = regexp.MustCompile(`trash\.(\d+)\.([0-9a-f]{32})$`)
487 // EmptyTrash looks for trashed blocks that exceeded trashLifetime
488 // and deletes them from the volume.
489 func (v *AzureBlobVolume) EmptyTrash() {
490 var bytesDeleted, bytesInTrash int64
491 var blocksDeleted, blocksInTrash int
492 params := storage.ListBlobsParameters{
497 resp, err := v.bsClient.ListBlobs(v.containerName, params)
499 log.Printf("EmptyTrash: ListBlobs: %v", err)
502 for _, b := range resp.Blobs {
503 matches := azTrashLocRegexp.FindStringSubmatch(b.Name)
504 if len(matches) != 3 {
505 log.Printf("EmptyTrash: regexp mismatch for: %v", b.Name)
509 deadline, err := strconv.ParseInt(matches[1], 10, 64)
511 log.Printf("EmptyTrash: %v: ParseInt(%v): %v", matches[1], err)
514 if deadline > time.Now().Unix() {
518 metadata, err := v.bsClient.GetBlobMetadata(v.containerName, matches[2])
520 log.Printf("EmptyTrash: %v: GetBlobMetadata(%v): %v", matches[2], err)
524 // Make sure the marker is for the current block, not an older one
525 if metadata["expires_at"] == matches[1] {
526 err = v.bsClient.DeleteBlob(v.containerName, matches[2], map[string]string{})
528 log.Printf("EmptyTrash: %v: DeleteBlob(%v): %v", matches[2], err)
533 err = v.bsClient.DeleteBlob(v.containerName, b.Name, map[string]string{})
535 log.Printf("EmptyTrash: %v: DeleteBlob(%v): %v", b.Name, err)
538 if resp.NextMarker == "" {
541 params.Marker = resp.NextMarker
544 log.Printf("EmptyTrash stats for %v: Deleted %v bytes in %v blocks. Remaining in trash: %v bytes in %v blocks.", v.String(), bytesDeleted, blocksDeleted, bytesInTrash-bytesDeleted, blocksInTrash-blocksDeleted)