+ v.logger.WithError(err).Error("EmptyTrash: lister failed")
+ }
+ v.logger.Infof("EmptyTrash: stats for %v: Deleted %v bytes in %v blocks. Remaining in trash: %v bytes in %v blocks.", v.DeviceID(), bytesDeleted, blocksDeleted, bytesInTrash-bytesDeleted, blocksInTrash-blocksDeleted)
+}
+
+// fixRace(X) is called when "recent/X" exists but "X" doesn't
+// exist. If the timestamps on "recent/X" and "trash/X" indicate there
+// was a race between Put and Trash, fixRace recovers from the race by
+// Untrashing the block.
+func (v *s3Volume) fixRace(key string) bool {
+ trash, err := v.head("trash/" + key)
+ if err != nil {
+ if !os.IsNotExist(v.translateError(err)) {
+ v.logger.WithError(err).Errorf("fixRace: HEAD %q failed", "trash/"+key)
+ }
+ return false
+ }
+
+ recent, err := v.head("recent/" + key)
+ if err != nil {
+ v.logger.WithError(err).Errorf("fixRace: HEAD %q failed", "recent/"+key)
+ return false
+ }
+
+ recentTime := *recent.LastModified
+ trashTime := *trash.LastModified
+ ageWhenTrashed := trashTime.Sub(recentTime)
+ if ageWhenTrashed >= v.cluster.Collections.BlobSigningTTL.Duration() {
+ // No evidence of a race: block hasn't been written
+ // since it became eligible for Trash. No fix needed.
+ return false
+ }
+
+ v.logger.Infof("fixRace: %q: trashed at %s but touched at %s (age when trashed = %s < %s)", key, trashTime, recentTime, ageWhenTrashed, v.cluster.Collections.BlobSigningTTL)
+ v.logger.Infof("fixRace: copying %q to %q to recover from race between Put/Touch and Trash", "recent/"+key, key)
+ err = v.safeCopy(key, "trash/"+key)
+ if err != nil {
+ v.logger.WithError(err).Error("fixRace: copy failed")
+ return false
+ }
+ return true
+}
+
+func (v *s3Volume) head(key string) (result *s3.HeadObjectOutput, err error) {
+ input := &s3.HeadObjectInput{
+ Bucket: aws.String(v.bucket.bucket),
+ Key: aws.String(key),
+ }
+
+ req := v.bucket.svc.HeadObjectRequest(input)
+ res, err := req.Send(context.TODO())
+
+ v.bucket.stats.TickOps("head")
+ v.bucket.stats.Tick(&v.bucket.stats.Ops, &v.bucket.stats.HeadOps)
+ v.bucket.stats.TickErr(err)
+
+ if err != nil {
+ return nil, v.translateError(err)
+ }
+ result = res.HeadObjectOutput
+ return
+}
+
+// BlockRead reads a Keep block that has been stored as a block blob
+// in the S3 bucket.
+func (v *s3Volume) BlockRead(ctx context.Context, hash string, w io.WriterAt) error {
+ key := v.key(hash)
+ err := v.readWorker(ctx, key, w)
+ if err != nil {
+ err = v.translateError(err)
+ if !os.IsNotExist(err) {
+ return err
+ }
+
+ _, err = v.head("recent/" + key)
+ err = v.translateError(err)
+ if err != nil {
+ // If we can't read recent/X, there's no point in
+ // trying fixRace. Give up.
+ return err
+ }
+ if !v.fixRace(key) {
+ err = os.ErrNotExist
+ return err
+ }
+
+ err = v.readWorker(ctx, key, w)
+ if err != nil {
+ v.logger.Warnf("reading %s after successful fixRace: %s", hash, err)
+ err = v.translateError(err)
+ return err
+ }
+ }
+ return nil
+}
+
+func (v *s3Volume) readWorker(ctx context.Context, key string, dst io.WriterAt) error {
+ downloader := s3manager.NewDownloaderWithClient(v.bucket.svc, func(u *s3manager.Downloader) {
+ u.PartSize = s3downloaderPartSize
+ u.Concurrency = s3downloaderReadConcurrency
+ })
+ count, err := downloader.DownloadWithContext(ctx, dst, &s3.GetObjectInput{
+ Bucket: aws.String(v.bucket.bucket),
+ Key: aws.String(key),
+ })
+ v.bucket.stats.TickOps("get")
+ v.bucket.stats.Tick(&v.bucket.stats.Ops, &v.bucket.stats.GetOps)
+ v.bucket.stats.TickErr(err)
+ v.bucket.stats.TickInBytes(uint64(count))
+ return v.translateError(err)
+}
+
+func (v *s3Volume) writeObject(ctx context.Context, key string, r io.Reader) error {
+ if r == nil {
+ // r == nil leads to a memory violation in func readFillBuf in
+ // aws-sdk-go-v2@v0.23.0/service/s3/s3manager/upload.go
+ r = bytes.NewReader(nil)