13383: Add PullWorkers, TrashWorkers, EmptyTrashWorkers configs.
authorTom Clegg <tclegg@veritasgenetics.com>
Tue, 24 Apr 2018 20:32:24 +0000 (16:32 -0400)
committerTom Clegg <tclegg@veritasgenetics.com>
Fri, 4 May 2018 20:50:47 +0000 (16:50 -0400)
EmptyTrashWorkers only applies to S3 volumes.

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tclegg@veritasgenetics.com>

services/keepstore/config.go
services/keepstore/keepstore.go
services/keepstore/s3_volume.go

index 17d6acdb68cca7463b9a7e9e49b9e1d9f3510229..19dc7f69bed5077744c346567c1ee8a8de8f81d4 100644 (file)
@@ -40,6 +40,9 @@ type Config struct {
        EnableDelete        bool
        TrashLifetime       arvados.Duration
        TrashCheckInterval  arvados.Duration
+       PullWorkers         int
+       TrashWorkers        int
+       EmptyTrashWorkers   int
 
        Volumes VolumeList
 
index 03eef7e76b0b897ed2cb70b95f22989b76436123..c74275201753787189e34604227ba0719c398bf8 100644 (file)
@@ -165,19 +165,23 @@ func main() {
                log.Fatal(err)
        }
 
-       // Initialize Pull queue and worker
+       // Initialize keepclient for pull workers
        keepClient := &keepclient.KeepClient{
                Arvados:       &arvadosclient.ArvadosClient{},
                Want_replicas: 1,
        }
 
-       // Initialize the pullq and worker
+       // Initialize the pullq and workers
        pullq = NewWorkQueue()
-       go RunPullWorker(pullq, keepClient)
+       for i := 0; i < 1 || i < theConfig.PullWorkers; i++ {
+               go RunPullWorker(pullq, keepClient)
+       }
 
-       // Initialize the trashq and worker
+       // Initialize the trashq and workers
        trashq = NewWorkQueue()
-       go RunTrashWorker(trashq)
+       for i := 0; i < 1 || i < theConfig.TrashWorkers; i++ {
+               go RunTrashWorker(trashq)
+       }
 
        // Start emptyTrash goroutine
        doneEmptyingTrash := make(chan bool)
index a60b2fc27e321f553c9784691702282ecb39a6e4..532a0823e2da2aefa5120a5fa2d5326e557bfa5e 100644 (file)
@@ -18,6 +18,7 @@ import (
        "regexp"
        "strings"
        "sync"
+       "sync/atomic"
        "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
@@ -764,26 +765,21 @@ func (v *S3Volume) translateError(err error) error {
 func (v *S3Volume) EmptyTrash() {
        var bytesInTrash, blocksInTrash, bytesDeleted, blocksDeleted int64
 
-       // Use a merge sort to find matching sets of trash/X and recent/X.
-       trashL := s3Lister{
-               Bucket:   v.bucket.Bucket,
-               Prefix:   "trash/",
-               PageSize: v.IndexPageSize,
-       }
        // Define "ready to delete" as "...when EmptyTrash started".
        startT := time.Now()
-       for trash := trashL.First(); trash != nil; trash = trashL.Next() {
+
+       emptyOneKey := func(trash *s3.Key) {
                loc := trash.Key[6:]
                if !v.isKeepBlock(loc) {
-                       continue
+                       return
                }
-               bytesInTrash += trash.Size
-               blocksInTrash++
+               atomic.AddInt64(&bytesInTrash, trash.Size)
+               atomic.AddInt64(&blocksInTrash, 1)
 
                trashT, err := time.Parse(time.RFC3339, trash.LastModified)
                if err != nil {
                        log.Printf("warning: %s: EmptyTrash: %q: parse %q: %s", v, trash.Key, trash.LastModified, err)
-                       continue
+                       return
                }
                recent, err := v.bucket.Head("recent/"+loc, nil)
                if err != nil && os.IsNotExist(v.translateError(err)) {
@@ -792,15 +788,15 @@ func (v *S3Volume) EmptyTrash() {
                        if err != nil {
                                log.Printf("error: %s: EmptyTrash: Untrash(%q): %s", v, loc, err)
                        }
-                       continue
+                       return
                } else if err != nil {
                        log.Printf("warning: %s: EmptyTrash: HEAD %q: %s", v, "recent/"+loc, err)
-                       continue
+                       return
                }
                recentT, err := v.lastModified(recent)
                if err != nil {
                        log.Printf("warning: %s: EmptyTrash: %q: parse %q: %s", v, "recent/"+loc, recent.Header.Get("Last-Modified"), err)
-                       continue
+                       return
                }
                if trashT.Sub(recentT) < theConfig.BlobSignatureTTL.Duration() {
                        if age := startT.Sub(recentT); age >= theConfig.BlobSignatureTTL.Duration()-time.Duration(v.RaceWindow) {
@@ -815,28 +811,28 @@ func (v *S3Volume) EmptyTrash() {
                                log.Printf("notice: %s: EmptyTrash: detected old race for %q, calling fixRace + Touch", v, loc)
                                v.fixRace(loc)
                                v.Touch(loc)
-                               continue
+                               return
                        }
                        _, err := v.bucket.Head(loc, nil)
                        if os.IsNotExist(err) {
                                log.Printf("notice: %s: EmptyTrash: detected recent race for %q, calling fixRace", v, loc)
                                v.fixRace(loc)
-                               continue
+                               return
                        } else if err != nil {
                                log.Printf("warning: %s: EmptyTrash: HEAD %q: %s", v, loc, err)
-                               continue
+                               return
                        }
                }
                if startT.Sub(trashT) < theConfig.TrashLifetime.Duration() {
-                       continue
+                       return
                }
                err = v.bucket.Del(trash.Key)
                if err != nil {
                        log.Printf("warning: %s: EmptyTrash: deleting %q: %s", v, trash.Key, err)
-                       continue
+                       return
                }
-               bytesDeleted += trash.Size
-               blocksDeleted++
+               atomic.AddInt64(&bytesDeleted, trash.Size)
+               atomic.AddInt64(&blocksDeleted, 1)
 
                _, err = v.bucket.Head(loc, nil)
                if os.IsNotExist(err) {
@@ -848,6 +844,30 @@ func (v *S3Volume) EmptyTrash() {
                        log.Printf("warning: %s: EmptyTrash: HEAD %q: %s", v, "recent/"+loc, err)
                }
        }
+
+       var wg sync.WaitGroup
+       todo := make(chan *s3.Key)
+       for i := 0; i < 1 || i < theConfig.EmptyTrashWorkers; i++ {
+               wg.Add(1)
+               go func() {
+                       defer wg.Done()
+                       for key := range todo {
+                               emptyOneKey(key)
+                       }
+               }()
+       }
+
+       trashL := s3Lister{
+               Bucket:   v.bucket.Bucket,
+               Prefix:   "trash/",
+               PageSize: v.IndexPageSize,
+       }
+       for trash := trashL.First(); trash != nil; trash = trashL.Next() {
+               todo <- trash
+       }
+       close(todo)
+       wg.Wait()
+
        if err := trashL.Error(); err != nil {
                log.Printf("error: %s: EmptyTrash: lister: %s", v, err)
        }