20540: Implement nearly-full jitter for exponential backoff.
authorTom Clegg <tom@curii.com>
Tue, 30 May 2023 20:54:03 +0000 (16:54 -0400)
committerTom Clegg <tom@curii.com>
Tue, 30 May 2023 20:54:03 +0000 (16:54 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

sdk/go/arvados/client.go
sdk/go/arvados/client_test.go

index 6316d1bedaceb35bc7d3f5578aff3425078aabb7..c4031465034a9e792c46ffbcb5ae1445fbe70230 100644 (file)
@@ -16,12 +16,15 @@ import (
        "io/fs"
        "io/ioutil"
        "log"
+       "math"
        "math/big"
+       mathrand "math/rand"
        "net"
        "net/http"
        "net/url"
        "os"
        "regexp"
+       "strconv"
        "strings"
        "sync/atomic"
        "time"
@@ -274,6 +277,7 @@ func (c *Client) Do(req *http.Request) (*http.Response, error) {
 
        rclient := retryablehttp.NewClient()
        rclient.HTTPClient = c.httpClient()
+       rclient.Backoff = exponentialBackoff
        if c.Timeout > 0 {
                rclient.RetryWaitMax = c.Timeout / 10
                rclient.RetryMax = 32
@@ -370,6 +374,35 @@ func isRedirectStatus(code int) bool {
        }
 }
 
+// Implements retryablehttp.Backoff using the server-provided
+// Retry-After header if available, otherwise nearly-full jitter
+// exponential backoff (similar to
+// https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/),
+// in all cases respecting the provided min and max.
+func exponentialBackoff(min, max time.Duration, attemptNum int, resp *http.Response) time.Duration {
+       var t time.Duration
+       if resp != nil && (resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode == http.StatusServiceUnavailable) {
+               if s := resp.Header.Get("Retry-After"); s != "" {
+                       if sleep, err := strconv.ParseInt(s, 10, 64); err == nil {
+                               t = time.Second * time.Duration(sleep)
+                       } else if stamp, err := time.Parse(time.RFC1123, s); err == nil {
+                               t = stamp.Sub(time.Now())
+                       }
+               }
+       }
+       if t == 0 {
+               jitter := mathrand.New(mathrand.NewSource(int64(time.Now().Nanosecond()))).Float64()
+               t = min + time.Duration((math.Pow(2, float64(attemptNum))*float64(min)-float64(min))*jitter)
+       }
+       if t < min {
+               return min
+       } else if t > max {
+               return max
+       } else {
+               return t
+       }
+}
+
 // DoAndDecode performs req and unmarshals the response (which must be
 // JSON) into dst. Use this instead of RequestAndDecode if you need
 // more control of the http.Request object.
index 422aca9f68f9cdf47612170a1feee6416d89e389..b1aae00daf42fd860be2ecbbbf131f108a4be8bd 100644 (file)
@@ -9,6 +9,7 @@ import (
        "context"
        "fmt"
        "io/ioutil"
+       "math"
        "math/rand"
        "net/http"
        "net/http/httptest"
@@ -339,3 +340,61 @@ func (s *clientRetrySuite) TestContextAlreadyCanceled(c *check.C) {
        err := s.client.RequestAndDecodeContext(ctx, &struct{}{}, http.MethodGet, "test", nil, nil)
        c.Check(err, check.Equals, context.Canceled)
 }
+
+func (s *clientRetrySuite) TestExponentialBackoff(c *check.C) {
+       var min, max time.Duration
+       min, max = time.Second, 64*time.Second
+
+       t := exponentialBackoff(min, max, 0, nil)
+       c.Check(t, check.Equals, min)
+
+       for e := float64(1); e < 5; e += 1 {
+               ok := false
+               for i := 0; i < 20; i++ {
+                       t = exponentialBackoff(min, max, int(e), nil)
+                       // Every returned value must be between min and min(2^e, max)
+                       c.Check(t >= min, check.Equals, true)
+                       c.Check(t <= min*time.Duration(math.Pow(2, e)), check.Equals, true)
+                       c.Check(t <= max, check.Equals, true)
+                       // Check that jitter is actually happening by
+                       // checking that at least one in 20 trials is
+                       // between min*2^(e-.75) and min*2^(e-.25)
+                       jittermin := time.Duration(float64(min) * math.Pow(2, e-0.75))
+                       jittermax := time.Duration(float64(min) * math.Pow(2, e-0.25))
+                       c.Logf("min %v max %v e %v jittermin %v jittermax %v t %v", min, max, e, jittermin, jittermax, t)
+                       if t > jittermin && t < jittermax {
+                               ok = true
+                               break
+                       }
+               }
+               c.Check(ok, check.Equals, true)
+       }
+
+       for i := 0; i < 20; i++ {
+               t := exponentialBackoff(min, max, 100, nil)
+               c.Check(t < max, check.Equals, true)
+       }
+
+       for _, trial := range []struct {
+               retryAfter string
+               expect     time.Duration
+       }{
+               {"1", time.Second * 4},             // minimum enforced
+               {"5", time.Second * 5},             // header used
+               {"55", time.Second * 10},           // maximum enforced
+               {"eleventy-nine", time.Second * 4}, // invalid header, exponential backoff used
+               {time.Now().UTC().Add(time.Second).Format(time.RFC1123), time.Second * 4},  // minimum enforced
+               {time.Now().UTC().Add(time.Minute).Format(time.RFC1123), time.Second * 10}, // maximum enforced
+               {time.Now().UTC().Add(-time.Minute).Format(time.RFC1123), time.Second * 4}, // minimum enforced
+       } {
+               c.Logf("trial %+v", trial)
+               t := exponentialBackoff(time.Second*4, time.Second*10, 0, &http.Response{
+                       StatusCode: http.StatusTooManyRequests,
+                       Header:     http.Header{"Retry-After": {trial.retryAfter}}})
+               c.Check(t, check.Equals, trial.expect)
+       }
+       t = exponentialBackoff(time.Second*4, time.Second*10, 0, &http.Response{
+               StatusCode: http.StatusTooManyRequests,
+       })
+       c.Check(t, check.Equals, time.Second*4)
+}