From: Tom Clegg Date: Tue, 30 May 2023 20:54:03 +0000 (-0400) Subject: 20540: Implement nearly-full jitter for exponential backoff. X-Git-Tag: 2.7.0~93^2~2 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/54e9c47a8340c5952e8e4c0e96e33eb47c3cb963 20540: Implement nearly-full jitter for exponential backoff. Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- diff --git a/sdk/go/arvados/client.go b/sdk/go/arvados/client.go index 6316d1beda..c403146503 100644 --- a/sdk/go/arvados/client.go +++ b/sdk/go/arvados/client.go @@ -16,12 +16,15 @@ import ( "io/fs" "io/ioutil" "log" + "math" "math/big" + mathrand "math/rand" "net" "net/http" "net/url" "os" "regexp" + "strconv" "strings" "sync/atomic" "time" @@ -274,6 +277,7 @@ func (c *Client) Do(req *http.Request) (*http.Response, error) { rclient := retryablehttp.NewClient() rclient.HTTPClient = c.httpClient() + rclient.Backoff = exponentialBackoff if c.Timeout > 0 { rclient.RetryWaitMax = c.Timeout / 10 rclient.RetryMax = 32 @@ -370,6 +374,35 @@ func isRedirectStatus(code int) bool { } } +// Implements retryablehttp.Backoff using the server-provided +// Retry-After header if available, otherwise nearly-full jitter +// exponential backoff (similar to +// https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/), +// in all cases respecting the provided min and max. +func exponentialBackoff(min, max time.Duration, attemptNum int, resp *http.Response) time.Duration { + var t time.Duration + if resp != nil && (resp.StatusCode == http.StatusTooManyRequests || resp.StatusCode == http.StatusServiceUnavailable) { + if s := resp.Header.Get("Retry-After"); s != "" { + if sleep, err := strconv.ParseInt(s, 10, 64); err == nil { + t = time.Second * time.Duration(sleep) + } else if stamp, err := time.Parse(time.RFC1123, s); err == nil { + t = stamp.Sub(time.Now()) + } + } + } + if t == 0 { + jitter := mathrand.New(mathrand.NewSource(int64(time.Now().Nanosecond()))).Float64() + t = min + time.Duration((math.Pow(2, float64(attemptNum))*float64(min)-float64(min))*jitter) + } + if t < min { + return min + } else if t > max { + return max + } else { + return t + } +} + // DoAndDecode performs req and unmarshals the response (which must be // JSON) into dst. Use this instead of RequestAndDecode if you need // more control of the http.Request object. diff --git a/sdk/go/arvados/client_test.go b/sdk/go/arvados/client_test.go index 422aca9f68..b1aae00daf 100644 --- a/sdk/go/arvados/client_test.go +++ b/sdk/go/arvados/client_test.go @@ -9,6 +9,7 @@ import ( "context" "fmt" "io/ioutil" + "math" "math/rand" "net/http" "net/http/httptest" @@ -339,3 +340,61 @@ func (s *clientRetrySuite) TestContextAlreadyCanceled(c *check.C) { err := s.client.RequestAndDecodeContext(ctx, &struct{}{}, http.MethodGet, "test", nil, nil) c.Check(err, check.Equals, context.Canceled) } + +func (s *clientRetrySuite) TestExponentialBackoff(c *check.C) { + var min, max time.Duration + min, max = time.Second, 64*time.Second + + t := exponentialBackoff(min, max, 0, nil) + c.Check(t, check.Equals, min) + + for e := float64(1); e < 5; e += 1 { + ok := false + for i := 0; i < 20; i++ { + t = exponentialBackoff(min, max, int(e), nil) + // Every returned value must be between min and min(2^e, max) + c.Check(t >= min, check.Equals, true) + c.Check(t <= min*time.Duration(math.Pow(2, e)), check.Equals, true) + c.Check(t <= max, check.Equals, true) + // Check that jitter is actually happening by + // checking that at least one in 20 trials is + // between min*2^(e-.75) and min*2^(e-.25) + jittermin := time.Duration(float64(min) * math.Pow(2, e-0.75)) + jittermax := time.Duration(float64(min) * math.Pow(2, e-0.25)) + c.Logf("min %v max %v e %v jittermin %v jittermax %v t %v", min, max, e, jittermin, jittermax, t) + if t > jittermin && t < jittermax { + ok = true + break + } + } + c.Check(ok, check.Equals, true) + } + + for i := 0; i < 20; i++ { + t := exponentialBackoff(min, max, 100, nil) + c.Check(t < max, check.Equals, true) + } + + for _, trial := range []struct { + retryAfter string + expect time.Duration + }{ + {"1", time.Second * 4}, // minimum enforced + {"5", time.Second * 5}, // header used + {"55", time.Second * 10}, // maximum enforced + {"eleventy-nine", time.Second * 4}, // invalid header, exponential backoff used + {time.Now().UTC().Add(time.Second).Format(time.RFC1123), time.Second * 4}, // minimum enforced + {time.Now().UTC().Add(time.Minute).Format(time.RFC1123), time.Second * 10}, // maximum enforced + {time.Now().UTC().Add(-time.Minute).Format(time.RFC1123), time.Second * 4}, // minimum enforced + } { + c.Logf("trial %+v", trial) + t := exponentialBackoff(time.Second*4, time.Second*10, 0, &http.Response{ + StatusCode: http.StatusTooManyRequests, + Header: http.Header{"Retry-After": {trial.retryAfter}}}) + c.Check(t, check.Equals, trial.expect) + } + t = exponentialBackoff(time.Second*4, time.Second*10, 0, &http.Response{ + StatusCode: http.StatusTooManyRequests, + }) + c.Check(t, check.Equals, time.Second*4) +}