[arvados] updated: 2.7.0-6019-gbffc439a72

git repository hosting git at public.arvados.org
Thu Feb 15 22:22:49 UTC 2024


Summary of changes:
 sdk/go/keepclient/keepclient.go      | 26 +++++---------
 sdk/go/keepclient/keepclient_test.go | 67 +++++++++++++++++++++++++++++++++---
 sdk/go/keepclient/support.go         | 48 +++++++++++++++++++-------
 services/keepproxy/keepproxy.go      |  1 -
 4 files changed, 106 insertions(+), 36 deletions(-)

       via  bffc439a72f03126359d468329b8d25febc7bc9e (commit)
       via  33bb622ec2846f9e3c788655f42d7a7f25a4651c (commit)
      from  22361307cf41f916afd562e7f33fcdaacefe5f9d (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit bffc439a72f03126359d468329b8d25febc7bc9e
Author: Tom Clegg <tom at curii.com>
Date:   Thu Feb 15 17:21:06 2024 -0500

    21023: Just use the default retry delay profile in keepproxy.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/services/keepproxy/keepproxy.go b/services/keepproxy/keepproxy.go
index 8afe20f911..39ffd45cbe 100644
--- a/services/keepproxy/keepproxy.go
+++ b/services/keepproxy/keepproxy.go
@@ -320,7 +320,6 @@ func (h *proxyHandler) Get(resp http.ResponseWriter, req *http.Request) {
 
 	kc := h.makeKeepClient(req)
 	kc.DiskCacheSize = keepclient.DiskCacheDisabled
-	kc.RetryDelay = time.Second
 
 	var pass bool
 	var tok string

commit 33bb622ec2846f9e3c788655f42d7a7f25a4651c
Author: Tom Clegg <tom at curii.com>
Date:   Thu Feb 15 16:30:31 2024 -0500

    21023: Use known-good exponential backoff with jitter strategy.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go
index a455b96fb7..6b9ff181e5 100644
--- a/sdk/go/keepclient/keepclient.go
+++ b/sdk/go/keepclient/keepclient.go
@@ -15,7 +15,6 @@ import (
 	"fmt"
 	"io"
 	"io/ioutil"
-	"math/rand"
 	"net"
 	"net/http"
 	"os"
@@ -46,6 +45,7 @@ var (
 	DefaultProxyKeepAlive           = 120 * time.Second
 
 	DefaultRetryDelay = 2 * time.Second // see KeepClient.RetryDelay
+	MinimumRetryDelay = time.Millisecond
 
 	rootCacheDir = "/var/cache/arvados/keep"
 	userCacheDir = ".cache/arvados/keep" // relative to HOME
@@ -120,11 +120,11 @@ type KeepClient struct {
 	// operation after a transient failure.
 	Retries int
 
-	// Initial delay after first attempt, used when automatic
-	// retry is invoked. If zero, DefaultRetryDelay is used.
-	// Delays after subsequent attempts are increased by a random
-	// factor between 1.75x and 2x, up to a maximum of 10x the
-	// initial delay.
+	// Initial maximum delay for automatic retry. If zero,
+	// DefaultRetryDelay is used.  The delay after attempt N
+	// (0-based) will be a random duration between
+	// MinimumRetryDelay and RetryDelay * 2^N, not to exceed a cap
+	// of RetryDelay * 10.
 	RetryDelay time.Duration
 
 	RequestID             string
@@ -284,11 +284,7 @@ func (kc *KeepClient) getOrHead(method string, locator string, header http.Heade
 
 	var errs []string
 
-	retryDelay := kc.RetryDelay
-	if retryDelay < 1 {
-		retryDelay = DefaultRetryDelay
-	}
-	maxRetryDelay := retryDelay * 10
+	delay := delayCalculator{InitialMaxDelay: kc.RetryDelay}
 	triesRemaining := 1 + kc.Retries
 
 	serversToTry := kc.getSortedRoots(locator)
@@ -369,13 +365,7 @@ func (kc *KeepClient) getOrHead(method string, locator string, header http.Heade
 		}
 		serversToTry = retryList
 		if len(serversToTry) > 0 && triesRemaining > 0 {
-			time.Sleep(retryDelay)
-			// Increase delay by a random factor between
-			// 1.75x and 2x
-			retryDelay = time.Duration((2 - rand.Float64()/4) * float64(retryDelay))
-			if retryDelay > maxRetryDelay {
-				retryDelay = maxRetryDelay
-			}
+			time.Sleep(delay.Next())
 		}
 	}
 	DebugPrintf("DEBUG: %s %s failed: %v", method, locator, errs)
diff --git a/sdk/go/keepclient/keepclient_test.go b/sdk/go/keepclient/keepclient_test.go
index 19a6d4e03b..4cfe1a0896 100644
--- a/sdk/go/keepclient/keepclient_test.go
+++ b/sdk/go/keepclient/keepclient_test.go
@@ -798,8 +798,12 @@ func (s *StandaloneSuite) TestGetFail(c *C) {
 }
 
 func (s *StandaloneSuite) TestGetFailRetry(c *C) {
-	defer func(orig time.Duration) { DefaultRetryDelay = orig }(DefaultRetryDelay)
+	defer func(origDefault, origMinimum time.Duration) {
+		DefaultRetryDelay = origDefault
+		MinimumRetryDelay = origMinimum
+	}(DefaultRetryDelay, MinimumRetryDelay)
 	DefaultRetryDelay = time.Second / 8
+	MinimumRetryDelay = time.Millisecond
 
 	hash := fmt.Sprintf("%x+3", md5.Sum([]byte("foo")))
 
@@ -839,7 +843,7 @@ func (s *StandaloneSuite) TestGetFailRetry(c *C) {
 		if expect == 0 {
 			expect = DefaultRetryDelay
 		}
-		min := expect + expect*7/4 + expect*7/4*7/4
+		min := MinimumRetryDelay * 3
 		max := expect + expect*2 + expect*2*2 + nonsleeptime
 		c.Check(elapsed >= min, Equals, true, Commentf("elapsed %v / expect min %v", elapsed, min))
 		c.Check(elapsed <= max, Equals, true, Commentf("elapsed %v / expect max %v", elapsed, max))
@@ -1502,8 +1506,12 @@ func (s *StandaloneSuite) TestGetIndexWithNoSuchPrefix(c *C) {
 }
 
 func (s *StandaloneSuite) TestPutBRetry(c *C) {
-	defer func(orig time.Duration) { DefaultRetryDelay = orig }(DefaultRetryDelay)
+	defer func(origDefault, origMinimum time.Duration) {
+		DefaultRetryDelay = origDefault
+		MinimumRetryDelay = origMinimum
+	}(DefaultRetryDelay, MinimumRetryDelay)
 	DefaultRetryDelay = time.Second / 8
+	MinimumRetryDelay = time.Millisecond
 
 	for _, delay := range []time.Duration{0, time.Nanosecond, time.Second / 8, time.Second / 16} {
 		c.Logf("=== initial delay %v", delay)
@@ -1556,8 +1564,9 @@ func (s *StandaloneSuite) TestPutBRetry(c *C) {
 		if expect == 0 {
 			expect = DefaultRetryDelay
 		}
-		min := expect + expect*7/4 + expect*7/4*7/4
-		max := expect + expect*2 + expect*2*2 + nonsleeptime
+		min := MinimumRetryDelay * 3
+		max := expect + expect*2 + expect*2*2
+		max += nonsleeptime
 		c.Check(elapsed >= min, Equals, true, Commentf("elapsed %v / expect min %v", elapsed, min))
 		c.Check(elapsed <= max, Equals, true, Commentf("elapsed %v / expect max %v", elapsed, max))
 	}
@@ -1608,3 +1617,51 @@ func (s *ServerRequiredSuite) TestMakeKeepClientWithNonDiskTypeService(c *C) {
 	c.Assert(kc.foundNonDiskSvc, Equals, true)
 	c.Assert(kc.httpClient().(*http.Client).Timeout, Equals, 300*time.Second)
 }
+
+func (s *StandaloneSuite) TestDelayCalculator(c *C) {
+	defer func(origDefault, origMinimum time.Duration) {
+		DefaultRetryDelay = origDefault
+		MinimumRetryDelay = origMinimum
+	}(DefaultRetryDelay, MinimumRetryDelay)
+
+	checkInterval := func(d, min, max time.Duration) {
+		c.Check(d >= min, Equals, true)
+		c.Check(d <= max, Equals, true)
+	}
+
+	MinimumRetryDelay = time.Second / 2
+	DefaultRetryDelay = time.Second
+	dc := delayCalculator{InitialMaxDelay: 0}
+	checkInterval(dc.Next(), time.Second/2, time.Second)
+	checkInterval(dc.Next(), time.Second/2, time.Second*2)
+	checkInterval(dc.Next(), time.Second/2, time.Second*4)
+	checkInterval(dc.Next(), time.Second/2, time.Second*8)
+	checkInterval(dc.Next(), time.Second/2, time.Second*10)
+	checkInterval(dc.Next(), time.Second/2, time.Second*10)
+
+	// Enforce non-zero InitialMaxDelay
+	dc = delayCalculator{InitialMaxDelay: time.Second}
+	checkInterval(dc.Next(), time.Second/2, time.Second*2)
+	checkInterval(dc.Next(), time.Second/2, time.Second*4)
+	checkInterval(dc.Next(), time.Second/2, time.Second*8)
+	checkInterval(dc.Next(), time.Second/2, time.Second*16)
+	checkInterval(dc.Next(), time.Second/2, time.Second*20)
+	checkInterval(dc.Next(), time.Second/2, time.Second*20)
+
+	// Enforce MinimumRetryDelay
+	dc = delayCalculator{InitialMaxDelay: time.Millisecond}
+	checkInterval(dc.Next(), time.Second/2, time.Second/2)
+	checkInterval(dc.Next(), time.Second/2, time.Second)
+	checkInterval(dc.Next(), time.Second/2, time.Second*2)
+	checkInterval(dc.Next(), time.Second/2, time.Second*4)
+	checkInterval(dc.Next(), time.Second/2, time.Second*8)
+	checkInterval(dc.Next(), time.Second/2, time.Second*10)
+	checkInterval(dc.Next(), time.Second/2, time.Second*10)
+
+	// If InitialMaxDelay is less than MinimumRetryDelay/10, then
+	// delay is always MinimumRetryDelay.
+	dc = delayCalculator{InitialMaxDelay: time.Millisecond}
+	for i := 0; i < 20; i++ {
+		c.Check(dc.Next(), Equals, time.Second/2)
+	}
+}
diff --git a/sdk/go/keepclient/support.go b/sdk/go/keepclient/support.go
index d5e3d0ec1c..d3d799dc5d 100644
--- a/sdk/go/keepclient/support.go
+++ b/sdk/go/keepclient/support.go
@@ -220,11 +220,7 @@ func (kc *KeepClient) httpBlockWrite(ctx context.Context, req arvados.BlockWrite
 		replicasPerThread = req.Replicas
 	}
 
-	retryDelay := kc.RetryDelay
-	if retryDelay < 1 {
-		retryDelay = DefaultRetryDelay
-	}
-	maxRetryDelay := retryDelay * 10
+	delay := delayCalculator{InitialMaxDelay: kc.RetryDelay}
 	retriesRemaining := req.Attempts
 	var retryServers []string
 
@@ -322,13 +318,7 @@ func (kc *KeepClient) httpBlockWrite(ctx context.Context, req arvados.BlockWrite
 
 		sv = retryServers
 		if len(sv) > 0 {
-			time.Sleep(retryDelay)
-			// Increase delay by a random factor between
-			// 1.75x and 2x
-			retryDelay = time.Duration((2 - rand.Float64()/4) * float64(retryDelay))
-			if retryDelay > maxRetryDelay {
-				retryDelay = maxRetryDelay
-			}
+			time.Sleep(delay.Next())
 		}
 	}
 
@@ -361,3 +351,37 @@ func parseStorageClassesConfirmedHeader(hdr string) (map[string]int, error) {
 	}
 	return classesStored, nil
 }
+
+// delayCalculator calculates a series of delays for implementing
+// exponential backoff with jitter.  The first call to Next() returns
+// a random duration between MinimumRetryDelay and the specified
+// InitialMaxDelay (or DefaultRetryDelay if 0).  The max delay is
+// doubled on each subsequent call to Next(), up to 10x the initial
+// max delay.
+type delayCalculator struct {
+	InitialMaxDelay time.Duration
+	n               int // number of delays returned so far
+	nextmax         time.Duration
+	limit           time.Duration
+}
+
+func (dc *delayCalculator) Next() time.Duration {
+	if dc.nextmax <= MinimumRetryDelay {
+		// initialize
+		if dc.InitialMaxDelay > 0 {
+			dc.nextmax = dc.InitialMaxDelay
+		} else {
+			dc.nextmax = DefaultRetryDelay
+		}
+		dc.limit = 10 * dc.nextmax
+	}
+	d := time.Duration(rand.Float64() * float64(dc.nextmax))
+	if d < MinimumRetryDelay {
+		d = MinimumRetryDelay
+	}
+	dc.nextmax *= 2
+	if dc.nextmax > dc.limit {
+		dc.nextmax = dc.limit
+	}
+	return d
+}

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list