[arvados] updated: 2.7.0-5652-g84e451a572

git repository hosting git at public.arvados.org
Fri Dec 22 16:12:49 UTC 2023


Summary of changes:
 lib/config/config.default.yml         | 12 ++++----
 lib/config/deprecated.go              |  2 +-
 lib/config/deprecated_test.go         |  2 +-
 sdk/go/arvados/byte_size.go           | 56 +++++++++++++++++++++++++++++++++++
 sdk/go/arvados/byte_size_test.go      | 49 +++++++++++++++++++++++++++++-
 sdk/go/arvados/client.go              |  6 ++++
 sdk/go/arvados/config.go              |  4 +--
 sdk/go/arvados/keep_cache.go          | 27 ++++++++++++-----
 sdk/go/arvadosclient/arvadosclient.go |  6 ++++
 sdk/go/keepclient/keepclient.go       |  3 +-
 services/keep-web/cache.go            |  2 +-
 services/keep-web/handler.go          | 12 ++------
 services/keep-web/handler_test.go     | 18 ++++-------
 13 files changed, 158 insertions(+), 41 deletions(-)

       via  84e451a572012c115801e1a72fc95ec79b4640e8 (commit)
      from  461fdaa1b96142b8065c131ae0334046fc71ea56 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.


commit 84e451a572012c115801e1a72fc95ec79b4640e8
Author: Tom Clegg <tom at curii.com>
Date:   Fri Dec 22 11:11:51 2023 -0500

    20318: Add config entry for keep-web cache size.
    
    Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>

diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 05bc1309cd..872501f915 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -721,16 +721,18 @@ Clusters:
         # Time to cache manifests, permission checks, and sessions.
         TTL: 300s
 
-        # Block cache entries. Each block consumes up to 64 MiB RAM.
-        MaxBlockEntries: 20
+        # Maximum amount of data cached in /var/cache/arvados/keep.
+        # Can be given as a percentage ("10%") or a number of bytes
+        # ("10 GiB")
+        DiskCacheSize: 10%
 
         # Approximate memory limit (in bytes) for session cache.
         #
         # Note this applies to the in-memory representation of
         # projects and collections -- metadata, block locators,
-        # filenames, etc. -- excluding cached file content, which is
-        # limited by MaxBlockEntries.
-        MaxCollectionBytes: 100000000
+        # filenames, etc. -- not the file data itself (see
+        # DiskCacheSize).
+        MaxCollectionBytes: 100 MB
 
         # Persistent sessions.
         MaxSessions: 100
diff --git a/lib/config/deprecated.go b/lib/config/deprecated.go
index d5c09d6706..d518b3414a 100644
--- a/lib/config/deprecated.go
+++ b/lib/config/deprecated.go
@@ -495,7 +495,7 @@ func (ldr *Loader) loadOldKeepWebConfig(cfg *arvados.Config) error {
 		cluster.Collections.WebDAVCache.TTL = *oc.Cache.TTL
 	}
 	if oc.Cache.MaxCollectionBytes != nil {
-		cluster.Collections.WebDAVCache.MaxCollectionBytes = *oc.Cache.MaxCollectionBytes
+		cluster.Collections.WebDAVCache.MaxCollectionBytes = arvados.ByteSize(*oc.Cache.MaxCollectionBytes)
 	}
 	if oc.AnonymousTokens != nil {
 		if len(*oc.AnonymousTokens) > 0 {
diff --git a/lib/config/deprecated_test.go b/lib/config/deprecated_test.go
index f9b1d1661b..e06a1f231d 100644
--- a/lib/config/deprecated_test.go
+++ b/lib/config/deprecated_test.go
@@ -199,7 +199,7 @@ func (s *LoadSuite) TestLegacyKeepWebConfig(c *check.C) {
 	c.Check(cluster.SystemRootToken, check.Equals, "abcdefg")
 
 	c.Check(cluster.Collections.WebDAVCache.TTL, check.Equals, arvados.Duration(60*time.Second))
-	c.Check(cluster.Collections.WebDAVCache.MaxCollectionBytes, check.Equals, int64(1234567890))
+	c.Check(cluster.Collections.WebDAVCache.MaxCollectionBytes, check.Equals, arvados.ByteSize(1234567890))
 
 	c.Check(cluster.Services.WebDAVDownload.ExternalURL, check.Equals, arvados.URL{Host: "download.example.com", Path: "/"})
 	c.Check(cluster.Services.WebDAVDownload.InternalURLs[arvados.URL{Host: ":80"}], check.NotNil)
diff --git a/sdk/go/arvados/byte_size.go b/sdk/go/arvados/byte_size.go
index 08cc83e126..7cc2c69781 100644
--- a/sdk/go/arvados/byte_size.go
+++ b/sdk/go/arvados/byte_size.go
@@ -8,11 +8,16 @@ import (
 	"encoding/json"
 	"fmt"
 	"math"
+	"strconv"
 	"strings"
 )
 
 type ByteSize int64
 
+// ByteSizeOrPercent indicates either a number of bytes or a
+// percentage from 1 to 100.
+type ByteSizeOrPercent ByteSize
+
 var prefixValue = map[string]int64{
 	"":   1,
 	"K":  1000,
@@ -89,3 +94,54 @@ func (n *ByteSize) UnmarshalJSON(data []byte) error {
 		return fmt.Errorf("bug: json.Number for %q is not int64 or float64: %s", s, err)
 	}
 }
+
+func (n ByteSizeOrPercent) MarshalJSON() ([]byte, error) {
+	if n < 0 && n >= -100 {
+		return []byte(fmt.Sprintf("\"%d%%\"", -n)), nil
+	} else {
+		return json.Marshal(int64(n))
+	}
+}
+
+func (n *ByteSizeOrPercent) UnmarshalJSON(data []byte) error {
+	if len(data) == 0 || data[0] != '"' {
+		return (*ByteSize)(n).UnmarshalJSON(data)
+	}
+	var s string
+	err := json.Unmarshal(data, &s)
+	if err != nil {
+		return err
+	}
+	if s := strings.TrimSpace(s); len(s) > 0 && s[len(s)-1] == '%' {
+		pct, err := strconv.ParseInt(strings.TrimSpace(s[:len(s)-1]), 10, 64)
+		if err != nil {
+			return err
+		}
+		if pct < 0 || pct > 100 {
+			return fmt.Errorf("invalid value %q (percentage must be between 0 and 100)", s)
+		}
+		*n = ByteSizeOrPercent(-pct)
+		return nil
+	}
+	return (*ByteSize)(n).UnmarshalJSON(data)
+}
+
+// ByteSize returns the absolute byte size specified by n, or 0 if n
+// specifies a percent.
+func (n ByteSizeOrPercent) ByteSize() ByteSize {
+	if n >= -100 && n < 0 {
+		return 0
+	} else {
+		return ByteSize(n)
+	}
+}
+
+// ByteSize returns the percentage specified by n, or 0 if n specifies
+// an absolute byte size.
+func (n ByteSizeOrPercent) Percent() int64 {
+	if n >= -100 && n < 0 {
+		return int64(-n)
+	} else {
+		return 0
+	}
+}
diff --git a/sdk/go/arvados/byte_size_test.go b/sdk/go/arvados/byte_size_test.go
index 7c4aff2072..e5fb10ebdb 100644
--- a/sdk/go/arvados/byte_size_test.go
+++ b/sdk/go/arvados/byte_size_test.go
@@ -64,7 +64,54 @@ func (s *ByteSizeSuite) TestUnmarshal(c *check.C) {
 	} {
 		var n ByteSize
 		err := yaml.Unmarshal([]byte(testcase+"\n"), &n)
-		c.Logf("%v => error: %v", n, err)
+		c.Logf("%s => error: %v", testcase, err)
+		c.Check(err, check.NotNil)
+	}
+}
+
+func (s *ByteSizeSuite) TestMarshalByteSizeOrPercent(c *check.C) {
+	for _, testcase := range []struct {
+		in  ByteSizeOrPercent
+		out string
+	}{
+		{0, "0"},
+		{-1, "1%"},
+		{-100, "100%"},
+		{8, "8"},
+	} {
+		out, err := yaml.Marshal(&testcase.in)
+		c.Check(err, check.IsNil)
+		c.Check(string(out), check.Equals, testcase.out+"\n")
+	}
+}
+
+func (s *ByteSizeSuite) TestUnmarshalByteSizeOrPercent(c *check.C) {
+	for _, testcase := range []struct {
+		in  string
+		out int64
+	}{
+		{"0", 0},
+		{"100", 100},
+		{"0%", 0},
+		{"1%", -1},
+		{"100%", -100},
+		{"8 GB", 8000000000},
+	} {
+		var n ByteSizeOrPercent
+		err := yaml.Unmarshal([]byte(testcase.in+"\n"), &n)
+		c.Logf("%v => %v: %v", testcase.in, testcase.out, n)
+		c.Check(err, check.IsNil)
+		c.Check(int64(n), check.Equals, testcase.out)
+	}
+	for _, testcase := range []string{
+		"1000%", "101%", "-1%",
+		"%", "-%", "%%", "%1",
+		"400000 EB",
+		"4.11e4 EB",
+	} {
+		var n ByteSizeOrPercent
+		err := yaml.Unmarshal([]byte(testcase+"\n"), &n)
+		c.Logf("%s => error: %v", testcase, err)
 		c.Check(err, check.NotNil)
 	}
 }
diff --git a/sdk/go/arvados/client.go b/sdk/go/arvados/client.go
index e3c1432660..991de1caa9 100644
--- a/sdk/go/arvados/client.go
+++ b/sdk/go/arvados/client.go
@@ -77,6 +77,11 @@ type Client struct {
 	// context deadline to establish a maximum request time.
 	Timeout time.Duration
 
+	// Maximum disk cache size in bytes or percent of total
+	// filesystem size. If zero, use default, currently 10% of
+	// filesystem size.
+	DiskCacheSize ByteSizeOrPercent
+
 	dd *DiscoveryDocument
 
 	defaultRequestID string
@@ -154,6 +159,7 @@ func NewClientFromConfig(cluster *Cluster) (*Client, error) {
 		APIHost:        ctrlURL.Host,
 		Insecure:       cluster.TLS.Insecure,
 		Timeout:        5 * time.Minute,
+		DiskCacheSize:  cluster.Collections.WebDAVCache.DiskCacheSize,
 		requestLimiter: &requestLimiter{maxlimit: int64(cluster.API.MaxConcurrentRequests / 4)},
 	}, nil
 }
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index 6301ed047a..acc091a90f 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -63,8 +63,8 @@ func (sc *Config) GetCluster(clusterID string) (*Cluster, error) {
 
 type WebDAVCacheConfig struct {
 	TTL                Duration
-	MaxBlockEntries    int
-	MaxCollectionBytes int64
+	DiskCacheSize      ByteSizeOrPercent
+	MaxCollectionBytes ByteSize
 	MaxSessions        int
 }
 
diff --git a/sdk/go/arvados/keep_cache.go b/sdk/go/arvados/keep_cache.go
index af80daa2e0..a657153876 100644
--- a/sdk/go/arvados/keep_cache.go
+++ b/sdk/go/arvados/keep_cache.go
@@ -37,7 +37,7 @@ type KeepGateway interface {
 type DiskCache struct {
 	KeepGateway
 	Dir     string
-	MaxSize int64
+	MaxSize ByteSizeOrPercent
 	Logger  logrus.FieldLogger
 
 	tidying        int32 // see tidy()
@@ -534,7 +534,7 @@ func (cache *DiskCache) gotidy() {
 	// is below MaxSize, and we haven't reached the "recheck
 	// anyway" time threshold.
 	if cache.sizeMeasured > 0 &&
-		atomic.LoadInt64(&cache.sizeEstimated) < cache.MaxSize &&
+		atomic.LoadInt64(&cache.sizeEstimated) < atomic.LoadInt64(&cache.defaultMaxSize) &&
 		time.Now().Before(cache.tidyHoldUntil) {
 		atomic.AddInt32(&cache.tidying, -1)
 		return
@@ -548,14 +548,26 @@ func (cache *DiskCache) gotidy() {
 
 // Delete cache files as needed to control disk usage.
 func (cache *DiskCache) tidy() {
-	maxsize := cache.MaxSize
+	maxsize := int64(cache.MaxSize.ByteSize())
 	if maxsize < 1 {
-		if maxsize = atomic.LoadInt64(&cache.defaultMaxSize); maxsize == 0 {
+		maxsize = atomic.LoadInt64(&cache.defaultMaxSize)
+		if maxsize == 0 {
+			// defaultMaxSize not yet computed. Use 10% of
+			// filesystem capacity (or different
+			// percentage if indicated by cache.MaxSize)
+			pct := cache.MaxSize.Percent()
+			if pct == 0 {
+				pct = 10
+			}
 			var stat unix.Statfs_t
 			if nil == unix.Statfs(cache.Dir, &stat) {
-				maxsize = int64(stat.Bavail) * stat.Bsize / 10
+				maxsize = int64(stat.Bavail) * stat.Bsize * pct / 100
+				atomic.StoreInt64(&cache.defaultMaxSize, maxsize)
+			} else {
+				// In this case we will set
+				// defaultMaxSize below after
+				// measuring current usage.
 			}
-			atomic.StoreInt64(&cache.defaultMaxSize, maxsize)
 		}
 	}
 
@@ -611,7 +623,8 @@ func (cache *DiskCache) tidy() {
 
 	// If MaxSize wasn't specified and we failed to come up with a
 	// defaultSize above, use the larger of {current cache size, 1
-	// GiB} as the defaultSize for subsequent tidy() operations.
+	// GiB} as the defaultMaxSize for subsequent tidy()
+	// operations.
 	if maxsize == 0 {
 		if totalsize < 1<<30 {
 			atomic.StoreInt64(&cache.defaultMaxSize, 1<<30)
diff --git a/sdk/go/arvadosclient/arvadosclient.go b/sdk/go/arvadosclient/arvadosclient.go
index 461320eca9..d0ebdc1b01 100644
--- a/sdk/go/arvadosclient/arvadosclient.go
+++ b/sdk/go/arvadosclient/arvadosclient.go
@@ -105,6 +105,11 @@ type ArvadosClient struct {
 	// available services.
 	KeepServiceURIs []string
 
+	// Maximum disk cache size in bytes or percent of total
+	// filesystem size. If zero, use default, currently 10% of
+	// filesystem size.
+	DiskCacheSize arvados.ByteSizeOrPercent
+
 	// Discovery document
 	DiscoveryDoc Dict
 
@@ -144,6 +149,7 @@ func New(c *arvados.Client) (*ArvadosClient, error) {
 		Client:            hc,
 		Retries:           2,
 		KeepServiceURIs:   c.KeepServiceURIs,
+		DiskCacheSize:     c.DiskCacheSize,
 		lastClosedIdlesAt: time.Now(),
 	}
 
diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go
index 08fa455ff4..b03362ee48 100644
--- a/sdk/go/keepclient/keepclient.go
+++ b/sdk/go/keepclient/keepclient.go
@@ -113,6 +113,7 @@ type KeepClient struct {
 	RequestID             string
 	StorageClasses        []string
 	DefaultStorageClasses []string // Set by cluster's exported config
+	DiskCacheSize         arvados.ByteSizeOrPercent
 
 	// set to 1 if all writable services are of disk type, otherwise 0
 	replicasPerService int
@@ -137,7 +138,6 @@ func (kc *KeepClient) Clone() *KeepClient {
 		gatewayRoots:          kc.gatewayRoots,
 		HTTPClient:            kc.HTTPClient,
 		Retries:               kc.Retries,
-		BlockCache:            kc.BlockCache,
 		RequestID:             kc.RequestID,
 		StorageClasses:        kc.StorageClasses,
 		DefaultStorageClasses: kc.DefaultStorageClasses,
@@ -387,6 +387,7 @@ func (kc *KeepClient) upstreamGateway() arvados.KeepGateway {
 	}
 	kc.gatewayStack = &arvados.DiskCache{
 		Dir:         cachedir,
+		MaxSize:     kc.DiskCacheSize,
 		KeepGateway: &keepViaHTTP{kc},
 	}
 	return kc.gatewayStack
diff --git a/services/keep-web/cache.go b/services/keep-web/cache.go
index df5705ed32..d443bc0829 100644
--- a/services/keep-web/cache.go
+++ b/services/keep-web/cache.go
@@ -303,7 +303,7 @@ func (c *cache) pruneSessions() {
 	// Mark more sessions for deletion until reaching desired
 	// memory size limit, starting with the oldest entries.
 	for i, snap := range snaps {
-		if size <= c.cluster.Collections.WebDAVCache.MaxCollectionBytes {
+		if size <= int64(c.cluster.Collections.WebDAVCache.MaxCollectionBytes) {
 			break
 		}
 		if snap.prune {
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 123c4fe34d..12c2839f8c 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -27,15 +27,13 @@ import (
 	"git.arvados.org/arvados.git/sdk/go/auth"
 	"git.arvados.org/arvados.git/sdk/go/ctxlog"
 	"git.arvados.org/arvados.git/sdk/go/httpserver"
-	"git.arvados.org/arvados.git/sdk/go/keepclient"
 	"github.com/sirupsen/logrus"
 	"golang.org/x/net/webdav"
 )
 
 type handler struct {
-	Cache     cache
-	Cluster   *arvados.Cluster
-	setupOnce sync.Once
+	Cache   cache
+	Cluster *arvados.Cluster
 
 	lockMtx    sync.Mutex
 	lock       map[string]*sync.RWMutex
@@ -60,10 +58,6 @@ func parseCollectionIDFromURL(s string) string {
 	return ""
 }
 
-func (h *handler) setup() {
-	keepclient.DefaultBlockCache.MaxBlocks = h.Cluster.Collections.WebDAVCache.MaxBlockEntries
-}
-
 func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
 	json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()})
 }
@@ -179,8 +173,6 @@ func (h *handler) Done() <-chan struct{} {
 
 // ServeHTTP implements http.Handler.
 func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
-	h.setupOnce.Do(h.setup)
-
 	if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
 		r.URL.Scheme = xfp
 	}
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index 5a12e26e9d..85c7801cd4 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -1469,20 +1469,14 @@ func (s *IntegrationSuite) TestFileContentType(c *check.C) {
 	}
 }
 
-func (s *IntegrationSuite) TestKeepClientBlockCache(c *check.C) {
-	s.handler.Cluster.Collections.WebDAVCache.MaxBlockEntries = 42
-	c.Check(keepclient.DefaultBlockCache.MaxBlocks, check.Not(check.Equals), 42)
-	u := mustParseURL("http://keep-web.example/c=" + arvadostest.FooCollection + "/t=" + arvadostest.ActiveToken + "/foo")
-	req := &http.Request{
-		Method:     "GET",
-		Host:       u.Host,
-		URL:        u,
-		RequestURI: u.RequestURI(),
-	}
+func (s *IntegrationSuite) TestCacheSize(c *check.C) {
+	req, err := http.NewRequest("GET", "http://"+arvadostest.FooCollection+".example.com/foo", nil)
+	req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveTokenV2)
+	c.Assert(err, check.IsNil)
 	resp := httptest.NewRecorder()
 	s.handler.ServeHTTP(resp, req)
-	c.Check(resp.Code, check.Equals, http.StatusOK)
-	c.Check(keepclient.DefaultBlockCache.MaxBlocks, check.Equals, 42)
+	c.Assert(resp.Code, check.Equals, http.StatusOK)
+	c.Check(s.handler.Cache.sessions[arvadostest.ActiveTokenV2].client.DiskCacheSize.Percent(), check.Equals, int64(10))
 }
 
 // Writing to a collection shouldn't affect its entry in the

-----------------------------------------------------------------------


hooks/post-receive
-- 




More information about the arvados-commits mailing list