[arvados] updated: 2.7.0-5652-g84e451a572
git repository hosting
git at public.arvados.org
Fri Dec 22 16:12:49 UTC 2023
Summary of changes:
lib/config/config.default.yml | 12 ++++----
lib/config/deprecated.go | 2 +-
lib/config/deprecated_test.go | 2 +-
sdk/go/arvados/byte_size.go | 56 +++++++++++++++++++++++++++++++++++
sdk/go/arvados/byte_size_test.go | 49 +++++++++++++++++++++++++++++-
sdk/go/arvados/client.go | 6 ++++
sdk/go/arvados/config.go | 4 +--
sdk/go/arvados/keep_cache.go | 27 ++++++++++++-----
sdk/go/arvadosclient/arvadosclient.go | 6 ++++
sdk/go/keepclient/keepclient.go | 3 +-
services/keep-web/cache.go | 2 +-
services/keep-web/handler.go | 12 ++------
services/keep-web/handler_test.go | 18 ++++-------
13 files changed, 158 insertions(+), 41 deletions(-)
via 84e451a572012c115801e1a72fc95ec79b4640e8 (commit)
from 461fdaa1b96142b8065c131ae0334046fc71ea56 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
commit 84e451a572012c115801e1a72fc95ec79b4640e8
Author: Tom Clegg <tom at curii.com>
Date: Fri Dec 22 11:11:51 2023 -0500
20318: Add config entry for keep-web cache size.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 05bc1309cd..872501f915 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -721,16 +721,18 @@ Clusters:
# Time to cache manifests, permission checks, and sessions.
TTL: 300s
- # Block cache entries. Each block consumes up to 64 MiB RAM.
- MaxBlockEntries: 20
+ # Maximum amount of data cached in /var/cache/arvados/keep.
+ # Can be given as a percentage ("10%") or a number of bytes
+ # ("10 GiB")
+ DiskCacheSize: 10%
# Approximate memory limit (in bytes) for session cache.
#
# Note this applies to the in-memory representation of
# projects and collections -- metadata, block locators,
- # filenames, etc. -- excluding cached file content, which is
- # limited by MaxBlockEntries.
- MaxCollectionBytes: 100000000
+ # filenames, etc. -- not the file data itself (see
+ # DiskCacheSize).
+ MaxCollectionBytes: 100 MB
# Persistent sessions.
MaxSessions: 100
diff --git a/lib/config/deprecated.go b/lib/config/deprecated.go
index d5c09d6706..d518b3414a 100644
--- a/lib/config/deprecated.go
+++ b/lib/config/deprecated.go
@@ -495,7 +495,7 @@ func (ldr *Loader) loadOldKeepWebConfig(cfg *arvados.Config) error {
cluster.Collections.WebDAVCache.TTL = *oc.Cache.TTL
}
if oc.Cache.MaxCollectionBytes != nil {
- cluster.Collections.WebDAVCache.MaxCollectionBytes = *oc.Cache.MaxCollectionBytes
+ cluster.Collections.WebDAVCache.MaxCollectionBytes = arvados.ByteSize(*oc.Cache.MaxCollectionBytes)
}
if oc.AnonymousTokens != nil {
if len(*oc.AnonymousTokens) > 0 {
diff --git a/lib/config/deprecated_test.go b/lib/config/deprecated_test.go
index f9b1d1661b..e06a1f231d 100644
--- a/lib/config/deprecated_test.go
+++ b/lib/config/deprecated_test.go
@@ -199,7 +199,7 @@ func (s *LoadSuite) TestLegacyKeepWebConfig(c *check.C) {
c.Check(cluster.SystemRootToken, check.Equals, "abcdefg")
c.Check(cluster.Collections.WebDAVCache.TTL, check.Equals, arvados.Duration(60*time.Second))
- c.Check(cluster.Collections.WebDAVCache.MaxCollectionBytes, check.Equals, int64(1234567890))
+ c.Check(cluster.Collections.WebDAVCache.MaxCollectionBytes, check.Equals, arvados.ByteSize(1234567890))
c.Check(cluster.Services.WebDAVDownload.ExternalURL, check.Equals, arvados.URL{Host: "download.example.com", Path: "/"})
c.Check(cluster.Services.WebDAVDownload.InternalURLs[arvados.URL{Host: ":80"}], check.NotNil)
diff --git a/sdk/go/arvados/byte_size.go b/sdk/go/arvados/byte_size.go
index 08cc83e126..7cc2c69781 100644
--- a/sdk/go/arvados/byte_size.go
+++ b/sdk/go/arvados/byte_size.go
@@ -8,11 +8,16 @@ import (
"encoding/json"
"fmt"
"math"
+ "strconv"
"strings"
)
type ByteSize int64
+// ByteSizeOrPercent indicates either a number of bytes or a
+// percentage from 1 to 100.
+type ByteSizeOrPercent ByteSize
+
var prefixValue = map[string]int64{
"": 1,
"K": 1000,
@@ -89,3 +94,54 @@ func (n *ByteSize) UnmarshalJSON(data []byte) error {
return fmt.Errorf("bug: json.Number for %q is not int64 or float64: %s", s, err)
}
}
+
+func (n ByteSizeOrPercent) MarshalJSON() ([]byte, error) {
+ if n < 0 && n >= -100 {
+ return []byte(fmt.Sprintf("\"%d%%\"", -n)), nil
+ } else {
+ return json.Marshal(int64(n))
+ }
+}
+
+func (n *ByteSizeOrPercent) UnmarshalJSON(data []byte) error {
+ if len(data) == 0 || data[0] != '"' {
+ return (*ByteSize)(n).UnmarshalJSON(data)
+ }
+ var s string
+ err := json.Unmarshal(data, &s)
+ if err != nil {
+ return err
+ }
+ if s := strings.TrimSpace(s); len(s) > 0 && s[len(s)-1] == '%' {
+ pct, err := strconv.ParseInt(strings.TrimSpace(s[:len(s)-1]), 10, 64)
+ if err != nil {
+ return err
+ }
+ if pct < 0 || pct > 100 {
+ return fmt.Errorf("invalid value %q (percentage must be between 0 and 100)", s)
+ }
+ *n = ByteSizeOrPercent(-pct)
+ return nil
+ }
+ return (*ByteSize)(n).UnmarshalJSON(data)
+}
+
+// ByteSize returns the absolute byte size specified by n, or 0 if n
+// specifies a percent.
+func (n ByteSizeOrPercent) ByteSize() ByteSize {
+ if n >= -100 && n < 0 {
+ return 0
+ } else {
+ return ByteSize(n)
+ }
+}
+
+// ByteSize returns the percentage specified by n, or 0 if n specifies
+// an absolute byte size.
+func (n ByteSizeOrPercent) Percent() int64 {
+ if n >= -100 && n < 0 {
+ return int64(-n)
+ } else {
+ return 0
+ }
+}
diff --git a/sdk/go/arvados/byte_size_test.go b/sdk/go/arvados/byte_size_test.go
index 7c4aff2072..e5fb10ebdb 100644
--- a/sdk/go/arvados/byte_size_test.go
+++ b/sdk/go/arvados/byte_size_test.go
@@ -64,7 +64,54 @@ func (s *ByteSizeSuite) TestUnmarshal(c *check.C) {
} {
var n ByteSize
err := yaml.Unmarshal([]byte(testcase+"\n"), &n)
- c.Logf("%v => error: %v", n, err)
+ c.Logf("%s => error: %v", testcase, err)
+ c.Check(err, check.NotNil)
+ }
+}
+
+func (s *ByteSizeSuite) TestMarshalByteSizeOrPercent(c *check.C) {
+ for _, testcase := range []struct {
+ in ByteSizeOrPercent
+ out string
+ }{
+ {0, "0"},
+ {-1, "1%"},
+ {-100, "100%"},
+ {8, "8"},
+ } {
+ out, err := yaml.Marshal(&testcase.in)
+ c.Check(err, check.IsNil)
+ c.Check(string(out), check.Equals, testcase.out+"\n")
+ }
+}
+
+func (s *ByteSizeSuite) TestUnmarshalByteSizeOrPercent(c *check.C) {
+ for _, testcase := range []struct {
+ in string
+ out int64
+ }{
+ {"0", 0},
+ {"100", 100},
+ {"0%", 0},
+ {"1%", -1},
+ {"100%", -100},
+ {"8 GB", 8000000000},
+ } {
+ var n ByteSizeOrPercent
+ err := yaml.Unmarshal([]byte(testcase.in+"\n"), &n)
+ c.Logf("%v => %v: %v", testcase.in, testcase.out, n)
+ c.Check(err, check.IsNil)
+ c.Check(int64(n), check.Equals, testcase.out)
+ }
+ for _, testcase := range []string{
+ "1000%", "101%", "-1%",
+ "%", "-%", "%%", "%1",
+ "400000 EB",
+ "4.11e4 EB",
+ } {
+ var n ByteSizeOrPercent
+ err := yaml.Unmarshal([]byte(testcase+"\n"), &n)
+ c.Logf("%s => error: %v", testcase, err)
c.Check(err, check.NotNil)
}
}
diff --git a/sdk/go/arvados/client.go b/sdk/go/arvados/client.go
index e3c1432660..991de1caa9 100644
--- a/sdk/go/arvados/client.go
+++ b/sdk/go/arvados/client.go
@@ -77,6 +77,11 @@ type Client struct {
// context deadline to establish a maximum request time.
Timeout time.Duration
+ // Maximum disk cache size in bytes or percent of total
+ // filesystem size. If zero, use default, currently 10% of
+ // filesystem size.
+ DiskCacheSize ByteSizeOrPercent
+
dd *DiscoveryDocument
defaultRequestID string
@@ -154,6 +159,7 @@ func NewClientFromConfig(cluster *Cluster) (*Client, error) {
APIHost: ctrlURL.Host,
Insecure: cluster.TLS.Insecure,
Timeout: 5 * time.Minute,
+ DiskCacheSize: cluster.Collections.WebDAVCache.DiskCacheSize,
requestLimiter: &requestLimiter{maxlimit: int64(cluster.API.MaxConcurrentRequests / 4)},
}, nil
}
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index 6301ed047a..acc091a90f 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -63,8 +63,8 @@ func (sc *Config) GetCluster(clusterID string) (*Cluster, error) {
type WebDAVCacheConfig struct {
TTL Duration
- MaxBlockEntries int
- MaxCollectionBytes int64
+ DiskCacheSize ByteSizeOrPercent
+ MaxCollectionBytes ByteSize
MaxSessions int
}
diff --git a/sdk/go/arvados/keep_cache.go b/sdk/go/arvados/keep_cache.go
index af80daa2e0..a657153876 100644
--- a/sdk/go/arvados/keep_cache.go
+++ b/sdk/go/arvados/keep_cache.go
@@ -37,7 +37,7 @@ type KeepGateway interface {
type DiskCache struct {
KeepGateway
Dir string
- MaxSize int64
+ MaxSize ByteSizeOrPercent
Logger logrus.FieldLogger
tidying int32 // see tidy()
@@ -534,7 +534,7 @@ func (cache *DiskCache) gotidy() {
// is below MaxSize, and we haven't reached the "recheck
// anyway" time threshold.
if cache.sizeMeasured > 0 &&
- atomic.LoadInt64(&cache.sizeEstimated) < cache.MaxSize &&
+ atomic.LoadInt64(&cache.sizeEstimated) < atomic.LoadInt64(&cache.defaultMaxSize) &&
time.Now().Before(cache.tidyHoldUntil) {
atomic.AddInt32(&cache.tidying, -1)
return
@@ -548,14 +548,26 @@ func (cache *DiskCache) gotidy() {
// Delete cache files as needed to control disk usage.
func (cache *DiskCache) tidy() {
- maxsize := cache.MaxSize
+ maxsize := int64(cache.MaxSize.ByteSize())
if maxsize < 1 {
- if maxsize = atomic.LoadInt64(&cache.defaultMaxSize); maxsize == 0 {
+ maxsize = atomic.LoadInt64(&cache.defaultMaxSize)
+ if maxsize == 0 {
+ // defaultMaxSize not yet computed. Use 10% of
+ // filesystem capacity (or different
+ // percentage if indicated by cache.MaxSize)
+ pct := cache.MaxSize.Percent()
+ if pct == 0 {
+ pct = 10
+ }
var stat unix.Statfs_t
if nil == unix.Statfs(cache.Dir, &stat) {
- maxsize = int64(stat.Bavail) * stat.Bsize / 10
+ maxsize = int64(stat.Bavail) * stat.Bsize * pct / 100
+ atomic.StoreInt64(&cache.defaultMaxSize, maxsize)
+ } else {
+ // In this case we will set
+ // defaultMaxSize below after
+ // measuring current usage.
}
- atomic.StoreInt64(&cache.defaultMaxSize, maxsize)
}
}
@@ -611,7 +623,8 @@ func (cache *DiskCache) tidy() {
// If MaxSize wasn't specified and we failed to come up with a
// defaultSize above, use the larger of {current cache size, 1
- // GiB} as the defaultSize for subsequent tidy() operations.
+ // GiB} as the defaultMaxSize for subsequent tidy()
+ // operations.
if maxsize == 0 {
if totalsize < 1<<30 {
atomic.StoreInt64(&cache.defaultMaxSize, 1<<30)
diff --git a/sdk/go/arvadosclient/arvadosclient.go b/sdk/go/arvadosclient/arvadosclient.go
index 461320eca9..d0ebdc1b01 100644
--- a/sdk/go/arvadosclient/arvadosclient.go
+++ b/sdk/go/arvadosclient/arvadosclient.go
@@ -105,6 +105,11 @@ type ArvadosClient struct {
// available services.
KeepServiceURIs []string
+ // Maximum disk cache size in bytes or percent of total
+ // filesystem size. If zero, use default, currently 10% of
+ // filesystem size.
+ DiskCacheSize arvados.ByteSizeOrPercent
+
// Discovery document
DiscoveryDoc Dict
@@ -144,6 +149,7 @@ func New(c *arvados.Client) (*ArvadosClient, error) {
Client: hc,
Retries: 2,
KeepServiceURIs: c.KeepServiceURIs,
+ DiskCacheSize: c.DiskCacheSize,
lastClosedIdlesAt: time.Now(),
}
diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go
index 08fa455ff4..b03362ee48 100644
--- a/sdk/go/keepclient/keepclient.go
+++ b/sdk/go/keepclient/keepclient.go
@@ -113,6 +113,7 @@ type KeepClient struct {
RequestID string
StorageClasses []string
DefaultStorageClasses []string // Set by cluster's exported config
+ DiskCacheSize arvados.ByteSizeOrPercent
// set to 1 if all writable services are of disk type, otherwise 0
replicasPerService int
@@ -137,7 +138,6 @@ func (kc *KeepClient) Clone() *KeepClient {
gatewayRoots: kc.gatewayRoots,
HTTPClient: kc.HTTPClient,
Retries: kc.Retries,
- BlockCache: kc.BlockCache,
RequestID: kc.RequestID,
StorageClasses: kc.StorageClasses,
DefaultStorageClasses: kc.DefaultStorageClasses,
@@ -387,6 +387,7 @@ func (kc *KeepClient) upstreamGateway() arvados.KeepGateway {
}
kc.gatewayStack = &arvados.DiskCache{
Dir: cachedir,
+ MaxSize: kc.DiskCacheSize,
KeepGateway: &keepViaHTTP{kc},
}
return kc.gatewayStack
diff --git a/services/keep-web/cache.go b/services/keep-web/cache.go
index df5705ed32..d443bc0829 100644
--- a/services/keep-web/cache.go
+++ b/services/keep-web/cache.go
@@ -303,7 +303,7 @@ func (c *cache) pruneSessions() {
// Mark more sessions for deletion until reaching desired
// memory size limit, starting with the oldest entries.
for i, snap := range snaps {
- if size <= c.cluster.Collections.WebDAVCache.MaxCollectionBytes {
+ if size <= int64(c.cluster.Collections.WebDAVCache.MaxCollectionBytes) {
break
}
if snap.prune {
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 123c4fe34d..12c2839f8c 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -27,15 +27,13 @@ import (
"git.arvados.org/arvados.git/sdk/go/auth"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/httpserver"
- "git.arvados.org/arvados.git/sdk/go/keepclient"
"github.com/sirupsen/logrus"
"golang.org/x/net/webdav"
)
type handler struct {
- Cache cache
- Cluster *arvados.Cluster
- setupOnce sync.Once
+ Cache cache
+ Cluster *arvados.Cluster
lockMtx sync.Mutex
lock map[string]*sync.RWMutex
@@ -60,10 +58,6 @@ func parseCollectionIDFromURL(s string) string {
return ""
}
-func (h *handler) setup() {
- keepclient.DefaultBlockCache.MaxBlocks = h.Cluster.Collections.WebDAVCache.MaxBlockEntries
-}
-
func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()})
}
@@ -179,8 +173,6 @@ func (h *handler) Done() <-chan struct{} {
// ServeHTTP implements http.Handler.
func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
- h.setupOnce.Do(h.setup)
-
if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
r.URL.Scheme = xfp
}
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index 5a12e26e9d..85c7801cd4 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -1469,20 +1469,14 @@ func (s *IntegrationSuite) TestFileContentType(c *check.C) {
}
}
-func (s *IntegrationSuite) TestKeepClientBlockCache(c *check.C) {
- s.handler.Cluster.Collections.WebDAVCache.MaxBlockEntries = 42
- c.Check(keepclient.DefaultBlockCache.MaxBlocks, check.Not(check.Equals), 42)
- u := mustParseURL("http://keep-web.example/c=" + arvadostest.FooCollection + "/t=" + arvadostest.ActiveToken + "/foo")
- req := &http.Request{
- Method: "GET",
- Host: u.Host,
- URL: u,
- RequestURI: u.RequestURI(),
- }
+func (s *IntegrationSuite) TestCacheSize(c *check.C) {
+ req, err := http.NewRequest("GET", "http://"+arvadostest.FooCollection+".example.com/foo", nil)
+ req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveTokenV2)
+ c.Assert(err, check.IsNil)
resp := httptest.NewRecorder()
s.handler.ServeHTTP(resp, req)
- c.Check(resp.Code, check.Equals, http.StatusOK)
- c.Check(keepclient.DefaultBlockCache.MaxBlocks, check.Equals, 42)
+ c.Assert(resp.Code, check.Equals, http.StatusOK)
+ c.Check(s.handler.Cache.sessions[arvadostest.ActiveTokenV2].client.DiskCacheSize.Percent(), check.Equals, int64(10))
}
// Writing to a collection shouldn't affect its entry in the
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list