[arvados] created: 2.7.0-5657-gd1f5285cdf
git repository hosting
git at public.arvados.org
Tue Dec 26 21:57:16 UTC 2023
at d1f5285cdf294074bb1835b502f0dc2f638f7399 (commit)
commit d1f5285cdf294074bb1835b502f0dc2f638f7399
Author: Tom Clegg <tom at curii.com>
Date: Tue Dec 26 16:52:11 2023 -0500
20318: Ensure empty cache for test case.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index 85c7801cd4..eefab36e69 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -209,6 +209,10 @@ func (s *UnitSuite) TestWebdavPrefixAndSource(c *check.C) {
}
func (s *UnitSuite) TestEmptyResponse(c *check.C) {
+ // Ensure we start with an empty cache
+ defer os.Setenv("HOME", os.Getenv("HOME"))
+ os.Setenv("HOME", c.MkDir())
+
for _, trial := range []struct {
dataExists bool
sendIMSHeader bool
commit 199572df1070802cbb06059e53b9a096a702e3ec
Author: Tom Clegg <tom at curii.com>
Date: Tue Dec 26 15:44:12 2023 -0500
20318: Add -cache-size flag to arvados-client mount.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/mount/command.go b/lib/mount/command.go
index 666f2cf4ac..7ab518faee 100644
--- a/lib/mount/command.go
+++ b/lib/mount/command.go
@@ -19,6 +19,7 @@ import (
"git.arvados.org/arvados.git/sdk/go/arvadosclient"
"git.arvados.org/arvados.git/sdk/go/keepclient"
"github.com/arvados/cgofuse/fuse"
+ "github.com/ghodss/yaml"
)
var Command = &mountCommand{}
@@ -43,6 +44,7 @@ func (c *mountCommand) RunCommand(prog string, args []string, stdin io.Reader, s
flags := flag.NewFlagSet(prog, flag.ContinueOnError)
ro := flags.Bool("ro", false, "read-only")
experimental := flags.Bool("experimental", false, "acknowledge this is an experimental command, and should not be used in production (required)")
+ cacheSizeStr := flags.String("cache-size", "0", "cache size as percent of home filesystem size (\"5%\") or size (\"10GiB\") or 0 for automatic")
pprof := flags.String("pprof", "", "serve Go profile data at `[addr]:port`")
if ok, code := cmd.ParseFlags(flags, prog, args, "[FUSE mount options]", stderr); !ok {
return code
@@ -58,6 +60,10 @@ func (c *mountCommand) RunCommand(prog string, args []string, stdin io.Reader, s
}
client := arvados.NewClientFromEnv()
+ if err := yaml.Unmarshal([]byte(*cacheSizeStr), &client.DiskCacheSize); err != nil {
+ logger.Printf("error parsing -cache-size argument: %s", err)
+ return 2
+ }
ac, err := arvadosclient.New(client)
if err != nil {
logger.Print(err)
commit ab0330ed429bfd576b58f37ab8ea4c3c5dd20871
Author: Tom Clegg <tom at curii.com>
Date: Tue Dec 26 15:43:20 2023 -0500
20318: Fix shared var in test.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache_test.go b/sdk/go/arvados/keep_cache_test.go
index ffca531cd7..776d9bb652 100644
--- a/sdk/go/arvados/keep_cache_test.go
+++ b/sdk/go/arvados/keep_cache_test.go
@@ -418,6 +418,7 @@ func (s *fileOpsSuite) BenchmarkOpenClose(c *check.C) {
os.WriteFile(fnm, make([]byte, 64000000), 0700)
var wg sync.WaitGroup
for i := 0; i < c.N; i++ {
+ i := i
wg.Add(1)
go func() {
defer wg.Done()
commit fd608866afd57f3f407d6103f770fad8b58eb564
Author: Tom Clegg <tom at curii.com>
Date: Tue Dec 26 15:16:28 2023 -0500
20318: Use one tidying goroutine and filehandle pool per cache dir.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache.go b/sdk/go/arvados/keep_cache.go
index a657153876..1d12f4cdc6 100644
--- a/sdk/go/arvados/keep_cache.go
+++ b/sdk/go/arvados/keep_cache.go
@@ -40,6 +40,26 @@ type DiskCache struct {
MaxSize ByteSizeOrPercent
Logger logrus.FieldLogger
+ *sharedCache
+ setupOnce sync.Once
+}
+
+var (
+ sharedCachesLock sync.Mutex
+ sharedCaches = map[string]*sharedCache{}
+)
+
+// sharedCache has fields that coordinate the cache usage in a single
+// cache directory; it can be shared by multiple DiskCaches.
+//
+// This serves to share a single pool of held-open filehandles, a
+// single tidying goroutine, etc., even when the program (like
+// keep-web) uses multiple KeepGateway stacks that use different auth
+// tokens, etc.
+type sharedCache struct {
+ dir string
+ maxSize ByteSizeOrPercent
+
tidying int32 // see tidy()
tidyHoldUntil time.Time
defaultMaxSize int64
@@ -82,12 +102,22 @@ const (
tidyHoldDuration = 5 * time.Minute // time to re-check cache size even if estimated size is below max
)
+func (cache *DiskCache) setup() {
+ sharedCachesLock.Lock()
+ defer sharedCachesLock.Unlock()
+ dir := cache.Dir
+ if sharedCaches[dir] == nil {
+ sharedCaches[dir] = &sharedCache{dir: dir, maxSize: cache.MaxSize}
+ }
+ cache.sharedCache = sharedCaches[dir]
+}
+
func (cache *DiskCache) cacheFile(locator string) string {
hash := locator
if i := strings.Index(hash, "+"); i > 0 {
hash = hash[:i]
}
- return filepath.Join(cache.Dir, hash[:3], hash+cacheFileSuffix)
+ return filepath.Join(cache.dir, hash[:3], hash+cacheFileSuffix)
}
// Open a cache file, creating the parent dir if necessary.
@@ -126,8 +156,9 @@ func (cache *DiskCache) debugf(format string, args ...interface{}) {
// BlockWrite writes through to the wrapped KeepGateway, and (if
// possible) retains a copy of the written block in the cache.
func (cache *DiskCache) BlockWrite(ctx context.Context, opts BlockWriteOptions) (BlockWriteResponse, error) {
+ cache.setupOnce.Do(cache.setup)
unique := fmt.Sprintf("%x.%p%s", os.Getpid(), &opts, tmpFileSuffix)
- tmpfilename := filepath.Join(cache.Dir, "tmp", unique)
+ tmpfilename := filepath.Join(cache.dir, "tmp", unique)
tmpfile, err := cache.openFile(tmpfilename, os.O_CREATE|os.O_EXCL|os.O_RDWR)
if err != nil {
cache.debugf("BlockWrite: open(%s) failed: %s", tmpfilename, err)
@@ -235,6 +266,7 @@ func (fw funcwriter) Write(p []byte) (int, error) {
// cache. The remainder of the block may continue to be copied into
// the cache in the background.
func (cache *DiskCache) ReadAt(locator string, dst []byte, offset int) (int, error) {
+ cache.setupOnce.Do(cache.setup)
cachefilename := cache.cacheFile(locator)
if n, err := cache.quickReadAt(cachefilename, dst, offset); err == nil {
return n, err
@@ -382,11 +414,11 @@ func (cache *DiskCache) quickReadAt(cachefilename string, dst []byte, offset int
lim := syscall.Rlimit{}
err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &lim)
if err != nil {
- cache.heldopenMax = 256
- } else if lim.Cur > 40000 {
+ cache.heldopenMax = 100
+ } else if lim.Cur > 400000 {
cache.heldopenMax = 10000
} else {
- cache.heldopenMax = int(lim.Cur / 4)
+ cache.heldopenMax = int(lim.Cur / 40)
}
}
heldopen := cache.heldopen[cachefilename]
@@ -483,6 +515,7 @@ func (cache *DiskCache) quickReadAt(cachefilename string, dst []byte, offset int
// BlockRead reads an entire block using a 128 KiB buffer.
func (cache *DiskCache) BlockRead(ctx context.Context, opts BlockReadOptions) (int, error) {
+ cache.setupOnce.Do(cache.setup)
i := strings.Index(opts.Locator, "+")
if i < 0 || i >= len(opts.Locator) {
return 0, errors.New("invalid block locator: no size hint")
@@ -531,7 +564,7 @@ func (cache *DiskCache) gotidy() {
return
}
// Skip if sizeEstimated is based on an actual measurement and
- // is below MaxSize, and we haven't reached the "recheck
+ // is below maxSize, and we haven't reached the "recheck
// anyway" time threshold.
if cache.sizeMeasured > 0 &&
atomic.LoadInt64(&cache.sizeEstimated) < atomic.LoadInt64(&cache.defaultMaxSize) &&
@@ -548,19 +581,19 @@ func (cache *DiskCache) gotidy() {
// Delete cache files as needed to control disk usage.
func (cache *DiskCache) tidy() {
- maxsize := int64(cache.MaxSize.ByteSize())
+ maxsize := int64(cache.maxSize.ByteSize())
if maxsize < 1 {
maxsize = atomic.LoadInt64(&cache.defaultMaxSize)
if maxsize == 0 {
// defaultMaxSize not yet computed. Use 10% of
// filesystem capacity (or different
- // percentage if indicated by cache.MaxSize)
- pct := cache.MaxSize.Percent()
+ // percentage if indicated by cache.maxSize)
+ pct := cache.maxSize.Percent()
if pct == 0 {
pct = 10
}
var stat unix.Statfs_t
- if nil == unix.Statfs(cache.Dir, &stat) {
+ if nil == unix.Statfs(cache.dir, &stat) {
maxsize = int64(stat.Bavail) * stat.Bsize * pct / 100
atomic.StoreInt64(&cache.defaultMaxSize, maxsize)
} else {
@@ -572,7 +605,7 @@ func (cache *DiskCache) tidy() {
}
// Bail if a tidy goroutine is running in a different process.
- lockfile, err := cache.openFile(filepath.Join(cache.Dir, "tmp", "tidy.lock"), os.O_CREATE|os.O_WRONLY)
+ lockfile, err := cache.openFile(filepath.Join(cache.dir, "tmp", "tidy.lock"), os.O_CREATE|os.O_WRONLY)
if err != nil {
return
}
@@ -589,7 +622,7 @@ func (cache *DiskCache) tidy() {
}
var ents []entT
var totalsize int64
- filepath.Walk(cache.Dir, func(path string, info fs.FileInfo, err error) error {
+ filepath.Walk(cache.dir, func(path string, info fs.FileInfo, err error) error {
if err != nil {
cache.debugf("tidy: skipping dir %s: %s", path, err)
return nil
commit 907db4d46fe506ad3dbc88a3d57a99041595c28c
Author: Tom Clegg <tom at curii.com>
Date: Tue Dec 26 12:25:34 2023 -0500
20318: Fix write using Reader and unspecified Hash.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/keepclient/keepclient_test.go b/sdk/go/keepclient/keepclient_test.go
index a6e0a11d51..ad5d12b505 100644
--- a/sdk/go/keepclient/keepclient_test.go
+++ b/sdk/go/keepclient/keepclient_test.go
@@ -1101,19 +1101,20 @@ func (s *ServerRequiredSuite) TestPutGetHead(c *C) {
}
{
hash2, replicas, err := kc.PutB(content)
+ c.Check(err, IsNil)
c.Check(hash2, Matches, fmt.Sprintf(`%s\+%d\b.*`, hash, len(content)))
c.Check(replicas, Equals, 2)
- c.Check(err, Equals, nil)
}
{
r, n, url2, err := kc.Get(hash)
c.Check(err, Equals, nil)
c.Check(n, Equals, int64(len(content)))
c.Check(url2, Matches, fmt.Sprintf("http://localhost:\\d+/%s", hash))
-
- readContent, err2 := ioutil.ReadAll(r)
- c.Check(err2, Equals, nil)
- c.Check(readContent, DeepEquals, content)
+ if c.Check(r, NotNil) {
+ readContent, err2 := ioutil.ReadAll(r)
+ c.Check(err2, Equals, nil)
+ c.Check(readContent, DeepEquals, content)
+ }
}
{
n, url2, err := kc.Ask(hash)
diff --git a/sdk/go/keepclient/support.go b/sdk/go/keepclient/support.go
index c4144bf871..6acaf64baa 100644
--- a/sdk/go/keepclient/support.go
+++ b/sdk/go/keepclient/support.go
@@ -149,8 +149,12 @@ func (kc *KeepClient) httpBlockWrite(ctx context.Context, req arvados.BlockWrite
getReader = func() io.Reader { return bytes.NewReader(req.Data[:req.DataSize]) }
} else {
buf := asyncbuf.NewBuffer(make([]byte, 0, req.DataSize))
+ reader := req.Reader
+ if req.Hash != "" {
+ reader = HashCheckingReader{req.Reader, md5.New(), req.Hash}
+ }
go func() {
- _, err := io.Copy(buf, HashCheckingReader{req.Reader, md5.New(), req.Hash})
+ _, err := io.Copy(buf, reader)
buf.CloseWithError(err)
}()
getReader = buf.NewReader
commit ac9b29f30413eaa3fbcbead1b094dd75dc6092a8
Author: Tom Clegg <tom at curii.com>
Date: Fri Dec 22 11:11:51 2023 -0500
20318: Add config entry for keep-web cache size.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/config/config.default.yml b/lib/config/config.default.yml
index 05bc1309cd..872501f915 100644
--- a/lib/config/config.default.yml
+++ b/lib/config/config.default.yml
@@ -721,16 +721,18 @@ Clusters:
# Time to cache manifests, permission checks, and sessions.
TTL: 300s
- # Block cache entries. Each block consumes up to 64 MiB RAM.
- MaxBlockEntries: 20
+ # Maximum amount of data cached in /var/cache/arvados/keep.
+ # Can be given as a percentage ("10%") or a number of bytes
+ # ("10 GiB")
+ DiskCacheSize: 10%
# Approximate memory limit (in bytes) for session cache.
#
# Note this applies to the in-memory representation of
# projects and collections -- metadata, block locators,
- # filenames, etc. -- excluding cached file content, which is
- # limited by MaxBlockEntries.
- MaxCollectionBytes: 100000000
+ # filenames, etc. -- not the file data itself (see
+ # DiskCacheSize).
+ MaxCollectionBytes: 100 MB
# Persistent sessions.
MaxSessions: 100
diff --git a/lib/config/deprecated.go b/lib/config/deprecated.go
index d5c09d6706..d518b3414a 100644
--- a/lib/config/deprecated.go
+++ b/lib/config/deprecated.go
@@ -495,7 +495,7 @@ func (ldr *Loader) loadOldKeepWebConfig(cfg *arvados.Config) error {
cluster.Collections.WebDAVCache.TTL = *oc.Cache.TTL
}
if oc.Cache.MaxCollectionBytes != nil {
- cluster.Collections.WebDAVCache.MaxCollectionBytes = *oc.Cache.MaxCollectionBytes
+ cluster.Collections.WebDAVCache.MaxCollectionBytes = arvados.ByteSize(*oc.Cache.MaxCollectionBytes)
}
if oc.AnonymousTokens != nil {
if len(*oc.AnonymousTokens) > 0 {
diff --git a/lib/config/deprecated_test.go b/lib/config/deprecated_test.go
index f9b1d1661b..e06a1f231d 100644
--- a/lib/config/deprecated_test.go
+++ b/lib/config/deprecated_test.go
@@ -199,7 +199,7 @@ func (s *LoadSuite) TestLegacyKeepWebConfig(c *check.C) {
c.Check(cluster.SystemRootToken, check.Equals, "abcdefg")
c.Check(cluster.Collections.WebDAVCache.TTL, check.Equals, arvados.Duration(60*time.Second))
- c.Check(cluster.Collections.WebDAVCache.MaxCollectionBytes, check.Equals, int64(1234567890))
+ c.Check(cluster.Collections.WebDAVCache.MaxCollectionBytes, check.Equals, arvados.ByteSize(1234567890))
c.Check(cluster.Services.WebDAVDownload.ExternalURL, check.Equals, arvados.URL{Host: "download.example.com", Path: "/"})
c.Check(cluster.Services.WebDAVDownload.InternalURLs[arvados.URL{Host: ":80"}], check.NotNil)
diff --git a/sdk/go/arvados/byte_size.go b/sdk/go/arvados/byte_size.go
index 08cc83e126..7cc2c69781 100644
--- a/sdk/go/arvados/byte_size.go
+++ b/sdk/go/arvados/byte_size.go
@@ -8,11 +8,16 @@ import (
"encoding/json"
"fmt"
"math"
+ "strconv"
"strings"
)
type ByteSize int64
+// ByteSizeOrPercent indicates either a number of bytes or a
+// percentage from 1 to 100.
+type ByteSizeOrPercent ByteSize
+
var prefixValue = map[string]int64{
"": 1,
"K": 1000,
@@ -89,3 +94,54 @@ func (n *ByteSize) UnmarshalJSON(data []byte) error {
return fmt.Errorf("bug: json.Number for %q is not int64 or float64: %s", s, err)
}
}
+
+func (n ByteSizeOrPercent) MarshalJSON() ([]byte, error) {
+ if n < 0 && n >= -100 {
+ return []byte(fmt.Sprintf("\"%d%%\"", -n)), nil
+ } else {
+ return json.Marshal(int64(n))
+ }
+}
+
+func (n *ByteSizeOrPercent) UnmarshalJSON(data []byte) error {
+ if len(data) == 0 || data[0] != '"' {
+ return (*ByteSize)(n).UnmarshalJSON(data)
+ }
+ var s string
+ err := json.Unmarshal(data, &s)
+ if err != nil {
+ return err
+ }
+ if s := strings.TrimSpace(s); len(s) > 0 && s[len(s)-1] == '%' {
+ pct, err := strconv.ParseInt(strings.TrimSpace(s[:len(s)-1]), 10, 64)
+ if err != nil {
+ return err
+ }
+ if pct < 0 || pct > 100 {
+ return fmt.Errorf("invalid value %q (percentage must be between 0 and 100)", s)
+ }
+ *n = ByteSizeOrPercent(-pct)
+ return nil
+ }
+ return (*ByteSize)(n).UnmarshalJSON(data)
+}
+
+// ByteSize returns the absolute byte size specified by n, or 0 if n
+// specifies a percent.
+func (n ByteSizeOrPercent) ByteSize() ByteSize {
+ if n >= -100 && n < 0 {
+ return 0
+ } else {
+ return ByteSize(n)
+ }
+}
+
+// ByteSize returns the percentage specified by n, or 0 if n specifies
+// an absolute byte size.
+func (n ByteSizeOrPercent) Percent() int64 {
+ if n >= -100 && n < 0 {
+ return int64(-n)
+ } else {
+ return 0
+ }
+}
diff --git a/sdk/go/arvados/byte_size_test.go b/sdk/go/arvados/byte_size_test.go
index 7c4aff2072..e5fb10ebdb 100644
--- a/sdk/go/arvados/byte_size_test.go
+++ b/sdk/go/arvados/byte_size_test.go
@@ -64,7 +64,54 @@ func (s *ByteSizeSuite) TestUnmarshal(c *check.C) {
} {
var n ByteSize
err := yaml.Unmarshal([]byte(testcase+"\n"), &n)
- c.Logf("%v => error: %v", n, err)
+ c.Logf("%s => error: %v", testcase, err)
+ c.Check(err, check.NotNil)
+ }
+}
+
+func (s *ByteSizeSuite) TestMarshalByteSizeOrPercent(c *check.C) {
+ for _, testcase := range []struct {
+ in ByteSizeOrPercent
+ out string
+ }{
+ {0, "0"},
+ {-1, "1%"},
+ {-100, "100%"},
+ {8, "8"},
+ } {
+ out, err := yaml.Marshal(&testcase.in)
+ c.Check(err, check.IsNil)
+ c.Check(string(out), check.Equals, testcase.out+"\n")
+ }
+}
+
+func (s *ByteSizeSuite) TestUnmarshalByteSizeOrPercent(c *check.C) {
+ for _, testcase := range []struct {
+ in string
+ out int64
+ }{
+ {"0", 0},
+ {"100", 100},
+ {"0%", 0},
+ {"1%", -1},
+ {"100%", -100},
+ {"8 GB", 8000000000},
+ } {
+ var n ByteSizeOrPercent
+ err := yaml.Unmarshal([]byte(testcase.in+"\n"), &n)
+ c.Logf("%v => %v: %v", testcase.in, testcase.out, n)
+ c.Check(err, check.IsNil)
+ c.Check(int64(n), check.Equals, testcase.out)
+ }
+ for _, testcase := range []string{
+ "1000%", "101%", "-1%",
+ "%", "-%", "%%", "%1",
+ "400000 EB",
+ "4.11e4 EB",
+ } {
+ var n ByteSizeOrPercent
+ err := yaml.Unmarshal([]byte(testcase+"\n"), &n)
+ c.Logf("%s => error: %v", testcase, err)
c.Check(err, check.NotNil)
}
}
diff --git a/sdk/go/arvados/client.go b/sdk/go/arvados/client.go
index e3c1432660..991de1caa9 100644
--- a/sdk/go/arvados/client.go
+++ b/sdk/go/arvados/client.go
@@ -77,6 +77,11 @@ type Client struct {
// context deadline to establish a maximum request time.
Timeout time.Duration
+ // Maximum disk cache size in bytes or percent of total
+ // filesystem size. If zero, use default, currently 10% of
+ // filesystem size.
+ DiskCacheSize ByteSizeOrPercent
+
dd *DiscoveryDocument
defaultRequestID string
@@ -154,6 +159,7 @@ func NewClientFromConfig(cluster *Cluster) (*Client, error) {
APIHost: ctrlURL.Host,
Insecure: cluster.TLS.Insecure,
Timeout: 5 * time.Minute,
+ DiskCacheSize: cluster.Collections.WebDAVCache.DiskCacheSize,
requestLimiter: &requestLimiter{maxlimit: int64(cluster.API.MaxConcurrentRequests / 4)},
}, nil
}
diff --git a/sdk/go/arvados/config.go b/sdk/go/arvados/config.go
index 6301ed047a..acc091a90f 100644
--- a/sdk/go/arvados/config.go
+++ b/sdk/go/arvados/config.go
@@ -63,8 +63,8 @@ func (sc *Config) GetCluster(clusterID string) (*Cluster, error) {
type WebDAVCacheConfig struct {
TTL Duration
- MaxBlockEntries int
- MaxCollectionBytes int64
+ DiskCacheSize ByteSizeOrPercent
+ MaxCollectionBytes ByteSize
MaxSessions int
}
diff --git a/sdk/go/arvados/keep_cache.go b/sdk/go/arvados/keep_cache.go
index af80daa2e0..a657153876 100644
--- a/sdk/go/arvados/keep_cache.go
+++ b/sdk/go/arvados/keep_cache.go
@@ -37,7 +37,7 @@ type KeepGateway interface {
type DiskCache struct {
KeepGateway
Dir string
- MaxSize int64
+ MaxSize ByteSizeOrPercent
Logger logrus.FieldLogger
tidying int32 // see tidy()
@@ -534,7 +534,7 @@ func (cache *DiskCache) gotidy() {
// is below MaxSize, and we haven't reached the "recheck
// anyway" time threshold.
if cache.sizeMeasured > 0 &&
- atomic.LoadInt64(&cache.sizeEstimated) < cache.MaxSize &&
+ atomic.LoadInt64(&cache.sizeEstimated) < atomic.LoadInt64(&cache.defaultMaxSize) &&
time.Now().Before(cache.tidyHoldUntil) {
atomic.AddInt32(&cache.tidying, -1)
return
@@ -548,14 +548,26 @@ func (cache *DiskCache) gotidy() {
// Delete cache files as needed to control disk usage.
func (cache *DiskCache) tidy() {
- maxsize := cache.MaxSize
+ maxsize := int64(cache.MaxSize.ByteSize())
if maxsize < 1 {
- if maxsize = atomic.LoadInt64(&cache.defaultMaxSize); maxsize == 0 {
+ maxsize = atomic.LoadInt64(&cache.defaultMaxSize)
+ if maxsize == 0 {
+ // defaultMaxSize not yet computed. Use 10% of
+ // filesystem capacity (or different
+ // percentage if indicated by cache.MaxSize)
+ pct := cache.MaxSize.Percent()
+ if pct == 0 {
+ pct = 10
+ }
var stat unix.Statfs_t
if nil == unix.Statfs(cache.Dir, &stat) {
- maxsize = int64(stat.Bavail) * stat.Bsize / 10
+ maxsize = int64(stat.Bavail) * stat.Bsize * pct / 100
+ atomic.StoreInt64(&cache.defaultMaxSize, maxsize)
+ } else {
+ // In this case we will set
+ // defaultMaxSize below after
+ // measuring current usage.
}
- atomic.StoreInt64(&cache.defaultMaxSize, maxsize)
}
}
@@ -611,7 +623,8 @@ func (cache *DiskCache) tidy() {
// If MaxSize wasn't specified and we failed to come up with a
// defaultSize above, use the larger of {current cache size, 1
- // GiB} as the defaultSize for subsequent tidy() operations.
+ // GiB} as the defaultMaxSize for subsequent tidy()
+ // operations.
if maxsize == 0 {
if totalsize < 1<<30 {
atomic.StoreInt64(&cache.defaultMaxSize, 1<<30)
diff --git a/sdk/go/arvados/keep_cache_test.go b/sdk/go/arvados/keep_cache_test.go
index e4d1790cac..ffca531cd7 100644
--- a/sdk/go/arvados/keep_cache_test.go
+++ b/sdk/go/arvados/keep_cache_test.go
@@ -201,7 +201,7 @@ func (s *keepCacheSuite) testConcurrentReaders(c *check.C, cannotRefresh, mangle
backend := &keepGatewayMemoryBacked{}
cache := DiskCache{
KeepGateway: backend,
- MaxSize: int64(blksize),
+ MaxSize: ByteSizeOrPercent(blksize),
Dir: c.MkDir(),
Logger: ctxlog.TestLogger(c),
}
@@ -286,7 +286,7 @@ func (s *keepCacheSuite) TestStreaming(c *check.C) {
}
cache := DiskCache{
KeepGateway: backend,
- MaxSize: int64(blksize),
+ MaxSize: ByteSizeOrPercent(blksize),
Dir: c.MkDir(),
Logger: ctxlog.TestLogger(c),
}
@@ -354,7 +354,7 @@ func (s *keepCacheBenchSuite) SetUpTest(c *check.C) {
s.backend = &keepGatewayMemoryBacked{}
s.cache = &DiskCache{
KeepGateway: s.backend,
- MaxSize: int64(s.blksize),
+ MaxSize: ByteSizeOrPercent(s.blksize),
Dir: c.MkDir(),
Logger: ctxlog.TestLogger(c),
}
diff --git a/sdk/go/arvadosclient/arvadosclient.go b/sdk/go/arvadosclient/arvadosclient.go
index 461320eca9..d0ebdc1b01 100644
--- a/sdk/go/arvadosclient/arvadosclient.go
+++ b/sdk/go/arvadosclient/arvadosclient.go
@@ -105,6 +105,11 @@ type ArvadosClient struct {
// available services.
KeepServiceURIs []string
+ // Maximum disk cache size in bytes or percent of total
+ // filesystem size. If zero, use default, currently 10% of
+ // filesystem size.
+ DiskCacheSize arvados.ByteSizeOrPercent
+
// Discovery document
DiscoveryDoc Dict
@@ -144,6 +149,7 @@ func New(c *arvados.Client) (*ArvadosClient, error) {
Client: hc,
Retries: 2,
KeepServiceURIs: c.KeepServiceURIs,
+ DiskCacheSize: c.DiskCacheSize,
lastClosedIdlesAt: time.Now(),
}
diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go
index 08fa455ff4..b03362ee48 100644
--- a/sdk/go/keepclient/keepclient.go
+++ b/sdk/go/keepclient/keepclient.go
@@ -113,6 +113,7 @@ type KeepClient struct {
RequestID string
StorageClasses []string
DefaultStorageClasses []string // Set by cluster's exported config
+ DiskCacheSize arvados.ByteSizeOrPercent
// set to 1 if all writable services are of disk type, otherwise 0
replicasPerService int
@@ -137,7 +138,6 @@ func (kc *KeepClient) Clone() *KeepClient {
gatewayRoots: kc.gatewayRoots,
HTTPClient: kc.HTTPClient,
Retries: kc.Retries,
- BlockCache: kc.BlockCache,
RequestID: kc.RequestID,
StorageClasses: kc.StorageClasses,
DefaultStorageClasses: kc.DefaultStorageClasses,
@@ -387,6 +387,7 @@ func (kc *KeepClient) upstreamGateway() arvados.KeepGateway {
}
kc.gatewayStack = &arvados.DiskCache{
Dir: cachedir,
+ MaxSize: kc.DiskCacheSize,
KeepGateway: &keepViaHTTP{kc},
}
return kc.gatewayStack
diff --git a/services/keep-web/cache.go b/services/keep-web/cache.go
index df5705ed32..d443bc0829 100644
--- a/services/keep-web/cache.go
+++ b/services/keep-web/cache.go
@@ -303,7 +303,7 @@ func (c *cache) pruneSessions() {
// Mark more sessions for deletion until reaching desired
// memory size limit, starting with the oldest entries.
for i, snap := range snaps {
- if size <= c.cluster.Collections.WebDAVCache.MaxCollectionBytes {
+ if size <= int64(c.cluster.Collections.WebDAVCache.MaxCollectionBytes) {
break
}
if snap.prune {
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
index 123c4fe34d..12c2839f8c 100644
--- a/services/keep-web/handler.go
+++ b/services/keep-web/handler.go
@@ -27,15 +27,13 @@ import (
"git.arvados.org/arvados.git/sdk/go/auth"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
"git.arvados.org/arvados.git/sdk/go/httpserver"
- "git.arvados.org/arvados.git/sdk/go/keepclient"
"github.com/sirupsen/logrus"
"golang.org/x/net/webdav"
)
type handler struct {
- Cache cache
- Cluster *arvados.Cluster
- setupOnce sync.Once
+ Cache cache
+ Cluster *arvados.Cluster
lockMtx sync.Mutex
lock map[string]*sync.RWMutex
@@ -60,10 +58,6 @@ func parseCollectionIDFromURL(s string) string {
return ""
}
-func (h *handler) setup() {
- keepclient.DefaultBlockCache.MaxBlocks = h.Cluster.Collections.WebDAVCache.MaxBlockEntries
-}
-
func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
json.NewEncoder(w).Encode(struct{ Version string }{cmd.Version.String()})
}
@@ -179,8 +173,6 @@ func (h *handler) Done() <-chan struct{} {
// ServeHTTP implements http.Handler.
func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
- h.setupOnce.Do(h.setup)
-
if xfp := r.Header.Get("X-Forwarded-Proto"); xfp != "" && xfp != "http" {
r.URL.Scheme = xfp
}
diff --git a/services/keep-web/handler_test.go b/services/keep-web/handler_test.go
index 5a12e26e9d..85c7801cd4 100644
--- a/services/keep-web/handler_test.go
+++ b/services/keep-web/handler_test.go
@@ -1469,20 +1469,14 @@ func (s *IntegrationSuite) TestFileContentType(c *check.C) {
}
}
-func (s *IntegrationSuite) TestKeepClientBlockCache(c *check.C) {
- s.handler.Cluster.Collections.WebDAVCache.MaxBlockEntries = 42
- c.Check(keepclient.DefaultBlockCache.MaxBlocks, check.Not(check.Equals), 42)
- u := mustParseURL("http://keep-web.example/c=" + arvadostest.FooCollection + "/t=" + arvadostest.ActiveToken + "/foo")
- req := &http.Request{
- Method: "GET",
- Host: u.Host,
- URL: u,
- RequestURI: u.RequestURI(),
- }
+func (s *IntegrationSuite) TestCacheSize(c *check.C) {
+ req, err := http.NewRequest("GET", "http://"+arvadostest.FooCollection+".example.com/foo", nil)
+ req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveTokenV2)
+ c.Assert(err, check.IsNil)
resp := httptest.NewRecorder()
s.handler.ServeHTTP(resp, req)
- c.Check(resp.Code, check.Equals, http.StatusOK)
- c.Check(keepclient.DefaultBlockCache.MaxBlocks, check.Equals, 42)
+ c.Assert(resp.Code, check.Equals, http.StatusOK)
+ c.Check(s.handler.Cache.sessions[arvadostest.ActiveTokenV2].client.DiskCacheSize.Percent(), check.Equals, int64(10))
}
// Writing to a collection shouldn't affect its entry in the
commit 461fdaa1b96142b8065c131ae0334046fc71ea56
Merge: dbd3e4e8a3 afc02fe04d
Author: Tom Clegg <tom at curii.com>
Date: Wed Dec 20 17:40:10 2023 -0500
20318: Merge branch 'main' into 20318-disk-cache
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --cc sdk/go/keepclient/keepclient.go
index 4e935812be,86001c01e0..08fa455ff4
--- a/sdk/go/keepclient/keepclient.go
+++ b/sdk/go/keepclient/keepclient.go
@@@ -122,10 -118,29 +122,31 @@@ type KeepClient struct
// Disable automatic discovery of keep services
disableDiscovery bool
+
+ gatewayStack arvados.KeepGateway
}
+ func (kc *KeepClient) Clone() *KeepClient {
+ kc.lock.Lock()
+ defer kc.lock.Unlock()
+ return &KeepClient{
+ Arvados: kc.Arvados,
+ Want_replicas: kc.Want_replicas,
+ localRoots: kc.localRoots,
+ writableLocalRoots: kc.writableLocalRoots,
+ gatewayRoots: kc.gatewayRoots,
+ HTTPClient: kc.HTTPClient,
+ Retries: kc.Retries,
+ BlockCache: kc.BlockCache,
+ RequestID: kc.RequestID,
+ StorageClasses: kc.StorageClasses,
+ DefaultStorageClasses: kc.DefaultStorageClasses,
+ replicasPerService: kc.replicasPerService,
+ foundNonDiskSvc: kc.foundNonDiskSvc,
+ disableDiscovery: kc.disableDiscovery,
+ }
+ }
+
func (kc *KeepClient) loadDefaultClasses() error {
scData, err := kc.Arvados.ClusterConfig("StorageClasses")
if err != nil {
commit dbd3e4e8a3216dcf4942ded00a546649777245c2
Author: Tom Clegg <tom at curii.com>
Date: Wed Dec 20 14:37:28 2023 -0500
20318: Test hash check on block write.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache_test.go b/sdk/go/arvados/keep_cache_test.go
index 5fe1a6a08e..e4d1790cac 100644
--- a/sdk/go/arvados/keep_cache_test.go
+++ b/sdk/go/arvados/keep_cache_test.go
@@ -114,6 +114,36 @@ func (k *keepGatewayMemoryBacked) BlockWrite(ctx context.Context, opts BlockWrit
return BlockWriteResponse{Locator: locator, Replicas: 1}, nil
}
+func (s *keepCacheSuite) TestBlockWrite(c *check.C) {
+ backend := &keepGatewayMemoryBacked{}
+ cache := DiskCache{
+ KeepGateway: backend,
+ MaxSize: 40000000,
+ Dir: c.MkDir(),
+ Logger: ctxlog.TestLogger(c),
+ }
+ ctx := context.Background()
+ real, err := cache.BlockWrite(ctx, BlockWriteOptions{
+ Data: make([]byte, 100000),
+ })
+ c.Assert(err, check.IsNil)
+
+ // Write different data but supply the same hash. Should be
+ // rejected (even though our fake backend doesn't notice).
+ _, err = cache.BlockWrite(ctx, BlockWriteOptions{
+ Hash: real.Locator[:32],
+ Data: make([]byte, 10),
+ })
+ c.Check(err, check.ErrorMatches, `block hash .+ did not match provided hash .+`)
+
+ // Ensure the bogus write didn't overwrite (or delete) the
+ // real cached data associated with that hash.
+ delete(backend.data, real.Locator)
+ n, err := cache.ReadAt(real.Locator, make([]byte, 100), 0)
+ c.Check(n, check.Equals, 100)
+ c.Check(err, check.IsNil)
+}
+
func (s *keepCacheSuite) TestMaxSize(c *check.C) {
backend := &keepGatewayMemoryBacked{}
cache := DiskCache{
commit a32cbc86cef9c05cc63a4bd749553c13befff730
Author: Tom Clegg <tom at curii.com>
Date: Wed Dec 20 13:33:56 2023 -0500
20318: Track estimated cache usage, and tidy more diligently.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache.go b/sdk/go/arvados/keep_cache.go
index b366d6f1b0..af80daa2e0 100644
--- a/sdk/go/arvados/keep_cache.go
+++ b/sdk/go/arvados/keep_cache.go
@@ -58,6 +58,9 @@ type DiskCache struct {
writing map[string]*writeprogress
writingCond *sync.Cond
writingLock sync.Mutex
+
+ sizeMeasured int64 // actual size on disk after last tidy(); zero if not measured yet
+ sizeEstimated int64 // last measured size, plus files we have written since
}
type writeprogress struct {
@@ -76,7 +79,7 @@ type openFileEnt struct {
const (
cacheFileSuffix = ".keepcacheblock"
tmpFileSuffix = ".tmp"
- tidyHoldDuration = 10 * time.Second
+ tidyHoldDuration = 5 * time.Minute // time to re-check cache size even if estimated size is below max
)
func (cache *DiskCache) cacheFile(locator string) string {
@@ -123,7 +126,6 @@ func (cache *DiskCache) debugf(format string, args ...interface{}) {
// BlockWrite writes through to the wrapped KeepGateway, and (if
// possible) retains a copy of the written block in the cache.
func (cache *DiskCache) BlockWrite(ctx context.Context, opts BlockWriteOptions) (BlockWriteResponse, error) {
- cache.gotidy()
unique := fmt.Sprintf("%x.%p%s", os.Getpid(), &opts, tmpFileSuffix)
tmpfilename := filepath.Join(cache.Dir, "tmp", unique)
tmpfile, err := cache.openFile(tmpfilename, os.O_CREATE|os.O_EXCL|os.O_RDWR)
@@ -187,6 +189,8 @@ func (cache *DiskCache) BlockWrite(ctx context.Context, opts BlockWriteOptions)
if err != nil {
cache.debugf("BlockWrite: rename(%s, %s) failed: %s", tmpfilename, cachefilename, err)
}
+ atomic.AddInt64(&cache.sizeEstimated, int64(n))
+ cache.gotidy()
}()
// Write through to the wrapped KeepGateway from the pipe,
@@ -231,7 +235,6 @@ func (fw funcwriter) Write(p []byte) (int, error) {
// cache. The remainder of the block may continue to be copied into
// the cache in the background.
func (cache *DiskCache) ReadAt(locator string, dst []byte, offset int) (int, error) {
- cache.gotidy()
cachefilename := cache.cacheFile(locator)
if n, err := cache.quickReadAt(cachefilename, dst, offset); err == nil {
return n, err
@@ -305,6 +308,8 @@ func (cache *DiskCache) ReadAt(locator string, dst []byte, offset int) (int, err
}
return n, err
})})
+ atomic.AddInt64(&cache.sizeEstimated, int64(size))
+ cache.gotidy()
}()
}
progress.cond.L.Lock()
@@ -519,9 +524,18 @@ func (cache *DiskCache) BlockRead(ctx context.Context, opts BlockReadOptions) (i
// Start a tidy() goroutine, unless one is already running / recently
// finished.
func (cache *DiskCache) gotidy() {
- // Return quickly if another tidy goroutine is running in this process.
+ // Skip if another tidy goroutine is running in this process.
n := atomic.AddInt32(&cache.tidying, 1)
- if n != 1 || time.Now().Before(cache.tidyHoldUntil) {
+ if n != 1 {
+ atomic.AddInt32(&cache.tidying, -1)
+ return
+ }
+ // Skip if sizeEstimated is based on an actual measurement and
+ // is below MaxSize, and we haven't reached the "recheck
+ // anyway" time threshold.
+ if cache.sizeMeasured > 0 &&
+ atomic.LoadInt64(&cache.sizeEstimated) < cache.MaxSize &&
+ time.Now().Before(cache.tidyHoldUntil) {
atomic.AddInt32(&cache.tidying, -1)
return
}
@@ -608,11 +622,26 @@ func (cache *DiskCache) tidy() {
return
}
- if totalsize <= maxsize {
+ // If we're below MaxSize or there's only one block in the
+ // cache, just update the usage estimate and return.
+ //
+ // (We never delete the last block because that would merely
+ // cause the same block to get re-fetched repeatedly from the
+ // backend.)
+ if totalsize <= maxsize || len(ents) == 1 {
+ atomic.StoreInt64(&cache.sizeMeasured, totalsize)
+ atomic.StoreInt64(&cache.sizeEstimated, totalsize)
return
}
- // Delete oldest entries until totalsize < maxsize.
+ // Set a new size target of maxsize minus 5%. This makes some
+ // room for sizeEstimate to grow before it triggers another
+ // tidy. We don't want to walk/sort an entire large cache
+ // directory each time we write a block.
+ target := maxsize - (maxsize / 20)
+
+ // Delete oldest entries until totalsize < target or we're
+ // down to a single cached block.
sort.Slice(ents, func(i, j int) bool {
return ents[i].atime.Before(ents[j].atime)
})
@@ -622,7 +651,7 @@ func (cache *DiskCache) tidy() {
go cache.deleteHeldopen(ent.path, nil)
deleted++
totalsize -= ent.size
- if totalsize <= maxsize {
+ if totalsize <= target || deleted == len(ents)-1 {
break
}
}
@@ -633,4 +662,6 @@ func (cache *DiskCache) tidy() {
"totalsize": totalsize,
}).Debugf("DiskCache: remaining cache usage after deleting")
}
+ atomic.StoreInt64(&cache.sizeMeasured, totalsize)
+ atomic.StoreInt64(&cache.sizeEstimated, totalsize)
}
diff --git a/sdk/go/arvados/keep_cache_test.go b/sdk/go/arvados/keep_cache_test.go
index 5ae932a782..5fe1a6a08e 100644
--- a/sdk/go/arvados/keep_cache_test.go
+++ b/sdk/go/arvados/keep_cache_test.go
@@ -127,15 +127,29 @@ func (s *keepCacheSuite) TestMaxSize(c *check.C) {
Data: make([]byte, 44000000),
})
c.Check(err, check.IsNil)
+
+ // Wait for tidy to finish, check that it doesn't delete the
+ // only block.
time.Sleep(time.Millisecond)
+ for atomic.LoadInt32(&cache.tidying) > 0 {
+ time.Sleep(time.Millisecond)
+ }
+ c.Check(atomic.LoadInt64(&cache.sizeMeasured), check.Equals, int64(44000000))
+
resp2, err := cache.BlockWrite(ctx, BlockWriteOptions{
Data: make([]byte, 32000000),
})
c.Check(err, check.IsNil)
delete(backend.data, resp1.Locator)
delete(backend.data, resp2.Locator)
- cache.tidyHoldUntil = time.Time{}
- cache.tidy()
+
+ // Wait for tidy to finish, check that it deleted the older
+ // block.
+ time.Sleep(time.Millisecond)
+ for atomic.LoadInt32(&cache.tidying) > 0 {
+ time.Sleep(time.Millisecond)
+ }
+ c.Check(atomic.LoadInt64(&cache.sizeMeasured), check.Equals, int64(32000000))
n, err := cache.ReadAt(resp1.Locator, make([]byte, 2), 0)
c.Check(n, check.Equals, 0)
commit 899185924c97c0e981c2c40ab115d7572ce79811
Author: Tom Clegg <tom at curii.com>
Date: Wed Dec 20 13:19:32 2023 -0500
20318: Implement BlockRead via ReadAt so it supports streaming.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache.go b/sdk/go/arvados/keep_cache.go
index 2a33871ca4..b366d6f1b0 100644
--- a/sdk/go/arvados/keep_cache.go
+++ b/sdk/go/arvados/keep_cache.go
@@ -476,18 +476,8 @@ func (cache *DiskCache) quickReadAt(cachefilename string, dst []byte, offset int
return n, err
}
-// BlockRead reads the entire block from the wrapped KeepGateway into
-// the cache if needed, and writes it to the provided writer.
+// BlockRead reads an entire block using a 128 KiB buffer.
func (cache *DiskCache) BlockRead(ctx context.Context, opts BlockReadOptions) (int, error) {
- cache.gotidy()
- cachefilename := cache.cacheFile(opts.Locator)
- f, err := cache.openFile(cachefilename, os.O_CREATE|os.O_RDWR)
- if err != nil {
- cache.debugf("BlockRead: %s", cachefilename, err)
- return cache.KeepGateway.BlockRead(ctx, opts)
- }
- defer f.Close()
-
i := strings.Index(opts.Locator, "+")
if i < 0 || i >= len(opts.Locator) {
return 0, errors.New("invalid block locator: no size hint")
@@ -502,33 +492,28 @@ func (cache *DiskCache) BlockRead(ctx context.Context, opts BlockReadOptions) (i
return 0, errors.New("invalid block locator: invalid size hint")
}
- err = syscall.Flock(int(f.Fd()), syscall.LOCK_SH)
- if err != nil {
- return 0, err
- }
- filesize, err := f.Seek(0, io.SeekEnd)
- if err != nil {
- return 0, err
- }
- _, err = f.Seek(0, io.SeekStart)
- if err != nil {
- return 0, err
- }
- if filesize == blocksize {
- n, err := io.Copy(opts.WriteTo, f)
- return int(n), err
- }
- err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
- if err != nil {
- return 0, err
- }
- opts.WriteTo = io.MultiWriter(f, opts.WriteTo)
- n, err := cache.KeepGateway.BlockRead(ctx, opts)
- if err != nil {
- return int(n), err
+ offset := 0
+ buf := make([]byte, 131072)
+ for offset < int(blocksize) {
+ if ctx.Err() != nil {
+ return offset, ctx.Err()
+ }
+ if int(blocksize)-offset > len(buf) {
+ buf = buf[:int(blocksize)-offset]
+ }
+ nr, err := cache.ReadAt(opts.Locator, buf, offset)
+ if nr > 0 {
+ nw, err := opts.WriteTo.Write(buf)
+ if err != nil {
+ return offset + nw, err
+ }
+ }
+ offset += nr
+ if err != nil {
+ return offset, err
+ }
}
- f.Truncate(int64(n))
- return n, nil
+ return offset, nil
}
// Start a tidy() goroutine, unless one is already running / recently
commit 4121c9e9fc03ee474f01248d384c7d3281b34328
Author: Tom Clegg <tom at curii.com>
Date: Wed Dec 20 10:55:30 2023 -0500
20318: Test streaming.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache_test.go b/sdk/go/arvados/keep_cache_test.go
index 6d11410470..5ae932a782 100644
--- a/sdk/go/arvados/keep_cache_test.go
+++ b/sdk/go/arvados/keep_cache_test.go
@@ -13,7 +13,9 @@ import (
"io"
"math/rand"
"os"
+ "path/filepath"
"sync"
+ "sync/atomic"
"time"
"git.arvados.org/arvados.git/sdk/go/ctxlog"
@@ -49,8 +51,10 @@ func (*keepGatewayBlackHole) BlockWrite(ctx context.Context, opts BlockWriteOpti
}
type keepGatewayMemoryBacked struct {
- mtx sync.RWMutex
- data map[string][]byte
+ mtx sync.RWMutex
+ data map[string][]byte
+ pauseBlockReadAfter int
+ pauseBlockReadUntil chan error
}
func (k *keepGatewayMemoryBacked) ReadAt(locator string, dst []byte, offset int) (int, error) {
@@ -76,6 +80,16 @@ func (k *keepGatewayMemoryBacked) BlockRead(ctx context.Context, opts BlockReadO
if data == nil {
return 0, errors.New("block not found: " + opts.Locator)
}
+ if k.pauseBlockReadUntil != nil {
+ src := bytes.NewReader(data)
+ n, err := io.CopyN(opts.WriteTo, src, int64(k.pauseBlockReadAfter))
+ if err != nil {
+ return int(n), err
+ }
+ <-k.pauseBlockReadUntil
+ n2, err := io.Copy(opts.WriteTo, src)
+ return int(n + n2), err
+ }
return opts.WriteTo.Write(data)
}
func (k *keepGatewayMemoryBacked) LocalLocator(locator string) (string, error) {
@@ -220,6 +234,66 @@ func (s *keepCacheSuite) testConcurrentReaders(c *check.C, cannotRefresh, mangle
wg.Wait()
}
+func (s *keepCacheSuite) TestStreaming(c *check.C) {
+ blksize := 64000000
+ backend := &keepGatewayMemoryBacked{
+ pauseBlockReadUntil: make(chan error),
+ pauseBlockReadAfter: blksize / 8,
+ }
+ cache := DiskCache{
+ KeepGateway: backend,
+ MaxSize: int64(blksize),
+ Dir: c.MkDir(),
+ Logger: ctxlog.TestLogger(c),
+ }
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+
+ resp, err := cache.BlockWrite(ctx, BlockWriteOptions{
+ Data: make([]byte, blksize),
+ })
+ c.Check(err, check.IsNil)
+ os.RemoveAll(filepath.Join(cache.Dir, resp.Locator[:3]))
+
+ // Start a lot of concurrent requests for various ranges of
+ // the same block. Our backend will return the first 8MB and
+ // then pause. The requests that can be satisfied by the first
+ // 8MB of data should return quickly. The rest should wait,
+ // and return after we release pauseBlockReadUntil.
+ var wgEarly, wgLate sync.WaitGroup
+ var doneEarly, doneLate int32
+ for i := 0; i < 10000; i++ {
+ wgEarly.Add(1)
+ go func() {
+ offset := int(rand.Int63() % int64(blksize-benchReadSize))
+ if offset+benchReadSize > backend.pauseBlockReadAfter {
+ wgLate.Add(1)
+ defer wgLate.Done()
+ wgEarly.Done()
+ defer atomic.AddInt32(&doneLate, 1)
+ } else {
+ defer wgEarly.Done()
+ defer atomic.AddInt32(&doneEarly, 1)
+ }
+ buf := make([]byte, benchReadSize)
+ n, err := cache.ReadAt(resp.Locator, buf, offset)
+ c.Check(n, check.Equals, len(buf))
+ c.Check(err, check.IsNil)
+ }()
+ }
+
+ // Ensure all early ranges finish while backend request(s) are
+ // paused.
+ wgEarly.Wait()
+ c.Logf("doneEarly = %d", doneEarly)
+ c.Check(doneLate, check.Equals, int32(0))
+
+ // Unpause backend request(s).
+ close(backend.pauseBlockReadUntil)
+ wgLate.Wait()
+ c.Logf("doneLate = %d", doneLate)
+}
+
var _ = check.Suite(&keepCacheBenchSuite{})
type keepCacheBenchSuite struct {
commit 932ca9e698fb36bcdd0c558b50e6e965417409d0
Author: Tom Clegg <tom at curii.com>
Date: Wed Dec 20 10:22:31 2023 -0500
20318: Comment about error handling.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache.go b/sdk/go/arvados/keep_cache.go
index 567e0cbec3..2a33871ca4 100644
--- a/sdk/go/arvados/keep_cache.go
+++ b/sdk/go/arvados/keep_cache.go
@@ -458,6 +458,12 @@ func (cache *DiskCache) quickReadAt(cachefilename string, dst []byte, offset int
progress.cond.Wait()
}
progress.cond.L.Unlock()
+ // If size<needed && progress.err!=nil here, we'll end
+ // up reporting a less helpful "EOF reading from cache
+ // file" below, instead of the actual error fetching
+ // from upstream to cache file. This is OK though,
+ // because our caller (ReadAt) doesn't even report our
+ // error, it just retries.
}
n, err := heldopen.f.ReadAt(dst, int64(offset))
commit 4e69128e5e7aeb1a9c5e4462adb38ecc5f5bb8ea
Author: Tom Clegg <tom at curii.com>
Date: Tue Dec 19 16:48:58 2023 -0500
20318: Return requested range while fetching remainder of block.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache.go b/sdk/go/arvados/keep_cache.go
index b8b3d9f588..567e0cbec3 100644
--- a/sdk/go/arvados/keep_cache.go
+++ b/sdk/go/arvados/keep_cache.go
@@ -50,6 +50,21 @@ type DiskCache struct {
heldopen map[string]*openFileEnt
heldopenMax int
heldopenLock sync.Mutex
+
+ // The "writing" fields allow multiple concurrent/sequential
+ // ReadAt calls to be notified as a single
+ // read-block-from-backend-into-cache goroutine fills the
+ // cache file.
+ writing map[string]*writeprogress
+ writingCond *sync.Cond
+ writingLock sync.Mutex
+}
+
+type writeprogress struct {
+ cond *sync.Cond // broadcast whenever size or done changes
+ done bool // size and err have their final values
+ size int // bytes copied into cache file so far
+ err error // error encountered while copying from backend to cache
}
type openFileEnt struct {
@@ -202,59 +217,116 @@ func (cache *DiskCache) BlockWrite(ctx context.Context, opts BlockWriteOptions)
return resp, err
}
+type funcwriter func([]byte) (int, error)
+
+func (fw funcwriter) Write(p []byte) (int, error) {
+ return fw(p)
+}
+
// ReadAt reads the entire block from the wrapped KeepGateway into the
// cache if needed, and copies the requested portion into the provided
// slice.
+//
+// ReadAt returns as soon as the requested portion is available in the
+// cache. The remainder of the block may continue to be copied into
+// the cache in the background.
func (cache *DiskCache) ReadAt(locator string, dst []byte, offset int) (int, error) {
cache.gotidy()
cachefilename := cache.cacheFile(locator)
if n, err := cache.quickReadAt(cachefilename, dst, offset); err == nil {
return n, err
}
- f, err := cache.openFile(cachefilename, os.O_CREATE|os.O_RDWR)
+ readf, err := cache.openFile(cachefilename, os.O_CREATE|os.O_RDONLY)
if err != nil {
- return 0, fmt.Errorf("ReadAt: %s", cachefilename, err)
+ return 0, fmt.Errorf("ReadAt: %w", err)
}
- defer f.Close()
+ defer readf.Close()
- err = syscall.Flock(int(f.Fd()), syscall.LOCK_SH)
+ err = syscall.Flock(int(readf.Fd()), syscall.LOCK_SH)
if err != nil {
return 0, fmt.Errorf("flock(%s, lock_sh) failed: %w", cachefilename, err)
}
- size, err := f.Seek(0, io.SeekEnd)
- if err != nil {
- return 0, fmt.Errorf("seek(%s, seek_end) failed: %w", cachefilename, err)
- }
- if size < int64(len(dst)+offset) {
- // The cache file seems to be truncated or empty
- // (possibly because we just created it). Wait for an
- // exclusive lock, then check again (in case someone
- // else is doing the same thing) before trying to
- // retrieve the entire block.
- err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
- if err != nil {
- return 0, fmt.Errorf("flock(%s, lock_ex) failed: %w", cachefilename, err)
- }
- }
- size, err = f.Seek(0, io.SeekEnd)
- if err != nil {
- return 0, fmt.Errorf("seek(%s, seek_end) failed: %w", cachefilename, err)
- }
- if size < int64(len(dst)+offset) {
- // The cache file is truncated or empty, and we own it
- // now. Fill it.
- _, err = f.Seek(0, io.SeekStart)
- if err != nil {
- return 0, fmt.Errorf("seek(%s, seek_start) failed: %w", cachefilename, err)
+ cache.writingLock.Lock()
+ progress := cache.writing[cachefilename]
+ if progress != nil {
+ cache.writingLock.Unlock()
+ } else {
+ progress = &writeprogress{}
+ progress.cond = sync.NewCond(&sync.Mutex{})
+ if cache.writing == nil {
+ cache.writing = map[string]*writeprogress{}
}
- n, err := cache.KeepGateway.BlockRead(context.Background(), BlockReadOptions{Locator: locator, WriteTo: f})
- if err != nil {
- return 0, err
- }
- f.Truncate(int64(n))
+ cache.writing[cachefilename] = progress
+ cache.writingLock.Unlock()
+
+ // Start a goroutine to copy from backend to f. As
+ // data arrives, wake up any waiting loops (see below)
+ // so ReadAt() requests for partial data can return as
+ // soon as the relevant bytes have been copied.
+ go func() {
+ var size int
+ var writef *os.File
+ var err error
+ defer func() {
+ closeErr := writef.Close()
+ if err == nil {
+ err = closeErr
+ }
+ progress.cond.L.Lock()
+ progress.err = err
+ progress.done = true
+ progress.size = size
+ progress.cond.L.Unlock()
+ progress.cond.Broadcast()
+ cache.writingLock.Lock()
+ delete(cache.writing, cachefilename)
+ cache.writingLock.Unlock()
+ }()
+ writef, err = cache.openFile(cachefilename, os.O_WRONLY)
+ if err != nil {
+ err = fmt.Errorf("ReadAt: %w", err)
+ return
+ }
+ err = syscall.Flock(int(writef.Fd()), syscall.LOCK_SH)
+ if err != nil {
+ err = fmt.Errorf("flock(%s, lock_sh) failed: %w", cachefilename, err)
+ return
+ }
+ size, err = cache.KeepGateway.BlockRead(context.Background(), BlockReadOptions{
+ Locator: locator,
+ WriteTo: funcwriter(func(p []byte) (int, error) {
+ n, err := writef.Write(p)
+ if n > 0 {
+ progress.cond.L.Lock()
+ progress.size += n
+ progress.cond.L.Unlock()
+ progress.cond.Broadcast()
+ }
+ return n, err
+ })})
+ }()
+ }
+ progress.cond.L.Lock()
+ for !progress.done && progress.size < len(dst)+offset {
+ progress.cond.Wait()
+ }
+ ok := progress.size >= len(dst)+offset
+ err = progress.err
+ progress.cond.L.Unlock()
+
+ if !ok && err != nil {
+ // If the copy-from-backend goroutine encountered an
+ // error before copying enough bytes to satisfy our
+ // request, we return that error.
+ return 0, err
+ } else {
+ // Regardless of whether the copy-from-backend
+ // goroutine succeeded, or failed after copying the
+ // bytes we need, the only errors we need to report
+ // are errors reading from the cache file.
+ return readf.ReadAt(dst, int64(offset))
}
- return f.ReadAt(dst, int64(offset))
}
var quickReadAtLostRace = errors.New("quickReadAt: lost race")
@@ -374,6 +446,20 @@ func (cache *DiskCache) quickReadAt(cachefilename string, dst []byte, offset int
// Other goroutine closed the file before we got RLock
return 0, quickReadAtLostRace
}
+
+ // If another goroutine is currently writing the file, wait
+ // for it to catch up to the end of the range we need.
+ cache.writingLock.Lock()
+ progress := cache.writing[cachefilename]
+ cache.writingLock.Unlock()
+ if progress != nil {
+ progress.cond.L.Lock()
+ for !progress.done && progress.size < len(dst)+offset {
+ progress.cond.Wait()
+ }
+ progress.cond.L.Unlock()
+ }
+
n, err := heldopen.f.ReadAt(dst, int64(offset))
if err != nil {
// wait for any concurrent users to finish, then
commit 2553652e43229a872b93a5d011c25a2727d1d18f
Author: Tom Clegg <tom at curii.com>
Date: Tue Dec 19 14:07:28 2023 -0500
20318: Fix stuttering error message.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache.go b/sdk/go/arvados/keep_cache.go
index 189332c866..b8b3d9f588 100644
--- a/sdk/go/arvados/keep_cache.go
+++ b/sdk/go/arvados/keep_cache.go
@@ -213,7 +213,7 @@ func (cache *DiskCache) ReadAt(locator string, dst []byte, offset int) (int, err
}
f, err := cache.openFile(cachefilename, os.O_CREATE|os.O_RDWR)
if err != nil {
- return 0, fmt.Errorf("ReadAt: open(%s) failed: %s", cachefilename, err)
+ return 0, fmt.Errorf("ReadAt: %s", cachefilename, err)
}
defer f.Close()
@@ -391,7 +391,7 @@ func (cache *DiskCache) BlockRead(ctx context.Context, opts BlockReadOptions) (i
cachefilename := cache.cacheFile(opts.Locator)
f, err := cache.openFile(cachefilename, os.O_CREATE|os.O_RDWR)
if err != nil {
- cache.debugf("BlockRead: open(%s) failed: %s", cachefilename, err)
+ cache.debugf("BlockRead: %s", cachefilename, err)
return cache.KeepGateway.BlockRead(ctx, opts)
}
defer f.Close()
commit 82054f095dad0fb9d21842eac1cd9ade50ffc940
Author: Tom Clegg <tom at curii.com>
Date: Tue Dec 19 14:07:23 2023 -0500
20318: Remove memory-backed keep block cache.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/lib/crunchrun/crunchrun.go b/lib/crunchrun/crunchrun.go
index 0e0d3c43e4..bde13424dd 100644
--- a/lib/crunchrun/crunchrun.go
+++ b/lib/crunchrun/crunchrun.go
@@ -78,7 +78,6 @@ type IKeepClient interface {
ReadAt(locator string, p []byte, off int) (int, error)
ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error)
LocalLocator(locator string) (string, error)
- ClearBlockCache()
SetStorageClasses(sc []string)
}
@@ -2033,7 +2032,6 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
log.Printf("%s: %v", containerUUID, err)
return 1
}
- kc.BlockCache = &keepclient.BlockCache{MaxBlocks: 2}
kc.Retries = 4
cr, err := NewContainerRunner(arvados.NewClientFromEnv(), api, kc, containerUUID)
diff --git a/lib/crunchrun/crunchrun_test.go b/lib/crunchrun/crunchrun_test.go
index c533821351..276dd36661 100644
--- a/lib/crunchrun/crunchrun_test.go
+++ b/lib/crunchrun/crunchrun_test.go
@@ -368,9 +368,6 @@ func (client *KeepTestClient) ReadAt(string, []byte, int) (int, error) {
return 0, errors.New("not implemented")
}
-func (client *KeepTestClient) ClearBlockCache() {
-}
-
func (client *KeepTestClient) Close() {
client.Content = nil
}
diff --git a/lib/mount/command.go b/lib/mount/command.go
index f88d977c4c..666f2cf4ac 100644
--- a/lib/mount/command.go
+++ b/lib/mount/command.go
@@ -43,7 +43,6 @@ func (c *mountCommand) RunCommand(prog string, args []string, stdin io.Reader, s
flags := flag.NewFlagSet(prog, flag.ContinueOnError)
ro := flags.Bool("ro", false, "read-only")
experimental := flags.Bool("experimental", false, "acknowledge this is an experimental command, and should not be used in production (required)")
- blockCache := flags.Int("block-cache", 4, "read cache size (number of 64MiB blocks)")
pprof := flags.String("pprof", "", "serve Go profile data at `[addr]:port`")
if ok, code := cmd.ParseFlags(flags, prog, args, "[FUSE mount options]", stderr); !ok {
return code
@@ -69,7 +68,6 @@ func (c *mountCommand) RunCommand(prog string, args []string, stdin io.Reader, s
logger.Print(err)
return 1
}
- kc.BlockCache = &keepclient.BlockCache{MaxBlocks: *blockCache}
host := fuse.NewFileSystemHost(&keepFS{
Client: client,
KeepClient: kc,
diff --git a/sdk/go/keepclient/block_cache.go b/sdk/go/keepclient/block_cache.go
deleted file mode 100644
index 37eee4de20..0000000000
--- a/sdk/go/keepclient/block_cache.go
+++ /dev/null
@@ -1,134 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: Apache-2.0
-
-package keepclient
-
-import (
- "bytes"
- "context"
- "io"
- "sort"
- "strconv"
- "strings"
- "sync"
- "time"
-
- "git.arvados.org/arvados.git/sdk/go/arvados"
-)
-
-var DefaultBlockCache = &BlockCache{}
-
-type BlockCache struct {
- // Maximum number of blocks to keep in the cache. If 0, a
- // default size (currently 4) is used instead.
- MaxBlocks int
-
- cache map[string]*cacheBlock
- mtx sync.Mutex
-}
-
-const defaultMaxBlocks = 4
-
-// Sweep deletes the least recently used blocks from the cache until
-// there are no more than MaxBlocks left.
-func (c *BlockCache) Sweep() {
- max := c.MaxBlocks
- if max == 0 {
- max = defaultMaxBlocks
- }
- c.mtx.Lock()
- defer c.mtx.Unlock()
- if len(c.cache) <= max {
- return
- }
- lru := make([]time.Time, 0, len(c.cache))
- for _, b := range c.cache {
- lru = append(lru, b.lastUse)
- }
- sort.Sort(sort.Reverse(timeSlice(lru)))
- threshold := lru[max]
- for loc, b := range c.cache {
- if !b.lastUse.After(threshold) {
- delete(c.cache, loc)
- }
- }
-}
-
-// ReadAt returns data from the cache, first retrieving it from Keep if
-// necessary.
-func (c *BlockCache) ReadAt(upstream arvados.KeepGateway, locator string, p []byte, off int) (int, error) {
- buf, err := c.get(upstream, locator)
- if err != nil {
- return 0, err
- }
- if off > len(buf) {
- return 0, io.ErrUnexpectedEOF
- }
- return copy(p, buf[off:]), nil
-}
-
-// Get a block from the cache, first retrieving it from Keep if
-// necessary.
-func (c *BlockCache) get(upstream arvados.KeepGateway, locator string) ([]byte, error) {
- cacheKey := locator[:32]
- bufsize := BLOCKSIZE
- if parts := strings.SplitN(locator, "+", 3); len(parts) >= 2 {
- datasize, err := strconv.ParseInt(parts[1], 10, 32)
- if err == nil && datasize >= 0 {
- bufsize = int(datasize)
- }
- }
- c.mtx.Lock()
- if c.cache == nil {
- c.cache = make(map[string]*cacheBlock)
- }
- b, ok := c.cache[cacheKey]
- if !ok || b.err != nil {
- b = &cacheBlock{
- fetched: make(chan struct{}),
- lastUse: time.Now(),
- }
- c.cache[cacheKey] = b
- go func() {
- buf := bytes.NewBuffer(make([]byte, 0, bufsize))
- _, err := upstream.BlockRead(context.Background(), arvados.BlockReadOptions{Locator: locator, WriteTo: buf})
- c.mtx.Lock()
- b.data, b.err = buf.Bytes(), err
- c.mtx.Unlock()
- close(b.fetched)
- go c.Sweep()
- }()
- }
- c.mtx.Unlock()
-
- // Wait (with mtx unlocked) for the fetch goroutine to finish,
- // in case it hasn't already.
- <-b.fetched
-
- c.mtx.Lock()
- b.lastUse = time.Now()
- c.mtx.Unlock()
- return b.data, b.err
-}
-
-func (c *BlockCache) Clear() {
- c.mtx.Lock()
- c.cache = nil
- c.mtx.Unlock()
-}
-
-type timeSlice []time.Time
-
-func (ts timeSlice) Len() int { return len(ts) }
-
-func (ts timeSlice) Less(i, j int) bool { return ts[i].Before(ts[j]) }
-
-func (ts timeSlice) Swap(i, j int) { ts[i], ts[j] = ts[j], ts[i] }
-
-type cacheBlock struct {
- data []byte
- err error
- fetched chan struct{}
- lastUse time.Time
-}
diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
index 65dcd9ac8a..c1bad8557d 100644
--- a/sdk/go/keepclient/collectionreader_test.go
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -237,14 +237,8 @@ func (s *CollectionReaderUnit) TestCollectionReaderManyBlocks(c *check.C) {
}
func (s *CollectionReaderUnit) TestCollectionReaderCloseEarly(c *check.C) {
- // Disable disk cache
- defer func(home string) {
- os.Setenv("HOME", home)
- }(os.Getenv("HOME"))
- os.Setenv("HOME", "")
-
- // Disable memory cache
- s.kc.BlockCache = &BlockCache{}
+ // Disable cache
+ s.kc.gatewayStack = &keepViaHTTP{s.kc}
s.kc.PutB([]byte("foo"))
s.kc.PutB([]byte("bar"))
diff --git a/sdk/go/keepclient/gateway_shim.go b/sdk/go/keepclient/gateway_shim.go
index 35c191afe6..eeb187e107 100644
--- a/sdk/go/keepclient/gateway_shim.go
+++ b/sdk/go/keepclient/gateway_shim.go
@@ -67,28 +67,3 @@ func (kvh *keepViaHTTP) LocalLocator(locator string) (string, error) {
}
return loc, nil
}
-
-// keepViaBlockCache implements arvados.KeepGateway by using the given
-// KeepClient's BlockCache with the wrapped KeepGateway.
-//
-// Note the whole KeepClient gets passed in instead of just its
-// cache. This ensures the new BlockCache gets used if it changes
-// after keepViaBlockCache is initialized.
-type keepViaBlockCache struct {
- kc *KeepClient
- arvados.KeepGateway
-}
-
-func (kvbc *keepViaBlockCache) ReadAt(locator string, dst []byte, offset int) (int, error) {
- return kvbc.kc.cache().ReadAt(kvbc.KeepGateway, locator, dst, offset)
-}
-
-func (kvbc *keepViaBlockCache) BlockRead(ctx context.Context, opts arvados.BlockReadOptions) (int, error) {
- rdr, _, _, _, err := kvbc.kc.getOrHead("GET", opts.Locator, nil)
- if err != nil {
- return 0, err
- }
- defer rdr.Close()
- n, err := io.Copy(opts.WriteTo, rdr)
- return int(n), err
-}
diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go
index 2712e9f3a5..4e935812be 100644
--- a/sdk/go/keepclient/keepclient.go
+++ b/sdk/go/keepclient/keepclient.go
@@ -110,7 +110,6 @@ type KeepClient struct {
lock sync.RWMutex
HTTPClient HTTPClient
Retries int
- BlockCache *BlockCache
RequestID string
StorageClasses []string
DefaultStorageClasses []string // Set by cluster's exported config
@@ -542,17 +541,6 @@ func (kc *KeepClient) getSortedRoots(locator string) []string {
return found
}
-func (kc *KeepClient) cache() *BlockCache {
- if kc.BlockCache != nil {
- return kc.BlockCache
- }
- return DefaultBlockCache
-}
-
-func (kc *KeepClient) ClearBlockCache() {
- kc.cache().Clear()
-}
-
func (kc *KeepClient) SetStorageClasses(sc []string) {
// make a copy so the caller can't mess with it.
kc.StorageClasses = append([]string{}, sc...)
commit 18541c985f7f19d9c200a592287333fb3fdab38b
Author: Tom Clegg <tom at curii.com>
Date: Tue Dec 19 10:57:27 2023 -0500
20318: Route KeepClient block writes through disk cache.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/keepclient/gateway_shim.go b/sdk/go/keepclient/gateway_shim.go
index 262d612640..35c191afe6 100644
--- a/sdk/go/keepclient/gateway_shim.go
+++ b/sdk/go/keepclient/gateway_shim.go
@@ -6,7 +6,11 @@ package keepclient
import (
"context"
+ "fmt"
"io"
+ "net/http"
+ "strings"
+ "time"
"git.arvados.org/arvados.git/sdk/go/arvados"
)
@@ -41,6 +45,29 @@ func (kvh *keepViaHTTP) BlockRead(ctx context.Context, opts arvados.BlockReadOpt
return int(n), err
}
+func (kvh *keepViaHTTP) BlockWrite(ctx context.Context, req arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
+ return kvh.httpBlockWrite(ctx, req)
+}
+
+func (kvh *keepViaHTTP) LocalLocator(locator string) (string, error) {
+ if !strings.Contains(locator, "+R") {
+ // Either it has +A, or it's unsigned and we assume
+ // it's a local locator on a site with signatures
+ // disabled.
+ return locator, nil
+ }
+ sighdr := fmt.Sprintf("local, time=%s", time.Now().UTC().Format(time.RFC3339))
+ _, _, url, hdr, err := kvh.KeepClient.getOrHead("HEAD", locator, http.Header{"X-Keep-Signature": []string{sighdr}})
+ if err != nil {
+ return "", err
+ }
+ loc := hdr.Get("X-Keep-Locator")
+ if loc == "" {
+ return "", fmt.Errorf("missing X-Keep-Locator header in HEAD response from %s", url)
+ }
+ return loc, nil
+}
+
// keepViaBlockCache implements arvados.KeepGateway by using the given
// KeepClient's BlockCache with the wrapped KeepGateway.
//
diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go
index cae6ca1545..2712e9f3a5 100644
--- a/sdk/go/keepclient/keepclient.go
+++ b/sdk/go/keepclient/keepclient.go
@@ -376,22 +376,7 @@ func (kc *KeepClient) upstreamGateway() arvados.KeepGateway {
// with a valid signature from the local cluster. If the given locator
// already has a local signature, it is returned unchanged.
func (kc *KeepClient) LocalLocator(locator string) (string, error) {
- if !strings.Contains(locator, "+R") {
- // Either it has +A, or it's unsigned and we assume
- // it's a local locator on a site with signatures
- // disabled.
- return locator, nil
- }
- sighdr := fmt.Sprintf("local, time=%s", time.Now().UTC().Format(time.RFC3339))
- _, _, url, hdr, err := kc.getOrHead("HEAD", locator, http.Header{"X-Keep-Signature": []string{sighdr}})
- if err != nil {
- return "", err
- }
- loc := hdr.Get("X-Keep-Locator")
- if loc == "" {
- return "", fmt.Errorf("missing X-Keep-Locator header in HEAD response from %s", url)
- }
- return loc, nil
+ return kc.upstreamGateway().LocalLocator(locator)
}
// Get retrieves a block, given a locator. Returns a reader, the
@@ -412,6 +397,12 @@ func (kc *KeepClient) ReadAt(locator string, p []byte, off int) (int, error) {
return kc.upstreamGateway().ReadAt(locator, p, off)
}
+// BlockWrite writes a full block to upstream servers and saves a copy
+// in the local cache.
+func (kc *KeepClient) BlockWrite(ctx context.Context, req arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
+ return kc.upstreamGateway().BlockWrite(ctx, req)
+}
+
// Ask verifies that a block with the given hash is available and
// readable, according to at least one Keep service. Unlike Get, it
// does not retrieve the data or verify that the data content matches
diff --git a/sdk/go/keepclient/support.go b/sdk/go/keepclient/support.go
index 8d299815b2..c4144bf871 100644
--- a/sdk/go/keepclient/support.go
+++ b/sdk/go/keepclient/support.go
@@ -127,7 +127,7 @@ func (kc *KeepClient) uploadToKeepServer(host string, hash string, classesTodo [
}
}
-func (kc *KeepClient) BlockWrite(ctx context.Context, req arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
+func (kc *KeepClient) httpBlockWrite(ctx context.Context, req arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
var resp arvados.BlockWriteResponse
var getReader func() io.Reader
if req.Data == nil && req.Reader == nil {
commit f68557c627b5a8472d33d973a2737448904c29bd
Author: Tom Clegg <tom at curii.com>
Date: Tue Dec 19 10:47:05 2023 -0500
20318: Don't use memory-backed block cache for short reads.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go
index 18614a77eb..cae6ca1545 100644
--- a/sdk/go/keepclient/keepclient.go
+++ b/sdk/go/keepclient/keepclient.go
@@ -42,6 +42,9 @@ var (
DefaultProxyConnectTimeout = 30 * time.Second
DefaultProxyTLSHandshakeTimeout = 10 * time.Second
DefaultProxyKeepAlive = 120 * time.Second
+
+ rootCacheDir = "/var/cache/arvados/keep"
+ userCacheDir = ".cache/arvados/keep" // relative to HOME
)
// Error interface with an error and boolean indicating whether the error is temporary
@@ -336,43 +339,37 @@ func (kc *KeepClient) getOrHead(method string, locator string, header http.Heade
return nil, 0, "", nil, err
}
+// attempt to create dir/subdir/ and its parents, up to but not
+// including dir itself, using mode 0700.
+func makedirs(dir, subdir string) {
+ for _, part := range strings.Split(subdir, string(os.PathSeparator)) {
+ dir = filepath.Join(dir, part)
+ os.Mkdir(dir, 0700)
+ }
+}
+
// upstreamGateway creates/returns the KeepGateway stack used to read
-// and write data: a memory cache, a disk-backed cache if available,
-// and an http backend.
+// and write data: a disk-backed cache on top of an http backend.
func (kc *KeepClient) upstreamGateway() arvados.KeepGateway {
kc.lock.Lock()
defer kc.lock.Unlock()
if kc.gatewayStack != nil {
return kc.gatewayStack
}
- var stack arvados.KeepGateway = &keepViaHTTP{kc}
-
- // Wrap with a disk cache, if cache dir is writable
- home := os.Getenv("HOME")
- if fi, err := os.Stat(home); home != "" && err == nil && fi.IsDir() {
- var err error
- dir := home
- for _, part := range []string{".cache", "arvados", "keep"} {
- dir = filepath.Join(dir, part)
- err = os.Mkdir(dir, 0700)
- }
- if err == nil || os.IsExist(err) {
- os.Mkdir(filepath.Join(dir, "tmp"), 0700)
- err = os.WriteFile(filepath.Join(dir, "tmp", "check.tmp"), nil, 0600)
- if err == nil {
- stack = &arvados.DiskCache{
- Dir: dir,
- KeepGateway: stack,
- }
- }
- }
+ var cachedir string
+ if os.Geteuid() == 0 {
+ cachedir = rootCacheDir
+ makedirs("/", cachedir)
+ } else {
+ home := "/" + os.Getenv("HOME")
+ makedirs(home, userCacheDir)
+ cachedir = filepath.Join(home, userCacheDir)
}
- stack = &keepViaBlockCache{
- kc: kc,
- KeepGateway: stack,
+ kc.gatewayStack = &arvados.DiskCache{
+ Dir: cachedir,
+ KeepGateway: &keepViaHTTP{kc},
}
- kc.gatewayStack = stack
- return stack
+ return kc.gatewayStack
}
// LocalLocator returns a locator equivalent to the one supplied, but
commit 4ef866c34bba8bd73c0b1de48fb8c62d4f7d0661
Author: Tom Clegg <tom at curii.com>
Date: Tue Dec 19 10:07:36 2023 -0500
20318: Fail instead of reading through if cache dir is unusable.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache.go b/sdk/go/arvados/keep_cache.go
index 6aed35a215..189332c866 100644
--- a/sdk/go/arvados/keep_cache.go
+++ b/sdk/go/arvados/keep_cache.go
@@ -213,8 +213,7 @@ func (cache *DiskCache) ReadAt(locator string, dst []byte, offset int) (int, err
}
f, err := cache.openFile(cachefilename, os.O_CREATE|os.O_RDWR)
if err != nil {
- cache.debugf("ReadAt: open(%s) failed: %s", cachefilename, err)
- return cache.KeepGateway.ReadAt(locator, dst, offset)
+ return 0, fmt.Errorf("ReadAt: open(%s) failed: %s", cachefilename, err)
}
defer f.Close()
commit 85a85da473af4b66dbc92a5f8882eea8c7ce8ffd
Author: Tom Clegg <tom at curii.com>
Date: Mon Dec 18 20:59:31 2023 -0500
20318: Close held-open filehandles when deleting cache files.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache.go b/sdk/go/arvados/keep_cache.go
index eb9d4607bf..6aed35a215 100644
--- a/sdk/go/arvados/keep_cache.go
+++ b/sdk/go/arvados/keep_cache.go
@@ -260,12 +260,33 @@ func (cache *DiskCache) ReadAt(locator string, dst []byte, offset int) (int, err
var quickReadAtLostRace = errors.New("quickReadAt: lost race")
-func (cache *DiskCache) deleteHeldopen(cachefilename string, heldopen *openFileEnt) {
+// Remove the cache entry for the indicated cachefilename if it
+// matches expect (quickReadAt() usage), or if expect is nil (tidy()
+// usage).
+//
+// If expect is non-nil, close expect's filehandle.
+//
+// If expect is nil and a different cache entry is deleted, close its
+// filehandle.
+func (cache *DiskCache) deleteHeldopen(cachefilename string, expect *openFileEnt) {
+ needclose := expect
+
cache.heldopenLock.Lock()
- if cache.heldopen[cachefilename] == heldopen {
+ found := cache.heldopen[cachefilename]
+ if found != nil && (expect == nil || expect == found) {
delete(cache.heldopen, cachefilename)
+ needclose = found
}
cache.heldopenLock.Unlock()
+
+ if needclose != nil {
+ needclose.Lock()
+ defer needclose.Unlock()
+ if needclose.f != nil {
+ needclose.f.Close()
+ needclose.f = nil
+ }
+ }
}
// quickReadAt attempts to use a cached-filehandle approach to read
@@ -522,6 +543,7 @@ func (cache *DiskCache) tidy() {
deleted := 0
for _, ent := range ents {
os.Remove(ent.path)
+ go cache.deleteHeldopen(ent.path, nil)
deleted++
totalsize -= ent.size
if totalsize <= maxsize {
commit f9a0922e50904365e40b99372ed66f3a6f992cd7
Author: Tom Clegg <tom at curii.com>
Date: Mon Dec 18 20:19:35 2023 -0500
20318: Test mangling cache files while reading.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache_test.go b/sdk/go/arvados/keep_cache_test.go
index 23e7dfbd9f..6d11410470 100644
--- a/sdk/go/arvados/keep_cache_test.go
+++ b/sdk/go/arvados/keep_cache_test.go
@@ -11,6 +11,7 @@ import (
"errors"
"fmt"
"io"
+ "math/rand"
"os"
"sync"
"time"
@@ -131,7 +132,13 @@ func (s *keepCacheSuite) TestMaxSize(c *check.C) {
c.Check(err, check.IsNil)
}
-func (s *keepCacheSuite) TestConcurrentReaders(c *check.C) {
+func (s *keepCacheSuite) TestConcurrentReadersNoRefresh(c *check.C) {
+ s.testConcurrentReaders(c, true, false)
+}
+func (s *keepCacheSuite) TestConcurrentReadersMangleCache(c *check.C) {
+ s.testConcurrentReaders(c, false, true)
+}
+func (s *keepCacheSuite) testConcurrentReaders(c *check.C, cannotRefresh, mangleCache bool) {
blksize := 64000000
backend := &keepGatewayMemoryBacked{}
cache := DiskCache{
@@ -140,20 +147,61 @@ func (s *keepCacheSuite) TestConcurrentReaders(c *check.C) {
Dir: c.MkDir(),
Logger: ctxlog.TestLogger(c),
}
- ctx := context.Background()
+ ctx, cancel := context.WithCancel(context.Background())
+ defer cancel()
+
resp, err := cache.BlockWrite(ctx, BlockWriteOptions{
Data: make([]byte, blksize),
})
c.Check(err, check.IsNil)
- delete(backend.data, resp.Locator)
+ if cannotRefresh {
+ // Delete the block from the backing store, to ensure
+ // the cache doesn't rely on re-reading a block that
+ // it has just written.
+ delete(backend.data, resp.Locator)
+ }
+ if mangleCache {
+ // Replace cache files with truncated files (and
+ // delete them outright) while the ReadAt loop is
+ // running, to ensure the cache can re-fetch from the
+ // backend as needed.
+ var nRemove, nTrunc int
+ defer func() {
+ c.Logf("nRemove %d", nRemove)
+ c.Logf("nTrunc %d", nTrunc)
+ }()
+ go func() {
+ // Truncate/delete the cache file at various
+ // intervals. Readers should re-fetch/recover from
+ // this.
+ fnm := cache.cacheFile(resp.Locator)
+ for ctx.Err() == nil {
+ trunclen := rand.Int63() % int64(blksize*2)
+ if trunclen > int64(blksize) {
+ err := os.Remove(fnm)
+ if err == nil {
+ nRemove++
+ }
+ } else if os.WriteFile(fnm+"#", make([]byte, trunclen), 0700) == nil {
+ err := os.Rename(fnm+"#", fnm)
+ if err == nil {
+ nTrunc++
+ }
+ }
+ }
+ }()
+ }
failed := false
var wg sync.WaitGroup
- for offset := 0; offset < blksize; offset += 123456 {
- offset := offset
+ var slots = make(chan bool, 100) // limit concurrency / memory usage
+ for i := 0; i < 20000; i++ {
+ offset := (i * 123456) % blksize
+ slots <- true
wg.Add(1)
go func() {
defer wg.Done()
+ defer func() { <-slots }()
buf := make([]byte, 654321)
if offset+len(buf) > blksize {
buf = buf[:blksize-offset]
commit fcfb6de8652973045d7c188d11817ef2471e7335
Author: Tom Clegg <tom at curii.com>
Date: Mon Dec 18 18:45:51 2023 -0500
20318: Add concurrent and sequential read benchmarks.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache_test.go b/sdk/go/arvados/keep_cache_test.go
index 128e47faa2..23e7dfbd9f 100644
--- a/sdk/go/arvados/keep_cache_test.go
+++ b/sdk/go/arvados/keep_cache_test.go
@@ -172,8 +172,73 @@ func (s *keepCacheSuite) TestConcurrentReaders(c *check.C) {
wg.Wait()
}
+var _ = check.Suite(&keepCacheBenchSuite{})
+
+type keepCacheBenchSuite struct {
+ blksize int
+ blkcount int
+ backend *keepGatewayMemoryBacked
+ cache *DiskCache
+ locators []string
+}
+
+func (s *keepCacheBenchSuite) SetUpTest(c *check.C) {
+ s.blksize = 64000000
+ s.blkcount = 8
+ s.backend = &keepGatewayMemoryBacked{}
+ s.cache = &DiskCache{
+ KeepGateway: s.backend,
+ MaxSize: int64(s.blksize),
+ Dir: c.MkDir(),
+ Logger: ctxlog.TestLogger(c),
+ }
+ s.locators = make([]string, s.blkcount)
+ data := make([]byte, s.blksize)
+ for b := 0; b < s.blkcount; b++ {
+ for i := range data {
+ data[i] = byte(b)
+ }
+ resp, err := s.cache.BlockWrite(context.Background(), BlockWriteOptions{
+ Data: data,
+ })
+ c.Assert(err, check.IsNil)
+ s.locators[b] = resp.Locator
+ }
+}
+
+func (s *keepCacheBenchSuite) BenchmarkConcurrentReads(c *check.C) {
+ var wg sync.WaitGroup
+ for i := 0; i < c.N; i++ {
+ i := i
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ buf := make([]byte, benchReadSize)
+ _, err := s.cache.ReadAt(s.locators[i%s.blkcount], buf, int((int64(i)*1234)%int64(s.blksize-benchReadSize)))
+ if err != nil {
+ c.Fail()
+ }
+ }()
+ }
+ wg.Wait()
+}
+
+func (s *keepCacheBenchSuite) BenchmarkSequentialReads(c *check.C) {
+ buf := make([]byte, benchReadSize)
+ for i := 0; i < c.N; i++ {
+ _, err := s.cache.ReadAt(s.locators[i%s.blkcount], buf, int((int64(i)*1234)%int64(s.blksize-benchReadSize)))
+ if err != nil {
+ c.Fail()
+ }
+ }
+}
+
const benchReadSize = 1000
+var _ = check.Suite(&fileOpsSuite{})
+
+type fileOpsSuite struct{}
+
// BenchmarkOpenClose and BenchmarkKeepOpen can be used to measure the
// potential performance improvement of caching filehandles rather
// than opening/closing the cache file for each read.
@@ -182,7 +247,7 @@ const benchReadSize = 1000
// improvement: ~636 MB/s when opening/closing the file for each
// 1000-byte read vs. ~2 GB/s when opening the file once and doing
// concurrent reads using the same file descriptor.
-func (s *keepCacheSuite) BenchmarkOpenClose(c *check.C) {
+func (s *fileOpsSuite) BenchmarkOpenClose(c *check.C) {
fnm := c.MkDir() + "/testfile"
os.WriteFile(fnm, make([]byte, 64000000), 0700)
var wg sync.WaitGroup
@@ -206,7 +271,7 @@ func (s *keepCacheSuite) BenchmarkOpenClose(c *check.C) {
wg.Wait()
}
-func (s *keepCacheSuite) BenchmarkKeepOpen(c *check.C) {
+func (s *fileOpsSuite) BenchmarkKeepOpen(c *check.C) {
fnm := c.MkDir() + "/testfile"
os.WriteFile(fnm, make([]byte, 64000000), 0700)
f, err := os.OpenFile(fnm, os.O_CREATE|os.O_RDWR, 0700)
commit 6a54de705f1e129566ee7f5101fe5cbe3dbba548
Author: Tom Clegg <tom at curii.com>
Date: Thu Dec 14 20:17:04 2023 -0500
20318: Try to keep cache files open for subsequent/concurrent reads.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache.go b/sdk/go/arvados/keep_cache.go
index 4dabbd3506..eb9d4607bf 100644
--- a/sdk/go/arvados/keep_cache.go
+++ b/sdk/go/arvados/keep_cache.go
@@ -17,6 +17,7 @@ import (
"sort"
"strconv"
"strings"
+ "sync"
"sync/atomic"
"syscall"
"time"
@@ -42,6 +43,19 @@ type DiskCache struct {
tidying int32 // see tidy()
tidyHoldUntil time.Time
defaultMaxSize int64
+
+ // The "heldopen" fields are used to open cache files for
+ // reading, and leave them open for future/concurrent ReadAt
+ // operations. See quickReadAt.
+ heldopen map[string]*openFileEnt
+ heldopenMax int
+ heldopenLock sync.Mutex
+}
+
+type openFileEnt struct {
+ sync.RWMutex
+ f *os.File
+ err error // if err is non-nil, f should not be used.
}
const (
@@ -194,6 +208,9 @@ func (cache *DiskCache) BlockWrite(ctx context.Context, opts BlockWriteOptions)
func (cache *DiskCache) ReadAt(locator string, dst []byte, offset int) (int, error) {
cache.gotidy()
cachefilename := cache.cacheFile(locator)
+ if n, err := cache.quickReadAt(cachefilename, dst, offset); err == nil {
+ return n, err
+ }
f, err := cache.openFile(cachefilename, os.O_CREATE|os.O_RDWR)
if err != nil {
cache.debugf("ReadAt: open(%s) failed: %s", cachefilename, err)
@@ -241,8 +258,114 @@ func (cache *DiskCache) ReadAt(locator string, dst []byte, offset int) (int, err
return f.ReadAt(dst, int64(offset))
}
-// ReadAt reads the entire block from the wrapped KeepGateway into the
-// cache if needed, and writes it to the provided writer.
+var quickReadAtLostRace = errors.New("quickReadAt: lost race")
+
+func (cache *DiskCache) deleteHeldopen(cachefilename string, heldopen *openFileEnt) {
+ cache.heldopenLock.Lock()
+ if cache.heldopen[cachefilename] == heldopen {
+ delete(cache.heldopen, cachefilename)
+ }
+ cache.heldopenLock.Unlock()
+}
+
+// quickReadAt attempts to use a cached-filehandle approach to read
+// from the indicated file. The expectation is that the caller
+// (ReadAt) will try a more robust approach when this fails, so
+// quickReadAt doesn't try especially hard to ensure success in
+// races. In particular, when there are concurrent calls, and one
+// fails, that can cause others to fail too.
+func (cache *DiskCache) quickReadAt(cachefilename string, dst []byte, offset int) (int, error) {
+ isnew := false
+ cache.heldopenLock.Lock()
+ if cache.heldopenMax == 0 {
+ // Choose a reasonable limit on open cache files based
+ // on RLIMIT_NOFILE. Note Go automatically raises
+ // softlimit to hardlimit, so it's typically 1048576,
+ // not 1024.
+ lim := syscall.Rlimit{}
+ err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &lim)
+ if err != nil {
+ cache.heldopenMax = 256
+ } else if lim.Cur > 40000 {
+ cache.heldopenMax = 10000
+ } else {
+ cache.heldopenMax = int(lim.Cur / 4)
+ }
+ }
+ heldopen := cache.heldopen[cachefilename]
+ if heldopen == nil {
+ isnew = true
+ heldopen = &openFileEnt{}
+ if cache.heldopen == nil {
+ cache.heldopen = make(map[string]*openFileEnt, cache.heldopenMax)
+ } else if len(cache.heldopen) > cache.heldopenMax {
+ // Rather than go to the trouble of tracking
+ // last access time, just close all files, and
+ // open again as needed. Even in the worst
+ // pathological case, this causes one extra
+ // open+close per read, which is not
+ // especially bad (see benchmarks).
+ go func(m map[string]*openFileEnt) {
+ for _, heldopen := range m {
+ heldopen.Lock()
+ defer heldopen.Unlock()
+ if heldopen.f != nil {
+ heldopen.f.Close()
+ heldopen.f = nil
+ }
+ }
+ }(cache.heldopen)
+ cache.heldopen = nil
+ }
+ cache.heldopen[cachefilename] = heldopen
+ heldopen.Lock()
+ }
+ cache.heldopenLock.Unlock()
+
+ if isnew {
+ // Open and flock the file, then call wg.Done() to
+ // unblock any other goroutines that are waiting in
+ // the !isnew case above.
+ f, err := os.Open(cachefilename)
+ if err == nil {
+ err = syscall.Flock(int(f.Fd()), syscall.LOCK_SH)
+ if err == nil {
+ heldopen.f = f
+ } else {
+ f.Close()
+ }
+ }
+ if err != nil {
+ heldopen.err = err
+ go cache.deleteHeldopen(cachefilename, heldopen)
+ }
+ heldopen.Unlock()
+ }
+ // Acquire read lock to ensure (1) initialization is complete,
+ // if it's done by a different goroutine, and (2) any "delete
+ // old/unused entries" waits for our read to finish before
+ // closing the file.
+ heldopen.RLock()
+ defer heldopen.RUnlock()
+ if heldopen.err != nil {
+ // Other goroutine encountered an error during setup
+ return 0, heldopen.err
+ } else if heldopen.f == nil {
+ // Other goroutine closed the file before we got RLock
+ return 0, quickReadAtLostRace
+ }
+ n, err := heldopen.f.ReadAt(dst, int64(offset))
+ if err != nil {
+ // wait for any concurrent users to finish, then
+ // delete this cache entry in case reopening the
+ // backing file helps.
+ go cache.deleteHeldopen(cachefilename, heldopen)
+ }
+ return n, err
+}
+
+// BlockRead reads the entire block from the wrapped KeepGateway into
+// the cache if needed, and writes it to the provided writer.
func (cache *DiskCache) BlockRead(ctx context.Context, opts BlockReadOptions) (int, error) {
cache.gotidy()
cachefilename := cache.cacheFile(opts.Locator)
commit cdb63c3e5b6f11bfcb8244614d8a6fd309fbafce
Author: Tom Clegg <tom at curii.com>
Date: Thu Dec 14 20:12:41 2023 -0500
20318: Use const for cache-tidy hold timer.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache.go b/sdk/go/arvados/keep_cache.go
index b9c7fea4b0..4dabbd3506 100644
--- a/sdk/go/arvados/keep_cache.go
+++ b/sdk/go/arvados/keep_cache.go
@@ -44,9 +44,10 @@ type DiskCache struct {
defaultMaxSize int64
}
-var (
- cacheFileSuffix = ".keepcacheblock"
- tmpFileSuffix = ".tmp"
+const (
+ cacheFileSuffix = ".keepcacheblock"
+ tmpFileSuffix = ".tmp"
+ tidyHoldDuration = 10 * time.Second
)
func (cache *DiskCache) cacheFile(locator string) string {
@@ -306,7 +307,7 @@ func (cache *DiskCache) gotidy() {
}
go func() {
cache.tidy()
- cache.tidyHoldUntil = time.Now().Add(10 * time.Second)
+ cache.tidyHoldUntil = time.Now().Add(tidyHoldDuration)
atomic.AddInt32(&cache.tidying, -1)
}()
}
commit 01ecf2938246c47dda5cdf667c4dcf29f49693be
Author: Tom Clegg <tom at curii.com>
Date: Wed Nov 29 15:06:36 2023 -0500
20318: Benchmark open/close vs. shared fd.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache_test.go b/sdk/go/arvados/keep_cache_test.go
index 6cc5a2b8dd..128e47faa2 100644
--- a/sdk/go/arvados/keep_cache_test.go
+++ b/sdk/go/arvados/keep_cache_test.go
@@ -11,6 +11,7 @@ import (
"errors"
"fmt"
"io"
+ "os"
"sync"
"time"
@@ -170,3 +171,62 @@ func (s *keepCacheSuite) TestConcurrentReaders(c *check.C) {
}
wg.Wait()
}
+
+const benchReadSize = 1000
+
+// BenchmarkOpenClose and BenchmarkKeepOpen can be used to measure the
+// potential performance improvement of caching filehandles rather
+// than opening/closing the cache file for each read.
+//
+// Results from a development machine indicate a ~3x throughput
+// improvement: ~636 MB/s when opening/closing the file for each
+// 1000-byte read vs. ~2 GB/s when opening the file once and doing
+// concurrent reads using the same file descriptor.
+func (s *keepCacheSuite) BenchmarkOpenClose(c *check.C) {
+ fnm := c.MkDir() + "/testfile"
+ os.WriteFile(fnm, make([]byte, 64000000), 0700)
+ var wg sync.WaitGroup
+ for i := 0; i < c.N; i++ {
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ f, err := os.OpenFile(fnm, os.O_CREATE|os.O_RDWR, 0700)
+ if err != nil {
+ c.Fail()
+ return
+ }
+ _, err = f.ReadAt(make([]byte, benchReadSize), (int64(i)*1000000)%63123123)
+ if err != nil {
+ c.Fail()
+ return
+ }
+ f.Close()
+ }()
+ }
+ wg.Wait()
+}
+
+func (s *keepCacheSuite) BenchmarkKeepOpen(c *check.C) {
+ fnm := c.MkDir() + "/testfile"
+ os.WriteFile(fnm, make([]byte, 64000000), 0700)
+ f, err := os.OpenFile(fnm, os.O_CREATE|os.O_RDWR, 0700)
+ if err != nil {
+ c.Fail()
+ return
+ }
+ var wg sync.WaitGroup
+ for i := 0; i < c.N; i++ {
+ i := i
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ _, err = f.ReadAt(make([]byte, benchReadSize), (int64(i)*1000000)%63123123)
+ if err != nil {
+ c.Fail()
+ return
+ }
+ }()
+ }
+ wg.Wait()
+ f.Close()
+}
commit ef1a56aa4c6aff593767fcaba693ebc042df5d4b
Author: Tom Clegg <tom at curii.com>
Date: Mon Nov 27 16:25:56 2023 -0500
20318: Add filesystem cache tests.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/keep_cache_test.go b/sdk/go/arvados/keep_cache_test.go
new file mode 100644
index 0000000000..6cc5a2b8dd
--- /dev/null
+++ b/sdk/go/arvados/keep_cache_test.go
@@ -0,0 +1,172 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package arvados
+
+import (
+ "bytes"
+ "context"
+ "crypto/md5"
+ "errors"
+ "fmt"
+ "io"
+ "sync"
+ "time"
+
+ "git.arvados.org/arvados.git/sdk/go/ctxlog"
+ check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&keepCacheSuite{})
+
+type keepCacheSuite struct {
+}
+
+type keepGatewayBlackHole struct {
+}
+
+func (*keepGatewayBlackHole) ReadAt(locator string, dst []byte, offset int) (int, error) {
+ return 0, errors.New("block not found")
+}
+func (*keepGatewayBlackHole) BlockRead(ctx context.Context, opts BlockReadOptions) (int, error) {
+ return 0, errors.New("block not found")
+}
+func (*keepGatewayBlackHole) LocalLocator(locator string) (string, error) {
+ return locator, nil
+}
+func (*keepGatewayBlackHole) BlockWrite(ctx context.Context, opts BlockWriteOptions) (BlockWriteResponse, error) {
+ h := md5.New()
+ var size int64
+ if opts.Reader == nil {
+ size, _ = io.Copy(h, bytes.NewReader(opts.Data))
+ } else {
+ size, _ = io.Copy(h, opts.Reader)
+ }
+ return BlockWriteResponse{Locator: fmt.Sprintf("%x+%d", h.Sum(nil), size), Replicas: 1}, nil
+}
+
+type keepGatewayMemoryBacked struct {
+ mtx sync.RWMutex
+ data map[string][]byte
+}
+
+func (k *keepGatewayMemoryBacked) ReadAt(locator string, dst []byte, offset int) (int, error) {
+ k.mtx.RLock()
+ data := k.data[locator]
+ k.mtx.RUnlock()
+ if data == nil {
+ return 0, errors.New("block not found: " + locator)
+ }
+ var n int
+ if len(data) > offset {
+ n = copy(dst, data[offset:])
+ }
+ if n < len(dst) {
+ return n, io.EOF
+ }
+ return n, nil
+}
+func (k *keepGatewayMemoryBacked) BlockRead(ctx context.Context, opts BlockReadOptions) (int, error) {
+ k.mtx.RLock()
+ data := k.data[opts.Locator]
+ k.mtx.RUnlock()
+ if data == nil {
+ return 0, errors.New("block not found: " + opts.Locator)
+ }
+ return opts.WriteTo.Write(data)
+}
+func (k *keepGatewayMemoryBacked) LocalLocator(locator string) (string, error) {
+ return locator, nil
+}
+func (k *keepGatewayMemoryBacked) BlockWrite(ctx context.Context, opts BlockWriteOptions) (BlockWriteResponse, error) {
+ h := md5.New()
+ data := bytes.NewBuffer(nil)
+ if opts.Reader == nil {
+ data.Write(opts.Data)
+ h.Write(data.Bytes())
+ } else {
+ io.Copy(io.MultiWriter(h, data), opts.Reader)
+ }
+ locator := fmt.Sprintf("%x+%d", h.Sum(nil), data.Len())
+ k.mtx.Lock()
+ if k.data == nil {
+ k.data = map[string][]byte{}
+ }
+ k.data[locator] = data.Bytes()
+ k.mtx.Unlock()
+ return BlockWriteResponse{Locator: locator, Replicas: 1}, nil
+}
+
+func (s *keepCacheSuite) TestMaxSize(c *check.C) {
+ backend := &keepGatewayMemoryBacked{}
+ cache := DiskCache{
+ KeepGateway: backend,
+ MaxSize: 40000000,
+ Dir: c.MkDir(),
+ Logger: ctxlog.TestLogger(c),
+ }
+ ctx := context.Background()
+ resp1, err := cache.BlockWrite(ctx, BlockWriteOptions{
+ Data: make([]byte, 44000000),
+ })
+ c.Check(err, check.IsNil)
+ time.Sleep(time.Millisecond)
+ resp2, err := cache.BlockWrite(ctx, BlockWriteOptions{
+ Data: make([]byte, 32000000),
+ })
+ c.Check(err, check.IsNil)
+ delete(backend.data, resp1.Locator)
+ delete(backend.data, resp2.Locator)
+ cache.tidyHoldUntil = time.Time{}
+ cache.tidy()
+
+ n, err := cache.ReadAt(resp1.Locator, make([]byte, 2), 0)
+ c.Check(n, check.Equals, 0)
+ c.Check(err, check.ErrorMatches, `block not found: .*\+44000000`)
+
+ n, err = cache.ReadAt(resp2.Locator, make([]byte, 2), 0)
+ c.Check(n > 0, check.Equals, true)
+ c.Check(err, check.IsNil)
+}
+
+func (s *keepCacheSuite) TestConcurrentReaders(c *check.C) {
+ blksize := 64000000
+ backend := &keepGatewayMemoryBacked{}
+ cache := DiskCache{
+ KeepGateway: backend,
+ MaxSize: int64(blksize),
+ Dir: c.MkDir(),
+ Logger: ctxlog.TestLogger(c),
+ }
+ ctx := context.Background()
+ resp, err := cache.BlockWrite(ctx, BlockWriteOptions{
+ Data: make([]byte, blksize),
+ })
+ c.Check(err, check.IsNil)
+ delete(backend.data, resp.Locator)
+
+ failed := false
+ var wg sync.WaitGroup
+ for offset := 0; offset < blksize; offset += 123456 {
+ offset := offset
+ wg.Add(1)
+ go func() {
+ defer wg.Done()
+ buf := make([]byte, 654321)
+ if offset+len(buf) > blksize {
+ buf = buf[:blksize-offset]
+ }
+ n, err := cache.ReadAt(resp.Locator, buf, offset)
+ if failed {
+ // don't fill logs with subsequent errors
+ return
+ }
+ if !c.Check(err, check.IsNil, check.Commentf("offset=%d", offset)) {
+ failed = true
+ }
+ c.Assert(n, check.Equals, len(buf))
+ }()
+ }
+ wg.Wait()
+}
diff --git a/sdk/go/arvadostest/api.go b/sdk/go/arvadostest/api.go
index 4e214414d7..706e49542f 100644
--- a/sdk/go/arvadostest/api.go
+++ b/sdk/go/arvadostest/api.go
@@ -356,6 +356,26 @@ func (as *APIStub) APIClientAuthorizationGet(ctx context.Context, options arvado
as.appendCall(ctx, as.APIClientAuthorizationGet, options)
return arvados.APIClientAuthorization{}, as.Error
}
+func (as *APIStub) ReadAt(locator string, dst []byte, offset int) (int, error) {
+ as.appendCall(context.TODO(), as.ReadAt, struct {
+ locator string
+ dst []byte
+ offset int
+ }{locator, dst, offset})
+ return 0, as.Error
+}
+func (as *APIStub) BlockRead(ctx context.Context, options arvados.BlockReadOptions) (int, error) {
+ as.appendCall(ctx, as.BlockRead, options)
+ return 0, as.Error
+}
+func (as *APIStub) BlockWrite(ctx context.Context, options arvados.BlockWriteOptions) (arvados.BlockWriteResponse, error) {
+ as.appendCall(ctx, as.BlockWrite, options)
+ return arvados.BlockWriteResponse{}, as.Error
+}
+func (as *APIStub) LocalLocator(locator string) (int, error) {
+ as.appendCall(context.TODO(), as.LocalLocator, locator)
+ return 0, as.Error
+}
func (as *APIStub) appendCall(ctx context.Context, method interface{}, options interface{}) {
as.mtx.Lock()
diff --git a/sdk/go/arvadostest/keep_stub.go b/sdk/go/arvadostest/keep_stub.go
new file mode 100644
index 0000000000..ddfa3909bb
--- /dev/null
+++ b/sdk/go/arvadostest/keep_stub.go
@@ -0,0 +1,7 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package arvadostest
+
+type KeepStub struct{}
commit 52fa01051f4633e3bbbfcdf8c55994e7cd91212a
Author: Tom Clegg <tom at curii.com>
Date: Mon Nov 27 11:20:09 2023 -0500
20318: Add disk-backed block cache.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/api.go b/sdk/go/arvados/api.go
index f4ac1ab3c4..2a527567e5 100644
--- a/sdk/go/arvados/api.go
+++ b/sdk/go/arvados/api.go
@@ -240,11 +240,16 @@ type LogoutOptions struct {
ReturnTo string `json:"return_to"` // Redirect to this URL after logging out
}
+type BlockReadOptions struct {
+ Locator string
+ WriteTo io.Writer
+}
+
type BlockWriteOptions struct {
Hash string
Data []byte
- Reader io.Reader
- DataSize int // Must be set if Data is nil.
+ Reader io.Reader // Must be set if Data is nil.
+ DataSize int // Must be set if Data is nil.
RequestID string
StorageClasses []string
Replicas int
diff --git a/sdk/go/arvados/keep_cache.go b/sdk/go/arvados/keep_cache.go
new file mode 100644
index 0000000000..b9c7fea4b0
--- /dev/null
+++ b/sdk/go/arvados/keep_cache.go
@@ -0,0 +1,414 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package arvados
+
+import (
+ "bytes"
+ "context"
+ "crypto/md5"
+ "errors"
+ "fmt"
+ "io"
+ "io/fs"
+ "os"
+ "path/filepath"
+ "sort"
+ "strconv"
+ "strings"
+ "sync/atomic"
+ "syscall"
+ "time"
+
+ "github.com/sirupsen/logrus"
+ "golang.org/x/sys/unix"
+)
+
+type KeepGateway interface {
+ ReadAt(locator string, dst []byte, offset int) (int, error)
+ BlockRead(ctx context.Context, opts BlockReadOptions) (int, error)
+ BlockWrite(ctx context.Context, opts BlockWriteOptions) (BlockWriteResponse, error)
+ LocalLocator(locator string) (string, error)
+}
+
+// DiskCache wraps KeepGateway, adding a disk-based cache layer.
+type DiskCache struct {
+ KeepGateway
+ Dir string
+ MaxSize int64
+ Logger logrus.FieldLogger
+
+ tidying int32 // see tidy()
+ tidyHoldUntil time.Time
+ defaultMaxSize int64
+}
+
+var (
+ cacheFileSuffix = ".keepcacheblock"
+ tmpFileSuffix = ".tmp"
+)
+
+func (cache *DiskCache) cacheFile(locator string) string {
+ hash := locator
+ if i := strings.Index(hash, "+"); i > 0 {
+ hash = hash[:i]
+ }
+ return filepath.Join(cache.Dir, hash[:3], hash+cacheFileSuffix)
+}
+
+// Open a cache file, creating the parent dir if necessary.
+func (cache *DiskCache) openFile(name string, flags int) (*os.File, error) {
+ f, err := os.OpenFile(name, flags, 0600)
+ if os.IsNotExist(err) {
+ // Create the parent dir and try again. (We could have
+ // checked/created the parent dir before, but that
+ // would be less efficient in the much more common
+ // situation where it already exists.)
+ parent, _ := filepath.Split(name)
+ os.Mkdir(parent, 0700)
+ f, err = os.OpenFile(name, flags, 0600)
+ }
+ return f, err
+}
+
+// Rename a file, creating the new path's parent dir if necessary.
+func (cache *DiskCache) rename(old, new string) error {
+ if nil == os.Rename(old, new) {
+ return nil
+ }
+ parent, _ := filepath.Split(new)
+ os.Mkdir(parent, 0700)
+ return os.Rename(old, new)
+}
+
+func (cache *DiskCache) debugf(format string, args ...interface{}) {
+ logger := cache.Logger
+ if logger == nil {
+ return
+ }
+ logger.Debugf(format, args...)
+}
+
+// BlockWrite writes through to the wrapped KeepGateway, and (if
+// possible) retains a copy of the written block in the cache.
+func (cache *DiskCache) BlockWrite(ctx context.Context, opts BlockWriteOptions) (BlockWriteResponse, error) {
+ cache.gotidy()
+ unique := fmt.Sprintf("%x.%p%s", os.Getpid(), &opts, tmpFileSuffix)
+ tmpfilename := filepath.Join(cache.Dir, "tmp", unique)
+ tmpfile, err := cache.openFile(tmpfilename, os.O_CREATE|os.O_EXCL|os.O_RDWR)
+ if err != nil {
+ cache.debugf("BlockWrite: open(%s) failed: %s", tmpfilename, err)
+ return cache.KeepGateway.BlockWrite(ctx, opts)
+ }
+
+ ctx, cancel := context.WithCancel(ctx)
+ defer cancel()
+ copyerr := make(chan error, 1)
+
+ // Start a goroutine to copy the caller's source data to
+ // tmpfile, a hash checker, and (via pipe) the wrapped
+ // KeepGateway.
+ pipereader, pipewriter := io.Pipe()
+ defer pipereader.Close()
+ go func() {
+ defer tmpfile.Close()
+ defer os.Remove(tmpfilename)
+ defer pipewriter.Close()
+
+ // Copy from opts.Data or opts.Reader, depending on
+ // which was provided.
+ var src io.Reader
+ if opts.Data != nil {
+ src = bytes.NewReader(opts.Data)
+ } else {
+ src = opts.Reader
+ }
+
+ hashcheck := md5.New()
+ n, err := io.Copy(io.MultiWriter(tmpfile, pipewriter, hashcheck), src)
+ if err != nil {
+ copyerr <- err
+ cancel()
+ return
+ } else if opts.DataSize > 0 && opts.DataSize != int(n) {
+ copyerr <- fmt.Errorf("block size %d did not match provided size %d", n, opts.DataSize)
+ cancel()
+ return
+ }
+ err = tmpfile.Close()
+ if err != nil {
+ // Don't rename tmpfile into place, but allow
+ // the BlockWrite call to succeed if nothing
+ // else goes wrong.
+ return
+ }
+ hash := fmt.Sprintf("%x", hashcheck.Sum(nil))
+ if opts.Hash != "" && opts.Hash != hash {
+ // Even if the wrapped KeepGateway doesn't
+ // notice a problem, this should count as an
+ // error.
+ copyerr <- fmt.Errorf("block hash %s did not match provided hash %s", hash, opts.Hash)
+ cancel()
+ return
+ }
+ cachefilename := cache.cacheFile(hash)
+ err = cache.rename(tmpfilename, cachefilename)
+ if err != nil {
+ cache.debugf("BlockWrite: rename(%s, %s) failed: %s", tmpfilename, cachefilename, err)
+ }
+ }()
+
+ // Write through to the wrapped KeepGateway from the pipe,
+ // instead of the original reader.
+ newopts := opts
+ if newopts.DataSize == 0 {
+ newopts.DataSize = len(newopts.Data)
+ }
+ newopts.Reader = pipereader
+ newopts.Data = nil
+
+ resp, err := cache.KeepGateway.BlockWrite(ctx, newopts)
+ if len(copyerr) > 0 {
+ // If the copy-to-pipe goroutine failed, that error
+ // will be more helpful than the resulting "context
+ // canceled" or "read [from pipereader] failed" error
+ // seen by the wrapped KeepGateway.
+ //
+ // If the wrapped KeepGateway encounters an error
+ // before all the data is copied into the pipe, it
+ // stops reading from the pipe, which causes the
+ // io.Copy() in the goroutine to block until our
+ // deferred pipereader.Close() call runs. In that case
+ // len(copyerr)==0 here, so the wrapped KeepGateway
+ // error is the one we return to our caller.
+ err = <-copyerr
+ }
+ return resp, err
+}
+
+// ReadAt reads the entire block from the wrapped KeepGateway into the
+// cache if needed, and copies the requested portion into the provided
+// slice.
+func (cache *DiskCache) ReadAt(locator string, dst []byte, offset int) (int, error) {
+ cache.gotidy()
+ cachefilename := cache.cacheFile(locator)
+ f, err := cache.openFile(cachefilename, os.O_CREATE|os.O_RDWR)
+ if err != nil {
+ cache.debugf("ReadAt: open(%s) failed: %s", cachefilename, err)
+ return cache.KeepGateway.ReadAt(locator, dst, offset)
+ }
+ defer f.Close()
+
+ err = syscall.Flock(int(f.Fd()), syscall.LOCK_SH)
+ if err != nil {
+ return 0, fmt.Errorf("flock(%s, lock_sh) failed: %w", cachefilename, err)
+ }
+
+ size, err := f.Seek(0, io.SeekEnd)
+ if err != nil {
+ return 0, fmt.Errorf("seek(%s, seek_end) failed: %w", cachefilename, err)
+ }
+ if size < int64(len(dst)+offset) {
+ // The cache file seems to be truncated or empty
+ // (possibly because we just created it). Wait for an
+ // exclusive lock, then check again (in case someone
+ // else is doing the same thing) before trying to
+ // retrieve the entire block.
+ err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
+ if err != nil {
+ return 0, fmt.Errorf("flock(%s, lock_ex) failed: %w", cachefilename, err)
+ }
+ }
+ size, err = f.Seek(0, io.SeekEnd)
+ if err != nil {
+ return 0, fmt.Errorf("seek(%s, seek_end) failed: %w", cachefilename, err)
+ }
+ if size < int64(len(dst)+offset) {
+ // The cache file is truncated or empty, and we own it
+ // now. Fill it.
+ _, err = f.Seek(0, io.SeekStart)
+ if err != nil {
+ return 0, fmt.Errorf("seek(%s, seek_start) failed: %w", cachefilename, err)
+ }
+ n, err := cache.KeepGateway.BlockRead(context.Background(), BlockReadOptions{Locator: locator, WriteTo: f})
+ if err != nil {
+ return 0, err
+ }
+ f.Truncate(int64(n))
+ }
+ return f.ReadAt(dst, int64(offset))
+}
+
+// ReadAt reads the entire block from the wrapped KeepGateway into the
+// cache if needed, and writes it to the provided writer.
+func (cache *DiskCache) BlockRead(ctx context.Context, opts BlockReadOptions) (int, error) {
+ cache.gotidy()
+ cachefilename := cache.cacheFile(opts.Locator)
+ f, err := cache.openFile(cachefilename, os.O_CREATE|os.O_RDWR)
+ if err != nil {
+ cache.debugf("BlockRead: open(%s) failed: %s", cachefilename, err)
+ return cache.KeepGateway.BlockRead(ctx, opts)
+ }
+ defer f.Close()
+
+ i := strings.Index(opts.Locator, "+")
+ if i < 0 || i >= len(opts.Locator) {
+ return 0, errors.New("invalid block locator: no size hint")
+ }
+ sizestr := opts.Locator[i+1:]
+ i = strings.Index(sizestr, "+")
+ if i > 0 {
+ sizestr = sizestr[:i]
+ }
+ blocksize, err := strconv.ParseInt(sizestr, 10, 32)
+ if err != nil || blocksize < 0 {
+ return 0, errors.New("invalid block locator: invalid size hint")
+ }
+
+ err = syscall.Flock(int(f.Fd()), syscall.LOCK_SH)
+ if err != nil {
+ return 0, err
+ }
+ filesize, err := f.Seek(0, io.SeekEnd)
+ if err != nil {
+ return 0, err
+ }
+ _, err = f.Seek(0, io.SeekStart)
+ if err != nil {
+ return 0, err
+ }
+ if filesize == blocksize {
+ n, err := io.Copy(opts.WriteTo, f)
+ return int(n), err
+ }
+ err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
+ if err != nil {
+ return 0, err
+ }
+ opts.WriteTo = io.MultiWriter(f, opts.WriteTo)
+ n, err := cache.KeepGateway.BlockRead(ctx, opts)
+ if err != nil {
+ return int(n), err
+ }
+ f.Truncate(int64(n))
+ return n, nil
+}
+
+// Start a tidy() goroutine, unless one is already running / recently
+// finished.
+func (cache *DiskCache) gotidy() {
+ // Return quickly if another tidy goroutine is running in this process.
+ n := atomic.AddInt32(&cache.tidying, 1)
+ if n != 1 || time.Now().Before(cache.tidyHoldUntil) {
+ atomic.AddInt32(&cache.tidying, -1)
+ return
+ }
+ go func() {
+ cache.tidy()
+ cache.tidyHoldUntil = time.Now().Add(10 * time.Second)
+ atomic.AddInt32(&cache.tidying, -1)
+ }()
+}
+
+// Delete cache files as needed to control disk usage.
+func (cache *DiskCache) tidy() {
+ maxsize := cache.MaxSize
+ if maxsize < 1 {
+ if maxsize = atomic.LoadInt64(&cache.defaultMaxSize); maxsize == 0 {
+ var stat unix.Statfs_t
+ if nil == unix.Statfs(cache.Dir, &stat) {
+ maxsize = int64(stat.Bavail) * stat.Bsize / 10
+ }
+ atomic.StoreInt64(&cache.defaultMaxSize, maxsize)
+ }
+ }
+
+ // Bail if a tidy goroutine is running in a different process.
+ lockfile, err := cache.openFile(filepath.Join(cache.Dir, "tmp", "tidy.lock"), os.O_CREATE|os.O_WRONLY)
+ if err != nil {
+ return
+ }
+ defer lockfile.Close()
+ err = syscall.Flock(int(lockfile.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
+ if err != nil {
+ return
+ }
+
+ type entT struct {
+ path string
+ atime time.Time
+ size int64
+ }
+ var ents []entT
+ var totalsize int64
+ filepath.Walk(cache.Dir, func(path string, info fs.FileInfo, err error) error {
+ if err != nil {
+ cache.debugf("tidy: skipping dir %s: %s", path, err)
+ return nil
+ }
+ if info.IsDir() {
+ return nil
+ }
+ if !strings.HasSuffix(path, cacheFileSuffix) && !strings.HasSuffix(path, tmpFileSuffix) {
+ return nil
+ }
+ var atime time.Time
+ if stat, ok := info.Sys().(*syscall.Stat_t); ok {
+ // Access time is available (hopefully the
+ // filesystem is not mounted with noatime)
+ atime = time.Unix(stat.Atim.Sec, stat.Atim.Nsec)
+ } else {
+ // If access time isn't available we fall back
+ // to sorting by modification time.
+ atime = info.ModTime()
+ }
+ ents = append(ents, entT{path, atime, info.Size()})
+ totalsize += info.Size()
+ return nil
+ })
+ if cache.Logger != nil {
+ cache.Logger.WithFields(logrus.Fields{
+ "totalsize": totalsize,
+ "maxsize": maxsize,
+ }).Debugf("DiskCache: checked current cache usage")
+ }
+
+ // If MaxSize wasn't specified and we failed to come up with a
+ // defaultSize above, use the larger of {current cache size, 1
+ // GiB} as the defaultSize for subsequent tidy() operations.
+ if maxsize == 0 {
+ if totalsize < 1<<30 {
+ atomic.StoreInt64(&cache.defaultMaxSize, 1<<30)
+ } else {
+ atomic.StoreInt64(&cache.defaultMaxSize, totalsize)
+ }
+ cache.debugf("found initial size %d, setting defaultMaxSize %d", totalsize, cache.defaultMaxSize)
+ return
+ }
+
+ if totalsize <= maxsize {
+ return
+ }
+
+ // Delete oldest entries until totalsize < maxsize.
+ sort.Slice(ents, func(i, j int) bool {
+ return ents[i].atime.Before(ents[j].atime)
+ })
+ deleted := 0
+ for _, ent := range ents {
+ os.Remove(ent.path)
+ deleted++
+ totalsize -= ent.size
+ if totalsize <= maxsize {
+ break
+ }
+ }
+
+ if cache.Logger != nil {
+ cache.Logger.WithFields(logrus.Fields{
+ "deleted": deleted,
+ "totalsize": totalsize,
+ }).Debugf("DiskCache: remaining cache usage after deleting")
+ }
+}
diff --git a/sdk/go/keepclient/block_cache.go b/sdk/go/keepclient/block_cache.go
index 89eecc6e27..37eee4de20 100644
--- a/sdk/go/keepclient/block_cache.go
+++ b/sdk/go/keepclient/block_cache.go
@@ -5,13 +5,16 @@
package keepclient
import (
- "fmt"
+ "bytes"
+ "context"
"io"
"sort"
"strconv"
"strings"
"sync"
"time"
+
+ "git.arvados.org/arvados.git/sdk/go/arvados"
)
var DefaultBlockCache = &BlockCache{}
@@ -54,8 +57,8 @@ func (c *BlockCache) Sweep() {
// ReadAt returns data from the cache, first retrieving it from Keep if
// necessary.
-func (c *BlockCache) ReadAt(kc *KeepClient, locator string, p []byte, off int) (int, error) {
- buf, err := c.Get(kc, locator)
+func (c *BlockCache) ReadAt(upstream arvados.KeepGateway, locator string, p []byte, off int) (int, error) {
+ buf, err := c.get(upstream, locator)
if err != nil {
return 0, err
}
@@ -65,9 +68,9 @@ func (c *BlockCache) ReadAt(kc *KeepClient, locator string, p []byte, off int) (
return copy(p, buf[off:]), nil
}
-// Get returns data from the cache, first retrieving it from Keep if
+// Get a block from the cache, first retrieving it from Keep if
// necessary.
-func (c *BlockCache) Get(kc *KeepClient, locator string) ([]byte, error) {
+func (c *BlockCache) get(upstream arvados.KeepGateway, locator string) ([]byte, error) {
cacheKey := locator[:32]
bufsize := BLOCKSIZE
if parts := strings.SplitN(locator, "+", 3); len(parts) >= 2 {
@@ -88,21 +91,10 @@ func (c *BlockCache) Get(kc *KeepClient, locator string) ([]byte, error) {
}
c.cache[cacheKey] = b
go func() {
- rdr, size, _, err := kc.Get(locator)
- var data []byte
- if err == nil {
- data = make([]byte, size, bufsize)
- _, err = io.ReadFull(rdr, data)
- err2 := rdr.Close()
- if err == nil && err2 != nil {
- err = fmt.Errorf("close(): %w", err2)
- }
- if err != nil {
- err = fmt.Errorf("Get %s: %w", locator, err)
- }
- }
+ buf := bytes.NewBuffer(make([]byte, 0, bufsize))
+ _, err := upstream.BlockRead(context.Background(), arvados.BlockReadOptions{Locator: locator, WriteTo: buf})
c.mtx.Lock()
- b.data, b.err = data, err
+ b.data, b.err = buf.Bytes(), err
c.mtx.Unlock()
close(b.fetched)
go c.Sweep()
diff --git a/sdk/go/keepclient/collectionreader_test.go b/sdk/go/keepclient/collectionreader_test.go
index 75603f1baa..65dcd9ac8a 100644
--- a/sdk/go/keepclient/collectionreader_test.go
+++ b/sdk/go/keepclient/collectionreader_test.go
@@ -237,7 +237,15 @@ func (s *CollectionReaderUnit) TestCollectionReaderManyBlocks(c *check.C) {
}
func (s *CollectionReaderUnit) TestCollectionReaderCloseEarly(c *check.C) {
+ // Disable disk cache
+ defer func(home string) {
+ os.Setenv("HOME", home)
+ }(os.Getenv("HOME"))
+ os.Setenv("HOME", "")
+
+ // Disable memory cache
s.kc.BlockCache = &BlockCache{}
+
s.kc.PutB([]byte("foo"))
s.kc.PutB([]byte("bar"))
s.kc.PutB([]byte("baz"))
diff --git a/sdk/go/keepclient/gateway_shim.go b/sdk/go/keepclient/gateway_shim.go
new file mode 100644
index 0000000000..262d612640
--- /dev/null
+++ b/sdk/go/keepclient/gateway_shim.go
@@ -0,0 +1,67 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: Apache-2.0
+
+package keepclient
+
+import (
+ "context"
+ "io"
+
+ "git.arvados.org/arvados.git/sdk/go/arvados"
+)
+
+// keepViaHTTP implements arvados.KeepGateway by using a KeepClient to
+// do upstream requests to keepstore and keepproxy.
+type keepViaHTTP struct {
+ *KeepClient
+}
+
+func (kvh *keepViaHTTP) ReadAt(locator string, dst []byte, offset int) (int, error) {
+ rdr, _, _, _, err := kvh.getOrHead("GET", locator, nil)
+ if err != nil {
+ return 0, err
+ }
+ defer rdr.Close()
+ _, err = io.CopyN(io.Discard, rdr, int64(offset))
+ if err != nil {
+ return 0, err
+ }
+ n, err := rdr.Read(dst)
+ return int(n), err
+}
+
+func (kvh *keepViaHTTP) BlockRead(ctx context.Context, opts arvados.BlockReadOptions) (int, error) {
+ rdr, _, _, _, err := kvh.getOrHead("GET", opts.Locator, nil)
+ if err != nil {
+ return 0, err
+ }
+ defer rdr.Close()
+ n, err := io.Copy(opts.WriteTo, rdr)
+ return int(n), err
+}
+
+// keepViaBlockCache implements arvados.KeepGateway by using the given
+// KeepClient's BlockCache with the wrapped KeepGateway.
+//
+// Note the whole KeepClient gets passed in instead of just its
+// cache. This ensures the new BlockCache gets used if it changes
+// after keepViaBlockCache is initialized.
+type keepViaBlockCache struct {
+ kc *KeepClient
+ arvados.KeepGateway
+}
+
+func (kvbc *keepViaBlockCache) ReadAt(locator string, dst []byte, offset int) (int, error) {
+ return kvbc.kc.cache().ReadAt(kvbc.KeepGateway, locator, dst, offset)
+}
+
+func (kvbc *keepViaBlockCache) BlockRead(ctx context.Context, opts arvados.BlockReadOptions) (int, error) {
+ rdr, _, _, _, err := kvbc.kc.getOrHead("GET", opts.Locator, nil)
+ if err != nil {
+ return 0, err
+ }
+ defer rdr.Close()
+ n, err := io.Copy(opts.WriteTo, rdr)
+ return int(n), err
+}
diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go
index 68ac886ddd..18614a77eb 100644
--- a/sdk/go/keepclient/keepclient.go
+++ b/sdk/go/keepclient/keepclient.go
@@ -16,6 +16,8 @@ import (
"io/ioutil"
"net"
"net/http"
+ "os"
+ "path/filepath"
"regexp"
"strconv"
"strings"
@@ -118,6 +120,8 @@ type KeepClient struct {
// Disable automatic discovery of keep services
disableDiscovery bool
+
+ gatewayStack arvados.KeepGateway
}
func (kc *KeepClient) loadDefaultClasses() error {
@@ -332,6 +336,45 @@ func (kc *KeepClient) getOrHead(method string, locator string, header http.Heade
return nil, 0, "", nil, err
}
+// upstreamGateway creates/returns the KeepGateway stack used to read
+// and write data: a memory cache, a disk-backed cache if available,
+// and an http backend.
+func (kc *KeepClient) upstreamGateway() arvados.KeepGateway {
+ kc.lock.Lock()
+ defer kc.lock.Unlock()
+ if kc.gatewayStack != nil {
+ return kc.gatewayStack
+ }
+ var stack arvados.KeepGateway = &keepViaHTTP{kc}
+
+ // Wrap with a disk cache, if cache dir is writable
+ home := os.Getenv("HOME")
+ if fi, err := os.Stat(home); home != "" && err == nil && fi.IsDir() {
+ var err error
+ dir := home
+ for _, part := range []string{".cache", "arvados", "keep"} {
+ dir = filepath.Join(dir, part)
+ err = os.Mkdir(dir, 0700)
+ }
+ if err == nil || os.IsExist(err) {
+ os.Mkdir(filepath.Join(dir, "tmp"), 0700)
+ err = os.WriteFile(filepath.Join(dir, "tmp", "check.tmp"), nil, 0600)
+ if err == nil {
+ stack = &arvados.DiskCache{
+ Dir: dir,
+ KeepGateway: stack,
+ }
+ }
+ }
+ }
+ stack = &keepViaBlockCache{
+ kc: kc,
+ KeepGateway: stack,
+ }
+ kc.gatewayStack = stack
+ return stack
+}
+
// LocalLocator returns a locator equivalent to the one supplied, but
// with a valid signature from the local cluster. If the given locator
// already has a local signature, it is returned unchanged.
@@ -369,7 +412,7 @@ func (kc *KeepClient) Get(locator string) (io.ReadCloser, int64, string, error)
// ReadAt retrieves a portion of block from the cache if it's
// present, otherwise from the network.
func (kc *KeepClient) ReadAt(locator string, p []byte, off int) (int, error) {
- return kc.cache().ReadAt(kc, locator, p, off)
+ return kc.upstreamGateway().ReadAt(locator, p, off)
}
// Ask verifies that a block with the given hash is available and
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list