[arvados] created: 2.7.0-6421-g2213910357
git repository hosting
git at public.arvados.org
Mon Apr 15 14:16:06 UTC 2024
at 2213910357aaccf3797b34e1f821e5ffab0679fe (commit)
commit 2213910357aaccf3797b34e1f821e5ffab0679fe
Author: Tom Clegg <tom at curii.com>
Date: Mon Apr 15 10:15:57 2024 -0400
21696: Add "large collection" test with large files.
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom at curii.com>
diff --git a/sdk/go/arvados/fs_collection_test.go b/sdk/go/arvados/fs_collection_test.go
index a29371b76c..5d9c52c860 100644
--- a/sdk/go/arvados/fs_collection_test.go
+++ b/sdk/go/arvados/fs_collection_test.go
@@ -1639,29 +1639,40 @@ type CollectionFSUnitSuite struct{}
var _ = check.Suite(&CollectionFSUnitSuite{})
// expect ~2 seconds to load a manifest with 256K files
-func (s *CollectionFSUnitSuite) TestLargeManifest(c *check.C) {
+func (s *CollectionFSUnitSuite) TestLargeManifest_ManyFiles(c *check.C) {
if testing.Short() {
c.Skip("slow")
}
+ s.testLargeManifest(c, 512, 512, 1)
+}
- const (
- dirCount = 512
- fileCount = 512
- )
+func (s *CollectionFSUnitSuite) TestLargeManifest_LargeFiles(c *check.C) {
+ if testing.Short() {
+ c.Skip("slow")
+ }
+ s.testLargeManifest(c, 1, 800, 1000)
+}
+func (s *CollectionFSUnitSuite) testLargeManifest(c *check.C, dirCount, filesPerDir, blocksPerFile int) {
+ const blksize = 1 << 26
+ c.Logf("%s building manifest with dirCount=%d filesPerDir=%d blocksPerFile=%d", time.Now(), dirCount, filesPerDir, blocksPerFile)
mb := bytes.NewBuffer(make([]byte, 0, 40000000))
+ blkid := 0
for i := 0; i < dirCount; i++ {
fmt.Fprintf(mb, "./dir%d", i)
- for j := 0; j <= fileCount; j++ {
- fmt.Fprintf(mb, " %032x+42+A%040x@%08x", j, j, j)
+ for j := 0; j < filesPerDir; j++ {
+ for k := 0; k < blocksPerFile; k++ {
+ blkid++
+ fmt.Fprintf(mb, " %032x+%d+A%040x@%08x", blkid, blksize, blkid, blkid)
+ }
}
- for j := 0; j < fileCount; j++ {
- fmt.Fprintf(mb, " %d:%d:dir%d/file%d", j*42+21, 42, j, j)
+ for j := 0; j < filesPerDir; j++ {
+ fmt.Fprintf(mb, " %d:%d:dir%d/file%d", j*blocksPerFile*blksize, blocksPerFile*blksize, j, j)
}
mb.Write([]byte{'\n'})
}
coll := Collection{ManifestText: mb.String()}
- c.Logf("%s built", time.Now())
+ c.Logf("%s built manifest size=%d", time.Now(), mb.Len())
var memstats runtime.MemStats
runtime.ReadMemStats(&memstats)
@@ -1670,17 +1681,27 @@ func (s *CollectionFSUnitSuite) TestLargeManifest(c *check.C) {
f, err := coll.FileSystem(NewClientFromEnv(), &keepClientStub{})
c.Check(err, check.IsNil)
c.Logf("%s loaded", time.Now())
- c.Check(f.Size(), check.Equals, int64(42*dirCount*fileCount))
+ c.Check(f.Size(), check.Equals, int64(dirCount*filesPerDir*blocksPerFile*blksize))
+ // Stat() and OpenFile() each file. This mimics the behavior
+ // of webdav propfind, which opens each file even when just
+ // listing directory entries.
for i := 0; i < dirCount; i++ {
- for j := 0; j < fileCount; j++ {
- f.Stat(fmt.Sprintf("./dir%d/dir%d/file%d", i, j, j))
+ for j := 0; j < filesPerDir; j++ {
+ fnm := fmt.Sprintf("./dir%d/dir%d/file%d", i, j, j)
+ fi, err := f.Stat(fnm)
+ c.Assert(err, check.IsNil)
+ c.Check(fi.IsDir(), check.Equals, false)
+ f, err := f.OpenFile(fnm, os.O_RDONLY, 0)
+ c.Assert(err, check.IsNil)
+ f.Close()
}
}
- c.Logf("%s Stat() x %d", time.Now(), dirCount*fileCount)
+ c.Logf("%s OpenFile() x %d", time.Now(), dirCount*filesPerDir)
runtime.ReadMemStats(&memstats)
c.Logf("%s Alloc=%d Sys=%d", time.Now(), memstats.Alloc, memstats.Sys)
+ c.Logf("%s MemorySize=%d", time.Now(), f.MemorySize())
}
// Gocheck boilerplate
-----------------------------------------------------------------------
hooks/post-receive
--
More information about the arvados-commits
mailing list