21696: Add "large collection" test with large files.
authorTom Clegg <tom@curii.com>
Mon, 15 Apr 2024 14:15:57 +0000 (10:15 -0400)
committerTom Clegg <tom@curii.com>
Mon, 15 Apr 2024 14:15:57 +0000 (10:15 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

sdk/go/arvados/fs_collection_test.go

index a29371b76c2aa29790b82cd8d8e1e38603dcf991..5d9c52c860d3e1fd59c9b7896741444e9ed24c04 100644 (file)
@@ -1639,29 +1639,40 @@ type CollectionFSUnitSuite struct{}
 var _ = check.Suite(&CollectionFSUnitSuite{})
 
 // expect ~2 seconds to load a manifest with 256K files
-func (s *CollectionFSUnitSuite) TestLargeManifest(c *check.C) {
+func (s *CollectionFSUnitSuite) TestLargeManifest_ManyFiles(c *check.C) {
        if testing.Short() {
                c.Skip("slow")
        }
+       s.testLargeManifest(c, 512, 512, 1)
+}
 
-       const (
-               dirCount  = 512
-               fileCount = 512
-       )
+func (s *CollectionFSUnitSuite) TestLargeManifest_LargeFiles(c *check.C) {
+       if testing.Short() {
+               c.Skip("slow")
+       }
+       s.testLargeManifest(c, 1, 800, 1000)
+}
 
+func (s *CollectionFSUnitSuite) testLargeManifest(c *check.C, dirCount, filesPerDir, blocksPerFile int) {
+       const blksize = 1 << 26
+       c.Logf("%s building manifest with dirCount=%d filesPerDir=%d blocksPerFile=%d", time.Now(), dirCount, filesPerDir, blocksPerFile)
        mb := bytes.NewBuffer(make([]byte, 0, 40000000))
+       blkid := 0
        for i := 0; i < dirCount; i++ {
                fmt.Fprintf(mb, "./dir%d", i)
-               for j := 0; j <= fileCount; j++ {
-                       fmt.Fprintf(mb, " %032x+42+A%040x@%08x", j, j, j)
+               for j := 0; j < filesPerDir; j++ {
+                       for k := 0; k < blocksPerFile; k++ {
+                               blkid++
+                               fmt.Fprintf(mb, " %032x+%d+A%040x@%08x", blkid, blksize, blkid, blkid)
+                       }
                }
-               for j := 0; j < fileCount; j++ {
-                       fmt.Fprintf(mb, " %d:%d:dir%d/file%d", j*42+21, 42, j, j)
+               for j := 0; j < filesPerDir; j++ {
+                       fmt.Fprintf(mb, " %d:%d:dir%d/file%d", j*blocksPerFile*blksize, blocksPerFile*blksize, j, j)
                }
                mb.Write([]byte{'\n'})
        }
        coll := Collection{ManifestText: mb.String()}
-       c.Logf("%s built", time.Now())
+       c.Logf("%s built manifest size=%d", time.Now(), mb.Len())
 
        var memstats runtime.MemStats
        runtime.ReadMemStats(&memstats)
@@ -1670,17 +1681,27 @@ func (s *CollectionFSUnitSuite) TestLargeManifest(c *check.C) {
        f, err := coll.FileSystem(NewClientFromEnv(), &keepClientStub{})
        c.Check(err, check.IsNil)
        c.Logf("%s loaded", time.Now())
-       c.Check(f.Size(), check.Equals, int64(42*dirCount*fileCount))
+       c.Check(f.Size(), check.Equals, int64(dirCount*filesPerDir*blocksPerFile*blksize))
 
+       // Stat() and OpenFile() each file. This mimics the behavior
+       // of webdav propfind, which opens each file even when just
+       // listing directory entries.
        for i := 0; i < dirCount; i++ {
-               for j := 0; j < fileCount; j++ {
-                       f.Stat(fmt.Sprintf("./dir%d/dir%d/file%d", i, j, j))
+               for j := 0; j < filesPerDir; j++ {
+                       fnm := fmt.Sprintf("./dir%d/dir%d/file%d", i, j, j)
+                       fi, err := f.Stat(fnm)
+                       c.Assert(err, check.IsNil)
+                       c.Check(fi.IsDir(), check.Equals, false)
+                       f, err := f.OpenFile(fnm, os.O_RDONLY, 0)
+                       c.Assert(err, check.IsNil)
+                       f.Close()
                }
        }
-       c.Logf("%s Stat() x %d", time.Now(), dirCount*fileCount)
+       c.Logf("%s OpenFile() x %d", time.Now(), dirCount*filesPerDir)
 
        runtime.ReadMemStats(&memstats)
        c.Logf("%s Alloc=%d Sys=%d", time.Now(), memstats.Alloc, memstats.Sys)
+       c.Logf("%s MemorySize=%d", time.Now(), f.MemorySize())
 }
 
 // Gocheck boilerplate