X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/44c93373e97da98645d41ae8f09c6eef6788bb26..HEAD:/sdk/go/arvados/fs_collection_test.go diff --git a/sdk/go/arvados/fs_collection_test.go b/sdk/go/arvados/fs_collection_test.go index b221aaa083..b57f9aa30f 100644 --- a/sdk/go/arvados/fs_collection_test.go +++ b/sdk/go/arvados/fs_collection_test.go @@ -124,6 +124,38 @@ func (s *CollectionFSSuite) SetUpTest(c *check.C) { c.Assert(err, check.IsNil) } +func (s *CollectionFSSuite) TestSyncNonCanonicalManifest(c *check.C) { + var coll Collection + err := s.client.RequestAndDecode(&coll, "GET", "arvados/v1/collections/"+fixtureFooAndBarFilesInDirUUID, nil, nil) + c.Assert(err, check.IsNil) + mtxt := strings.Replace(coll.ManifestText, "3:3:bar 0:3:foo", "0:3:foo 3:3:bar", -1) + c.Assert(mtxt, check.Not(check.Equals), coll.ManifestText) + err = s.client.RequestAndDecode(&coll, "POST", "arvados/v1/collections", nil, map[string]interface{}{ + "collection": map[string]interface{}{ + "manifest_text": mtxt}}) + c.Assert(err, check.IsNil) + // In order for the rest of the test to work as intended, the API server + // needs to retain the file ordering we set manually. We check that here. + // We can't check `mtxt == coll.ManifestText` because the API server + // might've returned new block signatures if the GET and POST happened in + // different seconds. + expectPattern := `\./dir1 \S+ 0:3:foo 3:3:bar\n` + c.Assert(coll.ManifestText, check.Matches, expectPattern) + + fs, err := coll.FileSystem(s.client, s.kc) + c.Assert(err, check.IsNil) + err = fs.Sync() + c.Check(err, check.IsNil) + + // fs had no local changes, so Sync should not have saved + // anything back to the API/database. (If it did, we would see + // the manifest rewritten in canonical order.) + var saved Collection + err = s.client.RequestAndDecode(&saved, "GET", "arvados/v1/collections/"+coll.UUID, nil, nil) + c.Assert(err, check.IsNil) + c.Check(saved.ManifestText, check.Matches, expectPattern) +} + func (s *CollectionFSSuite) TestHttpFileSystemInterface(c *check.C) { _, ok := s.fs.(http.FileSystem) c.Check(ok, check.Equals, true) @@ -1209,11 +1241,12 @@ func (s *CollectionFSSuite) TestFlushFullBlocksOnly(c *check.C) { } nDirs := int64(8) + nFiles := int64(67) megabyte := make([]byte, 1<<20) for i := int64(0); i < nDirs; i++ { dir := fmt.Sprintf("dir%d", i) fs.Mkdir(dir, 0755) - for j := 0; j < 67; j++ { + for j := int64(0); j < nFiles; j++ { f, err := fs.OpenFile(fmt.Sprintf("%s/file%d", dir, j), os.O_WRONLY|os.O_CREATE, 0) c.Assert(err, check.IsNil) defer f.Close() @@ -1221,7 +1254,8 @@ func (s *CollectionFSSuite) TestFlushFullBlocksOnly(c *check.C) { c.Assert(err, check.IsNil) } } - c.Check(fs.MemorySize(), check.Equals, int64(nDirs*67<<20)) + inodebytes := int64((nDirs*(nFiles+1) + 1) * 64) + c.Check(fs.MemorySize(), check.Equals, nDirs*nFiles*(1<<20+64)+inodebytes) c.Check(flushed, check.Equals, int64(0)) waitForFlush := func(expectUnflushed, expectFlushed int64) { @@ -1232,27 +1266,29 @@ func (s *CollectionFSSuite) TestFlushFullBlocksOnly(c *check.C) { } // Nothing flushed yet - waitForFlush((nDirs*67)<<20, 0) + waitForFlush(nDirs*nFiles*(1<<20+64)+inodebytes, 0) // Flushing a non-empty dir "/" is non-recursive and there are // no top-level files, so this has no effect fs.Flush("/", false) - waitForFlush((nDirs*67)<<20, 0) + waitForFlush(nDirs*nFiles*(1<<20+64)+inodebytes, 0) // Flush the full block in dir0 fs.Flush("dir0", false) - waitForFlush((nDirs*67-64)<<20, 64<<20) + bigloclen := int64(32 + 9 + 51 + 64) // md5 + "+" + "67xxxxxx" + "+Axxxxxx..." + 64 (see (storedSegment)memorySize) + waitForFlush((nDirs*nFiles-64)*(1<<20+64)+inodebytes+bigloclen*64, 64<<20) err = fs.Flush("dir-does-not-exist", false) c.Check(err, check.NotNil) // Flush full blocks in all dirs fs.Flush("", false) - waitForFlush(nDirs*3<<20, nDirs*64<<20) + waitForFlush(nDirs*3*(1<<20+64)+inodebytes+bigloclen*64*nDirs, nDirs*64<<20) // Flush non-full blocks, too fs.Flush("", true) - waitForFlush(0, nDirs*67<<20) + smallloclen := int64(32 + 8 + 51 + 64) // md5 + "+" + "3xxxxxx" + "+Axxxxxx..." + 64 (see (storedSegment)memorySize) + waitForFlush(inodebytes+bigloclen*64*nDirs+smallloclen*3*nDirs, nDirs*67<<20) } // Even when writing lots of files/dirs from different goroutines, as @@ -1603,48 +1639,101 @@ type CollectionFSUnitSuite struct{} var _ = check.Suite(&CollectionFSUnitSuite{}) // expect ~2 seconds to load a manifest with 256K files -func (s *CollectionFSUnitSuite) TestLargeManifest(c *check.C) { +func (s *CollectionFSUnitSuite) TestLargeManifest_ManyFiles(c *check.C) { if testing.Short() { c.Skip("slow") } + s.testLargeManifest(c, 512, 512, 1, 0) +} - const ( - dirCount = 512 - fileCount = 512 - ) +func (s *CollectionFSUnitSuite) TestLargeManifest_LargeFiles(c *check.C) { + if testing.Short() { + c.Skip("slow") + } + s.testLargeManifest(c, 1, 800, 1000, 0) +} + +func (s *CollectionFSUnitSuite) TestLargeManifest_InterleavedFiles(c *check.C) { + if testing.Short() { + c.Skip("slow") + } + // Timing figures here are from a dev host, (0)->(1)->(2)->(3) + // (0) no optimizations (main branch commit ea697fb1e8) + // (1) resolve streampos->blkidx with binary search + // (2) ...and rewrite PortableDataHash() without regexp + // (3) ...and use fnodeCache in loadManifest + s.testLargeManifest(c, 1, 800, 100, 4<<20) // 127s -> 12s -> 2.5s -> 1.5s + s.testLargeManifest(c, 1, 50, 1000, 4<<20) // 44s -> 10s -> 1.5s -> 0.8s + s.testLargeManifest(c, 1, 200, 100, 4<<20) // 13s -> 4s -> 0.6s -> 0.3s + s.testLargeManifest(c, 1, 200, 150, 4<<20) // 26s -> 4s -> 1s -> 0.5s + s.testLargeManifest(c, 1, 200, 200, 4<<20) // 38s -> 6s -> 1.3s -> 0.7s + s.testLargeManifest(c, 1, 200, 225, 4<<20) // 46s -> 7s -> 1.5s -> 1s + s.testLargeManifest(c, 1, 400, 400, 4<<20) // 477s -> 24s -> 5s -> 3s + // s.testLargeManifest(c, 1, 800, 1000, 4<<20) // timeout -> 186s -> 28s -> 17s +} +func (s *CollectionFSUnitSuite) testLargeManifest(c *check.C, dirCount, filesPerDir, blocksPerFile, interleaveChunk int) { + t0 := time.Now() + const blksize = 1 << 26 + c.Logf("%s building manifest with dirCount=%d filesPerDir=%d blocksPerFile=%d", time.Now(), dirCount, filesPerDir, blocksPerFile) mb := bytes.NewBuffer(make([]byte, 0, 40000000)) + blkid := 0 for i := 0; i < dirCount; i++ { fmt.Fprintf(mb, "./dir%d", i) - for j := 0; j <= fileCount; j++ { - fmt.Fprintf(mb, " %032x+42+A%040x@%08x", j, j, j) + for j := 0; j < filesPerDir; j++ { + for k := 0; k < blocksPerFile; k++ { + blkid++ + fmt.Fprintf(mb, " %032x+%d+A%040x@%08x", blkid, blksize, blkid, blkid) + } } - for j := 0; j < fileCount; j++ { - fmt.Fprintf(mb, " %d:%d:dir%d/file%d", j*42+21, 42, j, j) + for j := 0; j < filesPerDir; j++ { + if interleaveChunk == 0 { + fmt.Fprintf(mb, " %d:%d:dir%d/file%d", (filesPerDir-j-1)*blocksPerFile*blksize, blocksPerFile*blksize, j, j) + continue + } + for todo := int64(blocksPerFile) * int64(blksize); todo > 0; todo -= int64(interleaveChunk) { + size := int64(interleaveChunk) + if size > todo { + size = todo + } + offset := rand.Int63n(int64(blocksPerFile)*int64(blksize)*int64(filesPerDir) - size) + fmt.Fprintf(mb, " %d:%d:dir%d/file%d", offset, size, j, j) + } } mb.Write([]byte{'\n'}) } coll := Collection{ManifestText: mb.String()} - c.Logf("%s built", time.Now()) + c.Logf("%s built manifest size=%d", time.Now(), mb.Len()) var memstats runtime.MemStats runtime.ReadMemStats(&memstats) c.Logf("%s Alloc=%d Sys=%d", time.Now(), memstats.Alloc, memstats.Sys) - f, err := coll.FileSystem(nil, nil) + f, err := coll.FileSystem(NewClientFromEnv(), &keepClientStub{}) c.Check(err, check.IsNil) c.Logf("%s loaded", time.Now()) - c.Check(f.Size(), check.Equals, int64(42*dirCount*fileCount)) + c.Check(f.Size(), check.Equals, int64(dirCount*filesPerDir*blocksPerFile*blksize)) + // Stat() and OpenFile() each file. This mimics the behavior + // of webdav propfind, which opens each file even when just + // listing directory entries. for i := 0; i < dirCount; i++ { - for j := 0; j < fileCount; j++ { - f.Stat(fmt.Sprintf("./dir%d/dir%d/file%d", i, j, j)) + for j := 0; j < filesPerDir; j++ { + fnm := fmt.Sprintf("./dir%d/dir%d/file%d", i, j, j) + fi, err := f.Stat(fnm) + c.Assert(err, check.IsNil) + c.Check(fi.IsDir(), check.Equals, false) + f, err := f.OpenFile(fnm, os.O_RDONLY, 0) + c.Assert(err, check.IsNil) + f.Close() } } - c.Logf("%s Stat() x %d", time.Now(), dirCount*fileCount) + c.Logf("%s OpenFile() x %d", time.Now(), dirCount*filesPerDir) runtime.ReadMemStats(&memstats) c.Logf("%s Alloc=%d Sys=%d", time.Now(), memstats.Alloc, memstats.Sys) + c.Logf("%s MemorySize=%d", time.Now(), f.MemorySize()) + c.Logf("%s ... test duration %s", time.Now(), time.Now().Sub(t0)) } // Gocheck boilerplate