12430: Add test for avoiding loading collections excluded by globs.
[arvados.git] / lib / crunchrun / copier_test.go
index 777b715d76dd8bb57e9d5b34309ee70b356df888..a1fc81c716dd7466be75065cdc597ff0cc50da36 100644 (file)
@@ -5,30 +5,31 @@
 package crunchrun
 
 import (
+       "bytes"
        "io"
-       "io/ioutil"
+       "io/fs"
        "os"
+       "sort"
+       "syscall"
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
-       "git.arvados.org/arvados.git/sdk/go/arvadosclient"
        "git.arvados.org/arvados.git/sdk/go/arvadostest"
+       "github.com/sirupsen/logrus"
        check "gopkg.in/check.v1"
 )
 
 var _ = check.Suite(&copierSuite{})
 
 type copierSuite struct {
-       cp copier
+       cp  copier
+       log bytes.Buffer
 }
 
 func (s *copierSuite) SetUpTest(c *check.C) {
-       tmpdir, err := ioutil.TempDir("", "crunch-run.test.")
-       c.Assert(err, check.IsNil)
-       api, err := arvadosclient.MakeArvadosClient()
-       c.Assert(err, check.IsNil)
+       tmpdir := c.MkDir()
+       s.log = bytes.Buffer{}
        s.cp = copier{
                client:        arvados.NewClientFromEnv(),
-               arvClient:     api,
                hostOutputDir: tmpdir,
                ctrOutputDir:  "/ctr/outdir",
                mounts: map[string]arvados.Mount{
@@ -37,13 +38,10 @@ func (s *copierSuite) SetUpTest(c *check.C) {
                secretMounts: map[string]arvados.Mount{
                        "/secret_text": {Kind: "text", Content: "xyzzy"},
                },
+               logger: &logrus.Logger{Out: &s.log, Formatter: &logrus.TextFormatter{}, Level: logrus.InfoLevel},
        }
 }
 
-func (s *copierSuite) TearDownTest(c *check.C) {
-       os.RemoveAll(s.cp.hostOutputDir)
-}
-
 func (s *copierSuite) TestEmptyOutput(c *check.C) {
        err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
        c.Check(err, check.IsNil)
@@ -59,6 +57,8 @@ func (s *copierSuite) TestRegularFilesAndDirs(c *check.C) {
        _, err = io.WriteString(f, "foo")
        c.Assert(err, check.IsNil)
        c.Assert(f.Close(), check.IsNil)
+       err = syscall.Mkfifo(s.cp.hostOutputDir+"/dir1/fifo", 0644)
+       c.Assert(err, check.IsNil)
 
        err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
        c.Check(err, check.IsNil)
@@ -67,6 +67,7 @@ func (s *copierSuite) TestRegularFilesAndDirs(c *check.C) {
                {src: os.DevNull, dst: "/dir1/dir2/dir3/.keep"},
                {src: s.cp.hostOutputDir + "/dir1/foo", dst: "/dir1/foo", size: 3},
        })
+       c.Check(s.log.String(), check.Matches, `.* msg="Skipping unsupported file type \(mode 200000644\) in output dir: \\"/ctr/outdir/dir1/fifo\\""\n`)
 }
 
 func (s *copierSuite) TestSymlinkCycle(c *check.C) {
@@ -115,9 +116,7 @@ func (s *copierSuite) TestSymlinkToMountedCollection(c *check.C) {
        }
 
        // simulate mounted writable collection
-       bindtmp, err := ioutil.TempDir("", "crunch-run.test.")
-       c.Assert(err, check.IsNil)
-       defer os.RemoveAll(bindtmp)
+       bindtmp := c.MkDir()
        f, err := os.OpenFile(bindtmp+"/.arvados#collection", os.O_CREATE|os.O_WRONLY, 0644)
        c.Assert(err, check.IsNil)
        _, err = io.WriteString(f, `{"manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n"}`)
@@ -128,7 +127,9 @@ func (s *copierSuite) TestSymlinkToMountedCollection(c *check.C) {
                PortableDataHash: arvadostest.FooCollectionPDH,
                Writable:         true,
        }
-       s.cp.binds = append(s.cp.binds, bindtmp+":/mnt-w")
+       s.cp.bindmounts = map[string]bindmount{
+               "/mnt-w": bindmount{HostPath: bindtmp, ReadOnly: false},
+       }
 
        c.Assert(os.Symlink("../../mnt", s.cp.hostOutputDir+"/l_dir"), check.IsNil)
        c.Assert(os.Symlink("/mnt/foo", s.cp.hostOutputDir+"/l_file"), check.IsNil)
@@ -213,6 +214,66 @@ func (s *copierSuite) TestWritableMountBelow(c *check.C) {
        })
 }
 
+func (s *copierSuite) TestMountBelowExcludedByGlob(c *check.C) {
+       bindtmp := c.MkDir()
+       s.cp.mounts["/ctr/outdir/include/includer"] = arvados.Mount{
+               Kind:             "collection",
+               PortableDataHash: arvadostest.FooCollectionPDH,
+       }
+       s.cp.mounts["/ctr/outdir/include/includew"] = arvados.Mount{
+               Kind:             "collection",
+               PortableDataHash: arvadostest.FooCollectionPDH,
+               Writable:         true,
+       }
+       s.cp.mounts["/ctr/outdir/exclude/excluder"] = arvados.Mount{
+               Kind:             "collection",
+               PortableDataHash: arvadostest.FooCollectionPDH,
+       }
+       s.cp.mounts["/ctr/outdir/exclude/excludew"] = arvados.Mount{
+               Kind:             "collection",
+               PortableDataHash: arvadostest.FooCollectionPDH,
+               Writable:         true,
+       }
+       s.cp.mounts["/ctr/outdir/nonexistent-collection"] = arvados.Mount{
+               // As extra assurance, plant a collection that will
+               // fail if copier attempts to load its manifest.  (For
+               // performance reasons it's important that copier
+               // doesn't try to load the manifest before deciding
+               // not to copy the contents.)
+               Kind:             "collection",
+               PortableDataHash: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+1234",
+       }
+       s.cp.globs = []string{
+               "?ncl*/*r",
+               "*/?ncl*",
+       }
+       c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/include/includer", 0755), check.IsNil)
+       c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/include/includew", 0755), check.IsNil)
+       c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/exclude/excluder", 0755), check.IsNil)
+       c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/exclude/excludew", 0755), check.IsNil)
+       s.writeFileInOutputDir(c, "include/includew/foo", "foo")
+       s.writeFileInOutputDir(c, "exclude/excludew/foo", "foo")
+       s.cp.bindmounts = map[string]bindmount{
+               "/ctr/outdir/include/includew": bindmount{HostPath: bindtmp, ReadOnly: false},
+       }
+       s.cp.bindmounts = map[string]bindmount{
+               "/ctr/outdir/include/excludew": bindmount{HostPath: bindtmp, ReadOnly: false},
+       }
+
+       err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
+       c.Check(err, check.IsNil)
+       c.Log(s.log.String())
+
+       c.Check(s.cp.dirs, check.DeepEquals, []string{"/include", "/include/includew"})
+       c.Check(s.cp.files, check.DeepEquals, []filetodo{
+               {src: s.cp.hostOutputDir + "/include/includew/foo", dst: "/include/includew/foo", size: 3},
+       })
+       c.Check(s.cp.manifest, check.Matches, `(?ms).*\./include/includer .*`)
+       c.Check(s.cp.manifest, check.Not(check.Matches), `(?ms).*exclude.*`)
+       c.Check(s.log.String(), check.Matches, `(?ms).*not copying \\"exclude/excluder\\".*`)
+       c.Check(s.log.String(), check.Matches, `(?ms).*not copying \\"exclude/excludew\\".*`)
+}
+
 func (s *copierSuite) writeFileInOutputDir(c *check.C, path, data string) {
        f, err := os.OpenFile(s.cp.hostOutputDir+"/"+path, os.O_CREATE|os.O_WRONLY, 0644)
        c.Assert(err, check.IsNil)
@@ -220,3 +281,184 @@ func (s *copierSuite) writeFileInOutputDir(c *check.C, path, data string) {
        c.Assert(err, check.IsNil)
        c.Assert(f.Close(), check.IsNil)
 }
+
+// applyGlobsToFilesAndDirs uses the same glob-matching code as
+// applyGlobsToCollectionFS, so we don't need to test all of the same
+// glob-matching behavior covered in TestApplyGlobsToCollectionFS.  We
+// do need to check that (a) the glob is actually being used to filter
+// out files, and (b) non-matching dirs still included if and only if
+// they are ancestors of matching files.
+func (s *copierSuite) TestApplyGlobsToFilesAndDirs(c *check.C) {
+       dirs := []string{"dir1", "dir1/dir11", "dir1/dir12", "dir2"}
+       files := []string{"dir1/file11", "dir1/dir11/file111", "dir2/file2"}
+       for _, trial := range []struct {
+               globs []string
+               dirs  []string
+               files []string
+       }{
+               {
+                       globs: []string{},
+                       dirs:  append([]string{}, dirs...),
+                       files: append([]string{}, files...),
+               },
+               {
+                       globs: []string{"**"},
+                       dirs:  append([]string{}, dirs...),
+                       files: append([]string{}, files...),
+               },
+               {
+                       globs: []string{"**/file111"},
+                       dirs:  []string{"dir1", "dir1/dir11"},
+                       files: []string{"dir1/dir11/file111"},
+               },
+               {
+                       globs: []string{"nothing"},
+                       dirs:  nil,
+                       files: nil,
+               },
+               {
+                       globs: []string{"**/dir12"},
+                       dirs:  []string{"dir1", "dir1/dir12"},
+                       files: nil,
+               },
+               {
+                       globs: []string{"**/file*"},
+                       dirs:  []string{"dir1", "dir1/dir11", "dir2"},
+                       files: append([]string{}, files...),
+               },
+               {
+                       globs: []string{"**/dir1[12]"},
+                       dirs:  []string{"dir1", "dir1/dir11", "dir1/dir12"},
+                       files: nil,
+               },
+               {
+                       globs: []string{"**/dir1[^2]"},
+                       dirs:  []string{"dir1", "dir1/dir11"},
+                       files: nil,
+               },
+               {
+                       globs: []string{"dir1/**"},
+                       dirs:  []string{"dir1", "dir1/dir11", "dir1/dir12"},
+                       files: []string{"dir1/file11", "dir1/dir11/file111"},
+               },
+       } {
+               c.Logf("=== globs: %q", trial.globs)
+               cp := copier{
+                       globs: trial.globs,
+                       dirs:  dirs,
+               }
+               for _, path := range files {
+                       cp.files = append(cp.files, filetodo{dst: path})
+               }
+               cp.applyGlobsToFilesAndDirs()
+               var gotFiles []string
+               for _, file := range cp.files {
+                       gotFiles = append(gotFiles, file.dst)
+               }
+               c.Check(cp.dirs, check.DeepEquals, trial.dirs)
+               c.Check(gotFiles, check.DeepEquals, trial.files)
+       }
+}
+
+func (s *copierSuite) TestApplyGlobsToCollectionFS(c *check.C) {
+       for _, trial := range []struct {
+               globs  []string
+               expect []string
+       }{
+               {
+                       globs:  nil,
+                       expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
+               },
+               {
+                       globs:  []string{"foo"},
+                       expect: []string{"foo"},
+               },
+               {
+                       globs:  []string{"baz/parent1/item1"},
+                       expect: []string{"baz/parent1/item1"},
+               },
+               {
+                       globs:  []string{"**"},
+                       expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
+               },
+               {
+                       globs:  []string{"**/*"},
+                       expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
+               },
+               {
+                       globs:  []string{"*"},
+                       expect: []string{"foo", "bar"},
+               },
+               {
+                       globs:  []string{"baz"},
+                       expect: nil,
+               },
+               {
+                       globs:  []string{"b*/**"},
+                       expect: []string{"baz/quux", "baz/parent1/item1"},
+               },
+               {
+                       globs:  []string{"baz"},
+                       expect: nil,
+               },
+               {
+                       globs:  []string{"baz/**"},
+                       expect: []string{"baz/quux", "baz/parent1/item1"},
+               },
+               {
+                       globs:  []string{"baz/*"},
+                       expect: []string{"baz/quux"},
+               },
+               {
+                       globs:  []string{"baz/**/*uu?"},
+                       expect: []string{"baz/quux"},
+               },
+               {
+                       globs:  []string{"**/*m1"},
+                       expect: []string{"baz/parent1/item1"},
+               },
+               {
+                       globs:  []string{"*/*/*/**/*1"},
+                       expect: nil,
+               },
+               {
+                       globs:  []string{"f*", "**/q*"},
+                       expect: []string{"foo", "baz/quux"},
+               },
+               {
+                       globs:  []string{"\\"}, // invalid pattern matches nothing
+                       expect: nil,
+               },
+               {
+                       globs:  []string{"\\", "foo"},
+                       expect: []string{"foo"},
+               },
+               {
+                       globs:  []string{"foo/**"},
+                       expect: nil,
+               },
+               {
+                       globs:  []string{"foo*/**"},
+                       expect: nil,
+               },
+       } {
+               c.Logf("=== globs: %q", trial.globs)
+               collfs, err := (&arvados.Collection{ManifestText: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo 0:0:bar 0:0:baz/quux 0:0:baz/parent1/item1\n"}).FileSystem(nil, nil)
+               c.Assert(err, check.IsNil)
+               cp := copier{globs: trial.globs}
+               err = cp.applyGlobsToCollectionFS(collfs)
+               if !c.Check(err, check.IsNil) {
+                       continue
+               }
+               var got []string
+               fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
+                       if !ent.IsDir() {
+                               got = append(got, path)
+                       }
+                       return nil
+               })
+               sort.Strings(got)
+               sort.Strings(trial.expect)
+               c.Check(got, check.DeepEquals, trial.expect)
+       }
+}