Merge branch 'main' into 22235-toolbar-access-fixes
[arvados.git] / lib / crunchrun / copier_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "bytes"
9         "encoding/json"
10         "fmt"
11         "io"
12         "io/fs"
13         "os"
14         "path"
15         "sort"
16         "syscall"
17
18         "git.arvados.org/arvados.git/sdk/go/arvados"
19         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
20         "git.arvados.org/arvados.git/sdk/go/arvadostest"
21         "git.arvados.org/arvados.git/sdk/go/keepclient"
22         "github.com/sirupsen/logrus"
23         check "gopkg.in/check.v1"
24 )
25
26 var _ = check.Suite(&copierSuite{})
27
28 type copierSuite struct {
29         cp  copier
30         log bytes.Buffer
31 }
32
33 func (s *copierSuite) SetUpTest(c *check.C) {
34         tmpdir := c.MkDir()
35         s.log = bytes.Buffer{}
36
37         cl, err := arvadosclient.MakeArvadosClient()
38         c.Assert(err, check.IsNil)
39         kc, err := keepclient.MakeKeepClient(cl)
40         c.Assert(err, check.IsNil)
41         collfs, err := (&arvados.Collection{}).FileSystem(arvados.NewClientFromEnv(), kc)
42         c.Assert(err, check.IsNil)
43
44         s.cp = copier{
45                 client:        arvados.NewClientFromEnv(),
46                 keepClient:    kc,
47                 hostOutputDir: tmpdir,
48                 ctrOutputDir:  "/ctr/outdir",
49                 mounts: map[string]arvados.Mount{
50                         "/ctr/outdir": {Kind: "tmp"},
51                 },
52                 secretMounts: map[string]arvados.Mount{
53                         "/secret_text": {Kind: "text", Content: "xyzzy"},
54                 },
55                 logger: &logrus.Logger{Out: &s.log, Formatter: &logrus.TextFormatter{}, Level: logrus.InfoLevel},
56                 staged: collfs,
57         }
58 }
59
60 func (s *copierSuite) TestEmptyOutput(c *check.C) {
61         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
62         c.Check(err, check.IsNil)
63         c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
64         c.Check(len(s.cp.files), check.Equals, 0)
65 }
66
67 func (s *copierSuite) TestEmptyWritableMount(c *check.C) {
68         s.writeFileInOutputDir(c, ".arvados#collection", `{"manifest_text":""}`)
69         s.cp.mounts[s.cp.ctrOutputDir] = arvados.Mount{
70                 Kind:     "collection",
71                 Writable: true,
72         }
73
74         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
75         c.Assert(err, check.IsNil)
76         c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
77         c.Check(len(s.cp.files), check.Equals, 0)
78         rootdir, err := s.cp.staged.Open(".")
79         c.Assert(err, check.IsNil)
80         defer rootdir.Close()
81         fis, err := rootdir.Readdir(-1)
82         c.Assert(err, check.IsNil)
83         c.Check(fis, check.HasLen, 0)
84 }
85
86 func (s *copierSuite) TestOutputCollectionWithOnlySubmounts(c *check.C) {
87         s.writeFileInOutputDir(c, ".arvados#collection", `{"manifest_text":""}`)
88         s.cp.mounts[s.cp.ctrOutputDir] = arvados.Mount{
89                 Kind:     "collection",
90                 Writable: true,
91         }
92         s.cp.mounts[path.Join(s.cp.ctrOutputDir, "foo")] = arvados.Mount{
93                 Kind:             "collection",
94                 Path:             "foo",
95                 PortableDataHash: arvadostest.FooCollectionPDH,
96         }
97
98         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
99         c.Assert(err, check.IsNil)
100
101         // s.cp.dirs and s.cp.files are empty, because nothing needs
102         // to be copied from disk.
103         c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
104         c.Check(len(s.cp.files), check.Equals, 0)
105
106         // The "foo" file has already been copied from FooCollection
107         // to s.cp.staged via Snapshot+Splice.
108         rootdir, err := s.cp.staged.Open(".")
109         c.Assert(err, check.IsNil)
110         defer rootdir.Close()
111         fis, err := rootdir.Readdir(-1)
112         c.Assert(err, check.IsNil)
113         c.Assert(fis, check.HasLen, 1)
114         c.Check(fis[0].Size(), check.Equals, int64(3))
115 }
116
117 func (s *copierSuite) TestRegularFilesAndDirs(c *check.C) {
118         err := os.MkdirAll(s.cp.hostOutputDir+"/dir1/dir2/dir3", 0755)
119         c.Assert(err, check.IsNil)
120         f, err := os.OpenFile(s.cp.hostOutputDir+"/dir1/foo", os.O_CREATE|os.O_WRONLY, 0644)
121         c.Assert(err, check.IsNil)
122         _, err = io.WriteString(f, "foo")
123         c.Assert(err, check.IsNil)
124         c.Assert(f.Close(), check.IsNil)
125         err = syscall.Mkfifo(s.cp.hostOutputDir+"/dir1/fifo", 0644)
126         c.Assert(err, check.IsNil)
127
128         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
129         c.Check(err, check.IsNil)
130         c.Check(s.cp.dirs, check.DeepEquals, []string{"/dir1", "/dir1/dir2", "/dir1/dir2/dir3"})
131         c.Check(s.cp.files, check.DeepEquals, []filetodo{
132                 {src: os.DevNull, dst: "/dir1/dir2/dir3/.keep"},
133                 {src: s.cp.hostOutputDir + "/dir1/foo", dst: "/dir1/foo", size: 3},
134         })
135         c.Check(s.log.String(), check.Matches, `.* msg="Skipping unsupported file type \(mode 200000644\) in output dir: \\"/ctr/outdir/dir1/fifo\\""\n`)
136 }
137
138 func (s *copierSuite) TestSymlinkCycle(c *check.C) {
139         c.Assert(os.Mkdir(s.cp.hostOutputDir+"/dir1", 0755), check.IsNil)
140         c.Assert(os.Mkdir(s.cp.hostOutputDir+"/dir2", 0755), check.IsNil)
141         c.Assert(os.Symlink("../dir2", s.cp.hostOutputDir+"/dir1/l_dir2"), check.IsNil)
142         c.Assert(os.Symlink("../dir1", s.cp.hostOutputDir+"/dir2/l_dir1"), check.IsNil)
143         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
144         c.Check(err, check.ErrorMatches, `.*cycle.*`)
145 }
146
147 func (s *copierSuite) TestSymlinkTargetMissing(c *check.C) {
148         c.Assert(os.Symlink("./missing", s.cp.hostOutputDir+"/symlink"), check.IsNil)
149         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
150         c.Check(err, check.ErrorMatches, `.*/ctr/outdir/missing.*`)
151 }
152
153 func (s *copierSuite) TestSymlinkTargetNotMounted(c *check.C) {
154         c.Assert(os.Symlink("../boop", s.cp.hostOutputDir+"/symlink"), check.IsNil)
155         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
156         c.Check(err, check.ErrorMatches, `.*/ctr/boop.*`)
157 }
158
159 func (s *copierSuite) TestSymlinkToSecret(c *check.C) {
160         c.Assert(os.Symlink("/secret_text", s.cp.hostOutputDir+"/symlink"), check.IsNil)
161         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
162         c.Check(err, check.IsNil)
163         c.Check(len(s.cp.dirs), check.Equals, 0)
164         c.Check(len(s.cp.files), check.Equals, 0)
165 }
166
167 func (s *copierSuite) TestSecretInOutputDir(c *check.C) {
168         s.cp.secretMounts["/ctr/outdir/secret_text"] = s.cp.secretMounts["/secret_text"]
169         s.writeFileInOutputDir(c, "secret_text", "xyzzy")
170         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
171         c.Check(err, check.IsNil)
172         c.Check(len(s.cp.dirs), check.Equals, 0)
173         c.Check(len(s.cp.files), check.Equals, 0)
174 }
175
176 func (s *copierSuite) TestSymlinkToMountedCollection(c *check.C) {
177         // simulate mounted read-only collection
178         s.cp.mounts["/mnt"] = arvados.Mount{
179                 Kind:             "collection",
180                 PortableDataHash: arvadostest.FooCollectionPDH,
181         }
182
183         // simulate mounted writable collection
184         bindtmp := c.MkDir()
185         f, err := os.OpenFile(bindtmp+"/.arvados#collection", os.O_CREATE|os.O_WRONLY, 0644)
186         c.Assert(err, check.IsNil)
187         _, err = io.WriteString(f, `{"manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n"}`)
188         c.Assert(err, check.IsNil)
189         c.Assert(f.Close(), check.IsNil)
190         s.cp.mounts["/mnt-w"] = arvados.Mount{
191                 Kind:             "collection",
192                 PortableDataHash: arvadostest.FooCollectionPDH,
193                 Writable:         true,
194         }
195         s.cp.bindmounts = map[string]bindmount{
196                 "/mnt-w": bindmount{HostPath: bindtmp, ReadOnly: false},
197         }
198
199         c.Assert(os.Symlink("../../mnt", s.cp.hostOutputDir+"/l_dir"), check.IsNil)
200         c.Assert(os.Symlink("/mnt/foo", s.cp.hostOutputDir+"/l_file"), check.IsNil)
201         c.Assert(os.Symlink("/mnt-w/bar", s.cp.hostOutputDir+"/l_file_w"), check.IsNil)
202
203         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
204         c.Check(err, check.IsNil)
205         s.checkStagedFile(c, "l_dir/foo", 3)
206         s.checkStagedFile(c, "l_file", 3)
207         s.checkStagedFile(c, "l_file_w", 3)
208 }
209
210 func (s *copierSuite) checkStagedFile(c *check.C, path string, size int64) {
211         fi, err := s.cp.staged.Stat(path)
212         if c.Check(err, check.IsNil) {
213                 c.Check(fi.Size(), check.Equals, size)
214         }
215 }
216
217 func (s *copierSuite) TestSymlink(c *check.C) {
218         hostfile := s.cp.hostOutputDir + "/dir1/file"
219
220         err := os.MkdirAll(s.cp.hostOutputDir+"/dir1/dir2/dir3", 0755)
221         c.Assert(err, check.IsNil)
222         s.writeFileInOutputDir(c, "dir1/file", "file")
223         for _, err := range []error{
224                 os.Symlink(s.cp.ctrOutputDir+"/dir1/file", s.cp.hostOutputDir+"/l_abs_file"),
225                 os.Symlink(s.cp.ctrOutputDir+"/dir1/dir2", s.cp.hostOutputDir+"/l_abs_dir2"),
226                 os.Symlink("../../dir1/file", s.cp.hostOutputDir+"/dir1/dir2/l_rel_file"),
227                 os.Symlink("dir1/file", s.cp.hostOutputDir+"/l_rel_file"),
228                 os.MkdirAll(s.cp.hostOutputDir+"/morelinks", 0755),
229                 os.Symlink("../dir1/dir2", s.cp.hostOutputDir+"/morelinks/l_rel_dir2"),
230                 os.Symlink("dir1/dir2/dir3", s.cp.hostOutputDir+"/l_rel_dir3"),
231                 // rel. symlink -> rel. symlink -> regular file
232                 os.Symlink("../dir1/dir2/l_rel_file", s.cp.hostOutputDir+"/morelinks/l_rel_l_rel_file"),
233         } {
234                 c.Assert(err, check.IsNil)
235         }
236
237         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
238         c.Check(err, check.IsNil)
239         c.Check(s.cp.dirs, check.DeepEquals, []string{
240                 "/dir1", "/dir1/dir2", "/dir1/dir2/dir3",
241                 "/l_abs_dir2", "/l_abs_dir2/dir3",
242                 "/l_rel_dir3",
243                 "/morelinks", "/morelinks/l_rel_dir2", "/morelinks/l_rel_dir2/dir3",
244         })
245         c.Check(s.cp.files, check.DeepEquals, []filetodo{
246                 {dst: "/dir1/dir2/dir3/.keep", src: os.DevNull},
247                 {dst: "/dir1/dir2/l_rel_file", src: hostfile, size: 4},
248                 {dst: "/dir1/file", src: hostfile, size: 4},
249                 {dst: "/l_abs_dir2/dir3/.keep", src: os.DevNull},
250                 {dst: "/l_abs_dir2/l_rel_file", src: hostfile, size: 4},
251                 {dst: "/l_abs_file", src: hostfile, size: 4},
252                 {dst: "/l_rel_dir3/.keep", src: os.DevNull},
253                 {dst: "/l_rel_file", src: hostfile, size: 4},
254                 {dst: "/morelinks/l_rel_dir2/dir3/.keep", src: os.DevNull},
255                 {dst: "/morelinks/l_rel_dir2/l_rel_file", src: hostfile, size: 4},
256                 {dst: "/morelinks/l_rel_l_rel_file", src: hostfile, size: 4},
257         })
258 }
259
260 func (s *copierSuite) TestUnsupportedOutputMount(c *check.C) {
261         s.cp.mounts["/ctr/outdir"] = arvados.Mount{Kind: "waz"}
262         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
263         c.Check(err, check.NotNil)
264 }
265
266 func (s *copierSuite) TestUnsupportedMountKindBelow(c *check.C) {
267         s.cp.mounts["/ctr/outdir/dirk"] = arvados.Mount{Kind: "waz"}
268         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
269         c.Check(err, check.NotNil)
270 }
271
272 func (s *copierSuite) TestWritableMountBelow(c *check.C) {
273         s.cp.mounts["/ctr/outdir/mount"] = arvados.Mount{
274                 Kind:             "collection",
275                 PortableDataHash: arvadostest.FooCollectionPDH,
276                 Writable:         true,
277         }
278         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/mount", 0755), check.IsNil)
279         s.writeFileInOutputDir(c, "file", "file")
280         s.writeFileInOutputDir(c, "mount/foo", "foo")
281
282         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
283         c.Check(err, check.IsNil)
284         c.Check(s.cp.dirs, check.DeepEquals, []string{"/mount"})
285         c.Check(s.cp.files, check.DeepEquals, []filetodo{
286                 {src: s.cp.hostOutputDir + "/file", dst: "/file", size: 4},
287                 {src: s.cp.hostOutputDir + "/mount/foo", dst: "/mount/foo", size: 3},
288         })
289 }
290
291 // Check some glob-matching edge cases. In particular, check that
292 // patterns like "foo/**" do not match regular files named "foo"
293 // (unless of course they are inside a directory named "foo").
294 func (s *copierSuite) TestMatchGlobs(c *check.C) {
295         s.cp.globs = []string{"foo*/**"}
296         c.Check(s.cp.matchGlobs("foo", true), check.Equals, true)
297         c.Check(s.cp.matchGlobs("food", true), check.Equals, true)
298         c.Check(s.cp.matchGlobs("foo", false), check.Equals, false)
299         c.Check(s.cp.matchGlobs("food", false), check.Equals, false)
300         c.Check(s.cp.matchGlobs("foo/bar", false), check.Equals, true)
301         c.Check(s.cp.matchGlobs("food/bar", false), check.Equals, true)
302         c.Check(s.cp.matchGlobs("foo/bar", true), check.Equals, true)
303         c.Check(s.cp.matchGlobs("food/bar", true), check.Equals, true)
304
305         s.cp.globs = []string{"ba[!/]/foo*/**"}
306         c.Check(s.cp.matchGlobs("bar/foo", true), check.Equals, true)
307         c.Check(s.cp.matchGlobs("bar/food", true), check.Equals, true)
308         c.Check(s.cp.matchGlobs("bar/foo", false), check.Equals, false)
309         c.Check(s.cp.matchGlobs("bar/food", false), check.Equals, false)
310         c.Check(s.cp.matchGlobs("bar/foo/z\\[", true), check.Equals, true)
311         c.Check(s.cp.matchGlobs("bar/food/z\\[", true), check.Equals, true)
312         c.Check(s.cp.matchGlobs("bar/foo/z\\[", false), check.Equals, true)
313         c.Check(s.cp.matchGlobs("bar/food/z\\[", false), check.Equals, true)
314
315         s.cp.globs = []string{"waz/**/foo*/**"}
316         c.Check(s.cp.matchGlobs("waz/quux/foo", true), check.Equals, true)
317         c.Check(s.cp.matchGlobs("waz/quux/food", true), check.Equals, true)
318         c.Check(s.cp.matchGlobs("waz/quux/foo", false), check.Equals, false)
319         c.Check(s.cp.matchGlobs("waz/quux/food", false), check.Equals, false)
320         c.Check(s.cp.matchGlobs("waz/quux/foo/foo", true), check.Equals, true)
321         c.Check(s.cp.matchGlobs("waz/quux/food/foo", true), check.Equals, true)
322         c.Check(s.cp.matchGlobs("waz/quux/foo/foo", false), check.Equals, true)
323         c.Check(s.cp.matchGlobs("waz/quux/food/foo", false), check.Equals, true)
324
325         s.cp.globs = []string{"foo/**/*"}
326         c.Check(s.cp.matchGlobs("foo", false), check.Equals, false)
327         c.Check(s.cp.matchGlobs("foo/bar", false), check.Equals, true)
328         c.Check(s.cp.matchGlobs("foo/bar/baz", false), check.Equals, true)
329         c.Check(s.cp.matchGlobs("foo/bar/baz/waz", false), check.Equals, true)
330 }
331
332 func (s *copierSuite) TestSubtreeCouldMatch(c *check.C) {
333         for _, trial := range []struct {
334                 mount string // relative to output dir
335                 glob  string
336                 could bool
337         }{
338                 {mount: "abc", glob: "*"},
339                 {mount: "abc", glob: "abc/*", could: true},
340                 {mount: "abc", glob: "a*/**", could: true},
341                 {mount: "abc", glob: "**", could: true},
342                 {mount: "abc", glob: "*/*", could: true},
343                 {mount: "abc", glob: "**/*.txt", could: true},
344                 {mount: "abc/def", glob: "*"},
345                 {mount: "abc/def", glob: "*/*"},
346                 {mount: "abc/def", glob: "*/*.txt"},
347                 {mount: "abc/def", glob: "*/*/*", could: true},
348                 {mount: "abc/def", glob: "**", could: true},
349                 {mount: "abc/def", glob: "**/bar", could: true},
350                 {mount: "abc/def", glob: "abc/**", could: true},
351                 {mount: "abc/def/ghi", glob: "*c/**/bar", could: true},
352                 {mount: "abc/def/ghi", glob: "*c/*f/bar"},
353                 {mount: "abc/def/ghi", glob: "abc/d[^/]f/ghi/*", could: true},
354         } {
355                 c.Logf("=== %+v", trial)
356                 got := (&copier{
357                         globs: []string{trial.glob},
358                 }).subtreeCouldMatch(trial.mount)
359                 c.Check(got, check.Equals, trial.could)
360         }
361 }
362
363 func (s *copierSuite) TestCopyFromLargeCollection_Readonly(c *check.C) {
364         s.testCopyFromLargeCollection(c, false)
365 }
366
367 func (s *copierSuite) TestCopyFromLargeCollection_Writable(c *check.C) {
368         s.testCopyFromLargeCollection(c, true)
369 }
370
371 func (s *copierSuite) testCopyFromLargeCollection(c *check.C, writable bool) {
372         bindtmp := c.MkDir()
373         mtxt := arvadostest.FakeManifest(100, 100, 2, 4<<20)
374         pdh := arvados.PortableDataHash(mtxt)
375         json, err := json.Marshal(arvados.Collection{ManifestText: mtxt, PortableDataHash: pdh})
376         c.Assert(err, check.IsNil)
377         err = os.WriteFile(bindtmp+"/.arvados#collection", json, 0644)
378         // This symlink tricks walkHostFS into calling walkMount on
379         // the fakecollection dir. If we did the obvious thing instead
380         // (i.e., mount a collection under the output dir) walkMount
381         // would see that our fakecollection dir is actually a regular
382         // directory, conclude that the mount has been deleted and
383         // replaced by a regular directory tree, and process the tree
384         // as regular files, bypassing the manifest-copying code path
385         // we're trying to test.
386         err = os.Symlink("/fakecollection", s.cp.hostOutputDir+"/fakecollection")
387         c.Assert(err, check.IsNil)
388         s.cp.mounts["/fakecollection"] = arvados.Mount{
389                 Kind:             "collection",
390                 PortableDataHash: pdh,
391                 Writable:         writable,
392         }
393         s.cp.bindmounts = map[string]bindmount{
394                 "/fakecollection": bindmount{HostPath: bindtmp, ReadOnly: !writable},
395         }
396         s.cp.manifestCache = map[string]string{pdh: mtxt}
397         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
398         c.Check(err, check.IsNil)
399         c.Log(s.log.String())
400
401         // Check some files to ensure they were copied properly.
402         // Specifically, arbitrarily check every 17th file in every
403         // 13th dir.  (This is better than checking all of the files
404         // only in that it's less likely to show up as a distracting
405         // signal in CPU profiling.)
406         for i := 0; i < 100; i += 13 {
407                 for j := 0; j < 100; j += 17 {
408                         fnm := fmt.Sprintf("/fakecollection/dir%d/dir%d/file%d", i, j, j)
409                         _, err := s.cp.staged.Stat(fnm)
410                         c.Assert(err, check.IsNil, check.Commentf("%s", fnm))
411                 }
412         }
413 }
414
415 func (s *copierSuite) TestMountBelowExcludedByGlob(c *check.C) {
416         bindtmp := c.MkDir()
417         s.cp.mounts["/ctr/outdir/include/includer"] = arvados.Mount{
418                 Kind:             "collection",
419                 PortableDataHash: arvadostest.FooCollectionPDH,
420         }
421         s.cp.mounts["/ctr/outdir/include/includew"] = arvados.Mount{
422                 Kind:             "collection",
423                 PortableDataHash: arvadostest.FooCollectionPDH,
424                 Writable:         true,
425         }
426         s.cp.mounts["/ctr/outdir/exclude/excluder"] = arvados.Mount{
427                 Kind:             "collection",
428                 PortableDataHash: arvadostest.FooCollectionPDH,
429         }
430         s.cp.mounts["/ctr/outdir/exclude/excludew"] = arvados.Mount{
431                 Kind:             "collection",
432                 PortableDataHash: arvadostest.FooCollectionPDH,
433                 Writable:         true,
434         }
435         s.cp.mounts["/ctr/outdir/nonexistent/collection"] = arvados.Mount{
436                 // As extra assurance, plant a collection that will
437                 // fail if copier attempts to load its manifest.  (For
438                 // performance reasons it's important that copier
439                 // doesn't try to load the manifest before deciding
440                 // not to copy the contents.)
441                 Kind:             "collection",
442                 PortableDataHash: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+1234",
443         }
444         s.cp.globs = []string{
445                 "?ncl*/*r/*",
446                 "*/?ncl*/**",
447         }
448         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/include/includer", 0755), check.IsNil)
449         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/include/includew", 0755), check.IsNil)
450         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/exclude/excluder", 0755), check.IsNil)
451         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/exclude/excludew", 0755), check.IsNil)
452         s.writeFileInOutputDir(c, "include/includew/foo", "foo")
453         s.writeFileInOutputDir(c, "exclude/excludew/foo", "foo")
454         s.cp.bindmounts = map[string]bindmount{
455                 "/ctr/outdir/include/includew": bindmount{HostPath: bindtmp, ReadOnly: false},
456         }
457         s.cp.bindmounts = map[string]bindmount{
458                 "/ctr/outdir/include/excludew": bindmount{HostPath: bindtmp, ReadOnly: false},
459         }
460
461         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
462         c.Check(err, check.IsNil)
463         c.Log(s.log.String())
464
465         // Note it's OK that "/exclude" is not excluded by walkMount:
466         // it is just a local filesystem directory, not a mount point
467         // that's expensive to walk.  In real-life usage, it will be
468         // removed from cp.dirs before any copying happens.
469         c.Check(s.cp.dirs, check.DeepEquals, []string{"/exclude", "/include", "/include/includew"})
470         c.Check(s.cp.files, check.DeepEquals, []filetodo{
471                 {src: s.cp.hostOutputDir + "/include/includew/foo", dst: "/include/includew/foo", size: 3},
472         })
473         manifest, err := s.cp.staged.MarshalManifest(".")
474         c.Assert(err, check.IsNil)
475         c.Check(manifest, check.Matches, `(?ms).*\./include/includer .*`)
476         c.Check(manifest, check.Not(check.Matches), `(?ms).*exclude.*`)
477         c.Check(s.log.String(), check.Matches, `(?ms).*not copying \\"exclude/excluder\\".*`)
478         c.Check(s.log.String(), check.Matches, `(?ms).*not copying \\"nonexistent/collection\\".*`)
479 }
480
481 func (s *copierSuite) writeFileInOutputDir(c *check.C, path, data string) {
482         f, err := os.OpenFile(s.cp.hostOutputDir+"/"+path, os.O_CREATE|os.O_WRONLY, 0644)
483         c.Assert(err, check.IsNil)
484         _, err = io.WriteString(f, data)
485         c.Assert(err, check.IsNil)
486         c.Assert(f.Close(), check.IsNil)
487 }
488
489 // applyGlobsToFilesAndDirs uses the same glob-matching code as
490 // applyGlobsToStaged, so we don't need to test all of the same
491 // glob-matching behavior covered in TestApplyGlobsToCollectionFS.  We
492 // do need to check that (a) the glob is actually being used to filter
493 // out files, and (b) non-matching dirs still included if and only if
494 // they are ancestors of matching files.
495 func (s *copierSuite) TestApplyGlobsToFilesAndDirs(c *check.C) {
496         dirs := []string{"dir1", "dir1/dir11", "dir1/dir12", "dir2"}
497         files := []string{"dir1/file11", "dir1/dir11/file111", "dir2/file2"}
498         for _, trial := range []struct {
499                 globs []string
500                 dirs  []string
501                 files []string
502         }{
503                 {
504                         globs: []string{},
505                         dirs:  append([]string{}, dirs...),
506                         files: append([]string{}, files...),
507                 },
508                 {
509                         globs: []string{"**"},
510                         dirs:  append([]string{}, dirs...),
511                         files: append([]string{}, files...),
512                 },
513                 {
514                         globs: []string{"**/file111"},
515                         dirs:  []string{"dir1", "dir1/dir11"},
516                         files: []string{"dir1/dir11/file111"},
517                 },
518                 {
519                         globs: []string{"nothing"},
520                         dirs:  nil,
521                         files: nil,
522                 },
523                 {
524                         globs: []string{"**/dir12"},
525                         dirs:  []string{"dir1", "dir1/dir12"},
526                         files: nil,
527                 },
528                 {
529                         globs: []string{"**/file*"},
530                         dirs:  []string{"dir1", "dir1/dir11", "dir2"},
531                         files: append([]string{}, files...),
532                 },
533                 {
534                         globs: []string{"**/dir1[12]"},
535                         dirs:  []string{"dir1", "dir1/dir11", "dir1/dir12"},
536                         files: nil,
537                 },
538                 {
539                         globs: []string{"**/dir1[^2]"},
540                         dirs:  []string{"dir1", "dir1/dir11"},
541                         files: nil,
542                 },
543                 {
544                         globs: []string{"dir1/**"},
545                         dirs:  []string{"dir1", "dir1/dir11", "dir1/dir12"},
546                         files: []string{"dir1/file11", "dir1/dir11/file111"},
547                 },
548         } {
549                 c.Logf("=== globs: %q", trial.globs)
550                 cp := copier{
551                         globs: trial.globs,
552                         dirs:  dirs,
553                 }
554                 for _, path := range files {
555                         cp.files = append(cp.files, filetodo{dst: path})
556                 }
557                 cp.applyGlobsToFilesAndDirs()
558                 var gotFiles []string
559                 for _, file := range cp.files {
560                         gotFiles = append(gotFiles, file.dst)
561                 }
562                 c.Check(cp.dirs, check.DeepEquals, trial.dirs)
563                 c.Check(gotFiles, check.DeepEquals, trial.files)
564         }
565 }
566
567 func (s *copierSuite) TestApplyGlobsToCollectionFS(c *check.C) {
568         for _, trial := range []struct {
569                 globs  []string
570                 expect []string
571         }{
572                 {
573                         globs:  nil,
574                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
575                 },
576                 {
577                         globs:  []string{"foo"},
578                         expect: []string{"foo"},
579                 },
580                 {
581                         globs:  []string{"baz/parent1/item1"},
582                         expect: []string{"baz/parent1/item1"},
583                 },
584                 {
585                         globs:  []string{"**"},
586                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
587                 },
588                 {
589                         globs:  []string{"**/*"},
590                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
591                 },
592                 {
593                         globs:  []string{"*"},
594                         expect: []string{"foo", "bar"},
595                 },
596                 {
597                         globs:  []string{"baz"},
598                         expect: nil,
599                 },
600                 {
601                         globs:  []string{"b*/**"},
602                         expect: []string{"baz/quux", "baz/parent1/item1"},
603                 },
604                 {
605                         globs:  []string{"baz"},
606                         expect: nil,
607                 },
608                 {
609                         globs:  []string{"baz/**"},
610                         expect: []string{"baz/quux", "baz/parent1/item1"},
611                 },
612                 {
613                         globs:  []string{"baz/*"},
614                         expect: []string{"baz/quux"},
615                 },
616                 {
617                         globs:  []string{"baz/**/*uu?"},
618                         expect: []string{"baz/quux"},
619                 },
620                 {
621                         globs:  []string{"**/*m1"},
622                         expect: []string{"baz/parent1/item1"},
623                 },
624                 {
625                         globs:  []string{"*/*/*/**/*1"},
626                         expect: nil,
627                 },
628                 {
629                         globs:  []string{"f*", "**/q*"},
630                         expect: []string{"foo", "baz/quux"},
631                 },
632                 {
633                         globs:  []string{"\\"}, // invalid pattern matches nothing
634                         expect: nil,
635                 },
636                 {
637                         globs:  []string{"\\", "foo"},
638                         expect: []string{"foo"},
639                 },
640                 {
641                         globs:  []string{"foo/**"},
642                         expect: nil,
643                 },
644                 {
645                         globs:  []string{"foo*/**"},
646                         expect: nil,
647                 },
648         } {
649                 c.Logf("=== globs: %q", trial.globs)
650                 collfs, err := (&arvados.Collection{ManifestText: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo 0:0:bar 0:0:baz/quux 0:0:baz/parent1/item1\n"}).FileSystem(nil, nil)
651                 c.Assert(err, check.IsNil)
652                 cp := copier{globs: trial.globs, staged: collfs}
653                 err = cp.applyGlobsToStaged()
654                 if !c.Check(err, check.IsNil) {
655                         continue
656                 }
657                 var got []string
658                 fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
659                         if !ent.IsDir() {
660                                 got = append(got, path)
661                         }
662                         return nil
663                 })
664                 sort.Strings(got)
665                 sort.Strings(trial.expect)
666                 c.Check(got, check.DeepEquals, trial.expect)
667         }
668 }