]> git.arvados.org - arvados.git/blob - lib/crunchrun/copier_test.go
22827: Test repetitive mounts under output directory.
[arvados.git] / lib / crunchrun / copier_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "bytes"
9         "encoding/json"
10         "fmt"
11         "io"
12         "io/fs"
13         "os"
14         "path"
15         "runtime"
16         "sort"
17         "syscall"
18
19         "git.arvados.org/arvados.git/sdk/go/arvados"
20         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
21         "git.arvados.org/arvados.git/sdk/go/arvadostest"
22         "git.arvados.org/arvados.git/sdk/go/keepclient"
23         "github.com/sirupsen/logrus"
24         check "gopkg.in/check.v1"
25 )
26
27 var _ = check.Suite(&copierSuite{})
28
29 type copierSuite struct {
30         cp  copier
31         log bytes.Buffer
32 }
33
34 func (s *copierSuite) SetUpTest(c *check.C) {
35         tmpdir := c.MkDir()
36         s.log = bytes.Buffer{}
37
38         cl, err := arvadosclient.MakeArvadosClient()
39         c.Assert(err, check.IsNil)
40         kc, err := keepclient.MakeKeepClient(cl)
41         c.Assert(err, check.IsNil)
42         collfs, err := (&arvados.Collection{}).FileSystem(arvados.NewClientFromEnv(), kc)
43         c.Assert(err, check.IsNil)
44
45         s.cp = copier{
46                 client:        arvados.NewClientFromEnv(),
47                 keepClient:    kc,
48                 hostOutputDir: tmpdir,
49                 ctrOutputDir:  "/ctr/outdir",
50                 mounts: map[string]arvados.Mount{
51                         "/ctr/outdir": {Kind: "tmp"},
52                 },
53                 secretMounts: map[string]arvados.Mount{
54                         "/secret_text": {Kind: "text", Content: "xyzzy"},
55                 },
56                 logger: &logrus.Logger{Out: &s.log, Formatter: &logrus.TextFormatter{}, Level: logrus.InfoLevel},
57                 staged: collfs,
58         }
59 }
60
61 func (s *copierSuite) TestEmptyOutput(c *check.C) {
62         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
63         c.Check(err, check.IsNil)
64         c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
65         c.Check(len(s.cp.files), check.Equals, 0)
66 }
67
68 func (s *copierSuite) TestEmptyWritableMount(c *check.C) {
69         s.writeFileInOutputDir(c, ".arvados#collection", `{"manifest_text":""}`)
70         s.cp.mounts[s.cp.ctrOutputDir] = arvados.Mount{
71                 Kind:     "collection",
72                 Writable: true,
73         }
74
75         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
76         c.Assert(err, check.IsNil)
77         c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
78         c.Check(len(s.cp.files), check.Equals, 0)
79         rootdir, err := s.cp.staged.Open(".")
80         c.Assert(err, check.IsNil)
81         defer rootdir.Close()
82         fis, err := rootdir.Readdir(-1)
83         c.Assert(err, check.IsNil)
84         c.Check(fis, check.HasLen, 0)
85 }
86
87 func (s *copierSuite) TestOutputCollectionWithOnlySubmounts(c *check.C) {
88         s.writeFileInOutputDir(c, ".arvados#collection", `{"manifest_text":""}`)
89         s.cp.mounts[s.cp.ctrOutputDir] = arvados.Mount{
90                 Kind:     "collection",
91                 Writable: true,
92         }
93         s.cp.mounts[path.Join(s.cp.ctrOutputDir, "foo")] = arvados.Mount{
94                 Kind:             "collection",
95                 Path:             "foo",
96                 PortableDataHash: arvadostest.FooCollectionPDH,
97         }
98
99         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
100         c.Assert(err, check.IsNil)
101
102         // s.cp.dirs and s.cp.files are empty, because nothing needs
103         // to be copied from disk.
104         c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
105         c.Check(len(s.cp.files), check.Equals, 0)
106
107         // The "foo" file has already been copied from FooCollection
108         // to s.cp.staged via Snapshot+Splice.
109         rootdir, err := s.cp.staged.Open(".")
110         c.Assert(err, check.IsNil)
111         defer rootdir.Close()
112         fis, err := rootdir.Readdir(-1)
113         c.Assert(err, check.IsNil)
114         c.Assert(fis, check.HasLen, 1)
115         c.Check(fis[0].Size(), check.Equals, int64(3))
116 }
117
118 func (s *copierSuite) TestRepetitiveMountsInOutputDir(c *check.C) {
119         var memstats0 runtime.MemStats
120         runtime.ReadMemStats(&memstats0)
121
122         s.writeFileInOutputDir(c, ".arvados#collection", `{"manifest_text":""}`)
123         s.cp.mounts[s.cp.ctrOutputDir] = arvados.Mount{
124                 Kind:     "collection",
125                 Writable: true,
126         }
127         nmounts := 200
128         ncollections := 1
129         pdh := make([]string, ncollections)
130         s.cp.manifestCache = make(map[string]string)
131         for i := 0; i < ncollections; i++ {
132                 mtxt := arvadostest.FakeManifest(1, nmounts, 2, 4<<20)
133                 pdh[i] = arvados.PortableDataHash(mtxt)
134                 s.cp.manifestCache[pdh[i]] = mtxt
135         }
136         for i := 0; i < nmounts; i++ {
137                 filename := fmt.Sprintf("file%d", i)
138                 s.cp.mounts[path.Join(s.cp.ctrOutputDir, filename)] = arvados.Mount{
139                         Kind:             "collection",
140                         Path:             fmt.Sprintf("dir0/dir%d/file%d", i, i),
141                         PortableDataHash: pdh[i%ncollections],
142                 }
143         }
144         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
145         c.Assert(err, check.IsNil)
146
147         // Files mounted under output dir have been copied from the
148         // fake collections to s.cp.staged via Snapshot+Splice.
149         rootdir, err := s.cp.staged.Open(".")
150         c.Assert(err, check.IsNil)
151         defer rootdir.Close()
152         fis, err := rootdir.Readdir(-1)
153         c.Assert(err, check.IsNil)
154         c.Assert(fis, check.HasLen, nmounts)
155
156         var memstats runtime.MemStats
157         runtime.ReadMemStats(&memstats)
158         c.Logf("%s Alloc=%d Sys=%d", time.Now(), memstats.Alloc, memstats.Sys)
159 }
160
161 func (s *copierSuite) TestRegularFilesAndDirs(c *check.C) {
162         err := os.MkdirAll(s.cp.hostOutputDir+"/dir1/dir2/dir3", 0755)
163         c.Assert(err, check.IsNil)
164         f, err := os.OpenFile(s.cp.hostOutputDir+"/dir1/foo", os.O_CREATE|os.O_WRONLY, 0644)
165         c.Assert(err, check.IsNil)
166         _, err = io.WriteString(f, "foo")
167         c.Assert(err, check.IsNil)
168         c.Assert(f.Close(), check.IsNil)
169         err = syscall.Mkfifo(s.cp.hostOutputDir+"/dir1/fifo", 0644)
170         c.Assert(err, check.IsNil)
171
172         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
173         c.Check(err, check.IsNil)
174         c.Check(s.cp.dirs, check.DeepEquals, []string{"/dir1", "/dir1/dir2", "/dir1/dir2/dir3"})
175         c.Check(s.cp.files, check.DeepEquals, []filetodo{
176                 {src: os.DevNull, dst: "/dir1/dir2/dir3/.keep"},
177                 {src: s.cp.hostOutputDir + "/dir1/foo", dst: "/dir1/foo", size: 3},
178         })
179         c.Check(s.log.String(), check.Matches, `.* msg="Skipping unsupported file type \(mode 200000644\) in output dir: \\"/ctr/outdir/dir1/fifo\\""\n`)
180 }
181
182 func (s *copierSuite) TestSymlinkCycle(c *check.C) {
183         c.Assert(os.Mkdir(s.cp.hostOutputDir+"/dir1", 0755), check.IsNil)
184         c.Assert(os.Mkdir(s.cp.hostOutputDir+"/dir2", 0755), check.IsNil)
185         c.Assert(os.Symlink("../dir2", s.cp.hostOutputDir+"/dir1/l_dir2"), check.IsNil)
186         c.Assert(os.Symlink("../dir1", s.cp.hostOutputDir+"/dir2/l_dir1"), check.IsNil)
187         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
188         c.Check(err, check.ErrorMatches, `.*cycle.*`)
189 }
190
191 func (s *copierSuite) TestSymlinkTargetMissing(c *check.C) {
192         c.Assert(os.Symlink("./missing", s.cp.hostOutputDir+"/symlink"), check.IsNil)
193         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
194         c.Check(err, check.ErrorMatches, `.*/ctr/outdir/missing.*`)
195 }
196
197 func (s *copierSuite) TestSymlinkTargetNotMounted(c *check.C) {
198         c.Assert(os.Symlink("../boop", s.cp.hostOutputDir+"/symlink"), check.IsNil)
199         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
200         c.Check(err, check.ErrorMatches, `.*/ctr/boop.*`)
201 }
202
203 func (s *copierSuite) TestSymlinkToSecret(c *check.C) {
204         c.Assert(os.Symlink("/secret_text", s.cp.hostOutputDir+"/symlink"), check.IsNil)
205         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
206         c.Check(err, check.IsNil)
207         c.Check(len(s.cp.dirs), check.Equals, 0)
208         c.Check(len(s.cp.files), check.Equals, 0)
209 }
210
211 func (s *copierSuite) TestSecretInOutputDir(c *check.C) {
212         s.cp.secretMounts["/ctr/outdir/secret_text"] = s.cp.secretMounts["/secret_text"]
213         s.writeFileInOutputDir(c, "secret_text", "xyzzy")
214         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
215         c.Check(err, check.IsNil)
216         c.Check(len(s.cp.dirs), check.Equals, 0)
217         c.Check(len(s.cp.files), check.Equals, 0)
218 }
219
220 func (s *copierSuite) TestSymlinkToMountedCollection(c *check.C) {
221         // simulate mounted read-only collection
222         s.cp.mounts["/mnt"] = arvados.Mount{
223                 Kind:             "collection",
224                 PortableDataHash: arvadostest.FooCollectionPDH,
225         }
226
227         // simulate mounted writable collection
228         bindtmp := c.MkDir()
229         f, err := os.OpenFile(bindtmp+"/.arvados#collection", os.O_CREATE|os.O_WRONLY, 0644)
230         c.Assert(err, check.IsNil)
231         _, err = io.WriteString(f, `{"manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n"}`)
232         c.Assert(err, check.IsNil)
233         c.Assert(f.Close(), check.IsNil)
234         s.cp.mounts["/mnt-w"] = arvados.Mount{
235                 Kind:             "collection",
236                 PortableDataHash: arvadostest.FooCollectionPDH,
237                 Writable:         true,
238         }
239         s.cp.bindmounts = map[string]bindmount{
240                 "/mnt-w": bindmount{HostPath: bindtmp, ReadOnly: false},
241         }
242
243         c.Assert(os.Symlink("../../mnt", s.cp.hostOutputDir+"/l_dir"), check.IsNil)
244         c.Assert(os.Symlink("/mnt/foo", s.cp.hostOutputDir+"/l_file"), check.IsNil)
245         c.Assert(os.Symlink("/mnt-w/bar", s.cp.hostOutputDir+"/l_file_w"), check.IsNil)
246
247         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
248         c.Check(err, check.IsNil)
249         s.checkStagedFile(c, "l_dir/foo", 3)
250         s.checkStagedFile(c, "l_file", 3)
251         s.checkStagedFile(c, "l_file_w", 3)
252 }
253
254 func (s *copierSuite) checkStagedFile(c *check.C, path string, size int64) {
255         fi, err := s.cp.staged.Stat(path)
256         if c.Check(err, check.IsNil) {
257                 c.Check(fi.Size(), check.Equals, size)
258         }
259 }
260
261 func (s *copierSuite) TestSymlink(c *check.C) {
262         hostfile := s.cp.hostOutputDir + "/dir1/file"
263
264         err := os.MkdirAll(s.cp.hostOutputDir+"/dir1/dir2/dir3", 0755)
265         c.Assert(err, check.IsNil)
266         s.writeFileInOutputDir(c, "dir1/file", "file")
267         for _, err := range []error{
268                 os.Symlink(s.cp.ctrOutputDir+"/dir1/file", s.cp.hostOutputDir+"/l_abs_file"),
269                 os.Symlink(s.cp.ctrOutputDir+"/dir1/dir2", s.cp.hostOutputDir+"/l_abs_dir2"),
270                 os.Symlink("../../dir1/file", s.cp.hostOutputDir+"/dir1/dir2/l_rel_file"),
271                 os.Symlink("dir1/file", s.cp.hostOutputDir+"/l_rel_file"),
272                 os.MkdirAll(s.cp.hostOutputDir+"/morelinks", 0755),
273                 os.Symlink("../dir1/dir2", s.cp.hostOutputDir+"/morelinks/l_rel_dir2"),
274                 os.Symlink("dir1/dir2/dir3", s.cp.hostOutputDir+"/l_rel_dir3"),
275                 // rel. symlink -> rel. symlink -> regular file
276                 os.Symlink("../dir1/dir2/l_rel_file", s.cp.hostOutputDir+"/morelinks/l_rel_l_rel_file"),
277         } {
278                 c.Assert(err, check.IsNil)
279         }
280
281         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
282         c.Check(err, check.IsNil)
283         c.Check(s.cp.dirs, check.DeepEquals, []string{
284                 "/dir1", "/dir1/dir2", "/dir1/dir2/dir3",
285                 "/l_abs_dir2", "/l_abs_dir2/dir3",
286                 "/l_rel_dir3",
287                 "/morelinks", "/morelinks/l_rel_dir2", "/morelinks/l_rel_dir2/dir3",
288         })
289         c.Check(s.cp.files, check.DeepEquals, []filetodo{
290                 {dst: "/dir1/dir2/dir3/.keep", src: os.DevNull},
291                 {dst: "/dir1/dir2/l_rel_file", src: hostfile, size: 4},
292                 {dst: "/dir1/file", src: hostfile, size: 4},
293                 {dst: "/l_abs_dir2/dir3/.keep", src: os.DevNull},
294                 {dst: "/l_abs_dir2/l_rel_file", src: hostfile, size: 4},
295                 {dst: "/l_abs_file", src: hostfile, size: 4},
296                 {dst: "/l_rel_dir3/.keep", src: os.DevNull},
297                 {dst: "/l_rel_file", src: hostfile, size: 4},
298                 {dst: "/morelinks/l_rel_dir2/dir3/.keep", src: os.DevNull},
299                 {dst: "/morelinks/l_rel_dir2/l_rel_file", src: hostfile, size: 4},
300                 {dst: "/morelinks/l_rel_l_rel_file", src: hostfile, size: 4},
301         })
302 }
303
304 func (s *copierSuite) TestUnsupportedOutputMount(c *check.C) {
305         s.cp.mounts["/ctr/outdir"] = arvados.Mount{Kind: "waz"}
306         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
307         c.Check(err, check.NotNil)
308 }
309
310 func (s *copierSuite) TestUnsupportedMountKindBelow(c *check.C) {
311         s.cp.mounts["/ctr/outdir/dirk"] = arvados.Mount{Kind: "waz"}
312         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
313         c.Check(err, check.NotNil)
314 }
315
316 func (s *copierSuite) TestWritableMountBelow(c *check.C) {
317         s.cp.mounts["/ctr/outdir/mount"] = arvados.Mount{
318                 Kind:             "collection",
319                 PortableDataHash: arvadostest.FooCollectionPDH,
320                 Writable:         true,
321         }
322         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/mount", 0755), check.IsNil)
323         s.writeFileInOutputDir(c, "file", "file")
324         s.writeFileInOutputDir(c, "mount/foo", "foo")
325
326         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
327         c.Check(err, check.IsNil)
328         c.Check(s.cp.dirs, check.DeepEquals, []string{"/mount"})
329         c.Check(s.cp.files, check.DeepEquals, []filetodo{
330                 {src: s.cp.hostOutputDir + "/file", dst: "/file", size: 4},
331                 {src: s.cp.hostOutputDir + "/mount/foo", dst: "/mount/foo", size: 3},
332         })
333 }
334
335 // Check some glob-matching edge cases. In particular, check that
336 // patterns like "foo/**" do not match regular files named "foo"
337 // (unless of course they are inside a directory named "foo").
338 func (s *copierSuite) TestMatchGlobs(c *check.C) {
339         s.cp.globs = []string{"foo*/**"}
340         c.Check(s.cp.matchGlobs("foo", true), check.Equals, true)
341         c.Check(s.cp.matchGlobs("food", true), check.Equals, true)
342         c.Check(s.cp.matchGlobs("foo", false), check.Equals, false)
343         c.Check(s.cp.matchGlobs("food", false), check.Equals, false)
344         c.Check(s.cp.matchGlobs("foo/bar", false), check.Equals, true)
345         c.Check(s.cp.matchGlobs("food/bar", false), check.Equals, true)
346         c.Check(s.cp.matchGlobs("foo/bar", true), check.Equals, true)
347         c.Check(s.cp.matchGlobs("food/bar", true), check.Equals, true)
348
349         s.cp.globs = []string{"ba[!/]/foo*/**"}
350         c.Check(s.cp.matchGlobs("bar/foo", true), check.Equals, true)
351         c.Check(s.cp.matchGlobs("bar/food", true), check.Equals, true)
352         c.Check(s.cp.matchGlobs("bar/foo", false), check.Equals, false)
353         c.Check(s.cp.matchGlobs("bar/food", false), check.Equals, false)
354         c.Check(s.cp.matchGlobs("bar/foo/z\\[", true), check.Equals, true)
355         c.Check(s.cp.matchGlobs("bar/food/z\\[", true), check.Equals, true)
356         c.Check(s.cp.matchGlobs("bar/foo/z\\[", false), check.Equals, true)
357         c.Check(s.cp.matchGlobs("bar/food/z\\[", false), check.Equals, true)
358
359         s.cp.globs = []string{"waz/**/foo*/**"}
360         c.Check(s.cp.matchGlobs("waz/quux/foo", true), check.Equals, true)
361         c.Check(s.cp.matchGlobs("waz/quux/food", true), check.Equals, true)
362         c.Check(s.cp.matchGlobs("waz/quux/foo", false), check.Equals, false)
363         c.Check(s.cp.matchGlobs("waz/quux/food", false), check.Equals, false)
364         c.Check(s.cp.matchGlobs("waz/quux/foo/foo", true), check.Equals, true)
365         c.Check(s.cp.matchGlobs("waz/quux/food/foo", true), check.Equals, true)
366         c.Check(s.cp.matchGlobs("waz/quux/foo/foo", false), check.Equals, true)
367         c.Check(s.cp.matchGlobs("waz/quux/food/foo", false), check.Equals, true)
368
369         s.cp.globs = []string{"foo/**/*"}
370         c.Check(s.cp.matchGlobs("foo", false), check.Equals, false)
371         c.Check(s.cp.matchGlobs("foo/bar", false), check.Equals, true)
372         c.Check(s.cp.matchGlobs("foo/bar/baz", false), check.Equals, true)
373         c.Check(s.cp.matchGlobs("foo/bar/baz/waz", false), check.Equals, true)
374 }
375
376 func (s *copierSuite) TestSubtreeCouldMatch(c *check.C) {
377         for _, trial := range []struct {
378                 mount string // relative to output dir
379                 glob  string
380                 could bool
381         }{
382                 {mount: "abc", glob: "*"},
383                 {mount: "abc", glob: "abc/*", could: true},
384                 {mount: "abc", glob: "a*/**", could: true},
385                 {mount: "abc", glob: "**", could: true},
386                 {mount: "abc", glob: "*/*", could: true},
387                 {mount: "abc", glob: "**/*.txt", could: true},
388                 {mount: "abc/def", glob: "*"},
389                 {mount: "abc/def", glob: "*/*"},
390                 {mount: "abc/def", glob: "*/*.txt"},
391                 {mount: "abc/def", glob: "*/*/*", could: true},
392                 {mount: "abc/def", glob: "**", could: true},
393                 {mount: "abc/def", glob: "**/bar", could: true},
394                 {mount: "abc/def", glob: "abc/**", could: true},
395                 {mount: "abc/def/ghi", glob: "*c/**/bar", could: true},
396                 {mount: "abc/def/ghi", glob: "*c/*f/bar"},
397                 {mount: "abc/def/ghi", glob: "abc/d[^/]f/ghi/*", could: true},
398         } {
399                 c.Logf("=== %+v", trial)
400                 got := (&copier{
401                         globs: []string{trial.glob},
402                 }).subtreeCouldMatch(trial.mount)
403                 c.Check(got, check.Equals, trial.could)
404         }
405 }
406
407 func (s *copierSuite) TestCopyFromLargeCollection_Readonly(c *check.C) {
408         s.testCopyFromLargeCollection(c, false)
409 }
410
411 func (s *copierSuite) TestCopyFromLargeCollection_Writable(c *check.C) {
412         s.testCopyFromLargeCollection(c, true)
413 }
414
415 func (s *copierSuite) testCopyFromLargeCollection(c *check.C, writable bool) {
416         bindtmp := c.MkDir()
417         mtxt := arvadostest.FakeManifest(100, 100, 2, 4<<20)
418         pdh := arvados.PortableDataHash(mtxt)
419         json, err := json.Marshal(arvados.Collection{ManifestText: mtxt, PortableDataHash: pdh})
420         c.Assert(err, check.IsNil)
421         err = os.WriteFile(bindtmp+"/.arvados#collection", json, 0644)
422         // This symlink tricks walkHostFS into calling walkMount on
423         // the fakecollection dir. If we did the obvious thing instead
424         // (i.e., mount a collection under the output dir) walkMount
425         // would see that our fakecollection dir is actually a regular
426         // directory, conclude that the mount has been deleted and
427         // replaced by a regular directory tree, and process the tree
428         // as regular files, bypassing the manifest-copying code path
429         // we're trying to test.
430         err = os.Symlink("/fakecollection", s.cp.hostOutputDir+"/fakecollection")
431         c.Assert(err, check.IsNil)
432         s.cp.mounts["/fakecollection"] = arvados.Mount{
433                 Kind:             "collection",
434                 PortableDataHash: pdh,
435                 Writable:         writable,
436         }
437         s.cp.bindmounts = map[string]bindmount{
438                 "/fakecollection": bindmount{HostPath: bindtmp, ReadOnly: !writable},
439         }
440         s.cp.manifestCache = map[string]string{pdh: mtxt}
441         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
442         c.Check(err, check.IsNil)
443         c.Log(s.log.String())
444
445         // Check some files to ensure they were copied properly.
446         // Specifically, arbitrarily check every 17th file in every
447         // 13th dir.  (This is better than checking all of the files
448         // only in that it's less likely to show up as a distracting
449         // signal in CPU profiling.)
450         for i := 0; i < 100; i += 13 {
451                 for j := 0; j < 100; j += 17 {
452                         fnm := fmt.Sprintf("/fakecollection/dir%d/dir%d/file%d", i, j, j)
453                         _, err := s.cp.staged.Stat(fnm)
454                         c.Assert(err, check.IsNil, check.Commentf("%s", fnm))
455                 }
456         }
457 }
458
459 func (s *copierSuite) TestMountBelowExcludedByGlob(c *check.C) {
460         bindtmp := c.MkDir()
461         s.cp.mounts["/ctr/outdir/include/includer"] = arvados.Mount{
462                 Kind:             "collection",
463                 PortableDataHash: arvadostest.FooCollectionPDH,
464         }
465         s.cp.mounts["/ctr/outdir/include/includew"] = arvados.Mount{
466                 Kind:             "collection",
467                 PortableDataHash: arvadostest.FooCollectionPDH,
468                 Writable:         true,
469         }
470         s.cp.mounts["/ctr/outdir/exclude/excluder"] = arvados.Mount{
471                 Kind:             "collection",
472                 PortableDataHash: arvadostest.FooCollectionPDH,
473         }
474         s.cp.mounts["/ctr/outdir/exclude/excludew"] = arvados.Mount{
475                 Kind:             "collection",
476                 PortableDataHash: arvadostest.FooCollectionPDH,
477                 Writable:         true,
478         }
479         s.cp.mounts["/ctr/outdir/nonexistent/collection"] = arvados.Mount{
480                 // As extra assurance, plant a collection that will
481                 // fail if copier attempts to load its manifest.  (For
482                 // performance reasons it's important that copier
483                 // doesn't try to load the manifest before deciding
484                 // not to copy the contents.)
485                 Kind:             "collection",
486                 PortableDataHash: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+1234",
487         }
488         s.cp.globs = []string{
489                 "?ncl*/*r/*",
490                 "*/?ncl*/**",
491         }
492         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/include/includer", 0755), check.IsNil)
493         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/include/includew", 0755), check.IsNil)
494         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/exclude/excluder", 0755), check.IsNil)
495         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/exclude/excludew", 0755), check.IsNil)
496         s.writeFileInOutputDir(c, "include/includew/foo", "foo")
497         s.writeFileInOutputDir(c, "exclude/excludew/foo", "foo")
498         s.cp.bindmounts = map[string]bindmount{
499                 "/ctr/outdir/include/includew": bindmount{HostPath: bindtmp, ReadOnly: false},
500         }
501         s.cp.bindmounts = map[string]bindmount{
502                 "/ctr/outdir/include/excludew": bindmount{HostPath: bindtmp, ReadOnly: false},
503         }
504
505         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
506         c.Check(err, check.IsNil)
507         c.Log(s.log.String())
508
509         // Note it's OK that "/exclude" is not excluded by walkMount:
510         // it is just a local filesystem directory, not a mount point
511         // that's expensive to walk.  In real-life usage, it will be
512         // removed from cp.dirs before any copying happens.
513         c.Check(s.cp.dirs, check.DeepEquals, []string{"/exclude", "/include", "/include/includew"})
514         c.Check(s.cp.files, check.DeepEquals, []filetodo{
515                 {src: s.cp.hostOutputDir + "/include/includew/foo", dst: "/include/includew/foo", size: 3},
516         })
517         manifest, err := s.cp.staged.MarshalManifest(".")
518         c.Assert(err, check.IsNil)
519         c.Check(manifest, check.Matches, `(?ms).*\./include/includer .*`)
520         c.Check(manifest, check.Not(check.Matches), `(?ms).*exclude.*`)
521         c.Check(s.log.String(), check.Matches, `(?ms).*not copying \\"exclude/excluder\\".*`)
522         c.Check(s.log.String(), check.Matches, `(?ms).*not copying \\"nonexistent/collection\\".*`)
523 }
524
525 func (s *copierSuite) writeFileInOutputDir(c *check.C, path, data string) {
526         f, err := os.OpenFile(s.cp.hostOutputDir+"/"+path, os.O_CREATE|os.O_WRONLY, 0644)
527         c.Assert(err, check.IsNil)
528         _, err = io.WriteString(f, data)
529         c.Assert(err, check.IsNil)
530         c.Assert(f.Close(), check.IsNil)
531 }
532
533 // applyGlobsToFilesAndDirs uses the same glob-matching code as
534 // applyGlobsToStaged, so we don't need to test all of the same
535 // glob-matching behavior covered in TestApplyGlobsToCollectionFS.  We
536 // do need to check that (a) the glob is actually being used to filter
537 // out files, and (b) non-matching dirs still included if and only if
538 // they are ancestors of matching files.
539 func (s *copierSuite) TestApplyGlobsToFilesAndDirs(c *check.C) {
540         dirs := []string{"dir1", "dir1/dir11", "dir1/dir12", "dir2"}
541         files := []string{"dir1/file11", "dir1/dir11/file111", "dir2/file2"}
542         for _, trial := range []struct {
543                 globs []string
544                 dirs  []string
545                 files []string
546         }{
547                 {
548                         globs: []string{},
549                         dirs:  append([]string{}, dirs...),
550                         files: append([]string{}, files...),
551                 },
552                 {
553                         globs: []string{"**"},
554                         dirs:  append([]string{}, dirs...),
555                         files: append([]string{}, files...),
556                 },
557                 {
558                         globs: []string{"**/file111"},
559                         dirs:  []string{"dir1", "dir1/dir11"},
560                         files: []string{"dir1/dir11/file111"},
561                 },
562                 {
563                         globs: []string{"nothing"},
564                         dirs:  nil,
565                         files: nil,
566                 },
567                 {
568                         globs: []string{"**/dir12"},
569                         dirs:  []string{"dir1", "dir1/dir12"},
570                         files: nil,
571                 },
572                 {
573                         globs: []string{"**/file*"},
574                         dirs:  []string{"dir1", "dir1/dir11", "dir2"},
575                         files: append([]string{}, files...),
576                 },
577                 {
578                         globs: []string{"**/dir1[12]"},
579                         dirs:  []string{"dir1", "dir1/dir11", "dir1/dir12"},
580                         files: nil,
581                 },
582                 {
583                         globs: []string{"**/dir1[^2]"},
584                         dirs:  []string{"dir1", "dir1/dir11"},
585                         files: nil,
586                 },
587                 {
588                         globs: []string{"dir1/**"},
589                         dirs:  []string{"dir1", "dir1/dir11", "dir1/dir12"},
590                         files: []string{"dir1/file11", "dir1/dir11/file111"},
591                 },
592         } {
593                 c.Logf("=== globs: %q", trial.globs)
594                 cp := copier{
595                         globs: trial.globs,
596                         dirs:  dirs,
597                 }
598                 for _, path := range files {
599                         cp.files = append(cp.files, filetodo{dst: path})
600                 }
601                 cp.applyGlobsToFilesAndDirs()
602                 var gotFiles []string
603                 for _, file := range cp.files {
604                         gotFiles = append(gotFiles, file.dst)
605                 }
606                 c.Check(cp.dirs, check.DeepEquals, trial.dirs)
607                 c.Check(gotFiles, check.DeepEquals, trial.files)
608         }
609 }
610
611 func (s *copierSuite) TestApplyGlobsToCollectionFS(c *check.C) {
612         for _, trial := range []struct {
613                 globs  []string
614                 expect []string
615         }{
616                 {
617                         globs:  nil,
618                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
619                 },
620                 {
621                         globs:  []string{"foo"},
622                         expect: []string{"foo"},
623                 },
624                 {
625                         globs:  []string{"baz/parent1/item1"},
626                         expect: []string{"baz/parent1/item1"},
627                 },
628                 {
629                         globs:  []string{"**"},
630                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
631                 },
632                 {
633                         globs:  []string{"**/*"},
634                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
635                 },
636                 {
637                         globs:  []string{"*"},
638                         expect: []string{"foo", "bar"},
639                 },
640                 {
641                         globs:  []string{"baz"},
642                         expect: nil,
643                 },
644                 {
645                         globs:  []string{"b*/**"},
646                         expect: []string{"baz/quux", "baz/parent1/item1"},
647                 },
648                 {
649                         globs:  []string{"baz"},
650                         expect: nil,
651                 },
652                 {
653                         globs:  []string{"baz/**"},
654                         expect: []string{"baz/quux", "baz/parent1/item1"},
655                 },
656                 {
657                         globs:  []string{"baz/*"},
658                         expect: []string{"baz/quux"},
659                 },
660                 {
661                         globs:  []string{"baz/**/*uu?"},
662                         expect: []string{"baz/quux"},
663                 },
664                 {
665                         globs:  []string{"**/*m1"},
666                         expect: []string{"baz/parent1/item1"},
667                 },
668                 {
669                         globs:  []string{"*/*/*/**/*1"},
670                         expect: nil,
671                 },
672                 {
673                         globs:  []string{"f*", "**/q*"},
674                         expect: []string{"foo", "baz/quux"},
675                 },
676                 {
677                         globs:  []string{"\\"}, // invalid pattern matches nothing
678                         expect: nil,
679                 },
680                 {
681                         globs:  []string{"\\", "foo"},
682                         expect: []string{"foo"},
683                 },
684                 {
685                         globs:  []string{"foo/**"},
686                         expect: nil,
687                 },
688                 {
689                         globs:  []string{"foo*/**"},
690                         expect: nil,
691                 },
692         } {
693                 c.Logf("=== globs: %q", trial.globs)
694                 collfs, err := (&arvados.Collection{ManifestText: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo 0:0:bar 0:0:baz/quux 0:0:baz/parent1/item1\n"}).FileSystem(nil, nil)
695                 c.Assert(err, check.IsNil)
696                 cp := copier{globs: trial.globs, staged: collfs}
697                 err = cp.applyGlobsToStaged()
698                 if !c.Check(err, check.IsNil) {
699                         continue
700                 }
701                 var got []string
702                 fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
703                         if !ent.IsDir() {
704                                 got = append(got, path)
705                         }
706                         return nil
707                 })
708                 sort.Strings(got)
709                 sort.Strings(trial.expect)
710                 c.Check(got, check.DeepEquals, trial.expect)
711         }
712 }