Merge branch '22124-keepstore-double-slash'
[arvados.git] / lib / crunchrun / copier_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "bytes"
9         "encoding/json"
10         "fmt"
11         "io"
12         "io/fs"
13         "os"
14         "sort"
15         "syscall"
16
17         "git.arvados.org/arvados.git/sdk/go/arvados"
18         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
19         "git.arvados.org/arvados.git/sdk/go/arvadostest"
20         "git.arvados.org/arvados.git/sdk/go/keepclient"
21         "github.com/sirupsen/logrus"
22         check "gopkg.in/check.v1"
23 )
24
25 var _ = check.Suite(&copierSuite{})
26
27 type copierSuite struct {
28         cp  copier
29         log bytes.Buffer
30 }
31
32 func (s *copierSuite) SetUpTest(c *check.C) {
33         tmpdir := c.MkDir()
34         s.log = bytes.Buffer{}
35
36         cl, err := arvadosclient.MakeArvadosClient()
37         c.Assert(err, check.IsNil)
38         kc, err := keepclient.MakeKeepClient(cl)
39         c.Assert(err, check.IsNil)
40         collfs, err := (&arvados.Collection{}).FileSystem(arvados.NewClientFromEnv(), kc)
41         c.Assert(err, check.IsNil)
42
43         s.cp = copier{
44                 client:        arvados.NewClientFromEnv(),
45                 keepClient:    kc,
46                 hostOutputDir: tmpdir,
47                 ctrOutputDir:  "/ctr/outdir",
48                 mounts: map[string]arvados.Mount{
49                         "/ctr/outdir": {Kind: "tmp"},
50                 },
51                 secretMounts: map[string]arvados.Mount{
52                         "/secret_text": {Kind: "text", Content: "xyzzy"},
53                 },
54                 logger: &logrus.Logger{Out: &s.log, Formatter: &logrus.TextFormatter{}, Level: logrus.InfoLevel},
55                 staged: collfs,
56         }
57 }
58
59 func (s *copierSuite) TestEmptyOutput(c *check.C) {
60         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
61         c.Check(err, check.IsNil)
62         c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
63         c.Check(len(s.cp.files), check.Equals, 0)
64 }
65
66 func (s *copierSuite) TestRegularFilesAndDirs(c *check.C) {
67         err := os.MkdirAll(s.cp.hostOutputDir+"/dir1/dir2/dir3", 0755)
68         c.Assert(err, check.IsNil)
69         f, err := os.OpenFile(s.cp.hostOutputDir+"/dir1/foo", os.O_CREATE|os.O_WRONLY, 0644)
70         c.Assert(err, check.IsNil)
71         _, err = io.WriteString(f, "foo")
72         c.Assert(err, check.IsNil)
73         c.Assert(f.Close(), check.IsNil)
74         err = syscall.Mkfifo(s.cp.hostOutputDir+"/dir1/fifo", 0644)
75         c.Assert(err, check.IsNil)
76
77         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
78         c.Check(err, check.IsNil)
79         c.Check(s.cp.dirs, check.DeepEquals, []string{"/dir1", "/dir1/dir2", "/dir1/dir2/dir3"})
80         c.Check(s.cp.files, check.DeepEquals, []filetodo{
81                 {src: os.DevNull, dst: "/dir1/dir2/dir3/.keep"},
82                 {src: s.cp.hostOutputDir + "/dir1/foo", dst: "/dir1/foo", size: 3},
83         })
84         c.Check(s.log.String(), check.Matches, `.* msg="Skipping unsupported file type \(mode 200000644\) in output dir: \\"/ctr/outdir/dir1/fifo\\""\n`)
85 }
86
87 func (s *copierSuite) TestSymlinkCycle(c *check.C) {
88         c.Assert(os.Mkdir(s.cp.hostOutputDir+"/dir1", 0755), check.IsNil)
89         c.Assert(os.Mkdir(s.cp.hostOutputDir+"/dir2", 0755), check.IsNil)
90         c.Assert(os.Symlink("../dir2", s.cp.hostOutputDir+"/dir1/l_dir2"), check.IsNil)
91         c.Assert(os.Symlink("../dir1", s.cp.hostOutputDir+"/dir2/l_dir1"), check.IsNil)
92         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
93         c.Check(err, check.ErrorMatches, `.*cycle.*`)
94 }
95
96 func (s *copierSuite) TestSymlinkTargetMissing(c *check.C) {
97         c.Assert(os.Symlink("./missing", s.cp.hostOutputDir+"/symlink"), check.IsNil)
98         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
99         c.Check(err, check.ErrorMatches, `.*/ctr/outdir/missing.*`)
100 }
101
102 func (s *copierSuite) TestSymlinkTargetNotMounted(c *check.C) {
103         c.Assert(os.Symlink("../boop", s.cp.hostOutputDir+"/symlink"), check.IsNil)
104         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
105         c.Check(err, check.ErrorMatches, `.*/ctr/boop.*`)
106 }
107
108 func (s *copierSuite) TestSymlinkToSecret(c *check.C) {
109         c.Assert(os.Symlink("/secret_text", s.cp.hostOutputDir+"/symlink"), check.IsNil)
110         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
111         c.Check(err, check.IsNil)
112         c.Check(len(s.cp.dirs), check.Equals, 0)
113         c.Check(len(s.cp.files), check.Equals, 0)
114 }
115
116 func (s *copierSuite) TestSecretInOutputDir(c *check.C) {
117         s.cp.secretMounts["/ctr/outdir/secret_text"] = s.cp.secretMounts["/secret_text"]
118         s.writeFileInOutputDir(c, "secret_text", "xyzzy")
119         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
120         c.Check(err, check.IsNil)
121         c.Check(len(s.cp.dirs), check.Equals, 0)
122         c.Check(len(s.cp.files), check.Equals, 0)
123 }
124
125 func (s *copierSuite) TestSymlinkToMountedCollection(c *check.C) {
126         // simulate mounted read-only collection
127         s.cp.mounts["/mnt"] = arvados.Mount{
128                 Kind:             "collection",
129                 PortableDataHash: arvadostest.FooCollectionPDH,
130         }
131
132         // simulate mounted writable collection
133         bindtmp := c.MkDir()
134         f, err := os.OpenFile(bindtmp+"/.arvados#collection", os.O_CREATE|os.O_WRONLY, 0644)
135         c.Assert(err, check.IsNil)
136         _, err = io.WriteString(f, `{"manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n"}`)
137         c.Assert(err, check.IsNil)
138         c.Assert(f.Close(), check.IsNil)
139         s.cp.mounts["/mnt-w"] = arvados.Mount{
140                 Kind:             "collection",
141                 PortableDataHash: arvadostest.FooCollectionPDH,
142                 Writable:         true,
143         }
144         s.cp.bindmounts = map[string]bindmount{
145                 "/mnt-w": bindmount{HostPath: bindtmp, ReadOnly: false},
146         }
147
148         c.Assert(os.Symlink("../../mnt", s.cp.hostOutputDir+"/l_dir"), check.IsNil)
149         c.Assert(os.Symlink("/mnt/foo", s.cp.hostOutputDir+"/l_file"), check.IsNil)
150         c.Assert(os.Symlink("/mnt-w/bar", s.cp.hostOutputDir+"/l_file_w"), check.IsNil)
151
152         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
153         c.Check(err, check.IsNil)
154         s.checkStagedFile(c, "l_dir/foo", 3)
155         s.checkStagedFile(c, "l_file", 3)
156         s.checkStagedFile(c, "l_file_w", 3)
157 }
158
159 func (s *copierSuite) checkStagedFile(c *check.C, path string, size int64) {
160         fi, err := s.cp.staged.Stat(path)
161         if c.Check(err, check.IsNil) {
162                 c.Check(fi.Size(), check.Equals, size)
163         }
164 }
165
166 func (s *copierSuite) TestSymlink(c *check.C) {
167         hostfile := s.cp.hostOutputDir + "/dir1/file"
168
169         err := os.MkdirAll(s.cp.hostOutputDir+"/dir1/dir2/dir3", 0755)
170         c.Assert(err, check.IsNil)
171         s.writeFileInOutputDir(c, "dir1/file", "file")
172         for _, err := range []error{
173                 os.Symlink(s.cp.ctrOutputDir+"/dir1/file", s.cp.hostOutputDir+"/l_abs_file"),
174                 os.Symlink(s.cp.ctrOutputDir+"/dir1/dir2", s.cp.hostOutputDir+"/l_abs_dir2"),
175                 os.Symlink("../../dir1/file", s.cp.hostOutputDir+"/dir1/dir2/l_rel_file"),
176                 os.Symlink("dir1/file", s.cp.hostOutputDir+"/l_rel_file"),
177                 os.MkdirAll(s.cp.hostOutputDir+"/morelinks", 0755),
178                 os.Symlink("../dir1/dir2", s.cp.hostOutputDir+"/morelinks/l_rel_dir2"),
179                 os.Symlink("dir1/dir2/dir3", s.cp.hostOutputDir+"/l_rel_dir3"),
180                 // rel. symlink -> rel. symlink -> regular file
181                 os.Symlink("../dir1/dir2/l_rel_file", s.cp.hostOutputDir+"/morelinks/l_rel_l_rel_file"),
182         } {
183                 c.Assert(err, check.IsNil)
184         }
185
186         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
187         c.Check(err, check.IsNil)
188         c.Check(s.cp.dirs, check.DeepEquals, []string{
189                 "/dir1", "/dir1/dir2", "/dir1/dir2/dir3",
190                 "/l_abs_dir2", "/l_abs_dir2/dir3",
191                 "/l_rel_dir3",
192                 "/morelinks", "/morelinks/l_rel_dir2", "/morelinks/l_rel_dir2/dir3",
193         })
194         c.Check(s.cp.files, check.DeepEquals, []filetodo{
195                 {dst: "/dir1/dir2/dir3/.keep", src: os.DevNull},
196                 {dst: "/dir1/dir2/l_rel_file", src: hostfile, size: 4},
197                 {dst: "/dir1/file", src: hostfile, size: 4},
198                 {dst: "/l_abs_dir2/dir3/.keep", src: os.DevNull},
199                 {dst: "/l_abs_dir2/l_rel_file", src: hostfile, size: 4},
200                 {dst: "/l_abs_file", src: hostfile, size: 4},
201                 {dst: "/l_rel_dir3/.keep", src: os.DevNull},
202                 {dst: "/l_rel_file", src: hostfile, size: 4},
203                 {dst: "/morelinks/l_rel_dir2/dir3/.keep", src: os.DevNull},
204                 {dst: "/morelinks/l_rel_dir2/l_rel_file", src: hostfile, size: 4},
205                 {dst: "/morelinks/l_rel_l_rel_file", src: hostfile, size: 4},
206         })
207 }
208
209 func (s *copierSuite) TestUnsupportedOutputMount(c *check.C) {
210         s.cp.mounts["/ctr/outdir"] = arvados.Mount{Kind: "waz"}
211         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
212         c.Check(err, check.NotNil)
213 }
214
215 func (s *copierSuite) TestUnsupportedMountKindBelow(c *check.C) {
216         s.cp.mounts["/ctr/outdir/dirk"] = arvados.Mount{Kind: "waz"}
217         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
218         c.Check(err, check.NotNil)
219 }
220
221 func (s *copierSuite) TestWritableMountBelow(c *check.C) {
222         s.cp.mounts["/ctr/outdir/mount"] = arvados.Mount{
223                 Kind:             "collection",
224                 PortableDataHash: arvadostest.FooCollectionPDH,
225                 Writable:         true,
226         }
227         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/mount", 0755), check.IsNil)
228         s.writeFileInOutputDir(c, "file", "file")
229         s.writeFileInOutputDir(c, "mount/foo", "foo")
230
231         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
232         c.Check(err, check.IsNil)
233         c.Check(s.cp.dirs, check.DeepEquals, []string{"/mount"})
234         c.Check(s.cp.files, check.DeepEquals, []filetodo{
235                 {src: s.cp.hostOutputDir + "/file", dst: "/file", size: 4},
236                 {src: s.cp.hostOutputDir + "/mount/foo", dst: "/mount/foo", size: 3},
237         })
238 }
239
240 // Check some glob-matching edge cases. In particular, check that
241 // patterns like "foo/**" do not match regular files named "foo"
242 // (unless of course they are inside a directory named "foo").
243 func (s *copierSuite) TestMatchGlobs(c *check.C) {
244         s.cp.globs = []string{"foo*/**"}
245         c.Check(s.cp.matchGlobs("foo", true), check.Equals, true)
246         c.Check(s.cp.matchGlobs("food", true), check.Equals, true)
247         c.Check(s.cp.matchGlobs("foo", false), check.Equals, false)
248         c.Check(s.cp.matchGlobs("food", false), check.Equals, false)
249         c.Check(s.cp.matchGlobs("foo/bar", false), check.Equals, true)
250         c.Check(s.cp.matchGlobs("food/bar", false), check.Equals, true)
251         c.Check(s.cp.matchGlobs("foo/bar", true), check.Equals, true)
252         c.Check(s.cp.matchGlobs("food/bar", true), check.Equals, true)
253
254         s.cp.globs = []string{"ba[!/]/foo*/**"}
255         c.Check(s.cp.matchGlobs("bar/foo", true), check.Equals, true)
256         c.Check(s.cp.matchGlobs("bar/food", true), check.Equals, true)
257         c.Check(s.cp.matchGlobs("bar/foo", false), check.Equals, false)
258         c.Check(s.cp.matchGlobs("bar/food", false), check.Equals, false)
259         c.Check(s.cp.matchGlobs("bar/foo/z\\[", true), check.Equals, true)
260         c.Check(s.cp.matchGlobs("bar/food/z\\[", true), check.Equals, true)
261         c.Check(s.cp.matchGlobs("bar/foo/z\\[", false), check.Equals, true)
262         c.Check(s.cp.matchGlobs("bar/food/z\\[", false), check.Equals, true)
263
264         s.cp.globs = []string{"waz/**/foo*/**"}
265         c.Check(s.cp.matchGlobs("waz/quux/foo", true), check.Equals, true)
266         c.Check(s.cp.matchGlobs("waz/quux/food", true), check.Equals, true)
267         c.Check(s.cp.matchGlobs("waz/quux/foo", false), check.Equals, false)
268         c.Check(s.cp.matchGlobs("waz/quux/food", false), check.Equals, false)
269         c.Check(s.cp.matchGlobs("waz/quux/foo/foo", true), check.Equals, true)
270         c.Check(s.cp.matchGlobs("waz/quux/food/foo", true), check.Equals, true)
271         c.Check(s.cp.matchGlobs("waz/quux/foo/foo", false), check.Equals, true)
272         c.Check(s.cp.matchGlobs("waz/quux/food/foo", false), check.Equals, true)
273
274         s.cp.globs = []string{"foo/**/*"}
275         c.Check(s.cp.matchGlobs("foo", false), check.Equals, false)
276         c.Check(s.cp.matchGlobs("foo/bar", false), check.Equals, true)
277         c.Check(s.cp.matchGlobs("foo/bar/baz", false), check.Equals, true)
278         c.Check(s.cp.matchGlobs("foo/bar/baz/waz", false), check.Equals, true)
279 }
280
281 func (s *copierSuite) TestSubtreeCouldMatch(c *check.C) {
282         for _, trial := range []struct {
283                 mount string // relative to output dir
284                 glob  string
285                 could bool
286         }{
287                 {mount: "abc", glob: "*"},
288                 {mount: "abc", glob: "abc/*", could: true},
289                 {mount: "abc", glob: "a*/**", could: true},
290                 {mount: "abc", glob: "**", could: true},
291                 {mount: "abc", glob: "*/*", could: true},
292                 {mount: "abc", glob: "**/*.txt", could: true},
293                 {mount: "abc/def", glob: "*"},
294                 {mount: "abc/def", glob: "*/*"},
295                 {mount: "abc/def", glob: "*/*.txt"},
296                 {mount: "abc/def", glob: "*/*/*", could: true},
297                 {mount: "abc/def", glob: "**", could: true},
298                 {mount: "abc/def", glob: "**/bar", could: true},
299                 {mount: "abc/def", glob: "abc/**", could: true},
300                 {mount: "abc/def/ghi", glob: "*c/**/bar", could: true},
301                 {mount: "abc/def/ghi", glob: "*c/*f/bar"},
302                 {mount: "abc/def/ghi", glob: "abc/d[^/]f/ghi/*", could: true},
303         } {
304                 c.Logf("=== %+v", trial)
305                 got := (&copier{
306                         globs: []string{trial.glob},
307                 }).subtreeCouldMatch(trial.mount)
308                 c.Check(got, check.Equals, trial.could)
309         }
310 }
311
312 func (s *copierSuite) TestCopyFromLargeCollection_Readonly(c *check.C) {
313         s.testCopyFromLargeCollection(c, false)
314 }
315
316 func (s *copierSuite) TestCopyFromLargeCollection_Writable(c *check.C) {
317         s.testCopyFromLargeCollection(c, true)
318 }
319
320 func (s *copierSuite) testCopyFromLargeCollection(c *check.C, writable bool) {
321         bindtmp := c.MkDir()
322         mtxt := arvadostest.FakeManifest(100, 100, 2, 4<<20)
323         pdh := arvados.PortableDataHash(mtxt)
324         json, err := json.Marshal(arvados.Collection{ManifestText: mtxt, PortableDataHash: pdh})
325         c.Assert(err, check.IsNil)
326         err = os.WriteFile(bindtmp+"/.arvados#collection", json, 0644)
327         // This symlink tricks walkHostFS into calling walkMount on
328         // the fakecollection dir. If we did the obvious thing instead
329         // (i.e., mount a collection under the output dir) walkMount
330         // would see that our fakecollection dir is actually a regular
331         // directory, conclude that the mount has been deleted and
332         // replaced by a regular directory tree, and process the tree
333         // as regular files, bypassing the manifest-copying code path
334         // we're trying to test.
335         err = os.Symlink("/fakecollection", s.cp.hostOutputDir+"/fakecollection")
336         c.Assert(err, check.IsNil)
337         s.cp.mounts["/fakecollection"] = arvados.Mount{
338                 Kind:             "collection",
339                 PortableDataHash: pdh,
340                 Writable:         writable,
341         }
342         s.cp.bindmounts = map[string]bindmount{
343                 "/fakecollection": bindmount{HostPath: bindtmp, ReadOnly: !writable},
344         }
345         s.cp.manifestCache = map[string]string{pdh: mtxt}
346         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
347         c.Check(err, check.IsNil)
348         c.Log(s.log.String())
349
350         // Check some files to ensure they were copied properly.
351         // Specifically, arbitrarily check every 17th file in every
352         // 13th dir.  (This is better than checking all of the files
353         // only in that it's less likely to show up as a distracting
354         // signal in CPU profiling.)
355         for i := 0; i < 100; i += 13 {
356                 for j := 0; j < 100; j += 17 {
357                         fnm := fmt.Sprintf("/fakecollection/dir%d/dir%d/file%d", i, j, j)
358                         _, err := s.cp.staged.Stat(fnm)
359                         c.Assert(err, check.IsNil, check.Commentf("%s", fnm))
360                 }
361         }
362 }
363
364 func (s *copierSuite) TestMountBelowExcludedByGlob(c *check.C) {
365         bindtmp := c.MkDir()
366         s.cp.mounts["/ctr/outdir/include/includer"] = arvados.Mount{
367                 Kind:             "collection",
368                 PortableDataHash: arvadostest.FooCollectionPDH,
369         }
370         s.cp.mounts["/ctr/outdir/include/includew"] = arvados.Mount{
371                 Kind:             "collection",
372                 PortableDataHash: arvadostest.FooCollectionPDH,
373                 Writable:         true,
374         }
375         s.cp.mounts["/ctr/outdir/exclude/excluder"] = arvados.Mount{
376                 Kind:             "collection",
377                 PortableDataHash: arvadostest.FooCollectionPDH,
378         }
379         s.cp.mounts["/ctr/outdir/exclude/excludew"] = arvados.Mount{
380                 Kind:             "collection",
381                 PortableDataHash: arvadostest.FooCollectionPDH,
382                 Writable:         true,
383         }
384         s.cp.mounts["/ctr/outdir/nonexistent/collection"] = arvados.Mount{
385                 // As extra assurance, plant a collection that will
386                 // fail if copier attempts to load its manifest.  (For
387                 // performance reasons it's important that copier
388                 // doesn't try to load the manifest before deciding
389                 // not to copy the contents.)
390                 Kind:             "collection",
391                 PortableDataHash: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+1234",
392         }
393         s.cp.globs = []string{
394                 "?ncl*/*r/*",
395                 "*/?ncl*/**",
396         }
397         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/include/includer", 0755), check.IsNil)
398         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/include/includew", 0755), check.IsNil)
399         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/exclude/excluder", 0755), check.IsNil)
400         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/exclude/excludew", 0755), check.IsNil)
401         s.writeFileInOutputDir(c, "include/includew/foo", "foo")
402         s.writeFileInOutputDir(c, "exclude/excludew/foo", "foo")
403         s.cp.bindmounts = map[string]bindmount{
404                 "/ctr/outdir/include/includew": bindmount{HostPath: bindtmp, ReadOnly: false},
405         }
406         s.cp.bindmounts = map[string]bindmount{
407                 "/ctr/outdir/include/excludew": bindmount{HostPath: bindtmp, ReadOnly: false},
408         }
409
410         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
411         c.Check(err, check.IsNil)
412         c.Log(s.log.String())
413
414         // Note it's OK that "/exclude" is not excluded by walkMount:
415         // it is just a local filesystem directory, not a mount point
416         // that's expensive to walk.  In real-life usage, it will be
417         // removed from cp.dirs before any copying happens.
418         c.Check(s.cp.dirs, check.DeepEquals, []string{"/exclude", "/include", "/include/includew"})
419         c.Check(s.cp.files, check.DeepEquals, []filetodo{
420                 {src: s.cp.hostOutputDir + "/include/includew/foo", dst: "/include/includew/foo", size: 3},
421         })
422         manifest, err := s.cp.staged.MarshalManifest(".")
423         c.Assert(err, check.IsNil)
424         c.Check(manifest, check.Matches, `(?ms).*\./include/includer .*`)
425         c.Check(manifest, check.Not(check.Matches), `(?ms).*exclude.*`)
426         c.Check(s.log.String(), check.Matches, `(?ms).*not copying \\"exclude/excluder\\".*`)
427         c.Check(s.log.String(), check.Matches, `(?ms).*not copying \\"nonexistent/collection\\".*`)
428 }
429
430 func (s *copierSuite) writeFileInOutputDir(c *check.C, path, data string) {
431         f, err := os.OpenFile(s.cp.hostOutputDir+"/"+path, os.O_CREATE|os.O_WRONLY, 0644)
432         c.Assert(err, check.IsNil)
433         _, err = io.WriteString(f, data)
434         c.Assert(err, check.IsNil)
435         c.Assert(f.Close(), check.IsNil)
436 }
437
438 // applyGlobsToFilesAndDirs uses the same glob-matching code as
439 // applyGlobsToStaged, so we don't need to test all of the same
440 // glob-matching behavior covered in TestApplyGlobsToCollectionFS.  We
441 // do need to check that (a) the glob is actually being used to filter
442 // out files, and (b) non-matching dirs still included if and only if
443 // they are ancestors of matching files.
444 func (s *copierSuite) TestApplyGlobsToFilesAndDirs(c *check.C) {
445         dirs := []string{"dir1", "dir1/dir11", "dir1/dir12", "dir2"}
446         files := []string{"dir1/file11", "dir1/dir11/file111", "dir2/file2"}
447         for _, trial := range []struct {
448                 globs []string
449                 dirs  []string
450                 files []string
451         }{
452                 {
453                         globs: []string{},
454                         dirs:  append([]string{}, dirs...),
455                         files: append([]string{}, files...),
456                 },
457                 {
458                         globs: []string{"**"},
459                         dirs:  append([]string{}, dirs...),
460                         files: append([]string{}, files...),
461                 },
462                 {
463                         globs: []string{"**/file111"},
464                         dirs:  []string{"dir1", "dir1/dir11"},
465                         files: []string{"dir1/dir11/file111"},
466                 },
467                 {
468                         globs: []string{"nothing"},
469                         dirs:  nil,
470                         files: nil,
471                 },
472                 {
473                         globs: []string{"**/dir12"},
474                         dirs:  []string{"dir1", "dir1/dir12"},
475                         files: nil,
476                 },
477                 {
478                         globs: []string{"**/file*"},
479                         dirs:  []string{"dir1", "dir1/dir11", "dir2"},
480                         files: append([]string{}, files...),
481                 },
482                 {
483                         globs: []string{"**/dir1[12]"},
484                         dirs:  []string{"dir1", "dir1/dir11", "dir1/dir12"},
485                         files: nil,
486                 },
487                 {
488                         globs: []string{"**/dir1[^2]"},
489                         dirs:  []string{"dir1", "dir1/dir11"},
490                         files: nil,
491                 },
492                 {
493                         globs: []string{"dir1/**"},
494                         dirs:  []string{"dir1", "dir1/dir11", "dir1/dir12"},
495                         files: []string{"dir1/file11", "dir1/dir11/file111"},
496                 },
497         } {
498                 c.Logf("=== globs: %q", trial.globs)
499                 cp := copier{
500                         globs: trial.globs,
501                         dirs:  dirs,
502                 }
503                 for _, path := range files {
504                         cp.files = append(cp.files, filetodo{dst: path})
505                 }
506                 cp.applyGlobsToFilesAndDirs()
507                 var gotFiles []string
508                 for _, file := range cp.files {
509                         gotFiles = append(gotFiles, file.dst)
510                 }
511                 c.Check(cp.dirs, check.DeepEquals, trial.dirs)
512                 c.Check(gotFiles, check.DeepEquals, trial.files)
513         }
514 }
515
516 func (s *copierSuite) TestApplyGlobsToCollectionFS(c *check.C) {
517         for _, trial := range []struct {
518                 globs  []string
519                 expect []string
520         }{
521                 {
522                         globs:  nil,
523                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
524                 },
525                 {
526                         globs:  []string{"foo"},
527                         expect: []string{"foo"},
528                 },
529                 {
530                         globs:  []string{"baz/parent1/item1"},
531                         expect: []string{"baz/parent1/item1"},
532                 },
533                 {
534                         globs:  []string{"**"},
535                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
536                 },
537                 {
538                         globs:  []string{"**/*"},
539                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
540                 },
541                 {
542                         globs:  []string{"*"},
543                         expect: []string{"foo", "bar"},
544                 },
545                 {
546                         globs:  []string{"baz"},
547                         expect: nil,
548                 },
549                 {
550                         globs:  []string{"b*/**"},
551                         expect: []string{"baz/quux", "baz/parent1/item1"},
552                 },
553                 {
554                         globs:  []string{"baz"},
555                         expect: nil,
556                 },
557                 {
558                         globs:  []string{"baz/**"},
559                         expect: []string{"baz/quux", "baz/parent1/item1"},
560                 },
561                 {
562                         globs:  []string{"baz/*"},
563                         expect: []string{"baz/quux"},
564                 },
565                 {
566                         globs:  []string{"baz/**/*uu?"},
567                         expect: []string{"baz/quux"},
568                 },
569                 {
570                         globs:  []string{"**/*m1"},
571                         expect: []string{"baz/parent1/item1"},
572                 },
573                 {
574                         globs:  []string{"*/*/*/**/*1"},
575                         expect: nil,
576                 },
577                 {
578                         globs:  []string{"f*", "**/q*"},
579                         expect: []string{"foo", "baz/quux"},
580                 },
581                 {
582                         globs:  []string{"\\"}, // invalid pattern matches nothing
583                         expect: nil,
584                 },
585                 {
586                         globs:  []string{"\\", "foo"},
587                         expect: []string{"foo"},
588                 },
589                 {
590                         globs:  []string{"foo/**"},
591                         expect: nil,
592                 },
593                 {
594                         globs:  []string{"foo*/**"},
595                         expect: nil,
596                 },
597         } {
598                 c.Logf("=== globs: %q", trial.globs)
599                 collfs, err := (&arvados.Collection{ManifestText: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo 0:0:bar 0:0:baz/quux 0:0:baz/parent1/item1\n"}).FileSystem(nil, nil)
600                 c.Assert(err, check.IsNil)
601                 cp := copier{globs: trial.globs, staged: collfs}
602                 err = cp.applyGlobsToStaged()
603                 if !c.Check(err, check.IsNil) {
604                         continue
605                 }
606                 var got []string
607                 fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
608                         if !ent.IsDir() {
609                                 got = append(got, path)
610                         }
611                         return nil
612                 })
613                 sort.Strings(got)
614                 sort.Strings(trial.expect)
615                 c.Check(got, check.DeepEquals, trial.expect)
616         }
617 }