22226: Add test to reproduce crunch-run output save panic
[arvados.git] / lib / crunchrun / copier_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "bytes"
9         "encoding/json"
10         "fmt"
11         "io"
12         "io/fs"
13         "os"
14         "path"
15         "sort"
16         "syscall"
17
18         "git.arvados.org/arvados.git/sdk/go/arvados"
19         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
20         "git.arvados.org/arvados.git/sdk/go/arvadostest"
21         "git.arvados.org/arvados.git/sdk/go/keepclient"
22         "github.com/sirupsen/logrus"
23         check "gopkg.in/check.v1"
24 )
25
26 var _ = check.Suite(&copierSuite{})
27
28 type copierSuite struct {
29         cp  copier
30         log bytes.Buffer
31 }
32
33 func (s *copierSuite) SetUpTest(c *check.C) {
34         tmpdir := c.MkDir()
35         s.log = bytes.Buffer{}
36
37         cl, err := arvadosclient.MakeArvadosClient()
38         c.Assert(err, check.IsNil)
39         kc, err := keepclient.MakeKeepClient(cl)
40         c.Assert(err, check.IsNil)
41         collfs, err := (&arvados.Collection{}).FileSystem(arvados.NewClientFromEnv(), kc)
42         c.Assert(err, check.IsNil)
43
44         s.cp = copier{
45                 client:        arvados.NewClientFromEnv(),
46                 keepClient:    kc,
47                 hostOutputDir: tmpdir,
48                 ctrOutputDir:  "/ctr/outdir",
49                 mounts: map[string]arvados.Mount{
50                         "/ctr/outdir": {Kind: "tmp"},
51                 },
52                 secretMounts: map[string]arvados.Mount{
53                         "/secret_text": {Kind: "text", Content: "xyzzy"},
54                 },
55                 logger: &logrus.Logger{Out: &s.log, Formatter: &logrus.TextFormatter{}, Level: logrus.InfoLevel},
56                 staged: collfs,
57         }
58 }
59
60 func (s *copierSuite) TestEmptyOutput(c *check.C) {
61         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
62         c.Check(err, check.IsNil)
63         c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
64         c.Check(len(s.cp.files), check.Equals, 0)
65 }
66
67 func (s *copierSuite) TestEmptyWritableMount(c *check.C) {
68         s.writeFileInOutputDir(c, ".arvados#collection", `{"manifest_text":""}`)
69         s.cp.mounts[s.cp.ctrOutputDir] = arvados.Mount{
70                 Kind:     "collection",
71                 Writable: true,
72         }
73
74         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
75         c.Assert(err, check.IsNil)
76         c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
77         c.Check(len(s.cp.files), check.Equals, 0)
78 }
79
80 func (s *copierSuite) TestOutputCollectionWithOnlySubmounts(c *check.C) {
81         s.writeFileInOutputDir(c, "foo", `foo`)
82         s.writeFileInOutputDir(c, ".arvados#collection", `{"manifest_text":". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n"}`)
83         s.cp.mounts[s.cp.ctrOutputDir] = arvados.Mount{
84                 Kind:     "collection",
85                 Writable: true,
86         }
87         s.cp.mounts[path.Join(s.cp.ctrOutputDir, "foo")] = arvados.Mount{
88                 Kind:             "collection",
89                 Path:             "foo",
90                 PortableDataHash: "1f4b0bc7583c2a7f9102c395f4ffc5e3+45",
91         }
92
93         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
94         c.Assert(err, check.IsNil)
95         c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
96         c.Check(len(s.cp.files), check.Equals, 0)
97 }
98
99 func (s *copierSuite) TestRegularFilesAndDirs(c *check.C) {
100         err := os.MkdirAll(s.cp.hostOutputDir+"/dir1/dir2/dir3", 0755)
101         c.Assert(err, check.IsNil)
102         f, err := os.OpenFile(s.cp.hostOutputDir+"/dir1/foo", os.O_CREATE|os.O_WRONLY, 0644)
103         c.Assert(err, check.IsNil)
104         _, err = io.WriteString(f, "foo")
105         c.Assert(err, check.IsNil)
106         c.Assert(f.Close(), check.IsNil)
107         err = syscall.Mkfifo(s.cp.hostOutputDir+"/dir1/fifo", 0644)
108         c.Assert(err, check.IsNil)
109
110         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
111         c.Check(err, check.IsNil)
112         c.Check(s.cp.dirs, check.DeepEquals, []string{"/dir1", "/dir1/dir2", "/dir1/dir2/dir3"})
113         c.Check(s.cp.files, check.DeepEquals, []filetodo{
114                 {src: os.DevNull, dst: "/dir1/dir2/dir3/.keep"},
115                 {src: s.cp.hostOutputDir + "/dir1/foo", dst: "/dir1/foo", size: 3},
116         })
117         c.Check(s.log.String(), check.Matches, `.* msg="Skipping unsupported file type \(mode 200000644\) in output dir: \\"/ctr/outdir/dir1/fifo\\""\n`)
118 }
119
120 func (s *copierSuite) TestSymlinkCycle(c *check.C) {
121         c.Assert(os.Mkdir(s.cp.hostOutputDir+"/dir1", 0755), check.IsNil)
122         c.Assert(os.Mkdir(s.cp.hostOutputDir+"/dir2", 0755), check.IsNil)
123         c.Assert(os.Symlink("../dir2", s.cp.hostOutputDir+"/dir1/l_dir2"), check.IsNil)
124         c.Assert(os.Symlink("../dir1", s.cp.hostOutputDir+"/dir2/l_dir1"), check.IsNil)
125         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
126         c.Check(err, check.ErrorMatches, `.*cycle.*`)
127 }
128
129 func (s *copierSuite) TestSymlinkTargetMissing(c *check.C) {
130         c.Assert(os.Symlink("./missing", s.cp.hostOutputDir+"/symlink"), check.IsNil)
131         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
132         c.Check(err, check.ErrorMatches, `.*/ctr/outdir/missing.*`)
133 }
134
135 func (s *copierSuite) TestSymlinkTargetNotMounted(c *check.C) {
136         c.Assert(os.Symlink("../boop", s.cp.hostOutputDir+"/symlink"), check.IsNil)
137         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
138         c.Check(err, check.ErrorMatches, `.*/ctr/boop.*`)
139 }
140
141 func (s *copierSuite) TestSymlinkToSecret(c *check.C) {
142         c.Assert(os.Symlink("/secret_text", s.cp.hostOutputDir+"/symlink"), check.IsNil)
143         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
144         c.Check(err, check.IsNil)
145         c.Check(len(s.cp.dirs), check.Equals, 0)
146         c.Check(len(s.cp.files), check.Equals, 0)
147 }
148
149 func (s *copierSuite) TestSecretInOutputDir(c *check.C) {
150         s.cp.secretMounts["/ctr/outdir/secret_text"] = s.cp.secretMounts["/secret_text"]
151         s.writeFileInOutputDir(c, "secret_text", "xyzzy")
152         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
153         c.Check(err, check.IsNil)
154         c.Check(len(s.cp.dirs), check.Equals, 0)
155         c.Check(len(s.cp.files), check.Equals, 0)
156 }
157
158 func (s *copierSuite) TestSymlinkToMountedCollection(c *check.C) {
159         // simulate mounted read-only collection
160         s.cp.mounts["/mnt"] = arvados.Mount{
161                 Kind:             "collection",
162                 PortableDataHash: arvadostest.FooCollectionPDH,
163         }
164
165         // simulate mounted writable collection
166         bindtmp := c.MkDir()
167         f, err := os.OpenFile(bindtmp+"/.arvados#collection", os.O_CREATE|os.O_WRONLY, 0644)
168         c.Assert(err, check.IsNil)
169         _, err = io.WriteString(f, `{"manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n"}`)
170         c.Assert(err, check.IsNil)
171         c.Assert(f.Close(), check.IsNil)
172         s.cp.mounts["/mnt-w"] = arvados.Mount{
173                 Kind:             "collection",
174                 PortableDataHash: arvadostest.FooCollectionPDH,
175                 Writable:         true,
176         }
177         s.cp.bindmounts = map[string]bindmount{
178                 "/mnt-w": bindmount{HostPath: bindtmp, ReadOnly: false},
179         }
180
181         c.Assert(os.Symlink("../../mnt", s.cp.hostOutputDir+"/l_dir"), check.IsNil)
182         c.Assert(os.Symlink("/mnt/foo", s.cp.hostOutputDir+"/l_file"), check.IsNil)
183         c.Assert(os.Symlink("/mnt-w/bar", s.cp.hostOutputDir+"/l_file_w"), check.IsNil)
184
185         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
186         c.Check(err, check.IsNil)
187         s.checkStagedFile(c, "l_dir/foo", 3)
188         s.checkStagedFile(c, "l_file", 3)
189         s.checkStagedFile(c, "l_file_w", 3)
190 }
191
192 func (s *copierSuite) checkStagedFile(c *check.C, path string, size int64) {
193         fi, err := s.cp.staged.Stat(path)
194         if c.Check(err, check.IsNil) {
195                 c.Check(fi.Size(), check.Equals, size)
196         }
197 }
198
199 func (s *copierSuite) TestSymlink(c *check.C) {
200         hostfile := s.cp.hostOutputDir + "/dir1/file"
201
202         err := os.MkdirAll(s.cp.hostOutputDir+"/dir1/dir2/dir3", 0755)
203         c.Assert(err, check.IsNil)
204         s.writeFileInOutputDir(c, "dir1/file", "file")
205         for _, err := range []error{
206                 os.Symlink(s.cp.ctrOutputDir+"/dir1/file", s.cp.hostOutputDir+"/l_abs_file"),
207                 os.Symlink(s.cp.ctrOutputDir+"/dir1/dir2", s.cp.hostOutputDir+"/l_abs_dir2"),
208                 os.Symlink("../../dir1/file", s.cp.hostOutputDir+"/dir1/dir2/l_rel_file"),
209                 os.Symlink("dir1/file", s.cp.hostOutputDir+"/l_rel_file"),
210                 os.MkdirAll(s.cp.hostOutputDir+"/morelinks", 0755),
211                 os.Symlink("../dir1/dir2", s.cp.hostOutputDir+"/morelinks/l_rel_dir2"),
212                 os.Symlink("dir1/dir2/dir3", s.cp.hostOutputDir+"/l_rel_dir3"),
213                 // rel. symlink -> rel. symlink -> regular file
214                 os.Symlink("../dir1/dir2/l_rel_file", s.cp.hostOutputDir+"/morelinks/l_rel_l_rel_file"),
215         } {
216                 c.Assert(err, check.IsNil)
217         }
218
219         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
220         c.Check(err, check.IsNil)
221         c.Check(s.cp.dirs, check.DeepEquals, []string{
222                 "/dir1", "/dir1/dir2", "/dir1/dir2/dir3",
223                 "/l_abs_dir2", "/l_abs_dir2/dir3",
224                 "/l_rel_dir3",
225                 "/morelinks", "/morelinks/l_rel_dir2", "/morelinks/l_rel_dir2/dir3",
226         })
227         c.Check(s.cp.files, check.DeepEquals, []filetodo{
228                 {dst: "/dir1/dir2/dir3/.keep", src: os.DevNull},
229                 {dst: "/dir1/dir2/l_rel_file", src: hostfile, size: 4},
230                 {dst: "/dir1/file", src: hostfile, size: 4},
231                 {dst: "/l_abs_dir2/dir3/.keep", src: os.DevNull},
232                 {dst: "/l_abs_dir2/l_rel_file", src: hostfile, size: 4},
233                 {dst: "/l_abs_file", src: hostfile, size: 4},
234                 {dst: "/l_rel_dir3/.keep", src: os.DevNull},
235                 {dst: "/l_rel_file", src: hostfile, size: 4},
236                 {dst: "/morelinks/l_rel_dir2/dir3/.keep", src: os.DevNull},
237                 {dst: "/morelinks/l_rel_dir2/l_rel_file", src: hostfile, size: 4},
238                 {dst: "/morelinks/l_rel_l_rel_file", src: hostfile, size: 4},
239         })
240 }
241
242 func (s *copierSuite) TestUnsupportedOutputMount(c *check.C) {
243         s.cp.mounts["/ctr/outdir"] = arvados.Mount{Kind: "waz"}
244         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
245         c.Check(err, check.NotNil)
246 }
247
248 func (s *copierSuite) TestUnsupportedMountKindBelow(c *check.C) {
249         s.cp.mounts["/ctr/outdir/dirk"] = arvados.Mount{Kind: "waz"}
250         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
251         c.Check(err, check.NotNil)
252 }
253
254 func (s *copierSuite) TestWritableMountBelow(c *check.C) {
255         s.cp.mounts["/ctr/outdir/mount"] = arvados.Mount{
256                 Kind:             "collection",
257                 PortableDataHash: arvadostest.FooCollectionPDH,
258                 Writable:         true,
259         }
260         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/mount", 0755), check.IsNil)
261         s.writeFileInOutputDir(c, "file", "file")
262         s.writeFileInOutputDir(c, "mount/foo", "foo")
263
264         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
265         c.Check(err, check.IsNil)
266         c.Check(s.cp.dirs, check.DeepEquals, []string{"/mount"})
267         c.Check(s.cp.files, check.DeepEquals, []filetodo{
268                 {src: s.cp.hostOutputDir + "/file", dst: "/file", size: 4},
269                 {src: s.cp.hostOutputDir + "/mount/foo", dst: "/mount/foo", size: 3},
270         })
271 }
272
273 // Check some glob-matching edge cases. In particular, check that
274 // patterns like "foo/**" do not match regular files named "foo"
275 // (unless of course they are inside a directory named "foo").
276 func (s *copierSuite) TestMatchGlobs(c *check.C) {
277         s.cp.globs = []string{"foo*/**"}
278         c.Check(s.cp.matchGlobs("foo", true), check.Equals, true)
279         c.Check(s.cp.matchGlobs("food", true), check.Equals, true)
280         c.Check(s.cp.matchGlobs("foo", false), check.Equals, false)
281         c.Check(s.cp.matchGlobs("food", false), check.Equals, false)
282         c.Check(s.cp.matchGlobs("foo/bar", false), check.Equals, true)
283         c.Check(s.cp.matchGlobs("food/bar", false), check.Equals, true)
284         c.Check(s.cp.matchGlobs("foo/bar", true), check.Equals, true)
285         c.Check(s.cp.matchGlobs("food/bar", true), check.Equals, true)
286
287         s.cp.globs = []string{"ba[!/]/foo*/**"}
288         c.Check(s.cp.matchGlobs("bar/foo", true), check.Equals, true)
289         c.Check(s.cp.matchGlobs("bar/food", true), check.Equals, true)
290         c.Check(s.cp.matchGlobs("bar/foo", false), check.Equals, false)
291         c.Check(s.cp.matchGlobs("bar/food", false), check.Equals, false)
292         c.Check(s.cp.matchGlobs("bar/foo/z\\[", true), check.Equals, true)
293         c.Check(s.cp.matchGlobs("bar/food/z\\[", true), check.Equals, true)
294         c.Check(s.cp.matchGlobs("bar/foo/z\\[", false), check.Equals, true)
295         c.Check(s.cp.matchGlobs("bar/food/z\\[", false), check.Equals, true)
296
297         s.cp.globs = []string{"waz/**/foo*/**"}
298         c.Check(s.cp.matchGlobs("waz/quux/foo", true), check.Equals, true)
299         c.Check(s.cp.matchGlobs("waz/quux/food", true), check.Equals, true)
300         c.Check(s.cp.matchGlobs("waz/quux/foo", false), check.Equals, false)
301         c.Check(s.cp.matchGlobs("waz/quux/food", false), check.Equals, false)
302         c.Check(s.cp.matchGlobs("waz/quux/foo/foo", true), check.Equals, true)
303         c.Check(s.cp.matchGlobs("waz/quux/food/foo", true), check.Equals, true)
304         c.Check(s.cp.matchGlobs("waz/quux/foo/foo", false), check.Equals, true)
305         c.Check(s.cp.matchGlobs("waz/quux/food/foo", false), check.Equals, true)
306
307         s.cp.globs = []string{"foo/**/*"}
308         c.Check(s.cp.matchGlobs("foo", false), check.Equals, false)
309         c.Check(s.cp.matchGlobs("foo/bar", false), check.Equals, true)
310         c.Check(s.cp.matchGlobs("foo/bar/baz", false), check.Equals, true)
311         c.Check(s.cp.matchGlobs("foo/bar/baz/waz", false), check.Equals, true)
312 }
313
314 func (s *copierSuite) TestSubtreeCouldMatch(c *check.C) {
315         for _, trial := range []struct {
316                 mount string // relative to output dir
317                 glob  string
318                 could bool
319         }{
320                 {mount: "abc", glob: "*"},
321                 {mount: "abc", glob: "abc/*", could: true},
322                 {mount: "abc", glob: "a*/**", could: true},
323                 {mount: "abc", glob: "**", could: true},
324                 {mount: "abc", glob: "*/*", could: true},
325                 {mount: "abc", glob: "**/*.txt", could: true},
326                 {mount: "abc/def", glob: "*"},
327                 {mount: "abc/def", glob: "*/*"},
328                 {mount: "abc/def", glob: "*/*.txt"},
329                 {mount: "abc/def", glob: "*/*/*", could: true},
330                 {mount: "abc/def", glob: "**", could: true},
331                 {mount: "abc/def", glob: "**/bar", could: true},
332                 {mount: "abc/def", glob: "abc/**", could: true},
333                 {mount: "abc/def/ghi", glob: "*c/**/bar", could: true},
334                 {mount: "abc/def/ghi", glob: "*c/*f/bar"},
335                 {mount: "abc/def/ghi", glob: "abc/d[^/]f/ghi/*", could: true},
336         } {
337                 c.Logf("=== %+v", trial)
338                 got := (&copier{
339                         globs: []string{trial.glob},
340                 }).subtreeCouldMatch(trial.mount)
341                 c.Check(got, check.Equals, trial.could)
342         }
343 }
344
345 func (s *copierSuite) TestCopyFromLargeCollection_Readonly(c *check.C) {
346         s.testCopyFromLargeCollection(c, false)
347 }
348
349 func (s *copierSuite) TestCopyFromLargeCollection_Writable(c *check.C) {
350         s.testCopyFromLargeCollection(c, true)
351 }
352
353 func (s *copierSuite) testCopyFromLargeCollection(c *check.C, writable bool) {
354         bindtmp := c.MkDir()
355         mtxt := arvadostest.FakeManifest(100, 100, 2, 4<<20)
356         pdh := arvados.PortableDataHash(mtxt)
357         json, err := json.Marshal(arvados.Collection{ManifestText: mtxt, PortableDataHash: pdh})
358         c.Assert(err, check.IsNil)
359         err = os.WriteFile(bindtmp+"/.arvados#collection", json, 0644)
360         // This symlink tricks walkHostFS into calling walkMount on
361         // the fakecollection dir. If we did the obvious thing instead
362         // (i.e., mount a collection under the output dir) walkMount
363         // would see that our fakecollection dir is actually a regular
364         // directory, conclude that the mount has been deleted and
365         // replaced by a regular directory tree, and process the tree
366         // as regular files, bypassing the manifest-copying code path
367         // we're trying to test.
368         err = os.Symlink("/fakecollection", s.cp.hostOutputDir+"/fakecollection")
369         c.Assert(err, check.IsNil)
370         s.cp.mounts["/fakecollection"] = arvados.Mount{
371                 Kind:             "collection",
372                 PortableDataHash: pdh,
373                 Writable:         writable,
374         }
375         s.cp.bindmounts = map[string]bindmount{
376                 "/fakecollection": bindmount{HostPath: bindtmp, ReadOnly: !writable},
377         }
378         s.cp.manifestCache = map[string]string{pdh: mtxt}
379         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
380         c.Check(err, check.IsNil)
381         c.Log(s.log.String())
382
383         // Check some files to ensure they were copied properly.
384         // Specifically, arbitrarily check every 17th file in every
385         // 13th dir.  (This is better than checking all of the files
386         // only in that it's less likely to show up as a distracting
387         // signal in CPU profiling.)
388         for i := 0; i < 100; i += 13 {
389                 for j := 0; j < 100; j += 17 {
390                         fnm := fmt.Sprintf("/fakecollection/dir%d/dir%d/file%d", i, j, j)
391                         _, err := s.cp.staged.Stat(fnm)
392                         c.Assert(err, check.IsNil, check.Commentf("%s", fnm))
393                 }
394         }
395 }
396
397 func (s *copierSuite) TestMountBelowExcludedByGlob(c *check.C) {
398         bindtmp := c.MkDir()
399         s.cp.mounts["/ctr/outdir/include/includer"] = arvados.Mount{
400                 Kind:             "collection",
401                 PortableDataHash: arvadostest.FooCollectionPDH,
402         }
403         s.cp.mounts["/ctr/outdir/include/includew"] = arvados.Mount{
404                 Kind:             "collection",
405                 PortableDataHash: arvadostest.FooCollectionPDH,
406                 Writable:         true,
407         }
408         s.cp.mounts["/ctr/outdir/exclude/excluder"] = arvados.Mount{
409                 Kind:             "collection",
410                 PortableDataHash: arvadostest.FooCollectionPDH,
411         }
412         s.cp.mounts["/ctr/outdir/exclude/excludew"] = arvados.Mount{
413                 Kind:             "collection",
414                 PortableDataHash: arvadostest.FooCollectionPDH,
415                 Writable:         true,
416         }
417         s.cp.mounts["/ctr/outdir/nonexistent/collection"] = arvados.Mount{
418                 // As extra assurance, plant a collection that will
419                 // fail if copier attempts to load its manifest.  (For
420                 // performance reasons it's important that copier
421                 // doesn't try to load the manifest before deciding
422                 // not to copy the contents.)
423                 Kind:             "collection",
424                 PortableDataHash: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+1234",
425         }
426         s.cp.globs = []string{
427                 "?ncl*/*r/*",
428                 "*/?ncl*/**",
429         }
430         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/include/includer", 0755), check.IsNil)
431         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/include/includew", 0755), check.IsNil)
432         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/exclude/excluder", 0755), check.IsNil)
433         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/exclude/excludew", 0755), check.IsNil)
434         s.writeFileInOutputDir(c, "include/includew/foo", "foo")
435         s.writeFileInOutputDir(c, "exclude/excludew/foo", "foo")
436         s.cp.bindmounts = map[string]bindmount{
437                 "/ctr/outdir/include/includew": bindmount{HostPath: bindtmp, ReadOnly: false},
438         }
439         s.cp.bindmounts = map[string]bindmount{
440                 "/ctr/outdir/include/excludew": bindmount{HostPath: bindtmp, ReadOnly: false},
441         }
442
443         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
444         c.Check(err, check.IsNil)
445         c.Log(s.log.String())
446
447         // Note it's OK that "/exclude" is not excluded by walkMount:
448         // it is just a local filesystem directory, not a mount point
449         // that's expensive to walk.  In real-life usage, it will be
450         // removed from cp.dirs before any copying happens.
451         c.Check(s.cp.dirs, check.DeepEquals, []string{"/exclude", "/include", "/include/includew"})
452         c.Check(s.cp.files, check.DeepEquals, []filetodo{
453                 {src: s.cp.hostOutputDir + "/include/includew/foo", dst: "/include/includew/foo", size: 3},
454         })
455         manifest, err := s.cp.staged.MarshalManifest(".")
456         c.Assert(err, check.IsNil)
457         c.Check(manifest, check.Matches, `(?ms).*\./include/includer .*`)
458         c.Check(manifest, check.Not(check.Matches), `(?ms).*exclude.*`)
459         c.Check(s.log.String(), check.Matches, `(?ms).*not copying \\"exclude/excluder\\".*`)
460         c.Check(s.log.String(), check.Matches, `(?ms).*not copying \\"nonexistent/collection\\".*`)
461 }
462
463 func (s *copierSuite) writeFileInOutputDir(c *check.C, path, data string) {
464         f, err := os.OpenFile(s.cp.hostOutputDir+"/"+path, os.O_CREATE|os.O_WRONLY, 0644)
465         c.Assert(err, check.IsNil)
466         _, err = io.WriteString(f, data)
467         c.Assert(err, check.IsNil)
468         c.Assert(f.Close(), check.IsNil)
469 }
470
471 // applyGlobsToFilesAndDirs uses the same glob-matching code as
472 // applyGlobsToStaged, so we don't need to test all of the same
473 // glob-matching behavior covered in TestApplyGlobsToCollectionFS.  We
474 // do need to check that (a) the glob is actually being used to filter
475 // out files, and (b) non-matching dirs still included if and only if
476 // they are ancestors of matching files.
477 func (s *copierSuite) TestApplyGlobsToFilesAndDirs(c *check.C) {
478         dirs := []string{"dir1", "dir1/dir11", "dir1/dir12", "dir2"}
479         files := []string{"dir1/file11", "dir1/dir11/file111", "dir2/file2"}
480         for _, trial := range []struct {
481                 globs []string
482                 dirs  []string
483                 files []string
484         }{
485                 {
486                         globs: []string{},
487                         dirs:  append([]string{}, dirs...),
488                         files: append([]string{}, files...),
489                 },
490                 {
491                         globs: []string{"**"},
492                         dirs:  append([]string{}, dirs...),
493                         files: append([]string{}, files...),
494                 },
495                 {
496                         globs: []string{"**/file111"},
497                         dirs:  []string{"dir1", "dir1/dir11"},
498                         files: []string{"dir1/dir11/file111"},
499                 },
500                 {
501                         globs: []string{"nothing"},
502                         dirs:  nil,
503                         files: nil,
504                 },
505                 {
506                         globs: []string{"**/dir12"},
507                         dirs:  []string{"dir1", "dir1/dir12"},
508                         files: nil,
509                 },
510                 {
511                         globs: []string{"**/file*"},
512                         dirs:  []string{"dir1", "dir1/dir11", "dir2"},
513                         files: append([]string{}, files...),
514                 },
515                 {
516                         globs: []string{"**/dir1[12]"},
517                         dirs:  []string{"dir1", "dir1/dir11", "dir1/dir12"},
518                         files: nil,
519                 },
520                 {
521                         globs: []string{"**/dir1[^2]"},
522                         dirs:  []string{"dir1", "dir1/dir11"},
523                         files: nil,
524                 },
525                 {
526                         globs: []string{"dir1/**"},
527                         dirs:  []string{"dir1", "dir1/dir11", "dir1/dir12"},
528                         files: []string{"dir1/file11", "dir1/dir11/file111"},
529                 },
530         } {
531                 c.Logf("=== globs: %q", trial.globs)
532                 cp := copier{
533                         globs: trial.globs,
534                         dirs:  dirs,
535                 }
536                 for _, path := range files {
537                         cp.files = append(cp.files, filetodo{dst: path})
538                 }
539                 cp.applyGlobsToFilesAndDirs()
540                 var gotFiles []string
541                 for _, file := range cp.files {
542                         gotFiles = append(gotFiles, file.dst)
543                 }
544                 c.Check(cp.dirs, check.DeepEquals, trial.dirs)
545                 c.Check(gotFiles, check.DeepEquals, trial.files)
546         }
547 }
548
549 func (s *copierSuite) TestApplyGlobsToCollectionFS(c *check.C) {
550         for _, trial := range []struct {
551                 globs  []string
552                 expect []string
553         }{
554                 {
555                         globs:  nil,
556                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
557                 },
558                 {
559                         globs:  []string{"foo"},
560                         expect: []string{"foo"},
561                 },
562                 {
563                         globs:  []string{"baz/parent1/item1"},
564                         expect: []string{"baz/parent1/item1"},
565                 },
566                 {
567                         globs:  []string{"**"},
568                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
569                 },
570                 {
571                         globs:  []string{"**/*"},
572                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
573                 },
574                 {
575                         globs:  []string{"*"},
576                         expect: []string{"foo", "bar"},
577                 },
578                 {
579                         globs:  []string{"baz"},
580                         expect: nil,
581                 },
582                 {
583                         globs:  []string{"b*/**"},
584                         expect: []string{"baz/quux", "baz/parent1/item1"},
585                 },
586                 {
587                         globs:  []string{"baz"},
588                         expect: nil,
589                 },
590                 {
591                         globs:  []string{"baz/**"},
592                         expect: []string{"baz/quux", "baz/parent1/item1"},
593                 },
594                 {
595                         globs:  []string{"baz/*"},
596                         expect: []string{"baz/quux"},
597                 },
598                 {
599                         globs:  []string{"baz/**/*uu?"},
600                         expect: []string{"baz/quux"},
601                 },
602                 {
603                         globs:  []string{"**/*m1"},
604                         expect: []string{"baz/parent1/item1"},
605                 },
606                 {
607                         globs:  []string{"*/*/*/**/*1"},
608                         expect: nil,
609                 },
610                 {
611                         globs:  []string{"f*", "**/q*"},
612                         expect: []string{"foo", "baz/quux"},
613                 },
614                 {
615                         globs:  []string{"\\"}, // invalid pattern matches nothing
616                         expect: nil,
617                 },
618                 {
619                         globs:  []string{"\\", "foo"},
620                         expect: []string{"foo"},
621                 },
622                 {
623                         globs:  []string{"foo/**"},
624                         expect: nil,
625                 },
626                 {
627                         globs:  []string{"foo*/**"},
628                         expect: nil,
629                 },
630         } {
631                 c.Logf("=== globs: %q", trial.globs)
632                 collfs, err := (&arvados.Collection{ManifestText: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo 0:0:bar 0:0:baz/quux 0:0:baz/parent1/item1\n"}).FileSystem(nil, nil)
633                 c.Assert(err, check.IsNil)
634                 cp := copier{globs: trial.globs, staged: collfs}
635                 err = cp.applyGlobsToStaged()
636                 if !c.Check(err, check.IsNil) {
637                         continue
638                 }
639                 var got []string
640                 fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
641                         if !ent.IsDir() {
642                                 got = append(got, path)
643                         }
644                         return nil
645                 })
646                 sort.Strings(got)
647                 sort.Strings(trial.expect)
648                 c.Check(got, check.DeepEquals, trial.expect)
649         }
650 }