]> git.arvados.org - arvados.git/blob - lib/crunchrun/copier_test.go
Merge branch '23009-multiselect-bug' into main. Closes #23009
[arvados.git] / lib / crunchrun / copier_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "bytes"
9         "encoding/json"
10         "fmt"
11         "io"
12         "io/fs"
13         "os"
14         "path"
15         "runtime"
16         "sort"
17         "syscall"
18
19         "git.arvados.org/arvados.git/sdk/go/arvados"
20         "git.arvados.org/arvados.git/sdk/go/arvadosclient"
21         "git.arvados.org/arvados.git/sdk/go/arvadostest"
22         "git.arvados.org/arvados.git/sdk/go/keepclient"
23         "github.com/sirupsen/logrus"
24         check "gopkg.in/check.v1"
25 )
26
27 var _ = check.Suite(&copierSuite{})
28
29 type copierSuite struct {
30         cp  copier
31         log bytes.Buffer
32 }
33
34 func (s *copierSuite) SetUpTest(c *check.C) {
35         tmpdir := c.MkDir()
36         s.log = bytes.Buffer{}
37
38         cl, err := arvadosclient.MakeArvadosClient()
39         c.Assert(err, check.IsNil)
40         kc, err := keepclient.MakeKeepClient(cl)
41         c.Assert(err, check.IsNil)
42         collfs, err := (&arvados.Collection{}).FileSystem(arvados.NewClientFromEnv(), kc)
43         c.Assert(err, check.IsNil)
44
45         s.cp = copier{
46                 client:        arvados.NewClientFromEnv(),
47                 keepClient:    kc,
48                 hostOutputDir: tmpdir,
49                 ctrOutputDir:  "/ctr/outdir",
50                 mounts: map[string]arvados.Mount{
51                         "/ctr/outdir": {Kind: "tmp"},
52                 },
53                 secretMounts: map[string]arvados.Mount{
54                         "/secret_text": {Kind: "text", Content: "xyzzy"},
55                 },
56                 logger: &logrus.Logger{Out: &s.log, Formatter: &logrus.TextFormatter{}, Level: logrus.InfoLevel},
57                 staged: collfs,
58         }
59 }
60
61 func (s *copierSuite) TestEmptyOutput(c *check.C) {
62         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
63         c.Check(err, check.IsNil)
64         c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
65         c.Check(len(s.cp.files), check.Equals, 0)
66 }
67
68 func (s *copierSuite) TestEmptyWritableMount(c *check.C) {
69         s.writeFileInOutputDir(c, ".arvados#collection", `{"manifest_text":""}`)
70         s.cp.mounts[s.cp.ctrOutputDir] = arvados.Mount{
71                 Kind:     "collection",
72                 Writable: true,
73         }
74
75         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
76         c.Assert(err, check.IsNil)
77         c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
78         c.Check(len(s.cp.files), check.Equals, 0)
79         rootdir, err := s.cp.staged.Open(".")
80         c.Assert(err, check.IsNil)
81         defer rootdir.Close()
82         fis, err := rootdir.Readdir(-1)
83         c.Assert(err, check.IsNil)
84         c.Check(fis, check.HasLen, 0)
85 }
86
87 func (s *copierSuite) TestOutputCollectionWithOnlySubmounts(c *check.C) {
88         s.writeFileInOutputDir(c, ".arvados#collection", `{"manifest_text":""}`)
89         s.cp.mounts[s.cp.ctrOutputDir] = arvados.Mount{
90                 Kind:     "collection",
91                 Writable: true,
92         }
93         s.cp.mounts[path.Join(s.cp.ctrOutputDir, "foo")] = arvados.Mount{
94                 Kind:             "collection",
95                 Path:             "foo",
96                 PortableDataHash: arvadostest.FooCollectionPDH,
97         }
98
99         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
100         c.Assert(err, check.IsNil)
101
102         // s.cp.dirs and s.cp.files are empty, because nothing needs
103         // to be copied from disk.
104         c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
105         c.Check(len(s.cp.files), check.Equals, 0)
106
107         // The "foo" file has already been copied from FooCollection
108         // to s.cp.staged via Snapshot+Splice.
109         rootdir, err := s.cp.staged.Open(".")
110         c.Assert(err, check.IsNil)
111         defer rootdir.Close()
112         fis, err := rootdir.Readdir(-1)
113         c.Assert(err, check.IsNil)
114         c.Assert(fis, check.HasLen, 1)
115         c.Check(fis[0].Size(), check.Equals, int64(3))
116 }
117
118 func (s *copierSuite) TestRepetitiveMountsInOutputDir(c *check.C) {
119         var memstats0 runtime.MemStats
120         runtime.ReadMemStats(&memstats0)
121
122         s.writeFileInOutputDir(c, ".arvados#collection", `{"manifest_text":""}`)
123         s.cp.mounts[s.cp.ctrOutputDir] = arvados.Mount{
124                 Kind:     "collection",
125                 Writable: true,
126         }
127         nmounts := 200
128         ncollections := 1
129         pdh := make([]string, ncollections)
130         s.cp.manifestCache = make(map[string]string)
131         for i := 0; i < ncollections; i++ {
132                 mtxt := arvadostest.FakeManifest(1, nmounts, 2, 4<<20)
133                 pdh[i] = arvados.PortableDataHash(mtxt)
134                 s.cp.manifestCache[pdh[i]] = mtxt
135         }
136         for i := 0; i < nmounts; i++ {
137                 filename := fmt.Sprintf("file%d", i)
138                 s.cp.mounts[path.Join(s.cp.ctrOutputDir, filename)] = arvados.Mount{
139                         Kind:             "collection",
140                         Path:             fmt.Sprintf("dir0/dir%d/file%d", i, i),
141                         PortableDataHash: pdh[i%ncollections],
142                 }
143         }
144         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
145         c.Assert(err, check.IsNil)
146
147         // Files mounted under output dir have been copied from the
148         // fake collections to s.cp.staged via Snapshot+Splice.
149         rootdir, err := s.cp.staged.Open(".")
150         c.Assert(err, check.IsNil)
151         defer rootdir.Close()
152         fis, err := rootdir.Readdir(-1)
153         c.Assert(err, check.IsNil)
154         c.Assert(fis, check.HasLen, nmounts)
155
156         // nmounts -- Δalloc before -> Δalloc after fixing #22827
157         // 500 -- 1542 MB -> 15 MB
158         // 200 -- 254 MB -> 5 MB
159         var memstats runtime.MemStats
160         runtime.ReadMemStats(&memstats)
161         delta := (int64(memstats.Alloc) - int64(memstats0.Alloc)) / 1000000
162         c.Logf("Δalloc %d MB", delta)
163         c.Check(delta < 40, check.Equals, true, check.Commentf("Δalloc %d MB is suspiciously high, expect ~ 5 MB", delta))
164 }
165
166 func (s *copierSuite) TestRegularFilesAndDirs(c *check.C) {
167         err := os.MkdirAll(s.cp.hostOutputDir+"/dir1/dir2/dir3", 0755)
168         c.Assert(err, check.IsNil)
169         f, err := os.OpenFile(s.cp.hostOutputDir+"/dir1/foo", os.O_CREATE|os.O_WRONLY, 0644)
170         c.Assert(err, check.IsNil)
171         _, err = io.WriteString(f, "foo")
172         c.Assert(err, check.IsNil)
173         c.Assert(f.Close(), check.IsNil)
174         err = syscall.Mkfifo(s.cp.hostOutputDir+"/dir1/fifo", 0644)
175         c.Assert(err, check.IsNil)
176
177         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
178         c.Check(err, check.IsNil)
179         c.Check(s.cp.dirs, check.DeepEquals, []string{"/dir1", "/dir1/dir2", "/dir1/dir2/dir3"})
180         c.Check(s.cp.files, check.DeepEquals, []filetodo{
181                 {src: os.DevNull, dst: "/dir1/dir2/dir3/.keep"},
182                 {src: s.cp.hostOutputDir + "/dir1/foo", dst: "/dir1/foo", size: 3},
183         })
184         c.Check(s.log.String(), check.Matches, `.* msg="Skipping unsupported file type \(mode 200000644\) in output dir: \\"/ctr/outdir/dir1/fifo\\""\n`)
185 }
186
187 func (s *copierSuite) TestSymlinkCycle(c *check.C) {
188         c.Assert(os.Mkdir(s.cp.hostOutputDir+"/dir1", 0755), check.IsNil)
189         c.Assert(os.Mkdir(s.cp.hostOutputDir+"/dir2", 0755), check.IsNil)
190         c.Assert(os.Symlink("../dir2", s.cp.hostOutputDir+"/dir1/l_dir2"), check.IsNil)
191         c.Assert(os.Symlink("../dir1", s.cp.hostOutputDir+"/dir2/l_dir1"), check.IsNil)
192         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
193         c.Check(err, check.ErrorMatches, `.*cycle.*`)
194 }
195
196 func (s *copierSuite) TestSymlinkTargetMissing(c *check.C) {
197         c.Assert(os.Symlink("./missing", s.cp.hostOutputDir+"/symlink"), check.IsNil)
198         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
199         c.Check(err, check.ErrorMatches, `.*/ctr/outdir/missing.*`)
200 }
201
202 func (s *copierSuite) TestSymlinkTargetNotMounted(c *check.C) {
203         c.Assert(os.Symlink("../boop", s.cp.hostOutputDir+"/symlink"), check.IsNil)
204         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
205         c.Check(err, check.ErrorMatches, `.*/ctr/boop.*`)
206 }
207
208 func (s *copierSuite) TestSymlinkToSecret(c *check.C) {
209         c.Assert(os.Symlink("/secret_text", s.cp.hostOutputDir+"/symlink"), check.IsNil)
210         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
211         c.Check(err, check.IsNil)
212         c.Check(len(s.cp.dirs), check.Equals, 0)
213         c.Check(len(s.cp.files), check.Equals, 0)
214 }
215
216 func (s *copierSuite) TestSecretInOutputDir(c *check.C) {
217         s.cp.secretMounts["/ctr/outdir/secret_text"] = s.cp.secretMounts["/secret_text"]
218         s.writeFileInOutputDir(c, "secret_text", "xyzzy")
219         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
220         c.Check(err, check.IsNil)
221         c.Check(len(s.cp.dirs), check.Equals, 0)
222         c.Check(len(s.cp.files), check.Equals, 0)
223 }
224
225 func (s *copierSuite) TestSymlinkToMountedCollection(c *check.C) {
226         // simulate mounted read-only collection
227         s.cp.mounts["/mnt"] = arvados.Mount{
228                 Kind:             "collection",
229                 PortableDataHash: arvadostest.FooCollectionPDH,
230         }
231
232         // simulate mounted writable collection
233         bindtmp := c.MkDir()
234         f, err := os.OpenFile(bindtmp+"/.arvados#collection", os.O_CREATE|os.O_WRONLY, 0644)
235         c.Assert(err, check.IsNil)
236         _, err = io.WriteString(f, `{"manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n"}`)
237         c.Assert(err, check.IsNil)
238         c.Assert(f.Close(), check.IsNil)
239         s.cp.mounts["/mnt-w"] = arvados.Mount{
240                 Kind:             "collection",
241                 PortableDataHash: arvadostest.FooCollectionPDH,
242                 Writable:         true,
243         }
244         s.cp.bindmounts = map[string]bindmount{
245                 "/mnt-w": bindmount{HostPath: bindtmp, ReadOnly: false},
246         }
247
248         c.Assert(os.Symlink("../../mnt", s.cp.hostOutputDir+"/l_dir"), check.IsNil)
249         c.Assert(os.Symlink("/mnt/foo", s.cp.hostOutputDir+"/l_file"), check.IsNil)
250         c.Assert(os.Symlink("/mnt-w/bar", s.cp.hostOutputDir+"/l_file_w"), check.IsNil)
251
252         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
253         c.Check(err, check.IsNil)
254         s.checkStagedFile(c, "l_dir/foo", 3)
255         s.checkStagedFile(c, "l_file", 3)
256         s.checkStagedFile(c, "l_file_w", 3)
257 }
258
259 func (s *copierSuite) checkStagedFile(c *check.C, path string, size int64) {
260         fi, err := s.cp.staged.Stat(path)
261         if c.Check(err, check.IsNil) {
262                 c.Check(fi.Size(), check.Equals, size)
263         }
264 }
265
266 func (s *copierSuite) TestSymlink(c *check.C) {
267         hostfile := s.cp.hostOutputDir + "/dir1/file"
268
269         err := os.MkdirAll(s.cp.hostOutputDir+"/dir1/dir2/dir3", 0755)
270         c.Assert(err, check.IsNil)
271         s.writeFileInOutputDir(c, "dir1/file", "file")
272         for _, err := range []error{
273                 os.Symlink(s.cp.ctrOutputDir+"/dir1/file", s.cp.hostOutputDir+"/l_abs_file"),
274                 os.Symlink(s.cp.ctrOutputDir+"/dir1/dir2", s.cp.hostOutputDir+"/l_abs_dir2"),
275                 os.Symlink("../../dir1/file", s.cp.hostOutputDir+"/dir1/dir2/l_rel_file"),
276                 os.Symlink("dir1/file", s.cp.hostOutputDir+"/l_rel_file"),
277                 os.MkdirAll(s.cp.hostOutputDir+"/morelinks", 0755),
278                 os.Symlink("../dir1/dir2", s.cp.hostOutputDir+"/morelinks/l_rel_dir2"),
279                 os.Symlink("dir1/dir2/dir3", s.cp.hostOutputDir+"/l_rel_dir3"),
280                 // rel. symlink -> rel. symlink -> regular file
281                 os.Symlink("../dir1/dir2/l_rel_file", s.cp.hostOutputDir+"/morelinks/l_rel_l_rel_file"),
282         } {
283                 c.Assert(err, check.IsNil)
284         }
285
286         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
287         c.Check(err, check.IsNil)
288         c.Check(s.cp.dirs, check.DeepEquals, []string{
289                 "/dir1", "/dir1/dir2", "/dir1/dir2/dir3",
290                 "/l_abs_dir2", "/l_abs_dir2/dir3",
291                 "/l_rel_dir3",
292                 "/morelinks", "/morelinks/l_rel_dir2", "/morelinks/l_rel_dir2/dir3",
293         })
294         c.Check(s.cp.files, check.DeepEquals, []filetodo{
295                 {dst: "/dir1/dir2/dir3/.keep", src: os.DevNull},
296                 {dst: "/dir1/dir2/l_rel_file", src: hostfile, size: 4},
297                 {dst: "/dir1/file", src: hostfile, size: 4},
298                 {dst: "/l_abs_dir2/dir3/.keep", src: os.DevNull},
299                 {dst: "/l_abs_dir2/l_rel_file", src: hostfile, size: 4},
300                 {dst: "/l_abs_file", src: hostfile, size: 4},
301                 {dst: "/l_rel_dir3/.keep", src: os.DevNull},
302                 {dst: "/l_rel_file", src: hostfile, size: 4},
303                 {dst: "/morelinks/l_rel_dir2/dir3/.keep", src: os.DevNull},
304                 {dst: "/morelinks/l_rel_dir2/l_rel_file", src: hostfile, size: 4},
305                 {dst: "/morelinks/l_rel_l_rel_file", src: hostfile, size: 4},
306         })
307 }
308
309 func (s *copierSuite) TestUnsupportedOutputMount(c *check.C) {
310         s.cp.mounts["/ctr/outdir"] = arvados.Mount{Kind: "waz"}
311         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
312         c.Check(err, check.NotNil)
313 }
314
315 func (s *copierSuite) TestUnsupportedMountKindBelow(c *check.C) {
316         s.cp.mounts["/ctr/outdir/dirk"] = arvados.Mount{Kind: "waz"}
317         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
318         c.Check(err, check.NotNil)
319 }
320
321 func (s *copierSuite) TestWritableMountBelow(c *check.C) {
322         s.cp.mounts["/ctr/outdir/mount"] = arvados.Mount{
323                 Kind:             "collection",
324                 PortableDataHash: arvadostest.FooCollectionPDH,
325                 Writable:         true,
326         }
327         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/mount", 0755), check.IsNil)
328         s.writeFileInOutputDir(c, "file", "file")
329         s.writeFileInOutputDir(c, "mount/foo", "foo")
330
331         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
332         c.Check(err, check.IsNil)
333         c.Check(s.cp.dirs, check.DeepEquals, []string{"/mount"})
334         c.Check(s.cp.files, check.DeepEquals, []filetodo{
335                 {src: s.cp.hostOutputDir + "/file", dst: "/file", size: 4},
336                 {src: s.cp.hostOutputDir + "/mount/foo", dst: "/mount/foo", size: 3},
337         })
338 }
339
340 // Check some glob-matching edge cases. In particular, check that
341 // patterns like "foo/**" do not match regular files named "foo"
342 // (unless of course they are inside a directory named "foo").
343 func (s *copierSuite) TestMatchGlobs(c *check.C) {
344         s.cp.globs = []string{"foo*/**"}
345         c.Check(s.cp.matchGlobs("foo", true), check.Equals, true)
346         c.Check(s.cp.matchGlobs("food", true), check.Equals, true)
347         c.Check(s.cp.matchGlobs("foo", false), check.Equals, false)
348         c.Check(s.cp.matchGlobs("food", false), check.Equals, false)
349         c.Check(s.cp.matchGlobs("foo/bar", false), check.Equals, true)
350         c.Check(s.cp.matchGlobs("food/bar", false), check.Equals, true)
351         c.Check(s.cp.matchGlobs("foo/bar", true), check.Equals, true)
352         c.Check(s.cp.matchGlobs("food/bar", true), check.Equals, true)
353
354         s.cp.globs = []string{"ba[!/]/foo*/**"}
355         c.Check(s.cp.matchGlobs("bar/foo", true), check.Equals, true)
356         c.Check(s.cp.matchGlobs("bar/food", true), check.Equals, true)
357         c.Check(s.cp.matchGlobs("bar/foo", false), check.Equals, false)
358         c.Check(s.cp.matchGlobs("bar/food", false), check.Equals, false)
359         c.Check(s.cp.matchGlobs("bar/foo/z\\[", true), check.Equals, true)
360         c.Check(s.cp.matchGlobs("bar/food/z\\[", true), check.Equals, true)
361         c.Check(s.cp.matchGlobs("bar/foo/z\\[", false), check.Equals, true)
362         c.Check(s.cp.matchGlobs("bar/food/z\\[", false), check.Equals, true)
363
364         s.cp.globs = []string{"waz/**/foo*/**"}
365         c.Check(s.cp.matchGlobs("waz/quux/foo", true), check.Equals, true)
366         c.Check(s.cp.matchGlobs("waz/quux/food", true), check.Equals, true)
367         c.Check(s.cp.matchGlobs("waz/quux/foo", false), check.Equals, false)
368         c.Check(s.cp.matchGlobs("waz/quux/food", false), check.Equals, false)
369         c.Check(s.cp.matchGlobs("waz/quux/foo/foo", true), check.Equals, true)
370         c.Check(s.cp.matchGlobs("waz/quux/food/foo", true), check.Equals, true)
371         c.Check(s.cp.matchGlobs("waz/quux/foo/foo", false), check.Equals, true)
372         c.Check(s.cp.matchGlobs("waz/quux/food/foo", false), check.Equals, true)
373
374         s.cp.globs = []string{"foo/**/*"}
375         c.Check(s.cp.matchGlobs("foo", false), check.Equals, false)
376         c.Check(s.cp.matchGlobs("foo/bar", false), check.Equals, true)
377         c.Check(s.cp.matchGlobs("foo/bar/baz", false), check.Equals, true)
378         c.Check(s.cp.matchGlobs("foo/bar/baz/waz", false), check.Equals, true)
379 }
380
381 func (s *copierSuite) TestSubtreeCouldMatch(c *check.C) {
382         for _, trial := range []struct {
383                 mount string // relative to output dir
384                 glob  string
385                 could bool
386         }{
387                 {mount: "abc", glob: "*"},
388                 {mount: "abc", glob: "abc/*", could: true},
389                 {mount: "abc", glob: "a*/**", could: true},
390                 {mount: "abc", glob: "**", could: true},
391                 {mount: "abc", glob: "*/*", could: true},
392                 {mount: "abc", glob: "**/*.txt", could: true},
393                 {mount: "abc/def", glob: "*"},
394                 {mount: "abc/def", glob: "*/*"},
395                 {mount: "abc/def", glob: "*/*.txt"},
396                 {mount: "abc/def", glob: "*/*/*", could: true},
397                 {mount: "abc/def", glob: "**", could: true},
398                 {mount: "abc/def", glob: "**/bar", could: true},
399                 {mount: "abc/def", glob: "abc/**", could: true},
400                 {mount: "abc/def/ghi", glob: "*c/**/bar", could: true},
401                 {mount: "abc/def/ghi", glob: "*c/*f/bar"},
402                 {mount: "abc/def/ghi", glob: "abc/d[^/]f/ghi/*", could: true},
403         } {
404                 c.Logf("=== %+v", trial)
405                 got := (&copier{
406                         globs: []string{trial.glob},
407                 }).subtreeCouldMatch(trial.mount)
408                 c.Check(got, check.Equals, trial.could)
409         }
410 }
411
412 func (s *copierSuite) TestCopyFromLargeCollection_Readonly(c *check.C) {
413         s.testCopyFromLargeCollection(c, false)
414 }
415
416 func (s *copierSuite) TestCopyFromLargeCollection_Writable(c *check.C) {
417         s.testCopyFromLargeCollection(c, true)
418 }
419
420 func (s *copierSuite) testCopyFromLargeCollection(c *check.C, writable bool) {
421         bindtmp := c.MkDir()
422         mtxt := arvadostest.FakeManifest(100, 100, 2, 4<<20)
423         pdh := arvados.PortableDataHash(mtxt)
424         json, err := json.Marshal(arvados.Collection{ManifestText: mtxt, PortableDataHash: pdh})
425         c.Assert(err, check.IsNil)
426         err = os.WriteFile(bindtmp+"/.arvados#collection", json, 0644)
427         // This symlink tricks walkHostFS into calling walkMount on
428         // the fakecollection dir. If we did the obvious thing instead
429         // (i.e., mount a collection under the output dir) walkMount
430         // would see that our fakecollection dir is actually a regular
431         // directory, conclude that the mount has been deleted and
432         // replaced by a regular directory tree, and process the tree
433         // as regular files, bypassing the manifest-copying code path
434         // we're trying to test.
435         err = os.Symlink("/fakecollection", s.cp.hostOutputDir+"/fakecollection")
436         c.Assert(err, check.IsNil)
437         s.cp.mounts["/fakecollection"] = arvados.Mount{
438                 Kind:             "collection",
439                 PortableDataHash: pdh,
440                 Writable:         writable,
441         }
442         s.cp.bindmounts = map[string]bindmount{
443                 "/fakecollection": bindmount{HostPath: bindtmp, ReadOnly: !writable},
444         }
445         s.cp.manifestCache = map[string]string{pdh: mtxt}
446         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
447         c.Check(err, check.IsNil)
448         c.Log(s.log.String())
449
450         // Check some files to ensure they were copied properly.
451         // Specifically, arbitrarily check every 17th file in every
452         // 13th dir.  (This is better than checking all of the files
453         // only in that it's less likely to show up as a distracting
454         // signal in CPU profiling.)
455         for i := 0; i < 100; i += 13 {
456                 for j := 0; j < 100; j += 17 {
457                         fnm := fmt.Sprintf("/fakecollection/dir%d/dir%d/file%d", i, j, j)
458                         _, err := s.cp.staged.Stat(fnm)
459                         c.Assert(err, check.IsNil, check.Commentf("%s", fnm))
460                 }
461         }
462 }
463
464 func (s *copierSuite) TestMountBelowExcludedByGlob(c *check.C) {
465         bindtmp := c.MkDir()
466         s.cp.mounts["/ctr/outdir/include/includer"] = arvados.Mount{
467                 Kind:             "collection",
468                 PortableDataHash: arvadostest.FooCollectionPDH,
469         }
470         s.cp.mounts["/ctr/outdir/include/includew"] = arvados.Mount{
471                 Kind:             "collection",
472                 PortableDataHash: arvadostest.FooCollectionPDH,
473                 Writable:         true,
474         }
475         s.cp.mounts["/ctr/outdir/exclude/excluder"] = arvados.Mount{
476                 Kind:             "collection",
477                 PortableDataHash: arvadostest.FooCollectionPDH,
478         }
479         s.cp.mounts["/ctr/outdir/exclude/excludew"] = arvados.Mount{
480                 Kind:             "collection",
481                 PortableDataHash: arvadostest.FooCollectionPDH,
482                 Writable:         true,
483         }
484         s.cp.mounts["/ctr/outdir/nonexistent/collection"] = arvados.Mount{
485                 // As extra assurance, plant a collection that will
486                 // fail if copier attempts to load its manifest.  (For
487                 // performance reasons it's important that copier
488                 // doesn't try to load the manifest before deciding
489                 // not to copy the contents.)
490                 Kind:             "collection",
491                 PortableDataHash: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+1234",
492         }
493         s.cp.globs = []string{
494                 "?ncl*/*r/*",
495                 "*/?ncl*/**",
496         }
497         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/include/includer", 0755), check.IsNil)
498         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/include/includew", 0755), check.IsNil)
499         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/exclude/excluder", 0755), check.IsNil)
500         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/exclude/excludew", 0755), check.IsNil)
501         s.writeFileInOutputDir(c, "include/includew/foo", "foo")
502         s.writeFileInOutputDir(c, "exclude/excludew/foo", "foo")
503         s.cp.bindmounts = map[string]bindmount{
504                 "/ctr/outdir/include/includew": bindmount{HostPath: bindtmp, ReadOnly: false},
505         }
506         s.cp.bindmounts = map[string]bindmount{
507                 "/ctr/outdir/include/excludew": bindmount{HostPath: bindtmp, ReadOnly: false},
508         }
509
510         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
511         c.Check(err, check.IsNil)
512         c.Log(s.log.String())
513
514         // Note it's OK that "/exclude" is not excluded by walkMount:
515         // it is just a local filesystem directory, not a mount point
516         // that's expensive to walk.  In real-life usage, it will be
517         // removed from cp.dirs before any copying happens.
518         c.Check(s.cp.dirs, check.DeepEquals, []string{"/exclude", "/include", "/include/includew"})
519         c.Check(s.cp.files, check.DeepEquals, []filetodo{
520                 {src: s.cp.hostOutputDir + "/include/includew/foo", dst: "/include/includew/foo", size: 3},
521         })
522         manifest, err := s.cp.staged.MarshalManifest(".")
523         c.Assert(err, check.IsNil)
524         c.Check(manifest, check.Matches, `(?ms).*\./include/includer .*`)
525         c.Check(manifest, check.Not(check.Matches), `(?ms).*exclude.*`)
526         c.Check(s.log.String(), check.Matches, `(?ms).*not copying \\"exclude/excluder\\".*`)
527         c.Check(s.log.String(), check.Matches, `(?ms).*not copying \\"nonexistent/collection\\".*`)
528 }
529
530 func (s *copierSuite) writeFileInOutputDir(c *check.C, path, data string) {
531         f, err := os.OpenFile(s.cp.hostOutputDir+"/"+path, os.O_CREATE|os.O_WRONLY, 0644)
532         c.Assert(err, check.IsNil)
533         _, err = io.WriteString(f, data)
534         c.Assert(err, check.IsNil)
535         c.Assert(f.Close(), check.IsNil)
536 }
537
538 // applyGlobsToFilesAndDirs uses the same glob-matching code as
539 // applyGlobsToStaged, so we don't need to test all of the same
540 // glob-matching behavior covered in TestApplyGlobsToCollectionFS.  We
541 // do need to check that (a) the glob is actually being used to filter
542 // out files, and (b) non-matching dirs still included if and only if
543 // they are ancestors of matching files.
544 func (s *copierSuite) TestApplyGlobsToFilesAndDirs(c *check.C) {
545         dirs := []string{"dir1", "dir1/dir11", "dir1/dir12", "dir2"}
546         files := []string{"dir1/file11", "dir1/dir11/file111", "dir2/file2"}
547         for _, trial := range []struct {
548                 globs []string
549                 dirs  []string
550                 files []string
551         }{
552                 {
553                         globs: []string{},
554                         dirs:  append([]string{}, dirs...),
555                         files: append([]string{}, files...),
556                 },
557                 {
558                         globs: []string{"**"},
559                         dirs:  append([]string{}, dirs...),
560                         files: append([]string{}, files...),
561                 },
562                 {
563                         globs: []string{"**/file111"},
564                         dirs:  []string{"dir1", "dir1/dir11"},
565                         files: []string{"dir1/dir11/file111"},
566                 },
567                 {
568                         globs: []string{"nothing"},
569                         dirs:  nil,
570                         files: nil,
571                 },
572                 {
573                         globs: []string{"**/dir12"},
574                         dirs:  []string{"dir1", "dir1/dir12"},
575                         files: nil,
576                 },
577                 {
578                         globs: []string{"**/file*"},
579                         dirs:  []string{"dir1", "dir1/dir11", "dir2"},
580                         files: append([]string{}, files...),
581                 },
582                 {
583                         globs: []string{"**/dir1[12]"},
584                         dirs:  []string{"dir1", "dir1/dir11", "dir1/dir12"},
585                         files: nil,
586                 },
587                 {
588                         globs: []string{"**/dir1[^2]"},
589                         dirs:  []string{"dir1", "dir1/dir11"},
590                         files: nil,
591                 },
592                 {
593                         globs: []string{"dir1/**"},
594                         dirs:  []string{"dir1", "dir1/dir11", "dir1/dir12"},
595                         files: []string{"dir1/file11", "dir1/dir11/file111"},
596                 },
597         } {
598                 c.Logf("=== globs: %q", trial.globs)
599                 cp := copier{
600                         globs: trial.globs,
601                         dirs:  dirs,
602                 }
603                 for _, path := range files {
604                         cp.files = append(cp.files, filetodo{dst: path})
605                 }
606                 cp.applyGlobsToFilesAndDirs()
607                 var gotFiles []string
608                 for _, file := range cp.files {
609                         gotFiles = append(gotFiles, file.dst)
610                 }
611                 c.Check(cp.dirs, check.DeepEquals, trial.dirs)
612                 c.Check(gotFiles, check.DeepEquals, trial.files)
613         }
614 }
615
616 func (s *copierSuite) TestApplyGlobsToCollectionFS(c *check.C) {
617         for _, trial := range []struct {
618                 globs  []string
619                 expect []string
620         }{
621                 {
622                         globs:  nil,
623                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
624                 },
625                 {
626                         globs:  []string{"foo"},
627                         expect: []string{"foo"},
628                 },
629                 {
630                         globs:  []string{"baz/parent1/item1"},
631                         expect: []string{"baz/parent1/item1"},
632                 },
633                 {
634                         globs:  []string{"**"},
635                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
636                 },
637                 {
638                         globs:  []string{"**/*"},
639                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
640                 },
641                 {
642                         globs:  []string{"*"},
643                         expect: []string{"foo", "bar"},
644                 },
645                 {
646                         globs:  []string{"baz"},
647                         expect: nil,
648                 },
649                 {
650                         globs:  []string{"b*/**"},
651                         expect: []string{"baz/quux", "baz/parent1/item1"},
652                 },
653                 {
654                         globs:  []string{"baz"},
655                         expect: nil,
656                 },
657                 {
658                         globs:  []string{"baz/**"},
659                         expect: []string{"baz/quux", "baz/parent1/item1"},
660                 },
661                 {
662                         globs:  []string{"baz/*"},
663                         expect: []string{"baz/quux"},
664                 },
665                 {
666                         globs:  []string{"baz/**/*uu?"},
667                         expect: []string{"baz/quux"},
668                 },
669                 {
670                         globs:  []string{"**/*m1"},
671                         expect: []string{"baz/parent1/item1"},
672                 },
673                 {
674                         globs:  []string{"*/*/*/**/*1"},
675                         expect: nil,
676                 },
677                 {
678                         globs:  []string{"f*", "**/q*"},
679                         expect: []string{"foo", "baz/quux"},
680                 },
681                 {
682                         globs:  []string{"\\"}, // invalid pattern matches nothing
683                         expect: nil,
684                 },
685                 {
686                         globs:  []string{"\\", "foo"},
687                         expect: []string{"foo"},
688                 },
689                 {
690                         globs:  []string{"foo/**"},
691                         expect: nil,
692                 },
693                 {
694                         globs:  []string{"foo*/**"},
695                         expect: nil,
696                 },
697         } {
698                 c.Logf("=== globs: %q", trial.globs)
699                 collfs, err := (&arvados.Collection{ManifestText: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo 0:0:bar 0:0:baz/quux 0:0:baz/parent1/item1\n"}).FileSystem(nil, nil)
700                 c.Assert(err, check.IsNil)
701                 cp := copier{globs: trial.globs, staged: collfs}
702                 err = cp.applyGlobsToStaged()
703                 if !c.Check(err, check.IsNil) {
704                         continue
705                 }
706                 var got []string
707                 fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
708                         if !ent.IsDir() {
709                                 got = append(got, path)
710                         }
711                         return nil
712                 })
713                 sort.Strings(got)
714                 sort.Strings(trial.expect)
715                 c.Check(got, check.DeepEquals, trial.expect)
716         }
717 }