1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
15 "git.arvados.org/arvados.git/sdk/go/arvados"
16 "git.arvados.org/arvados.git/sdk/go/arvadostest"
17 "github.com/sirupsen/logrus"
18 check "gopkg.in/check.v1"
21 var _ = check.Suite(&copierSuite{})
23 type copierSuite struct {
28 func (s *copierSuite) SetUpTest(c *check.C) {
30 s.log = bytes.Buffer{}
32 client: arvados.NewClientFromEnv(),
33 hostOutputDir: tmpdir,
34 ctrOutputDir: "/ctr/outdir",
35 mounts: map[string]arvados.Mount{
36 "/ctr/outdir": {Kind: "tmp"},
38 secretMounts: map[string]arvados.Mount{
39 "/secret_text": {Kind: "text", Content: "xyzzy"},
41 logger: &logrus.Logger{Out: &s.log, Formatter: &logrus.TextFormatter{}, Level: logrus.InfoLevel},
45 func (s *copierSuite) TestEmptyOutput(c *check.C) {
46 err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
47 c.Check(err, check.IsNil)
48 c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
49 c.Check(len(s.cp.files), check.Equals, 0)
52 func (s *copierSuite) TestRegularFilesAndDirs(c *check.C) {
53 err := os.MkdirAll(s.cp.hostOutputDir+"/dir1/dir2/dir3", 0755)
54 c.Assert(err, check.IsNil)
55 f, err := os.OpenFile(s.cp.hostOutputDir+"/dir1/foo", os.O_CREATE|os.O_WRONLY, 0644)
56 c.Assert(err, check.IsNil)
57 _, err = io.WriteString(f, "foo")
58 c.Assert(err, check.IsNil)
59 c.Assert(f.Close(), check.IsNil)
60 err = syscall.Mkfifo(s.cp.hostOutputDir+"/dir1/fifo", 0644)
61 c.Assert(err, check.IsNil)
63 err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
64 c.Check(err, check.IsNil)
65 c.Check(s.cp.dirs, check.DeepEquals, []string{"/dir1", "/dir1/dir2", "/dir1/dir2/dir3"})
66 c.Check(s.cp.files, check.DeepEquals, []filetodo{
67 {src: os.DevNull, dst: "/dir1/dir2/dir3/.keep"},
68 {src: s.cp.hostOutputDir + "/dir1/foo", dst: "/dir1/foo", size: 3},
70 c.Check(s.log.String(), check.Matches, `.* msg="Skipping unsupported file type \(mode 200000644\) in output dir: \\"/ctr/outdir/dir1/fifo\\""\n`)
73 func (s *copierSuite) TestSymlinkCycle(c *check.C) {
74 c.Assert(os.Mkdir(s.cp.hostOutputDir+"/dir1", 0755), check.IsNil)
75 c.Assert(os.Mkdir(s.cp.hostOutputDir+"/dir2", 0755), check.IsNil)
76 c.Assert(os.Symlink("../dir2", s.cp.hostOutputDir+"/dir1/l_dir2"), check.IsNil)
77 c.Assert(os.Symlink("../dir1", s.cp.hostOutputDir+"/dir2/l_dir1"), check.IsNil)
78 err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
79 c.Check(err, check.ErrorMatches, `.*cycle.*`)
82 func (s *copierSuite) TestSymlinkTargetMissing(c *check.C) {
83 c.Assert(os.Symlink("./missing", s.cp.hostOutputDir+"/symlink"), check.IsNil)
84 err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
85 c.Check(err, check.ErrorMatches, `.*/ctr/outdir/missing.*`)
88 func (s *copierSuite) TestSymlinkTargetNotMounted(c *check.C) {
89 c.Assert(os.Symlink("../boop", s.cp.hostOutputDir+"/symlink"), check.IsNil)
90 err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
91 c.Check(err, check.ErrorMatches, `.*/ctr/boop.*`)
94 func (s *copierSuite) TestSymlinkToSecret(c *check.C) {
95 c.Assert(os.Symlink("/secret_text", s.cp.hostOutputDir+"/symlink"), check.IsNil)
96 err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
97 c.Check(err, check.IsNil)
98 c.Check(len(s.cp.dirs), check.Equals, 0)
99 c.Check(len(s.cp.files), check.Equals, 0)
102 func (s *copierSuite) TestSecretInOutputDir(c *check.C) {
103 s.cp.secretMounts["/ctr/outdir/secret_text"] = s.cp.secretMounts["/secret_text"]
104 s.writeFileInOutputDir(c, "secret_text", "xyzzy")
105 err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
106 c.Check(err, check.IsNil)
107 c.Check(len(s.cp.dirs), check.Equals, 0)
108 c.Check(len(s.cp.files), check.Equals, 0)
111 func (s *copierSuite) TestSymlinkToMountedCollection(c *check.C) {
112 // simulate mounted read-only collection
113 s.cp.mounts["/mnt"] = arvados.Mount{
115 PortableDataHash: arvadostest.FooCollectionPDH,
118 // simulate mounted writable collection
120 f, err := os.OpenFile(bindtmp+"/.arvados#collection", os.O_CREATE|os.O_WRONLY, 0644)
121 c.Assert(err, check.IsNil)
122 _, err = io.WriteString(f, `{"manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n"}`)
123 c.Assert(err, check.IsNil)
124 c.Assert(f.Close(), check.IsNil)
125 s.cp.mounts["/mnt-w"] = arvados.Mount{
127 PortableDataHash: arvadostest.FooCollectionPDH,
130 s.cp.bindmounts = map[string]bindmount{
131 "/mnt-w": bindmount{HostPath: bindtmp, ReadOnly: false},
134 c.Assert(os.Symlink("../../mnt", s.cp.hostOutputDir+"/l_dir"), check.IsNil)
135 c.Assert(os.Symlink("/mnt/foo", s.cp.hostOutputDir+"/l_file"), check.IsNil)
136 c.Assert(os.Symlink("/mnt-w/bar", s.cp.hostOutputDir+"/l_file_w"), check.IsNil)
138 err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
139 c.Check(err, check.IsNil)
140 c.Check(s.cp.manifest, check.Matches, `(?ms)\./l_dir acbd\S+ 0:3:foo\n\. acbd\S+ 0:3:l_file\n\. 37b5\S+ 0:3:l_file_w\n`)
143 func (s *copierSuite) TestSymlink(c *check.C) {
144 hostfile := s.cp.hostOutputDir + "/dir1/file"
146 err := os.MkdirAll(s.cp.hostOutputDir+"/dir1/dir2/dir3", 0755)
147 c.Assert(err, check.IsNil)
148 s.writeFileInOutputDir(c, "dir1/file", "file")
149 for _, err := range []error{
150 os.Symlink(s.cp.ctrOutputDir+"/dir1/file", s.cp.hostOutputDir+"/l_abs_file"),
151 os.Symlink(s.cp.ctrOutputDir+"/dir1/dir2", s.cp.hostOutputDir+"/l_abs_dir2"),
152 os.Symlink("../../dir1/file", s.cp.hostOutputDir+"/dir1/dir2/l_rel_file"),
153 os.Symlink("dir1/file", s.cp.hostOutputDir+"/l_rel_file"),
154 os.MkdirAll(s.cp.hostOutputDir+"/morelinks", 0755),
155 os.Symlink("../dir1/dir2", s.cp.hostOutputDir+"/morelinks/l_rel_dir2"),
156 os.Symlink("dir1/dir2/dir3", s.cp.hostOutputDir+"/l_rel_dir3"),
157 // rel. symlink -> rel. symlink -> regular file
158 os.Symlink("../dir1/dir2/l_rel_file", s.cp.hostOutputDir+"/morelinks/l_rel_l_rel_file"),
160 c.Assert(err, check.IsNil)
163 err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
164 c.Check(err, check.IsNil)
165 c.Check(s.cp.dirs, check.DeepEquals, []string{
166 "/dir1", "/dir1/dir2", "/dir1/dir2/dir3",
167 "/l_abs_dir2", "/l_abs_dir2/dir3",
169 "/morelinks", "/morelinks/l_rel_dir2", "/morelinks/l_rel_dir2/dir3",
171 c.Check(s.cp.files, check.DeepEquals, []filetodo{
172 {dst: "/dir1/dir2/dir3/.keep", src: os.DevNull},
173 {dst: "/dir1/dir2/l_rel_file", src: hostfile, size: 4},
174 {dst: "/dir1/file", src: hostfile, size: 4},
175 {dst: "/l_abs_dir2/dir3/.keep", src: os.DevNull},
176 {dst: "/l_abs_dir2/l_rel_file", src: hostfile, size: 4},
177 {dst: "/l_abs_file", src: hostfile, size: 4},
178 {dst: "/l_rel_dir3/.keep", src: os.DevNull},
179 {dst: "/l_rel_file", src: hostfile, size: 4},
180 {dst: "/morelinks/l_rel_dir2/dir3/.keep", src: os.DevNull},
181 {dst: "/morelinks/l_rel_dir2/l_rel_file", src: hostfile, size: 4},
182 {dst: "/morelinks/l_rel_l_rel_file", src: hostfile, size: 4},
186 func (s *copierSuite) TestUnsupportedOutputMount(c *check.C) {
187 s.cp.mounts["/ctr/outdir"] = arvados.Mount{Kind: "waz"}
188 err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
189 c.Check(err, check.NotNil)
192 func (s *copierSuite) TestUnsupportedMountKindBelow(c *check.C) {
193 s.cp.mounts["/ctr/outdir/dirk"] = arvados.Mount{Kind: "waz"}
194 err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
195 c.Check(err, check.NotNil)
198 func (s *copierSuite) TestWritableMountBelow(c *check.C) {
199 s.cp.mounts["/ctr/outdir/mount"] = arvados.Mount{
201 PortableDataHash: arvadostest.FooCollectionPDH,
204 c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/mount", 0755), check.IsNil)
205 s.writeFileInOutputDir(c, "file", "file")
206 s.writeFileInOutputDir(c, "mount/foo", "foo")
208 err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
209 c.Check(err, check.IsNil)
210 c.Check(s.cp.dirs, check.DeepEquals, []string{"/mount"})
211 c.Check(s.cp.files, check.DeepEquals, []filetodo{
212 {src: s.cp.hostOutputDir + "/file", dst: "/file", size: 4},
213 {src: s.cp.hostOutputDir + "/mount/foo", dst: "/mount/foo", size: 3},
217 // Check some glob-matching edge cases. In particular, check that
218 // patterns like "foo/**" do not match regular files named "foo"
219 // (unless of course they are inside a directory named "foo").
220 func (s *copierSuite) TestMatchGlobs(c *check.C) {
221 s.cp.globs = []string{"foo*/**"}
222 c.Check(s.cp.matchGlobs("foo", true), check.Equals, true)
223 c.Check(s.cp.matchGlobs("food", true), check.Equals, true)
224 c.Check(s.cp.matchGlobs("foo", false), check.Equals, false)
225 c.Check(s.cp.matchGlobs("food", false), check.Equals, false)
226 c.Check(s.cp.matchGlobs("foo/bar", false), check.Equals, true)
227 c.Check(s.cp.matchGlobs("food/bar", false), check.Equals, true)
228 c.Check(s.cp.matchGlobs("foo/bar", true), check.Equals, true)
229 c.Check(s.cp.matchGlobs("food/bar", true), check.Equals, true)
231 s.cp.globs = []string{"ba[!/]/foo*/**"}
232 c.Check(s.cp.matchGlobs("bar/foo", true), check.Equals, true)
233 c.Check(s.cp.matchGlobs("bar/food", true), check.Equals, true)
234 c.Check(s.cp.matchGlobs("bar/foo", false), check.Equals, false)
235 c.Check(s.cp.matchGlobs("bar/food", false), check.Equals, false)
236 c.Check(s.cp.matchGlobs("bar/foo/z\\[", true), check.Equals, true)
237 c.Check(s.cp.matchGlobs("bar/food/z\\[", true), check.Equals, true)
238 c.Check(s.cp.matchGlobs("bar/foo/z\\[", false), check.Equals, true)
239 c.Check(s.cp.matchGlobs("bar/food/z\\[", false), check.Equals, true)
241 s.cp.globs = []string{"waz/**/foo*/**"}
242 c.Check(s.cp.matchGlobs("waz/quux/foo", true), check.Equals, true)
243 c.Check(s.cp.matchGlobs("waz/quux/food", true), check.Equals, true)
244 c.Check(s.cp.matchGlobs("waz/quux/foo", false), check.Equals, false)
245 c.Check(s.cp.matchGlobs("waz/quux/food", false), check.Equals, false)
246 c.Check(s.cp.matchGlobs("waz/quux/foo/foo", true), check.Equals, true)
247 c.Check(s.cp.matchGlobs("waz/quux/food/foo", true), check.Equals, true)
248 c.Check(s.cp.matchGlobs("waz/quux/foo/foo", false), check.Equals, true)
249 c.Check(s.cp.matchGlobs("waz/quux/food/foo", false), check.Equals, true)
251 s.cp.globs = []string{"foo/**/*"}
252 c.Check(s.cp.matchGlobs("foo", false), check.Equals, false)
253 c.Check(s.cp.matchGlobs("foo/bar", false), check.Equals, true)
254 c.Check(s.cp.matchGlobs("foo/bar/baz", false), check.Equals, true)
255 c.Check(s.cp.matchGlobs("foo/bar/baz/waz", false), check.Equals, true)
258 func (s *copierSuite) TestSubtreeCouldMatch(c *check.C) {
259 for _, trial := range []struct {
260 mount string // relative to output dir
264 {mount: "abc", glob: "*"},
265 {mount: "abc", glob: "abc/*", could: true},
266 {mount: "abc", glob: "a*/**", could: true},
267 {mount: "abc", glob: "**", could: true},
268 {mount: "abc", glob: "*/*", could: true},
269 {mount: "abc", glob: "**/*.txt", could: true},
270 {mount: "abc/def", glob: "*"},
271 {mount: "abc/def", glob: "*/*"},
272 {mount: "abc/def", glob: "*/*.txt"},
273 {mount: "abc/def", glob: "*/*/*", could: true},
274 {mount: "abc/def", glob: "**", could: true},
275 {mount: "abc/def", glob: "**/bar", could: true},
276 {mount: "abc/def", glob: "abc/**", could: true},
277 {mount: "abc/def/ghi", glob: "*c/**/bar", could: true},
278 {mount: "abc/def/ghi", glob: "*c/*f/bar"},
279 {mount: "abc/def/ghi", glob: "abc/d[^/]f/ghi/*", could: true},
281 c.Logf("=== %+v", trial)
283 globs: []string{trial.glob},
284 }).subtreeCouldMatch(trial.mount)
285 c.Check(got, check.Equals, trial.could)
289 func (s *copierSuite) TestMountBelowExcludedByGlob(c *check.C) {
291 s.cp.mounts["/ctr/outdir/include/includer"] = arvados.Mount{
293 PortableDataHash: arvadostest.FooCollectionPDH,
295 s.cp.mounts["/ctr/outdir/include/includew"] = arvados.Mount{
297 PortableDataHash: arvadostest.FooCollectionPDH,
300 s.cp.mounts["/ctr/outdir/exclude/excluder"] = arvados.Mount{
302 PortableDataHash: arvadostest.FooCollectionPDH,
304 s.cp.mounts["/ctr/outdir/exclude/excludew"] = arvados.Mount{
306 PortableDataHash: arvadostest.FooCollectionPDH,
309 s.cp.mounts["/ctr/outdir/nonexistent/collection"] = arvados.Mount{
310 // As extra assurance, plant a collection that will
311 // fail if copier attempts to load its manifest. (For
312 // performance reasons it's important that copier
313 // doesn't try to load the manifest before deciding
314 // not to copy the contents.)
316 PortableDataHash: "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+1234",
318 s.cp.globs = []string{
322 c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/include/includer", 0755), check.IsNil)
323 c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/include/includew", 0755), check.IsNil)
324 c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/exclude/excluder", 0755), check.IsNil)
325 c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/exclude/excludew", 0755), check.IsNil)
326 s.writeFileInOutputDir(c, "include/includew/foo", "foo")
327 s.writeFileInOutputDir(c, "exclude/excludew/foo", "foo")
328 s.cp.bindmounts = map[string]bindmount{
329 "/ctr/outdir/include/includew": bindmount{HostPath: bindtmp, ReadOnly: false},
331 s.cp.bindmounts = map[string]bindmount{
332 "/ctr/outdir/include/excludew": bindmount{HostPath: bindtmp, ReadOnly: false},
335 err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
336 c.Check(err, check.IsNil)
337 c.Log(s.log.String())
339 // Note it's OK that "/exclude" is not excluded by walkMount:
340 // it is just a local filesystem directory, not a mount point
341 // that's expensive to walk. In real-life usage, it will be
342 // removed from cp.dirs before any copying happens.
343 c.Check(s.cp.dirs, check.DeepEquals, []string{"/exclude", "/include", "/include/includew"})
344 c.Check(s.cp.files, check.DeepEquals, []filetodo{
345 {src: s.cp.hostOutputDir + "/include/includew/foo", dst: "/include/includew/foo", size: 3},
347 c.Check(s.cp.manifest, check.Matches, `(?ms).*\./include/includer .*`)
348 c.Check(s.cp.manifest, check.Not(check.Matches), `(?ms).*exclude.*`)
349 c.Check(s.log.String(), check.Matches, `(?ms).*not copying \\"exclude/excluder\\".*`)
350 c.Check(s.log.String(), check.Matches, `(?ms).*not copying \\"nonexistent/collection\\".*`)
353 func (s *copierSuite) writeFileInOutputDir(c *check.C, path, data string) {
354 f, err := os.OpenFile(s.cp.hostOutputDir+"/"+path, os.O_CREATE|os.O_WRONLY, 0644)
355 c.Assert(err, check.IsNil)
356 _, err = io.WriteString(f, data)
357 c.Assert(err, check.IsNil)
358 c.Assert(f.Close(), check.IsNil)
361 // applyGlobsToFilesAndDirs uses the same glob-matching code as
362 // applyGlobsToCollectionFS, so we don't need to test all of the same
363 // glob-matching behavior covered in TestApplyGlobsToCollectionFS. We
364 // do need to check that (a) the glob is actually being used to filter
365 // out files, and (b) non-matching dirs still included if and only if
366 // they are ancestors of matching files.
367 func (s *copierSuite) TestApplyGlobsToFilesAndDirs(c *check.C) {
368 dirs := []string{"dir1", "dir1/dir11", "dir1/dir12", "dir2"}
369 files := []string{"dir1/file11", "dir1/dir11/file111", "dir2/file2"}
370 for _, trial := range []struct {
377 dirs: append([]string{}, dirs...),
378 files: append([]string{}, files...),
381 globs: []string{"**"},
382 dirs: append([]string{}, dirs...),
383 files: append([]string{}, files...),
386 globs: []string{"**/file111"},
387 dirs: []string{"dir1", "dir1/dir11"},
388 files: []string{"dir1/dir11/file111"},
391 globs: []string{"nothing"},
396 globs: []string{"**/dir12"},
397 dirs: []string{"dir1", "dir1/dir12"},
401 globs: []string{"**/file*"},
402 dirs: []string{"dir1", "dir1/dir11", "dir2"},
403 files: append([]string{}, files...),
406 globs: []string{"**/dir1[12]"},
407 dirs: []string{"dir1", "dir1/dir11", "dir1/dir12"},
411 globs: []string{"**/dir1[^2]"},
412 dirs: []string{"dir1", "dir1/dir11"},
416 globs: []string{"dir1/**"},
417 dirs: []string{"dir1", "dir1/dir11", "dir1/dir12"},
418 files: []string{"dir1/file11", "dir1/dir11/file111"},
421 c.Logf("=== globs: %q", trial.globs)
426 for _, path := range files {
427 cp.files = append(cp.files, filetodo{dst: path})
429 cp.applyGlobsToFilesAndDirs()
430 var gotFiles []string
431 for _, file := range cp.files {
432 gotFiles = append(gotFiles, file.dst)
434 c.Check(cp.dirs, check.DeepEquals, trial.dirs)
435 c.Check(gotFiles, check.DeepEquals, trial.files)
439 func (s *copierSuite) TestApplyGlobsToCollectionFS(c *check.C) {
440 for _, trial := range []struct {
446 expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
449 globs: []string{"foo"},
450 expect: []string{"foo"},
453 globs: []string{"baz/parent1/item1"},
454 expect: []string{"baz/parent1/item1"},
457 globs: []string{"**"},
458 expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
461 globs: []string{"**/*"},
462 expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
465 globs: []string{"*"},
466 expect: []string{"foo", "bar"},
469 globs: []string{"baz"},
473 globs: []string{"b*/**"},
474 expect: []string{"baz/quux", "baz/parent1/item1"},
477 globs: []string{"baz"},
481 globs: []string{"baz/**"},
482 expect: []string{"baz/quux", "baz/parent1/item1"},
485 globs: []string{"baz/*"},
486 expect: []string{"baz/quux"},
489 globs: []string{"baz/**/*uu?"},
490 expect: []string{"baz/quux"},
493 globs: []string{"**/*m1"},
494 expect: []string{"baz/parent1/item1"},
497 globs: []string{"*/*/*/**/*1"},
501 globs: []string{"f*", "**/q*"},
502 expect: []string{"foo", "baz/quux"},
505 globs: []string{"\\"}, // invalid pattern matches nothing
509 globs: []string{"\\", "foo"},
510 expect: []string{"foo"},
513 globs: []string{"foo/**"},
517 globs: []string{"foo*/**"},
521 c.Logf("=== globs: %q", trial.globs)
522 collfs, err := (&arvados.Collection{ManifestText: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo 0:0:bar 0:0:baz/quux 0:0:baz/parent1/item1\n"}).FileSystem(nil, nil)
523 c.Assert(err, check.IsNil)
524 cp := copier{globs: trial.globs}
525 err = cp.applyGlobsToCollectionFS(collfs)
526 if !c.Check(err, check.IsNil) {
530 fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
532 got = append(got, path)
537 sort.Strings(trial.expect)
538 c.Check(got, check.DeepEquals, trial.expect)