12430: Drop non-matching files from output if output_glob specified.
[arvados.git] / lib / crunchrun / copier_test.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package crunchrun
6
7 import (
8         "bytes"
9         "io"
10         "io/fs"
11         "io/ioutil"
12         "os"
13         "sort"
14         "syscall"
15
16         "git.arvados.org/arvados.git/sdk/go/arvados"
17         "git.arvados.org/arvados.git/sdk/go/arvadostest"
18         "github.com/sirupsen/logrus"
19         check "gopkg.in/check.v1"
20 )
21
22 var _ = check.Suite(&copierSuite{})
23
24 type copierSuite struct {
25         cp  copier
26         log bytes.Buffer
27 }
28
29 func (s *copierSuite) SetUpTest(c *check.C) {
30         tmpdir := c.MkDir()
31         s.log = bytes.Buffer{}
32         s.cp = copier{
33                 client:        arvados.NewClientFromEnv(),
34                 hostOutputDir: tmpdir,
35                 ctrOutputDir:  "/ctr/outdir",
36                 mounts: map[string]arvados.Mount{
37                         "/ctr/outdir": {Kind: "tmp"},
38                 },
39                 secretMounts: map[string]arvados.Mount{
40                         "/secret_text": {Kind: "text", Content: "xyzzy"},
41                 },
42                 logger: &logrus.Logger{Out: &s.log, Formatter: &logrus.TextFormatter{}, Level: logrus.InfoLevel},
43         }
44 }
45
46 func (s *copierSuite) TestEmptyOutput(c *check.C) {
47         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
48         c.Check(err, check.IsNil)
49         c.Check(s.cp.dirs, check.DeepEquals, []string(nil))
50         c.Check(len(s.cp.files), check.Equals, 0)
51 }
52
53 func (s *copierSuite) TestRegularFilesAndDirs(c *check.C) {
54         err := os.MkdirAll(s.cp.hostOutputDir+"/dir1/dir2/dir3", 0755)
55         c.Assert(err, check.IsNil)
56         f, err := os.OpenFile(s.cp.hostOutputDir+"/dir1/foo", os.O_CREATE|os.O_WRONLY, 0644)
57         c.Assert(err, check.IsNil)
58         _, err = io.WriteString(f, "foo")
59         c.Assert(err, check.IsNil)
60         c.Assert(f.Close(), check.IsNil)
61         err = syscall.Mkfifo(s.cp.hostOutputDir+"/dir1/fifo", 0644)
62         c.Assert(err, check.IsNil)
63
64         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
65         c.Check(err, check.IsNil)
66         c.Check(s.cp.dirs, check.DeepEquals, []string{"/dir1", "/dir1/dir2", "/dir1/dir2/dir3"})
67         c.Check(s.cp.files, check.DeepEquals, []filetodo{
68                 {src: os.DevNull, dst: "/dir1/dir2/dir3/.keep"},
69                 {src: s.cp.hostOutputDir + "/dir1/foo", dst: "/dir1/foo", size: 3},
70         })
71         c.Check(s.log.String(), check.Matches, `.* msg="Skipping unsupported file type \(mode 200000644\) in output dir: \\"/ctr/outdir/dir1/fifo\\""\n`)
72 }
73
74 func (s *copierSuite) TestSymlinkCycle(c *check.C) {
75         c.Assert(os.Mkdir(s.cp.hostOutputDir+"/dir1", 0755), check.IsNil)
76         c.Assert(os.Mkdir(s.cp.hostOutputDir+"/dir2", 0755), check.IsNil)
77         c.Assert(os.Symlink("../dir2", s.cp.hostOutputDir+"/dir1/l_dir2"), check.IsNil)
78         c.Assert(os.Symlink("../dir1", s.cp.hostOutputDir+"/dir2/l_dir1"), check.IsNil)
79         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
80         c.Check(err, check.ErrorMatches, `.*cycle.*`)
81 }
82
83 func (s *copierSuite) TestSymlinkTargetMissing(c *check.C) {
84         c.Assert(os.Symlink("./missing", s.cp.hostOutputDir+"/symlink"), check.IsNil)
85         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
86         c.Check(err, check.ErrorMatches, `.*/ctr/outdir/missing.*`)
87 }
88
89 func (s *copierSuite) TestSymlinkTargetNotMounted(c *check.C) {
90         c.Assert(os.Symlink("../boop", s.cp.hostOutputDir+"/symlink"), check.IsNil)
91         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
92         c.Check(err, check.ErrorMatches, `.*/ctr/boop.*`)
93 }
94
95 func (s *copierSuite) TestSymlinkToSecret(c *check.C) {
96         c.Assert(os.Symlink("/secret_text", s.cp.hostOutputDir+"/symlink"), check.IsNil)
97         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
98         c.Check(err, check.IsNil)
99         c.Check(len(s.cp.dirs), check.Equals, 0)
100         c.Check(len(s.cp.files), check.Equals, 0)
101 }
102
103 func (s *copierSuite) TestSecretInOutputDir(c *check.C) {
104         s.cp.secretMounts["/ctr/outdir/secret_text"] = s.cp.secretMounts["/secret_text"]
105         s.writeFileInOutputDir(c, "secret_text", "xyzzy")
106         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
107         c.Check(err, check.IsNil)
108         c.Check(len(s.cp.dirs), check.Equals, 0)
109         c.Check(len(s.cp.files), check.Equals, 0)
110 }
111
112 func (s *copierSuite) TestSymlinkToMountedCollection(c *check.C) {
113         // simulate mounted read-only collection
114         s.cp.mounts["/mnt"] = arvados.Mount{
115                 Kind:             "collection",
116                 PortableDataHash: arvadostest.FooCollectionPDH,
117         }
118
119         // simulate mounted writable collection
120         bindtmp, err := ioutil.TempDir("", "crunch-run.test.")
121         c.Assert(err, check.IsNil)
122         defer os.RemoveAll(bindtmp)
123         f, err := os.OpenFile(bindtmp+"/.arvados#collection", os.O_CREATE|os.O_WRONLY, 0644)
124         c.Assert(err, check.IsNil)
125         _, err = io.WriteString(f, `{"manifest_text":". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n"}`)
126         c.Assert(err, check.IsNil)
127         c.Assert(f.Close(), check.IsNil)
128         s.cp.mounts["/mnt-w"] = arvados.Mount{
129                 Kind:             "collection",
130                 PortableDataHash: arvadostest.FooCollectionPDH,
131                 Writable:         true,
132         }
133         s.cp.bindmounts = map[string]bindmount{
134                 "/mnt-w": bindmount{HostPath: bindtmp, ReadOnly: false},
135         }
136
137         c.Assert(os.Symlink("../../mnt", s.cp.hostOutputDir+"/l_dir"), check.IsNil)
138         c.Assert(os.Symlink("/mnt/foo", s.cp.hostOutputDir+"/l_file"), check.IsNil)
139         c.Assert(os.Symlink("/mnt-w/bar", s.cp.hostOutputDir+"/l_file_w"), check.IsNil)
140
141         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
142         c.Check(err, check.IsNil)
143         c.Check(s.cp.manifest, check.Matches, `(?ms)\./l_dir acbd\S+ 0:3:foo\n\. acbd\S+ 0:3:l_file\n\. 37b5\S+ 0:3:l_file_w\n`)
144 }
145
146 func (s *copierSuite) TestSymlink(c *check.C) {
147         hostfile := s.cp.hostOutputDir + "/dir1/file"
148
149         err := os.MkdirAll(s.cp.hostOutputDir+"/dir1/dir2/dir3", 0755)
150         c.Assert(err, check.IsNil)
151         s.writeFileInOutputDir(c, "dir1/file", "file")
152         for _, err := range []error{
153                 os.Symlink(s.cp.ctrOutputDir+"/dir1/file", s.cp.hostOutputDir+"/l_abs_file"),
154                 os.Symlink(s.cp.ctrOutputDir+"/dir1/dir2", s.cp.hostOutputDir+"/l_abs_dir2"),
155                 os.Symlink("../../dir1/file", s.cp.hostOutputDir+"/dir1/dir2/l_rel_file"),
156                 os.Symlink("dir1/file", s.cp.hostOutputDir+"/l_rel_file"),
157                 os.MkdirAll(s.cp.hostOutputDir+"/morelinks", 0755),
158                 os.Symlink("../dir1/dir2", s.cp.hostOutputDir+"/morelinks/l_rel_dir2"),
159                 os.Symlink("dir1/dir2/dir3", s.cp.hostOutputDir+"/l_rel_dir3"),
160                 // rel. symlink -> rel. symlink -> regular file
161                 os.Symlink("../dir1/dir2/l_rel_file", s.cp.hostOutputDir+"/morelinks/l_rel_l_rel_file"),
162         } {
163                 c.Assert(err, check.IsNil)
164         }
165
166         err = s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
167         c.Check(err, check.IsNil)
168         c.Check(s.cp.dirs, check.DeepEquals, []string{
169                 "/dir1", "/dir1/dir2", "/dir1/dir2/dir3",
170                 "/l_abs_dir2", "/l_abs_dir2/dir3",
171                 "/l_rel_dir3",
172                 "/morelinks", "/morelinks/l_rel_dir2", "/morelinks/l_rel_dir2/dir3",
173         })
174         c.Check(s.cp.files, check.DeepEquals, []filetodo{
175                 {dst: "/dir1/dir2/dir3/.keep", src: os.DevNull},
176                 {dst: "/dir1/dir2/l_rel_file", src: hostfile, size: 4},
177                 {dst: "/dir1/file", src: hostfile, size: 4},
178                 {dst: "/l_abs_dir2/dir3/.keep", src: os.DevNull},
179                 {dst: "/l_abs_dir2/l_rel_file", src: hostfile, size: 4},
180                 {dst: "/l_abs_file", src: hostfile, size: 4},
181                 {dst: "/l_rel_dir3/.keep", src: os.DevNull},
182                 {dst: "/l_rel_file", src: hostfile, size: 4},
183                 {dst: "/morelinks/l_rel_dir2/dir3/.keep", src: os.DevNull},
184                 {dst: "/morelinks/l_rel_dir2/l_rel_file", src: hostfile, size: 4},
185                 {dst: "/morelinks/l_rel_l_rel_file", src: hostfile, size: 4},
186         })
187 }
188
189 func (s *copierSuite) TestUnsupportedOutputMount(c *check.C) {
190         s.cp.mounts["/ctr/outdir"] = arvados.Mount{Kind: "waz"}
191         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
192         c.Check(err, check.NotNil)
193 }
194
195 func (s *copierSuite) TestUnsupportedMountKindBelow(c *check.C) {
196         s.cp.mounts["/ctr/outdir/dirk"] = arvados.Mount{Kind: "waz"}
197         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
198         c.Check(err, check.NotNil)
199 }
200
201 func (s *copierSuite) TestWritableMountBelow(c *check.C) {
202         s.cp.mounts["/ctr/outdir/mount"] = arvados.Mount{
203                 Kind:             "collection",
204                 PortableDataHash: arvadostest.FooCollectionPDH,
205                 Writable:         true,
206         }
207         c.Assert(os.MkdirAll(s.cp.hostOutputDir+"/mount", 0755), check.IsNil)
208         s.writeFileInOutputDir(c, "file", "file")
209         s.writeFileInOutputDir(c, "mount/foo", "foo")
210
211         err := s.cp.walkMount("", s.cp.ctrOutputDir, 10, true)
212         c.Check(err, check.IsNil)
213         c.Check(s.cp.dirs, check.DeepEquals, []string{"/mount"})
214         c.Check(s.cp.files, check.DeepEquals, []filetodo{
215                 {src: s.cp.hostOutputDir + "/file", dst: "/file", size: 4},
216                 {src: s.cp.hostOutputDir + "/mount/foo", dst: "/mount/foo", size: 3},
217         })
218 }
219
220 func (s *copierSuite) writeFileInOutputDir(c *check.C, path, data string) {
221         f, err := os.OpenFile(s.cp.hostOutputDir+"/"+path, os.O_CREATE|os.O_WRONLY, 0644)
222         c.Assert(err, check.IsNil)
223         _, err = io.WriteString(f, data)
224         c.Assert(err, check.IsNil)
225         c.Assert(f.Close(), check.IsNil)
226 }
227
228 func (s *copierSuite) TestApplyGlobsToFilesAndDirs(c *check.C) {
229         dirs := []string{"dir1", "dir1/dir11", "dir1/dir12", "dir2"}
230         files := []string{"dir1/file11", "dir1/dir11/file111", "dir2/file2"}
231         for _, trial := range []struct {
232                 globs []string
233                 dirs  []string
234                 files []string
235         }{
236                 {
237                         globs: []string{},
238                         dirs:  append([]string{}, dirs...),
239                         files: append([]string{}, files...),
240                 },
241                 {
242                         globs: []string{"**"},
243                         dirs:  append([]string{}, dirs...),
244                         files: append([]string{}, files...),
245                 },
246                 {
247                         globs: []string{"**/file111"},
248                         dirs:  []string{"dir1", "dir1/dir11"},
249                         files: []string{"dir1/dir11/file111"},
250                 },
251                 {
252                         globs: []string{"nothing"},
253                         dirs:  nil,
254                         files: nil,
255                 },
256                 {
257                         globs: []string{"**/dir12"},
258                         dirs:  []string{"dir1", "dir1/dir12"},
259                         files: nil,
260                 },
261                 {
262                         globs: []string{"**/file*"},
263                         dirs:  []string{"dir1", "dir1/dir11", "dir2"},
264                         files: append([]string{}, files...),
265                 },
266                 {
267                         globs: []string{"**/dir1[12]"},
268                         dirs:  []string{"dir1", "dir1/dir11", "dir1/dir12"},
269                         files: nil,
270                 },
271                 {
272                         globs: []string{"**/dir1[^2]"},
273                         dirs:  []string{"dir1", "dir1/dir11"},
274                         files: nil,
275                 },
276         } {
277                 c.Logf("=== globs: %q", trial.globs)
278                 cp := copier{
279                         globs: trial.globs,
280                         dirs:  dirs,
281                 }
282                 for _, path := range files {
283                         cp.files = append(cp.files, filetodo{dst: path})
284                 }
285                 cp.applyGlobsToFilesAndDirs()
286                 var gotFiles []string
287                 for _, file := range cp.files {
288                         gotFiles = append(gotFiles, file.dst)
289                 }
290                 c.Check(cp.dirs, check.DeepEquals, trial.dirs)
291                 c.Check(gotFiles, check.DeepEquals, trial.files)
292         }
293 }
294
295 func (s *copierSuite) TestApplyGlobsToCollectionFS(c *check.C) {
296         for _, trial := range []struct {
297                 globs  []string
298                 expect []string
299         }{
300                 {
301                         globs:  nil,
302                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
303                 },
304                 {
305                         globs:  []string{"foo"},
306                         expect: []string{"foo"},
307                 },
308                 {
309                         globs:  []string{"baz/parent1/item1"},
310                         expect: []string{"baz/parent1/item1"},
311                 },
312                 {
313                         globs:  []string{"**"},
314                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
315                 },
316                 {
317                         globs:  []string{"**/*"},
318                         expect: []string{"foo", "bar", "baz/quux", "baz/parent1/item1"},
319                 },
320                 {
321                         globs:  []string{"*"},
322                         expect: []string{"foo", "bar"},
323                 },
324                 {
325                         globs:  []string{"baz"},
326                         expect: nil,
327                 },
328                 {
329                         globs:  []string{"b*/**"},
330                         expect: []string{"baz/quux", "baz/parent1/item1"},
331                 },
332                 {
333                         globs:  []string{"baz"},
334                         expect: nil,
335                 },
336                 {
337                         globs:  []string{"baz/**"},
338                         expect: []string{"baz/quux", "baz/parent1/item1"},
339                 },
340                 {
341                         globs:  []string{"baz/*"},
342                         expect: []string{"baz/quux"},
343                 },
344                 {
345                         globs:  []string{"baz/**/*uu?"},
346                         expect: []string{"baz/quux"},
347                 },
348                 {
349                         globs:  []string{"**/*m1"},
350                         expect: []string{"baz/parent1/item1"},
351                 },
352                 {
353                         globs:  []string{"*/*/*/**/*1"},
354                         expect: nil,
355                 },
356                 {
357                         globs:  []string{"f*", "**/q*"},
358                         expect: []string{"foo", "baz/quux"},
359                 },
360                 {
361                         globs:  []string{"\\"}, // invalid pattern matches nothing
362                         expect: nil,
363                 },
364                 {
365                         globs:  []string{"\\", "foo"},
366                         expect: []string{"foo"},
367                 },
368                 {
369                         globs:  []string{"foo/**"},
370                         expect: nil,
371                 },
372                 {
373                         globs:  []string{"foo*/**"},
374                         expect: nil,
375                 },
376         } {
377                 c.Logf("=== globs: %q", trial.globs)
378                 collfs, err := (&arvados.Collection{ManifestText: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo 0:0:bar 0:0:baz/quux 0:0:baz/parent1/item1\n"}).FileSystem(nil, nil)
379                 c.Assert(err, check.IsNil)
380                 cp := copier{globs: trial.globs}
381                 err = cp.applyGlobsToCollectionFS(collfs)
382                 if !c.Check(err, check.IsNil) {
383                         continue
384                 }
385                 var got []string
386                 fs.WalkDir(arvados.FS(collfs), "", func(path string, ent fs.DirEntry, err error) error {
387                         if !ent.IsDir() {
388                                 got = append(got, path)
389                         }
390                         return nil
391                 })
392                 sort.Strings(got)
393                 sort.Strings(trial.expect)
394                 c.Check(got, check.DeepEquals, trial.expect)
395         }
396 }