1 // Copyright (C) The Arvados Authors. All rights reserved.
3 // SPDX-License-Identifier: AGPL-3.0
17 "git.arvados.org/arvados.git/sdk/go/arvados"
18 "git.arvados.org/arvados.git/sdk/go/keepclient"
19 "git.arvados.org/arvados.git/sdk/go/manifest"
22 type printfer interface {
23 Printf(string, ...interface{})
26 var errTooManySymlinks = errors.New("too many symlinks, or symlink cycle")
28 const limitFollowSymlinks = 10
30 type filetodo struct {
36 // copier copies data from a finished container's output path to a new
37 // Arvados collection.
39 // Regular files (and symlinks to regular files) in hostOutputDir are
40 // copied from the local filesystem.
42 // Symlinks to mounted collections, and any collections mounted under
43 // ctrOutputDir, are copied by transforming the relevant parts of the
44 // existing manifests, without moving any data around.
46 // Symlinks to other parts of the container's filesystem result in
51 // manifest, err := (&copier{...}).Copy()
53 client *arvados.Client
54 arvClient IArvadosClient
55 keepClient IKeepClient
59 mounts map[string]arvados.Mount
60 secretMounts map[string]arvados.Mount
67 manifestCache map[string]*manifest.Manifest
70 // Copy copies data as needed, and returns a new manifest.
71 func (cp *copier) Copy() (string, error) {
72 err := cp.walkMount("", cp.ctrOutputDir, limitFollowSymlinks, true)
74 return "", fmt.Errorf("error scanning files to copy to output: %v", err)
76 fs, err := (&arvados.Collection{ManifestText: cp.manifest}).FileSystem(cp.client, cp.keepClient)
78 return "", fmt.Errorf("error creating Collection.FileSystem: %v", err)
80 for _, d := range cp.dirs {
81 err = fs.Mkdir(d, 0777)
82 if err != nil && err != os.ErrExist {
83 return "", fmt.Errorf("error making directory %q in output collection: %v", d, err)
87 var lastparentdir string
88 for _, f := range cp.files {
89 // If a dir has just had its last file added, do a
90 // full Flush. Otherwise, do a partial Flush (write
91 // full-size blocks, but leave the last short block
92 // open so f's data can be packed with it).
93 dir, _ := filepath.Split(f.dst)
94 if dir != lastparentdir || unflushed > keepclient.BLOCKSIZE {
95 if err := fs.Flush("/"+lastparentdir, dir != lastparentdir); err != nil {
96 return "", fmt.Errorf("error flushing output collection file data: %v", err)
102 n, err := cp.copyFile(fs, f)
104 return "", fmt.Errorf("error copying file %q into output collection: %v", f, err)
108 return fs.MarshalManifest(".")
111 func (cp *copier) copyFile(fs arvados.CollectionFileSystem, f filetodo) (int64, error) {
112 cp.logger.Printf("copying %q (%d bytes)", f.dst, f.size)
113 dst, err := fs.OpenFile(f.dst, os.O_CREATE|os.O_WRONLY, 0666)
117 src, err := os.Open(f.src)
123 n, err := io.Copy(dst, src)
128 return n, dst.Close()
131 // Append to cp.manifest, cp.files, and cp.dirs so as to copy src (an
132 // absolute path in the container's filesystem) to dest (an absolute
133 // path in the output collection, or "" for output root).
135 // src must be (or be a descendant of) a readonly "collection" mount,
136 // a writable collection mounted at ctrOutputPath, or a "tmp" mount.
138 // If walkMountsBelow is true, include contents of any collection
139 // mounted below src as well.
140 func (cp *copier) walkMount(dest, src string, maxSymlinks int, walkMountsBelow bool) error {
141 // srcRoot, srcMount indicate the innermost mount that
144 var srcMount arvados.Mount
145 for root, mnt := range cp.mounts {
146 if len(root) > len(srcRoot) && strings.HasPrefix(src+"/", root+"/") {
147 srcRoot, srcMount = root, mnt
150 for root := range cp.secretMounts {
151 if len(root) > len(srcRoot) && strings.HasPrefix(src+"/", root+"/") {
152 // Silently omit secrets, and symlinks to
158 return fmt.Errorf("cannot output file %q: not in any mount", src)
161 // srcRelPath is the path to the file/dir we are trying to
162 // copy, relative to its mount point -- ".", "./foo.txt", ...
163 srcRelPath := filepath.Join(".", srcMount.Path, src[len(srcRoot):])
166 case srcMount.ExcludeFromOutput:
167 case srcMount.Kind == "tmp":
168 // Handle by walking the host filesystem.
169 return cp.walkHostFS(dest, src, maxSymlinks, walkMountsBelow)
170 case srcMount.Kind != "collection":
171 return fmt.Errorf("%q: unsupported mount %q in output (kind is %q)", src, srcRoot, srcMount.Kind)
172 case !srcMount.Writable:
173 mft, err := cp.getManifest(srcMount.PortableDataHash)
177 cp.manifest += mft.Extract(srcRelPath, dest).Text
179 hostRoot, err := cp.hostRoot(srcRoot)
183 f, err := os.Open(filepath.Join(hostRoot, ".arvados#collection"))
188 var coll arvados.Collection
189 err = json.NewDecoder(f).Decode(&coll)
193 mft := manifest.Manifest{Text: coll.ManifestText}
194 cp.manifest += mft.Extract(srcRelPath, dest).Text
197 return cp.walkMountsBelow(dest, src)
202 func (cp *copier) walkMountsBelow(dest, src string) error {
203 for mnt, mntinfo := range cp.mounts {
204 if !strings.HasPrefix(mnt, src+"/") {
207 if cp.copyRegularFiles(mntinfo) {
208 // These got copied into the nearest parent
209 // mount as regular files during setup, so
210 // they get copied as regular files when we
211 // process the parent. Output will reflect any
212 // changes and deletions done by the
216 // Example: we are processing dest=/foo src=/mnt1/dir1
217 // (perhaps we followed a symlink /outdir/foo ->
218 // /mnt1/dir1). Caller has already processed the
219 // collection mounted at /mnt1, but now we find that
220 // /mnt1/dir1/mnt2 is also a mount, so we need to copy
221 // src=/mnt1/dir1/mnt2 to dest=/foo/mnt2.
223 // We handle all descendants of /mnt1/dir1 in this
224 // loop instead of using recursion:
225 // /mnt1/dir1/mnt2/mnt3 is a child of both /mnt1 and
226 // /mnt1/dir1/mnt2, but we only want to walk it
227 // once. (This simplification is safe because mounted
228 // collections cannot contain symlinks.)
229 err := cp.walkMount(dest+mnt[len(src):], mnt, 0, false)
237 // Add entries to cp.dirs and cp.files so as to copy src (an absolute
238 // path in the container's filesystem which corresponds to a real file
239 // or directory in cp.hostOutputDir) to dest (an absolute path in the
240 // output collection, or "" for output root).
242 // Always follow symlinks.
244 // If includeMounts is true, include mounts at and below src.
245 // Otherwise, skip them.
246 func (cp *copier) walkHostFS(dest, src string, maxSymlinks int, includeMounts bool) error {
248 err := cp.walkMountsBelow(dest, src)
254 hostsrc := cp.hostOutputDir + src[len(cp.ctrOutputDir):]
256 // If src is a symlink, walk its target.
257 fi, err := os.Lstat(hostsrc)
259 return fmt.Errorf("lstat %q: %s", src, err)
261 if fi.Mode()&os.ModeSymlink != 0 {
263 return errTooManySymlinks
265 target, err := os.Readlink(hostsrc)
267 return fmt.Errorf("readlink %q: %s", src, err)
269 if !strings.HasPrefix(target, "/") {
270 target = filepath.Join(filepath.Dir(src), target)
272 return cp.walkMount(dest, target, maxSymlinks-1, true)
275 // If src is a regular directory, append it to cp.dirs and
276 // walk each of its children. (If there are no children,
277 // create an empty file "dest/.keep".)
278 if fi.Mode().IsDir() {
280 cp.dirs = append(cp.dirs, dest)
282 dir, err := os.Open(hostsrc)
284 return fmt.Errorf("open %q: %s", src, err)
286 names, err := dir.Readdirnames(-1)
289 return fmt.Errorf("readdirnames %q: %s", src, err)
293 cp.files = append(cp.files, filetodo{
295 dst: dest + "/.keep",
301 for _, name := range names {
302 dest, src := dest+"/"+name, src+"/"+name
303 if _, isSecret := cp.secretMounts[src]; isSecret {
306 if mntinfo, isMount := cp.mounts[src]; isMount && !cp.copyRegularFiles(mntinfo) {
307 // If a regular file/dir somehow
308 // exists at a path that's also a
309 // mount target, ignore the file --
310 // the mount has already been included
311 // with walkMountsBelow().
313 // (...except mount types that are
314 // handled as regular files.)
317 err = cp.walkHostFS(dest, src, maxSymlinks, false)
325 // If src is a regular file, append it to cp.files.
326 if fi.Mode().IsRegular() {
327 cp.files = append(cp.files, filetodo{
334 cp.logger.Printf("Skipping unsupported file type (mode %o) in output dir: %q", fi.Mode(), src)
338 // Return the host path that was mounted at the given path in the
340 func (cp *copier) hostRoot(ctrRoot string) (string, error) {
341 if ctrRoot == cp.ctrOutputDir {
342 return cp.hostOutputDir, nil
344 for _, bind := range cp.binds {
345 tokens := strings.Split(bind, ":")
346 if len(tokens) >= 2 && tokens[1] == ctrRoot {
347 return tokens[0], nil
350 return "", fmt.Errorf("not bind-mounted: %q", ctrRoot)
353 func (cp *copier) copyRegularFiles(m arvados.Mount) bool {
354 return m.Kind == "text" || m.Kind == "json" || (m.Kind == "collection" && m.Writable)
357 func (cp *copier) getManifest(pdh string) (*manifest.Manifest, error) {
358 if mft, ok := cp.manifestCache[pdh]; ok {
361 var coll arvados.Collection
362 err := cp.arvClient.Get("collections", pdh, nil, &coll)
364 return nil, fmt.Errorf("error retrieving collection record for %q: %s", pdh, err)
366 mft := &manifest.Manifest{Text: coll.ManifestText}
367 if cp.manifestCache == nil {
368 cp.manifestCache = map[string]*manifest.Manifest{pdh: mft}
370 cp.manifestCache[pdh] = mft