1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: Apache-2.0
8 import arvados.commands.run
11 # Implements "Virtual Working Directory"
12 # Provides a way of emulating a shared writable directory in Keep based
13 # on a "check out, edit, check in, merge" model.
14 # At the moment, this only permits adding new files, applications
15 # cannot modify or delete existing files.
17 # Create a symlink tree rooted at target_dir mirroring arv-mounted
18 # source_collection. target_dir must be empty, and will be created if it
20 def checkout(source_collection, target_dir, keepmount=None):
23 keepmount = os.environ['TASK_KEEPMOUNT']
25 if not os.path.exists(target_dir):
26 os.makedirs(target_dir)
28 l = os.listdir(target_dir)
30 raise Exception("target_dir must be empty before checkout, contains %s" % l)
32 stem = os.path.join(keepmount, source_collection)
33 for root, dirs, files in os.walk(os.path.join(keepmount, source_collection), topdown=True):
34 rel = root[len(stem)+1:]
36 os.mkdir(os.path.join(target_dir, rel, d))
38 os.symlink(os.path.join(root, f), os.path.join(target_dir, rel, f))
40 def checkin(target_dir):
41 """Write files in `target_dir` to Keep.
43 Regular files or symlinks to files outside the keep mount are written to
44 Keep as normal files (Keep does not support symlinks).
46 Symlinks to files in the keep mount will result in files in the new
47 collection which reference existing Keep blocks, no data copying necessary.
49 Returns a new Collection object, with data flushed but the collection record
54 outputcollection = arvados.collection.Collection(num_retries=5)
56 if target_dir[-1:] != '/':
61 logger = logging.getLogger("arvados")
64 for root, dirs, files in os.walk(target_dir):
67 s = os.lstat(os.path.join(root, f))
71 if stat.S_ISREG(s.st_mode):
73 elif stat.S_ISLNK(s.st_mode):
74 # 1. check if it is a link into a collection
75 real = os.path.split(os.path.realpath(os.path.join(root, f)))
76 (pdh, branch) = arvados.commands.run.is_in_collection(real[0], real[1])
79 if pdh not in collections:
80 # 2.1 make sure it is flushed (see #5787 note 11)
81 fd = os.open(real[0], os.O_RDONLY)
85 # 2.2 get collection from API server
86 collections[pdh] = arvados.collection.CollectionReader(pdh,
87 api_client=outputcollection._my_api(),
88 keep_client=outputcollection._my_keep(),
90 # 3. copy arvfile to new collection
91 outputcollection.copy(branch, os.path.join(root[len(target_dir):], f), source_collection=collections[pdh])
96 reldir = root[len(target_dir):]
97 with outputcollection.open(os.path.join(reldir, f), "wb") as writer:
98 with open(os.path.join(root, f), "rb") as reader:
99 dat = reader.read(64*1024)
102 dat = reader.read(64*1024)
103 except (IOError, OSError) as e:
107 return (outputcollection, last_error)