4 import arvados.commands.run
7 # Implements "Virtual Working Directory"
8 # Provides a way of emulating a shared writable directory in Keep based
9 # on a "check out, edit, check in, merge" model.
10 # At the moment, this only permits adding new files, applications
11 # cannot modify or delete existing files.
13 # Create a symlink tree rooted at target_dir mirroring arv-mounted
14 # source_collection. target_dir must be empty, and will be created if it
16 def checkout(source_collection, target_dir, keepmount=None):
19 keepmount = os.environ['TASK_KEEPMOUNT']
21 if not os.path.exists(target_dir):
22 os.makedirs(target_dir)
24 l = os.listdir(target_dir)
26 raise Exception("target_dir must be empty before checkout, contains %s" % l)
28 stem = os.path.join(keepmount, source_collection)
29 for root, dirs, files in os.walk(os.path.join(keepmount, source_collection), topdown=True):
30 rel = root[len(stem)+1:]
32 os.mkdir(os.path.join(target_dir, rel, d))
34 os.symlink(os.path.join(root, f), os.path.join(target_dir, rel, f))
36 def checkin(target_dir):
37 """Write files in `target_dir` to Keep.
39 Regular files or symlinks to files outside the keep mount are written to
40 Keep as normal files (Keep does not support symlinks).
42 Symlinks to files in the keep mount will result in files in the new
43 collection which reference existing Keep blocks, no data copying necessary.
45 Returns a new Collection object, with data flushed but the collection record
50 outputcollection = arvados.collection.Collection(num_retries=5)
52 if target_dir[-1:] != '/':
57 logger = logging.getLogger("arvados")
60 for root, dirs, files in os.walk(target_dir):
63 s = os.lstat(os.path.join(root, f))
67 if stat.S_ISREG(s.st_mode):
69 elif stat.S_ISLNK(s.st_mode):
70 # 1. check if it is a link into a collection
71 real = os.path.split(os.path.realpath(os.path.join(root, f)))
72 (pdh, branch) = arvados.commands.run.is_in_collection(real[0], real[1])
75 if pdh not in collections:
76 # 2.1 make sure it is flushed (see #5787 note 11)
77 fd = os.open(real[0], os.O_RDONLY)
81 # 2.2 get collection from API server
82 collections[pdh] = arvados.collection.CollectionReader(pdh,
83 api_client=outputcollection._my_api(),
84 keep_client=outputcollection._my_keep(),
86 # 3. copy arvfile to new collection
87 outputcollection.copy(branch, os.path.join(root[len(target_dir):], f), source_collection=collections[pdh])
92 reldir = root[len(target_dir):]
93 with outputcollection.open(os.path.join(reldir, f), "wb") as writer:
94 with open(os.path.join(root, f), "rb") as reader:
95 dat = reader.read(64*1024)
98 dat = reader.read(64*1024)
99 except (IOError, OSError) as e:
103 return (outputcollection, last_error)