# Copyright (C) The Arvados Authors. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 import arvados import os import stat import arvados.commands.run import logging # Implements "Virtual Working Directory" # Provides a way of emulating a shared writable directory in Keep based # on a "check out, edit, check in, merge" model. # At the moment, this only permits adding new files, applications # cannot modify or delete existing files. # Create a symlink tree rooted at target_dir mirroring arv-mounted # source_collection. target_dir must be empty, and will be created if it # doesn't exist. def checkout(source_collection, target_dir, keepmount=None): # create symlinks if keepmount is None: keepmount = os.environ['TASK_KEEPMOUNT'] if not os.path.exists(target_dir): os.makedirs(target_dir) l = os.listdir(target_dir) if len(l) > 0: raise Exception("target_dir must be empty before checkout, contains %s" % l) stem = os.path.join(keepmount, source_collection) for root, dirs, files in os.walk(os.path.join(keepmount, source_collection), topdown=True): rel = root[len(stem)+1:] for d in dirs: os.mkdir(os.path.join(target_dir, rel, d)) for f in files: os.symlink(os.path.join(root, f), os.path.join(target_dir, rel, f)) def checkin(target_dir): """Write files in `target_dir` to Keep. Regular files or symlinks to files outside the keep mount are written to Keep as normal files (Keep does not support symlinks). Symlinks to files in the keep mount will result in files in the new collection which reference existing Keep blocks, no data copying necessary. Returns a new Collection object, with data flushed but the collection record not saved to the API. """ outputcollection = arvados.collection.Collection(num_retries=5) if target_dir[-1:] != '/': target_dir += '/' collections = {} logger = logging.getLogger("arvados") last_error = None for root, dirs, files in os.walk(target_dir): for f in files: try: s = os.lstat(os.path.join(root, f)) writeIt = False if stat.S_ISREG(s.st_mode): writeIt = True elif stat.S_ISLNK(s.st_mode): # 1. check if it is a link into a collection real = os.path.split(os.path.realpath(os.path.join(root, f))) (pdh, branch) = arvados.commands.run.is_in_collection(real[0], real[1]) if pdh is not None: # 2. load collection if pdh not in collections: # 2.1 make sure it is flushed (see #5787 note 11) fd = os.open(real[0], os.O_RDONLY) os.fsync(fd) os.close(fd) # 2.2 get collection from API server collections[pdh] = arvados.collection.CollectionReader(pdh, api_client=outputcollection._my_api(), keep_client=outputcollection._my_keep(), num_retries=5) # 3. copy arvfile to new collection outputcollection.copy(branch, os.path.join(root[len(target_dir):], f), source_collection=collections[pdh]) else: writeIt = True if writeIt: reldir = root[len(target_dir):] with outputcollection.open(os.path.join(reldir, f), "wb") as writer: with open(os.path.join(root, f), "rb") as reader: dat = reader.read(64*1024) while dat: writer.write(dat) dat = reader.read(64*1024) except (IOError, OSError) as e: logger.error(e) last_error = e return (outputcollection, last_error)