X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/3af38eca348413c6f11f6526b2ee2ca7cb53e348..081b65fb2c53f9faba40fedd2de9cb9f1a860016:/sdk/python/arvados/commands/put.py diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py index 54fa356d3a..a21719cabd 100644 --- a/sdk/python/arvados/commands/put.py +++ b/sdk/python/arvados/commands/put.py @@ -10,6 +10,7 @@ import argparse import arvados import arvados.collection import base64 +import ciso8601 import copy import datetime import errno @@ -154,6 +155,29 @@ On high latency installations, using a greater number will improve overall throughput. """) +upload_opts.add_argument('--exclude', metavar='PATTERN', default=[], + action='append', help=""" +Exclude files and directories whose names match the given glob pattern. When +using a path-like pattern like 'subdir/*.txt', all text files inside 'subdir' +directory, relative to the provided input dirs will be excluded. +When using a filename pattern like '*.txt', any text file will be excluded +no matter where it is placed. +For the special case of needing to exclude only files or dirs directly below +the given input directory, you can use a pattern like './exclude_this.gif'. +You can specify multiple patterns by using this argument more than once. +""") + +_group = upload_opts.add_mutually_exclusive_group() +_group.add_argument('--follow-links', action='store_true', default=True, + dest='follow_links', help=""" +Follow file and directory symlinks (default). +""") +_group.add_argument('--no-follow-links', action='store_false', dest='follow_links', + help=""" +Do not follow file and directory symlinks. +""") + + run_opts = argparse.ArgumentParser(add_help=False) run_opts.add_argument('--project-uuid', metavar='UUID', help=""" @@ -165,18 +189,6 @@ run_opts.add_argument('--name', help=""" Save the collection with the specified name. """) -run_opts.add_argument('--exclude', metavar='PATTERN', default=[], - action='append', help=""" -Exclude files and directories whose names match the given glob pattern. When -using a path-like pattern like 'subdir/*.txt', all text files inside 'subdir' -directory, relative to the provided input dirs will be excluded. -When using a filename pattern like '*.txt', any text file will be excluded -no matter where is placed. -For the special case of needing to exclude only files or dirs directly below -the given input directory, you can use a pattern like './exclude_this.gif'. -You can specify multiple patterns by using this argument more than once. -""") - _group = run_opts.add_mutually_exclusive_group() _group.add_argument('--progress', action='store_true', help=""" @@ -213,16 +225,6 @@ _group.add_argument('--no-resume', action='store_false', dest='resume', Do not continue interrupted uploads from cached state. """) -_group = run_opts.add_mutually_exclusive_group() -_group.add_argument('--follow-links', action='store_true', default=True, - dest='follow_links', help=""" -Follow file and directory symlinks (default). -""") -_group.add_argument('--no-follow-links', action='store_false', dest='follow_links', - help=""" -Do not follow file and directory symlinks. -""") - _group = run_opts.add_mutually_exclusive_group() _group.add_argument('--cache', action='store_true', dest='use_cache', default=True, help=""" @@ -233,6 +235,18 @@ _group.add_argument('--no-cache', action='store_false', dest='use_cache', Do not save upload state in a cache file for resuming. """) +_group = upload_opts.add_mutually_exclusive_group() +_group.add_argument('--trash-at', metavar='YYYY-MM-DD HH:MM', default=None, + help=""" +Set the trash date of the resulting collection to an absolute date in the future. +The accepted format is defined by the ISO 8601 standard. +""") +_group.add_argument('--trash-after', type=int, metavar='DAYS', default=None, + help=""" +Set the trash date of the resulting collection to an amount of days from the +date/time that the upload process finishes. +""") + arg_parser = argparse.ArgumentParser( description='Copy data from the local filesystem to Keep.', parents=[upload_opts, run_opts, arv_cmd.retry_opt]) @@ -429,7 +443,8 @@ class ArvPutUploadJob(object): put_threads=None, replication_desired=None, filename=None, update_time=60.0, update_collection=None, storage_classes=None, logger=logging.getLogger('arvados.arv_put'), dry_run=False, - follow_links=True, exclude_paths=[], exclude_names=None): + follow_links=True, exclude_paths=[], exclude_names=None, + trash_at=None): self.paths = paths self.resume = resume self.use_cache = use_cache @@ -469,6 +484,10 @@ class ArvPutUploadJob(object): self.follow_links = follow_links self.exclude_paths = exclude_paths self.exclude_names = exclude_names + self._trash_at = trash_at + + if self._trash_at is not None and type(self._trash_at) not in [datetime.datetime, datetime.timedelta]: + raise TypeError('trash_at should be None, datetime or timedelta') if not self.use_cache and self.resume: raise ArvPutArgumentConflict('resume cannot be True when use_cache is False') @@ -609,6 +628,17 @@ class ArvPutUploadJob(object): if self.use_cache: self._cache_file.close() + def _collection_trash_at(self): + """ + Returns the trash date that the collection should use at save time. + Takes into account absolute/relative trash_at values requested + by the user. + """ + if type(self._trash_at) == datetime.timedelta: + # Get an absolute datetime for trash_at + return datetime.datetime.utcnow() + self._trash_at + return self._trash_at + def save_collection(self): if self.update: # Check if files should be updated on the remote collection. @@ -624,7 +654,8 @@ class ArvPutUploadJob(object): # The file already exist on remote collection, skip it. pass self._remote_collection.save(storage_classes=self.storage_classes, - num_retries=self.num_retries) + num_retries=self.num_retries, + trash_at=self._collection_trash_at()) else: if self.storage_classes is None: self.storage_classes = ['default'] @@ -632,7 +663,8 @@ class ArvPutUploadJob(object): name=self.name, owner_uuid=self.owner_uuid, storage_classes=self.storage_classes, ensure_unique_name=self.ensure_unique_name, - num_retries=self.num_retries) + num_retries=self.num_retries, + trash_at=self._collection_trash_at()) def destroy_cache(self): if self.use_cache: @@ -1072,6 +1104,28 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, if install_sig_handlers: arv_cmd.install_signal_handlers() + # Trash arguments validation + trash_at = None + if args.trash_at is not None: + try: + trash_at = ciso8601.parse_datetime(args.trash_at) + except: + logger.error("--trash-at argument format invalid, should be YYYY-MM-DDTHH:MM.") + sys.exit(1) + else: + if trash_at.tzinfo is not None: + # Timezone-aware datetime provided, convert to non-aware UTC + delta = trash_at.tzinfo.utcoffset(None) + trash_at = trash_at.replace(tzinfo=None) - delta + if trash_at <= datetime.datetime.utcnow(): + logger.error("--trash-at argument should be set in the future") + sys.exit(1) + if args.trash_after is not None: + if args.trash_after < 1: + logger.error("--trash-after argument should be >= 1") + sys.exit(1) + trash_at = datetime.timedelta(seconds=(args.trash_after * 24 * 60 * 60)) + # Determine the name to use if args.name: if args.stream or args.raw: @@ -1177,7 +1231,8 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, dry_run=args.dry_run, follow_links=args.follow_links, exclude_paths=exclude_paths, - exclude_names=exclude_names) + exclude_names=exclude_names, + trash_at=trash_at) except ResumeCacheConflict: logger.error("\n".join([ "arv-put: Another process is already uploading this data.", @@ -1191,7 +1246,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, " --no-resume to start a new resume cache.", " --no-cache to disable resume cache."])) sys.exit(1) - except CollectionUpdateError as error: + except (CollectionUpdateError, PathDoesNotExistError) as error: logger.error("\n".join([ "arv-put: %s" % str(error)])) sys.exit(1) @@ -1201,10 +1256,6 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, except ArvPutUploadNotPending: # No files pending for upload sys.exit(0) - except PathDoesNotExistError as error: - logger.error("\n".join([ - "arv-put: %s" % str(error)])) - sys.exit(1) if not args.dry_run and not args.update_collection and args.resume and writer.bytes_written > 0: logger.warning("\n".join([