From 49bfee2a221bb050732935c240b981b7becd9aff Mon Sep 17 00:00:00 2001 From: Fuad Muhic Date: Mon, 21 May 2018 17:18:37 +0200 Subject: [PATCH] Added --storage-classes argument to arv-put. Arvados-DCO-1.1-Signed-off-by: Fuad Muhic --- sdk/python/arvados/collection.py | 11 +++++++++-- sdk/python/arvados/commands/put.py | 25 ++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py index 8fb90c9443..cce7d75685 100644 --- a/sdk/python/arvados/collection.py +++ b/sdk/python/arvados/collection.py @@ -1436,7 +1436,7 @@ class Collection(RichCollectionBase): @must_be_writable @synchronized @retry_method - def save(self, merge=True, num_retries=None): + def save(self, storage_classes=None, merge=True, num_retries=None): """Save collection to an existing collection record. Commit pending buffer blocks to Keep, merge with remote record (if @@ -1465,9 +1465,13 @@ class Collection(RichCollectionBase): self.update() text = self.manifest_text(strip=False) + body={'manifest_text': text} + if storage_classes: + body["storage_classes_desired"] = storage_classes + self._remember_api_response(self._my_api().collections().update( uuid=self._manifest_locator, - body={'manifest_text': text} + body=body ).execute( num_retries=num_retries)) self._manifest_text = self._api_response["manifest_text"] @@ -1483,6 +1487,7 @@ class Collection(RichCollectionBase): def save_new(self, name=None, create_collection_record=True, owner_uuid=None, + storage_classes=None, ensure_unique_name=False, num_retries=None): """Save collection to a new collection record. @@ -1525,6 +1530,8 @@ class Collection(RichCollectionBase): "replication_desired": self.replication_desired} if owner_uuid: body["owner_uuid"] = owner_uuid + if storage_classes: + body["storage_classes_desired"] = storage_classes self._remember_api_response(self._my_api().collections().create(ensure_unique_name=ensure_unique_name, body=body).execute(num_retries=num_retries)) text = self._api_response["manifest_text"] diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py index 388d87b3a6..cba00c3c8c 100644 --- a/sdk/python/arvados/commands/put.py +++ b/sdk/python/arvados/commands/put.py @@ -140,6 +140,10 @@ physical storage devices (e.g., disks) should have a copy of each data block. Default is to use the server-provided default (if any) or 2. """) +upload_opts.add_argument('--storage-classes', help=""" +Specify comma separated list of storage classes to be used when saving data to Keep. +""") + upload_opts.add_argument('--threads', type=int, metavar='N', default=None, help=""" Set the number of upload threads to be used. Take into account that @@ -418,8 +422,8 @@ class ArvPutUploadJob(object): def __init__(self, paths, resume=True, use_cache=True, reporter=None, name=None, owner_uuid=None, api_client=None, ensure_unique_name=False, num_retries=None, - put_threads=None, replication_desired=None, - filename=None, update_time=60.0, update_collection=None, + put_threads=None, replication_desired=None, filename=None, + update_time=60.0, update_collection=None, storage_classes=None, logger=logging.getLogger('arvados.arv_put'), dry_run=False, follow_links=True, exclude_paths=[], exclude_names=None): self.paths = paths @@ -439,6 +443,7 @@ class ArvPutUploadJob(object): self.replication_desired = replication_desired self.put_threads = put_threads self.filename = filename + self.storage_classes = storage_classes self._api_client = api_client self._state_lock = threading.Lock() self._state = None # Previous run state (file list & manifest) @@ -614,10 +619,14 @@ class ArvPutUploadJob(object): else: # The file already exist on remote collection, skip it. pass - self._remote_collection.save(num_retries=self.num_retries) + self._remote_collection.save(storage_classes=self.storage_classes, + num_retries=self.num_retries) else: + if self.storage_classes is None: + self.storage_classes = ['default'] self._local_collection.save_new( name=self.name, owner_uuid=self.owner_uuid, + storage_classes=self.storage_classes, ensure_unique_name=self.ensure_unique_name, num_retries=self.num_retries) @@ -1045,6 +1054,15 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, else: reporter = None + # Split storage-classes argument + storage_classes = None + if args.storage_classes: + storage_classes = args.storage_classes.strip().split(',') + if len(storage_classes) > 1: + logger.error("Multiple storage classes are not supported currently.") + sys.exit(1) + + # Setup exclude regex from all the --exclude arguments provided name_patterns = [] exclude_paths = [] @@ -1102,6 +1120,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr, owner_uuid = project_uuid, ensure_unique_name = True, update_collection = args.update_collection, + storage_classes=storage_classes, logger=logger, dry_run=args.dry_run, follow_links=args.follow_links, -- 2.30.2