Merge branch '13430-allow-storage-classes'
authorFuad Muhic <fmuhic@capeannenterprises.com>
Wed, 23 May 2018 13:07:39 +0000 (15:07 +0200)
committerFuad Muhic <fmuhic@capeannenterprises.com>
Wed, 23 May 2018 13:07:39 +0000 (15:07 +0200)
refs #13430

Arvados-DCO-1.1-Signed-off-by: Fuad Muhic <fmuhic@capeannenterprises.com>

sdk/python/arvados/collection.py
sdk/python/arvados/commands/put.py
sdk/python/tests/test_arv_put.py
sdk/python/tests/test_collections.py

index 8fb90c944396967e6863a38daee27ffe3cb8b9ec..e390a60a87a0977a018025306252d8f86f2e69cc 100644 (file)
@@ -1436,7 +1436,7 @@ class Collection(RichCollectionBase):
     @must_be_writable
     @synchronized
     @retry_method
-    def save(self, merge=True, num_retries=None):
+    def save(self, storage_classes=None, merge=True, num_retries=None):
         """Save collection to an existing collection record.
 
         Commit pending buffer blocks to Keep, merge with remote record (if
@@ -1447,6 +1447,9 @@ class Collection(RichCollectionBase):
         the API server.  If you want to save a manifest to Keep only, see
         `save_new()`.
 
+        :storage_classes:
+          Specify desirable storage classes to be used when writing data to Keep.
+
         :merge:
           Update and merge remote changes before saving.  Otherwise, any
           remote changes will be ignored and overwritten.
@@ -1455,6 +1458,9 @@ class Collection(RichCollectionBase):
           Retry count on API calls (if None,  use the collection default)
 
         """
+        if storage_classes and type(storage_classes) is not list:
+            raise errors.ArgumentError("storage_classes must be list type.")
+
         if not self.committed():
             if not self._has_collection_uuid():
                 raise AssertionError("Collection manifest_locator is not a collection uuid.  Use save_new() for new collections.")
@@ -1465,14 +1471,24 @@ class Collection(RichCollectionBase):
                 self.update()
 
             text = self.manifest_text(strip=False)
+            body={'manifest_text': text}
+            if storage_classes:
+                body["storage_classes_desired"] = storage_classes
+
             self._remember_api_response(self._my_api().collections().update(
                 uuid=self._manifest_locator,
-                body={'manifest_text': text}
+                body=body
                 ).execute(
                     num_retries=num_retries))
             self._manifest_text = self._api_response["manifest_text"]
             self._portable_data_hash = self._api_response["portable_data_hash"]
             self.set_committed(True)
+        elif storage_classes:
+            self._remember_api_response(self._my_api().collections().update(
+                uuid=self._manifest_locator,
+                body={"storage_classes_desired": storage_classes}
+                ).execute(
+                    num_retries=num_retries))
 
         return self._manifest_text
 
@@ -1483,6 +1499,7 @@ class Collection(RichCollectionBase):
     def save_new(self, name=None,
                  create_collection_record=True,
                  owner_uuid=None,
+                 storage_classes=None,
                  ensure_unique_name=False,
                  num_retries=None):
         """Save collection to a new collection record.
@@ -1503,6 +1520,9 @@ class Collection(RichCollectionBase):
           the user, or project uuid that will own this collection.
           If None, defaults to the current user.
 
+        :storage_classes:
+          Specify desirable storage classes to be used when writing data to Keep.
+
         :ensure_unique_name:
           If True, ask the API server to rename the collection
           if it conflicts with a collection with the same name and owner.  If
@@ -1525,6 +1545,10 @@ class Collection(RichCollectionBase):
                     "replication_desired": self.replication_desired}
             if owner_uuid:
                 body["owner_uuid"] = owner_uuid
+            if storage_classes:
+                if type(storage_classes) is not list:
+                    raise errors.ArgumentError("storage_classes must be list type.")
+                body["storage_classes_desired"] = storage_classes
 
             self._remember_api_response(self._my_api().collections().create(ensure_unique_name=ensure_unique_name, body=body).execute(num_retries=num_retries))
             text = self._api_response["manifest_text"]
index 388d87b3a6f99ce51dc6b39248fd6810394828f3..cba00c3c8cf153039de990d27867558d0dbc699a 100644 (file)
@@ -140,6 +140,10 @@ physical storage devices (e.g., disks) should have a copy of each data
 block. Default is to use the server-provided default (if any) or 2.
 """)
 
+upload_opts.add_argument('--storage-classes', help="""
+Specify comma separated list of storage classes to be used when saving data to Keep.
+""")
+
 upload_opts.add_argument('--threads', type=int, metavar='N', default=None,
                          help="""
 Set the number of upload threads to be used. Take into account that
@@ -418,8 +422,8 @@ class ArvPutUploadJob(object):
     def __init__(self, paths, resume=True, use_cache=True, reporter=None,
                  name=None, owner_uuid=None, api_client=None,
                  ensure_unique_name=False, num_retries=None,
-                 put_threads=None, replication_desired=None,
-                 filename=None, update_time=60.0, update_collection=None,
+                 put_threads=None, replication_desired=None, filename=None,
+                 update_time=60.0, update_collection=None, storage_classes=None,
                  logger=logging.getLogger('arvados.arv_put'), dry_run=False,
                  follow_links=True, exclude_paths=[], exclude_names=None):
         self.paths = paths
@@ -439,6 +443,7 @@ class ArvPutUploadJob(object):
         self.replication_desired = replication_desired
         self.put_threads = put_threads
         self.filename = filename
+        self.storage_classes = storage_classes
         self._api_client = api_client
         self._state_lock = threading.Lock()
         self._state = None # Previous run state (file list & manifest)
@@ -614,10 +619,14 @@ class ArvPutUploadJob(object):
                 else:
                     # The file already exist on remote collection, skip it.
                     pass
-            self._remote_collection.save(num_retries=self.num_retries)
+            self._remote_collection.save(storage_classes=self.storage_classes,
+                                         num_retries=self.num_retries)
         else:
+            if self.storage_classes is None:
+                self.storage_classes = ['default']
             self._local_collection.save_new(
                 name=self.name, owner_uuid=self.owner_uuid,
+                storage_classes=self.storage_classes,
                 ensure_unique_name=self.ensure_unique_name,
                 num_retries=self.num_retries)
 
@@ -1045,6 +1054,15 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr,
     else:
         reporter = None
 
+    #  Split storage-classes argument
+    storage_classes = None
+    if args.storage_classes:
+        storage_classes = args.storage_classes.strip().split(',')
+        if len(storage_classes) > 1:
+            logger.error("Multiple storage classes are not supported currently.")
+            sys.exit(1)
+
+
     # Setup exclude regex from all the --exclude arguments provided
     name_patterns = []
     exclude_paths = []
@@ -1102,6 +1120,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr,
                                  owner_uuid = project_uuid,
                                  ensure_unique_name = True,
                                  update_collection = args.update_collection,
+                                 storage_classes=storage_classes,
                                  logger=logger,
                                  dry_run=args.dry_run,
                                  follow_links=args.follow_links,
index 4b1f69477e5823502b2a5396a586db82a56e6ff7..93cfdc2a36c26389a3259222304e7ba1d5de7dff 100644 (file)
@@ -730,6 +730,11 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers,
                           self.call_main_with_args,
                           ['--project-uuid', self.Z_UUID, '--stream'])
 
+    def test_error_when_multiple_storage_classes_specified(self):
+        self.assertRaises(SystemExit,
+                          self.call_main_with_args,
+                          ['--storage-classes', 'hot,cold'])
+
     def test_error_when_excluding_absolute_path(self):
         tmpdir = self.make_tmpdir()
         self.assertRaises(SystemExit,
@@ -1061,6 +1066,18 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
                                        '--project-uuid', self.PROJECT_UUID])
         self.assertEqual(link_name, collection['name'])
 
+    def test_put_collection_with_storage_classes_specified(self):
+        collection = self.run_and_find_collection("", ['--storage-classes', 'hot'])
+
+        self.assertEqual(len(collection['storage_classes_desired']), 1)
+        self.assertEqual(collection['storage_classes_desired'][0], 'hot')
+
+    def test_put_collection_without_storage_classes_specified(self):
+        collection = self.run_and_find_collection("")
+
+        self.assertEqual(len(collection['storage_classes_desired']), 1)
+        self.assertEqual(collection['storage_classes_desired'][0], 'default')
+
     def test_exclude_filename_pattern(self):
         tmpdir = self.make_tmpdir()
         tmpsubdir = os.path.join(tmpdir, 'subdir')
index 49c00191bebe02cc8e267b397212a893a33f246a..a56d4f68f157e0e76534da441c9707a8670960e7 100644 (file)
@@ -1300,17 +1300,29 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
 
     def test_create_and_save(self):
         c = self.create_count_txt()
-        c.save()
+        c.save(storage_classes=['archive'])
+
         self.assertRegex(
             c.manifest_text(),
             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
+        self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
+
 
     def test_create_and_save_new(self):
         c = self.create_count_txt()
-        c.save_new()
+        c.save_new(storage_classes=['archive'])
+
         self.assertRegex(
             c.manifest_text(),
             r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
+        self.assertEqual(c.api_response()["storage_classes_desired"], ['archive'])
+
+    def test_update_storage_classes_desired_if_collection_is_commited(self):
+        c = self.create_count_txt()
+        c.save(storage_classes=['hot'])
+        c.save(storage_classes=['cold'])
+
+        self.assertEqual(c.api_response()["storage_classes_desired"], ['cold'])
 
     def test_create_diff_apply(self):
         c1 = self.create_count_txt()