17572: Merge branch 'master' into 17572-arvmount-storage-classes
authorLucas Di Pentima <lucas.dipentima@curii.com>
Mon, 21 Jun 2021 19:40:42 +0000 (16:40 -0300)
committerLucas Di Pentima <lucas.dipentima@curii.com>
Mon, 21 Jun 2021 19:40:42 +0000 (16:40 -0300)
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima@curii.com>

doc/user/topics/storage-classes.html.textile.liquid
services/fuse/arvados_fuse/__init__.py
services/fuse/arvados_fuse/command.py
services/fuse/arvados_fuse/fusedir.py
services/fuse/tests/test_mount.py
services/fuse/tests/test_tmp_collection.py

index 96c808306272add476bcf428d481160d0562baec..99556af10aecfce48a9ac07c37e07959c44e169a 100644 (file)
@@ -16,10 +16,18 @@ Names of storage classes are internal to the cluster and decided by the administ
 
 h3. arv-put
 
-You may specify the desired storage class for a collection uploaded using @arv-put@:
+You may specify one or more desired storage classes for a collection uploaded using @arv-put@:
 
 <pre>
-$ arv-put --storage-classes=hot myfile.txt
+$ arv-put --storage-classes=hot,archival myfile.txt
+</pre>
+
+h3. arv-mount
+
+You can ask @arv-mount@ to use specific storage classes when creating new collections:
+
+<pre>
+$ arv-mount --storage-classes=transient --mount-tmp=scratch keep
 </pre>
 
 h3. arvados-cwl-runner
@@ -46,8 +54,6 @@ h3. Storage class notes
 
 Collection blocks will be in the "default" storage class if not otherwise specified.
 
-Currently, a collection may only have one desired storage class.
-
 Any user with write access to a collection may set any storage class on that collection.
 
 Names of storage classes are internal to the cluster and decided by the administrator.  Aside from "default", Arvados currently does not define any standard storage class names.
index 3a0316cf9e1e48cb319792c1636c56c8fc1eabc2..1696f856a595616a86ed9e9a0bc1b8b05ff56af2 100644 (file)
@@ -61,24 +61,16 @@ from builtins import next
 from builtins import str
 from builtins import object
 import os
-import sys
 import llfuse
 import errno
 import stat
 import threading
 import arvados
-import pprint
 import arvados.events
-import re
-import apiclient
-import json
 import logging
 import time
-import _strptime
-import calendar
 import threading
 import itertools
-import ciso8601
 import collections
 import functools
 import arvados.keep
index 7bef8a269fd5a2aec7dcd93f272e5a0a5bd99d19..67a2aaa4da881891be106535d38e9bc4969220ab 100644 (file)
@@ -95,6 +95,7 @@ class ArgumentParser(argparse.ArgumentParser):
 
         self.add_argument('--read-only', action='store_false', help="Mount will be read only (default)", dest="enable_write", default=False)
         self.add_argument('--read-write', action='store_true', help="Mount will be read-write", dest="enable_write", default=False)
+        self.add_argument('--storage-classes', type=str, metavar='CLASSES', help="Specify comma separated list of storage classes to be used when saving data of new collections", default=None)
 
         self.add_argument('--crunchstat-interval', type=float, help="Write stats to stderr every N seconds (default disabled)", default=0)
 
@@ -246,6 +247,11 @@ class Mount(object):
         dir_args = [llfuse.ROOT_INODE, self.operations.inodes, self.api, self.args.retries]
         mount_readme = False
 
+        storage_classes = None
+        if self.args.storage_classes is not None:
+            storage_classes = self.args.storage_classes.replace(' ', '').split(',')
+            self.logger.info("Storage classes requested for new collections: {}".format(', '.join(storage_classes)))
+
         if self.args.collection is not None:
             # Set up the request handler with the collection at the root
             # First check that the collection is readable
@@ -295,7 +301,10 @@ class Mount(object):
             mount_readme = True
 
         if dir_class is not None:
-            ent = dir_class(*dir_args)
+            if dir_class in [TagsDirectory, CollectionDirectory]:
+                ent = dir_class(*dir_args)
+            else:
+                ent = dir_class(*dir_args, storage_classes=storage_classes)
             self.operations.inodes.add_entry(ent)
             self.listen_for_events = ent.want_event_subscribe()
             return
@@ -305,17 +314,17 @@ class Mount(object):
         dir_args[0] = e.inode
 
         for name in self.args.mount_by_id:
-            self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=False))
+            self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=False, storage_classes=storage_classes))
         for name in self.args.mount_by_pdh:
             self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=True))
         for name in self.args.mount_by_tag:
             self._add_mount(e, name, TagsDirectory(*dir_args))
         for name in self.args.mount_home:
-            self._add_mount(e, name, ProjectDirectory(*dir_args, project_object=usr, poll=True))
+            self._add_mount(e, name, ProjectDirectory(*dir_args, project_object=usr, poll=True, storage_classes=storage_classes))
         for name in self.args.mount_shared:
-            self._add_mount(e, name, SharedDirectory(*dir_args, exclude=usr, poll=True))
+            self._add_mount(e, name, SharedDirectory(*dir_args, exclude=usr, poll=True, storage_classes=storage_classes))
         for name in self.args.mount_tmp:
-            self._add_mount(e, name, TmpCollectionDirectory(*dir_args))
+            self._add_mount(e, name, TmpCollectionDirectory(*dir_args, storage_classes=storage_classes))
 
         if mount_readme:
             text = self._readme_text(
index a2e3ac139eca44a4b9c1513977181d744fb364be..78cbd0d8cfd06f1c638549151c56e74a32025237 100644 (file)
@@ -487,6 +487,8 @@ class CollectionDirectory(CollectionDirectoryBase):
                             new_collection_record["portable_data_hash"] = new_collection_record["uuid"]
                         if 'manifest_text' not in new_collection_record:
                             new_collection_record['manifest_text'] = coll_reader.manifest_text()
+                        if 'storage_classes_desired' not in new_collection_record:
+                            new_collection_record['storage_classes_desired'] = coll_reader.storage_classes_desired()
 
                         if self.collection_record is None or self.collection_record["portable_data_hash"] != new_collection_record.get("portable_data_hash"):
                             self.new_collection(new_collection_record, coll_reader)
@@ -571,11 +573,12 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
         def save_new(self):
             pass
 
-    def __init__(self, parent_inode, inodes, api_client, num_retries):
+    def __init__(self, parent_inode, inodes, api_client, num_retries, storage_classes=None):
         collection = self.UnsaveableCollection(
             api_client=api_client,
             keep_client=api_client.keep,
-            num_retries=num_retries)
+            num_retries=num_retries,
+            storage_classes_desired=storage_classes)
         super(TmpCollectionDirectory, self).__init__(
             parent_inode, inodes, api_client.config, collection)
         self.collection_record_file = None
@@ -595,6 +598,7 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
                 "uuid": None,
                 "manifest_text": self.collection.manifest_text(),
                 "portable_data_hash": self.collection.portable_data_hash(),
+                "storage_classes_desired": self.collection.storage_classes_desired(),
             }
 
     def __contains__(self, k):
@@ -653,11 +657,12 @@ and the directory will appear if it exists.
 
 """.lstrip()
 
-    def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False):
+    def __init__(self, parent_inode, inodes, api, num_retries, pdh_only=False, storage_classes=None):
         super(MagicDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
         self.pdh_only = pdh_only
+        self.storage_classes = storage_classes
 
     def __setattr__(self, name, value):
         super(MagicDirectory, self).__setattr__(name, value)
@@ -687,7 +692,8 @@ and the directory will appear if it exists.
                 if project[u'items_available'] == 0:
                     return False
                 e = self.inodes.add_entry(ProjectDirectory(
-                    self.inode, self.inodes, self.api, self.num_retries, project[u'items'][0]))
+                    self.inode, self.inodes, self.api, self.num_retries,
+                    project[u'items'][0], storage_classes=self.storage_classes))
             else:
                 e = self.inodes.add_entry(CollectionDirectory(
                         self.inode, self.inodes, self.api, self.num_retries, k))
@@ -811,7 +817,7 @@ class ProjectDirectory(Directory):
     """A special directory that contains the contents of a project."""
 
     def __init__(self, parent_inode, inodes, api, num_retries, project_object,
-                 poll=True, poll_time=3):
+                 poll=True, poll_time=3, storage_classes=None):
         super(ProjectDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
@@ -823,6 +829,7 @@ class ProjectDirectory(Directory):
         self._updating_lock = threading.Lock()
         self._current_user = None
         self._full_listing = False
+        self.storage_classes = storage_classes
 
     def want_event_subscribe(self):
         return True
@@ -831,7 +838,7 @@ class ProjectDirectory(Directory):
         if collection_uuid_pattern.match(i['uuid']):
             return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i)
         elif group_uuid_pattern.match(i['uuid']):
-            return ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i, self._poll, self._poll_time)
+            return ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i, self._poll, self._poll_time, self.storage_classes)
         elif link_uuid_pattern.match(i['uuid']):
             if i['head_kind'] == 'arvados#collection' or portable_data_hash_pattern.match(i['head_uuid']):
                 return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i['head_uuid'])
@@ -982,9 +989,16 @@ class ProjectDirectory(Directory):
     def mkdir(self, name):
         try:
             with llfuse.lock_released:
-                self.api.collections().create(body={"owner_uuid": self.project_uuid,
-                                                    "name": name,
-                                                    "manifest_text": ""}).execute(num_retries=self.num_retries)
+                c = {
+                    "owner_uuid": self.project_uuid,
+                    "name": name,
+                    "manifest_text": "" }
+                if self.storage_classes is not None:
+                    c["storage_classes_desired"] = self.storage_classes
+                try:
+                    self.api.collections().create(body=c).execute(num_retries=self.num_retries)
+                except Exception as e:
+                    raise
             self.invalidate()
         except apiclient_errors.Error as error:
             _logger.error(error)
@@ -1079,7 +1093,7 @@ class SharedDirectory(Directory):
     """A special directory that represents users or groups who have shared projects with me."""
 
     def __init__(self, parent_inode, inodes, api, num_retries, exclude,
-                 poll=False, poll_time=60):
+                 poll=False, poll_time=60, storage_classes=None):
         super(SharedDirectory, self).__init__(parent_inode, inodes, api.config)
         self.api = api
         self.num_retries = num_retries
@@ -1087,6 +1101,7 @@ class SharedDirectory(Directory):
         self._poll = True
         self._poll_time = poll_time
         self._updating_lock = threading.Lock()
+        self.storage_classes = storage_classes
 
     @use_counter
     def update(self):
@@ -1156,8 +1171,6 @@ class SharedDirectory(Directory):
                         obr = objects[r]
                         if obr.get("name"):
                             contents[obr["name"]] = obr
-                        #elif obr.get("username"):
-                        #    contents[obr["username"]] = obr
                         elif "first_name" in obr:
                             contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr
 
@@ -1172,7 +1185,7 @@ class SharedDirectory(Directory):
             self.merge(viewitems(contents),
                        lambda i: i[0],
                        lambda a, i: a.uuid() == i[1]['uuid'],
-                       lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time))
+                       lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time, storage_classes=self.storage_classes))
         except Exception:
             _logger.exception("arv-mount shared dir error")
         finally:
index 54316bb9a987cd5a2a771da901cf9d6db9da8c51..82e5c441eb18edacadd0c91cbb79122183b9d7b4 100644 (file)
@@ -22,6 +22,7 @@ from . import run_test_server
 
 from .integration_test import IntegrationTest
 from .mount_test_base import MountTestBase
+from .test_tmp_collection import storage_classes_desired
 
 logger = logging.getLogger('arvados.arv-mount')
 
@@ -1262,3 +1263,31 @@ class SlashSubstitutionTest(IntegrationTest):
     def _test_slash_substitution_conflict(self, tmpdir, fusename):
         with open(os.path.join(tmpdir, fusename, 'waz'), 'w') as f:
             f.write('foo')
+
+class StorageClassesTest(IntegrationTest):
+    mnt_args = [
+        '--read-write',
+        '--mount-home', 'homedir',
+    ]
+
+    def setUp(self):
+        super(StorageClassesTest, self).setUp()
+        self.api = arvados.safeapi.ThreadSafeApiCache(arvados.config.settings())
+
+    @IntegrationTest.mount(argv=mnt_args)
+    def test_collection_default_storage_classes(self):
+        coll_path = os.path.join(self.mnt, 'homedir', 'a_collection')
+        self.api.collections().create(body={'name':'a_collection'}).execute()
+        self.pool_test(coll_path)
+    @staticmethod
+    def _test_collection_default_storage_classes(self, coll):
+        self.assertEqual(storage_classes_desired(coll), ['default'])
+
+    @IntegrationTest.mount(argv=mnt_args+['--storage-classes', 'foo'])
+    def test_collection_custom_storage_classes(self):
+        coll_path = os.path.join(self.mnt, 'homedir', 'new_coll')
+        os.mkdir(coll_path)
+        self.pool_test(coll_path)
+    @staticmethod
+    def _test_collection_custom_storage_classes(self, coll):
+        self.assertEqual(storage_classes_desired(coll), ['foo'])
index 50075c96aed6563af73b04f3ed1e0724eaf01c7e..c59024267a4b628dee6948c8259ac7bcda8257e4 100644 (file)
@@ -58,6 +58,9 @@ def current_manifest(tmpdir):
     with open(os.path.join(tmpdir, '.arvados#collection')) as tmp:
         return json.load(tmp)['manifest_text']
 
+def storage_classes_desired(tmpdir):
+    with open(os.path.join(tmpdir, '.arvados#collection')) as tmp:
+        return json.load(tmp)['storage_classes_desired']
 
 class TmpCollectionTest(IntegrationTest):
     mnt_args = [
@@ -65,6 +68,13 @@ class TmpCollectionTest(IntegrationTest):
         '--mount-tmp', 'zzz',
     ]
 
+    @IntegrationTest.mount(argv=mnt_args+['--storage-classes', 'foo, bar'])
+    def test_storage_classes(self):
+        self.pool_test(os.path.join(self.mnt, 'zzz'))
+    @staticmethod
+    def _test_storage_classes(self, zzz):
+        self.assertEqual(storage_classes_desired(zzz), ['foo', 'bar'])
+
     @IntegrationTest.mount(argv=mnt_args+['--mount-tmp', 'yyy'])
     def test_two_tmp(self):
         self.pool_test(os.path.join(self.mnt, 'zzz'),