18941: FUSE set number of get threads based on cache size
authorPeter Amstutz <peter.amstutz@curii.com>
Tue, 29 Mar 2022 16:58:05 +0000 (12:58 -0400)
committerPeter Amstutz <peter.amstutz@curii.com>
Tue, 29 Mar 2022 16:58:05 +0000 (12:58 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <peter.amstutz@curii.com>

sdk/python/arvados/commands/get.py
services/fuse/arvados_fuse/fusedir.py

index a377c149df15cc6e09ae6c2fdd56c97b5639cf40..c061c70f0eebbac2ed2025fdecd27865c27139b8 100755 (executable)
@@ -98,11 +98,11 @@ When getting a collection manifest, strip its access tokens before writing
 it.
 """)
 
-parser.add_argument('--threads', type=int, metavar='N', default=2,
+parser.add_argument('--threads', type=int, metavar='N', default=4,
                     help="""
 Set the number of download threads to be used. Take into account that
 using lots of threads will increase the RAM requirements. Default is
-to use 2 threads.
+to use 4 threads.
 On high latency installations, using a greater number will improve
 overall throughput.
 """)
index 7de95a0cb1b0d95bd1d67dcc58b5a3c406a863ff..bea7ed18c4fc4642e7c329f3245d093c688ff415 100644 (file)
@@ -529,11 +529,14 @@ class CollectionDirectory(CollectionDirectoryBase):
                         if uuid_pattern.match(self.collection_locator):
                             coll_reader = arvados.collection.Collection(
                                 self.collection_locator, self.api, self.api.keep,
-                                num_retries=self.num_retries)
+                                num_retries=self.num_retries,
+                                get_threads=(self.api.keep.block_cache.cache_max // 64 * 1024 * 1024)                            )
                         else:
                             coll_reader = arvados.collection.CollectionReader(
                                 self.collection_locator, self.api, self.api.keep,
-                                num_retries=self.num_retries)
+                                num_retries=self.num_retries,
+                                get_threads=(self.api.keep.block_cache.cache_max // 64 * 1024 * 1024)
+                            )
                         new_collection_record = coll_reader.api_response() or {}
                         # If the Collection only exists in Keep, there will be no API
                         # response.  Fill in the fields we need.