X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/4a832a93cd0baf253575936a79f83bcc4f666a82..6477e004fb67681b8b64bcdbca187dbc32c6cd60:/services/fuse/arvados_fuse/command.py

diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py
index e275825a61..610da477ca 100644
--- a/services/fuse/arvados_fuse/command.py
+++ b/services/fuse/arvados_fuse/command.py
@@ -117,7 +117,13 @@ class ArgumentParser(argparse.ArgumentParser):
         self.add_argument('--unmount-timeout',
                           type=float, default=2.0,
                           help="Time to wait for graceful shutdown after --exec program exits and filesystem is unmounted")
-
+        self.add_argument(
+            '--filters',
+            type=arv_cmd.JSONArgument(arv_cmd.validate_filters),
+            help="""Filters to apply to all project, shared, and tag directory
+contents. Pass filters as either a JSON string or a path to a JSON file.
+The JSON object should be a list of filters in Arvados API list filter syntax.
+""")
         self.add_argument('--exec', type=str, nargs=argparse.REMAINDER,
                             dest="exec_args", metavar=('command', 'args', '...', '--'),
                             help="""Mount, run a command, then unmount and exit""")
@@ -134,21 +140,40 @@ class Mount(object):
         if self.args.logfile:
             self.args.logfile = os.path.realpath(self.args.logfile)
 
+        try:
+            self._setup_logging()
+        except Exception as e:
+            self.logger.exception("exception during setup: %s", e)
+            exit(1)
+
         try:
             nofile_limit = resource.getrlimit(resource.RLIMIT_NOFILE)
-            if nofile_limit[0] < 10240:
-                resource.setrlimit(resource.RLIMIT_NOFILE, (min(10240, nofile_limit[1]), nofile_limit[1]))
+
+            minlimit = 10240
+            if self.args.file_cache:
+                # Adjust the file handle limit so it can meet
+                # the desired cache size. Multiply by 8 because the
+                # number of 64 MiB cache slots that keepclient
+                # allocates is RLIMIT_NOFILE / 8
+                minlimit = int((self.args.file_cache/(64*1024*1024)) * 8)
+
+            if nofile_limit[0] < minlimit:
+                resource.setrlimit(resource.RLIMIT_NOFILE, (min(minlimit, nofile_limit[1]), nofile_limit[1]))
+
+            if minlimit > nofile_limit[1]:
+                self.logger.warning("file handles required to meet --file-cache (%s) exceeds hard file handle limit (%s), cache size will be smaller than requested", minlimit, nofile_limit[1])
+
         except Exception as e:
-            self.logger.warning("arv-mount: unable to adjust file handle limit: %s", e)
+            self.logger.warning("unable to adjust file handle limit: %s", e)
 
-        self.logger.debug("arv-mount: file handle limit is %s", resource.getrlimit(resource.RLIMIT_NOFILE))
+        nofile_limit = resource.getrlimit(resource.RLIMIT_NOFILE)
+        self.logger.info("file cache capped at %s bytes or less based on available disk (RLIMIT_NOFILE is %s)", ((nofile_limit[0]//8)*64*1024*1024), nofile_limit)
 
         try:
-            self._setup_logging()
             self._setup_api()
             self._setup_mount()
         except Exception as e:
-            self.logger.exception("arv-mount: exception during setup: %s", e)
+            self.logger.exception("exception during setup: %s", e)
             exit(1)
 
     def __enter__(self):
@@ -228,14 +253,28 @@ class Mount(object):
 
     def _setup_api(self):
         try:
+            # default value of file_cache is 0, this tells KeepBlockCache to
+            # choose a default based on whether disk_cache is enabled or not.
+
+            block_cache = arvados.keep.KeepBlockCache(cache_max=self.args.file_cache,
+                                                      disk_cache=self.args.disk_cache,
+                                                      disk_cache_dir=self.args.disk_cache_dir)
+
+            # If there's too many prefetch threads and you
+            # max out the CPU, delivering data to the FUSE
+            # layer actually ends up being slower.
+            # Experimentally, capping 7 threads seems to
+            # be a sweet spot.
+            prefetch_threads = min(max((block_cache.cache_max // (64 * 1024 * 1024)) - 1, 1), 7)
+
             self.api = arvados.safeapi.ThreadSafeApiCache(
                 apiconfig=arvados.config.settings(),
-                # default value of file_cache is 0, this tells KeepBlockCache to
-                # choose a default based on whether disk_cache is enabled or not.
+                api_params={
+                    'num_retries': self.args.retries,
+                },
                 keep_params={
-                    'block_cache': arvados.keep.KeepBlockCache(cache_max=self.args.file_cache,
-                                                               disk_cache=self.args.disk_cache,
-                                                               disk_cache_dir=self.args.disk_cache_dir),
+                    'block_cache': block_cache,
+                    'num_prefetch_threads': prefetch_threads,
                     'num_retries': self.args.retries,
                 },
                 version='v1',
@@ -267,7 +306,14 @@ class Mount(object):
         usr = self.api.users().current().execute(num_retries=self.args.retries)
         now = time.time()
         dir_class = None
-        dir_args = [llfuse.ROOT_INODE, self.operations.inodes, self.api, self.args.retries, self.args.enable_write]
+        dir_args = [
+            llfuse.ROOT_INODE,
+            self.operations.inodes,
+            self.api,
+            self.args.retries,
+            self.args.enable_write,
+            self.args.filters,
+        ]
         mount_readme = False
 
         storage_classes = None
@@ -333,7 +379,12 @@ class Mount(object):
             return
 
         e = self.operations.inodes.add_entry(Directory(
-            llfuse.ROOT_INODE, self.operations.inodes, self.api.config, self.args.enable_write))
+            llfuse.ROOT_INODE,
+            self.operations.inodes,
+            self.api.config,
+            self.args.enable_write,
+            self.args.filters,
+        ))
         dir_args[0] = e.inode
 
         for name in self.args.mount_by_id: