X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/9f4fd542a9fc94e9f48387e90fd70b614458c1f2..1efba8f3b728a3b8aa3c64c5aa09f441318ff2a8:/services/fuse/arvados_fuse/command.py diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py index 5f0a1f80f6..9c607c7f0c 100644 --- a/services/fuse/arvados_fuse/command.py +++ b/services/fuse/arvados_fuse/command.py @@ -16,6 +16,7 @@ import signal import subprocess import sys import time +import resource import arvados.commands._util as arv_cmd from arvados_fuse import crunchstat @@ -83,13 +84,20 @@ class ArgumentParser(argparse.ArgumentParser): type=str, metavar='PATH', action='append', default=[], help="Create a new collection, mount it in read/write mode at mountpoint/PATH, and delete it when unmounting.") + self.add_argument('--debug', action='store_true', help="""Debug mode""") self.add_argument('--logfile', help="""Write debug logs and errors to the specified file (default stderr).""") self.add_argument('--foreground', action='store_true', help="""Run in foreground (default is to daemonize unless --exec specified)""", default=False) self.add_argument('--encoding', type=str, help="Character encoding to use for filesystem, default is utf-8 (see Python codec registry for list of available encodings)", default="utf-8") - self.add_argument('--file-cache', type=int, help="File data cache size, in bytes (default 256MiB)", default=256*1024*1024) - self.add_argument('--directory-cache', type=int, help="Directory data cache size, in bytes (default 128MiB)", default=128*1024*1024) + self.add_argument('--file-cache', type=int, help="File data cache size, in bytes (default 8 GiB for disk-based cache or 256 MiB with RAM-only cache)", default=0) + self.add_argument('--directory-cache', type=int, help="Directory data cache size, in bytes (default 128 MiB)", default=128*1024*1024) + + cachetype = self.add_mutually_exclusive_group() + cachetype.add_argument('--ram-cache', action='store_false', dest='disk_cache', help="Use in-memory caching only", default=True) + cachetype.add_argument('--disk-cache', action='store_true', dest='disk_cache', help="Use disk based caching (default)", default=True) + + self.add_argument('--disk-cache-dir', type=str, help="Disk cache location (default ~/.cache/arvados/keep)", default=None) self.add_argument('--disable-event-listening', action='store_true', help="Don't subscribe to events on the API server", dest="disable_event_listening", default=False) @@ -128,10 +136,38 @@ class Mount(object): try: self._setup_logging() + except Exception as e: + self.logger.exception("exception during setup: %s", e) + exit(1) + + try: + nofile_limit = resource.getrlimit(resource.RLIMIT_NOFILE) + + minlimit = 10240 + if self.args.file_cache: + # Adjust the file handle limit so it can meet + # the desired cache size. Multiply by 8 because the + # number of 64 MiB cache slots that keepclient + # allocates is RLIMIT_NOFILE / 8 + minlimit = int((self.args.file_cache/(64*1024*1024)) * 8) + + if nofile_limit[0] < minlimit: + resource.setrlimit(resource.RLIMIT_NOFILE, (min(minlimit, nofile_limit[1]), nofile_limit[1])) + + if minlimit > nofile_limit[1]: + self.logger.warning("file handles required to meet --file-cache (%s) exceeds hard file handle limit (%s), cache size will be smaller than requested", minlimit, nofile_limit[1]) + + except Exception as e: + self.logger.warning("unable to adjust file handle limit: %s", e) + + nofile_limit = resource.getrlimit(resource.RLIMIT_NOFILE) + self.logger.info("file cache capped at %s bytes or less based on available disk (RLIMIT_NOFILE is %s)", ((nofile_limit[0]//8)*64*1024*1024), nofile_limit) + + try: self._setup_api() self._setup_mount() except Exception as e: - self.logger.exception("arv-mount: exception during setup: %s", e) + self.logger.exception("exception during setup: %s", e) exit(1) def __enter__(self): @@ -211,12 +247,32 @@ class Mount(object): def _setup_api(self): try: + # default value of file_cache is 0, this tells KeepBlockCache to + # choose a default based on whether disk_cache is enabled or not. + + block_cache = arvados.keep.KeepBlockCache(cache_max=self.args.file_cache, + disk_cache=self.args.disk_cache, + disk_cache_dir=self.args.disk_cache_dir) + + # If there's too many prefetch threads and you + # max out the CPU, delivering data to the FUSE + # layer actually ends up being slower. + # Experimentally, capping 7 threads seems to + # be a sweet spot. + prefetch_threads = min(max((block_cache.cache_max // (64 * 1024 * 1024)) - 1, 1), 7) + self.api = arvados.safeapi.ThreadSafeApiCache( apiconfig=arvados.config.settings(), + api_params={ + 'num_retries': self.args.retries, + }, keep_params={ - 'block_cache': arvados.keep.KeepBlockCache(self.args.file_cache), + 'block_cache': block_cache, + 'num_prefetch_threads': prefetch_threads, 'num_retries': self.args.retries, - }) + }, + version='v1', + ) except KeyError as e: self.logger.error("Missing environment: %s", e) exit(1)