X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/4a832a93cd0baf253575936a79f83bcc4f666a82..bd04725444b7ab15ea81647c519a580ca50a94de:/services/fuse/arvados_fuse/command.py diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py index e275825a61..4c8ae899e1 100644 --- a/services/fuse/arvados_fuse/command.py +++ b/services/fuse/arvados_fuse/command.py @@ -28,99 +28,327 @@ class ArgumentParser(argparse.ArgumentParser): def __init__(self): super(ArgumentParser, self).__init__( parents=[arv_cmd.retry_opt], - description='''Mount Keep data under the local filesystem. Default mode is --home''', - epilog=""" - Note: When using the --exec feature, you must either specify the - mountpoint before --exec, or mark the end of your --exec arguments - with "--". - """) - self.add_argument('--version', action='version', - version=u"%s %s" % (sys.argv[0], __version__), - help='Print version and exit.') - self.add_argument('mountpoint', type=str, help="""Mount point.""") - self.add_argument('--allow-other', action='store_true', - help="""Let other users read the mount""") - self.add_argument('--subtype', type=str, metavar='STRING', - help="""Report mounted filesystem type as "fuse.STRING", instead of just "fuse".""") - - mode = self.add_mutually_exclusive_group() - - mode.add_argument('--all', action='store_const', const='all', dest='mode', - help="""Mount a subdirectory for each mode: home, shared, by_tag, by_id (default if no --mount-* arguments are given).""") - mode.add_argument('--custom', action='store_const', const=None, dest='mode', - help="""Mount a top level meta-directory with subdirectories as specified by additional --mount-* arguments (default if any --mount-* arguments are given).""") - mode.add_argument('--home', action='store_const', const='home', dest='mode', - help="""Mount only the user's home project.""") - mode.add_argument('--shared', action='store_const', const='shared', dest='mode', - help="""Mount only list of projects shared with the user.""") - mode.add_argument('--by-tag', action='store_const', const='by_tag', dest='mode', - help="""Mount subdirectories listed by tag.""") - mode.add_argument('--by-id', action='store_const', const='by_id', dest='mode', - help="""Mount subdirectories listed by portable data hash or uuid.""") - mode.add_argument('--by-pdh', action='store_const', const='by_pdh', dest='mode', - help="""Mount subdirectories listed by portable data hash.""") - mode.add_argument('--project', type=str, metavar='UUID', - help="""Mount the specified project.""") - mode.add_argument('--collection', type=str, metavar='UUID_or_PDH', - help="""Mount only the specified collection.""") - - mounts = self.add_argument_group('Custom mount options') - mounts.add_argument('--mount-by-pdh', - type=str, metavar='PATH', action='append', default=[], - help="Mount each readable collection at mountpoint/PATH/P where P is the collection's portable data hash.") - mounts.add_argument('--mount-by-id', - type=str, metavar='PATH', action='append', default=[], - help="Mount each readable collection at mountpoint/PATH/UUID and mountpoint/PATH/PDH where PDH is the collection's portable data hash and UUID is its UUID.") - mounts.add_argument('--mount-by-tag', - type=str, metavar='PATH', action='append', default=[], - help="Mount all collections with tag TAG at mountpoint/PATH/TAG/UUID.") - mounts.add_argument('--mount-home', - type=str, metavar='PATH', action='append', default=[], - help="Mount the current user's home project at mountpoint/PATH.") - mounts.add_argument('--mount-shared', - type=str, metavar='PATH', action='append', default=[], - help="Mount projects shared with the current user at mountpoint/PATH.") - mounts.add_argument('--mount-tmp', - type=str, metavar='PATH', action='append', default=[], - help="Create a new collection, mount it in read/write mode at mountpoint/PATH, and delete it when unmounting.") - - - self.add_argument('--debug', action='store_true', help="""Debug mode""") - self.add_argument('--logfile', help="""Write debug logs and errors to the specified file (default stderr).""") - self.add_argument('--foreground', action='store_true', help="""Run in foreground (default is to daemonize unless --exec specified)""", default=False) - self.add_argument('--encoding', type=str, help="Character encoding to use for filesystem, default is utf-8 (see Python codec registry for list of available encodings)", default="utf-8") - - self.add_argument('--file-cache', type=int, help="File data cache size, in bytes (default 8 GiB for disk-based cache or 256 MiB with RAM-only cache)", default=0) - self.add_argument('--directory-cache', type=int, help="Directory data cache size, in bytes (default 128 MiB)", default=128*1024*1024) - - cachetype = self.add_mutually_exclusive_group() - cachetype.add_argument('--ram-cache', action='store_false', dest='disk_cache', help="Use in-memory caching only", default=True) - cachetype.add_argument('--disk-cache', action='store_true', dest='disk_cache', help="Use disk based caching (default)", default=True) - - self.add_argument('--disk-cache-dir', type=str, help="Disk cache location (default ~/.cache/arvados/keep)", default=None) - - self.add_argument('--disable-event-listening', action='store_true', help="Don't subscribe to events on the API server", dest="disable_event_listening", default=False) - - self.add_argument('--read-only', action='store_false', help="Mount will be read only (default)", dest="enable_write", default=False) - self.add_argument('--read-write', action='store_true', help="Mount will be read-write", dest="enable_write", default=False) - self.add_argument('--storage-classes', type=str, metavar='CLASSES', help="Specify comma separated list of storage classes to be used when saving data of new collections", default=None) - - self.add_argument('--crunchstat-interval', type=float, help="Write stats to stderr every N seconds (default disabled)", default=0) - - unmount = self.add_mutually_exclusive_group() - unmount.add_argument('--unmount', action='store_true', default=False, - help="Forcefully unmount the specified mountpoint (if it's a fuse mount) and exit. If --subtype is given, unmount only if the mount has the specified subtype. WARNING: This command can affect any kind of fuse mount, not just arv-mount.") - unmount.add_argument('--unmount-all', action='store_true', default=False, - help="Forcefully unmount every fuse mount at or below the specified path and exit. If --subtype is given, unmount only mounts that have the specified subtype. Exit non-zero if any other types of mounts are found at or below the given path. WARNING: This command can affect any kind of fuse mount, not just arv-mount.") - unmount.add_argument('--replace', action='store_true', default=False, - help="If a fuse mount is already present at mountpoint, forcefully unmount it before mounting") - self.add_argument('--unmount-timeout', - type=float, default=2.0, - help="Time to wait for graceful shutdown after --exec program exits and filesystem is unmounted") - - self.add_argument('--exec', type=str, nargs=argparse.REMAINDER, - dest="exec_args", metavar=('command', 'args', '...', '--'), - help="""Mount, run a command, then unmount and exit""") + description="Interact with Arvados data through a local filesystem", + ) + self.add_argument( + '--version', + action='version', + version=u"%s %s" % (sys.argv[0], __version__), + help="Print version and exit", + ) + self.add_argument( + 'mountpoint', + metavar='MOUNT_DIR', + help="Directory path to mount data", + ) + + mode_group = self.add_argument_group("Mount contents") + mode = mode_group.add_mutually_exclusive_group() + mode.add_argument( + '--all', + action='store_const', + const='all', + dest='mode', + help=""" +Mount a subdirectory for each mode: `home`, `shared`, `by_id`, and `by_tag` +(default if no `--mount-*` options are given) +""", + ) + mode.add_argument( + '--custom', + action='store_const', + const=None, + dest='mode', + help=""" +Mount a subdirectory for each mode specified by a `--mount-*` option +(default if any `--mount-*` options are given; +see "Mount custom layout and filtering" section) +""", + ) + mode.add_argument( + '--collection', + metavar='UUID_OR_PDH', + help="Mount the specified collection", + ) + mode.add_argument( + '--home', + action='store_const', + const='home', + dest='mode', + help="Mount your home project", + ) + mode.add_argument( + '--project', + metavar='UUID', + help="Mount the specified project", + ) + mode.add_argument( + '--shared', + action='store_const', + const='shared', + dest='mode', + help="Mount a subdirectory for each project shared with you", + ) + mode.add_argument( + '--by-id', + action='store_const', + const='by_id', + dest='mode', + help=""" +Mount a magic directory where collections and projects are accessible through +subdirectories named after their UUID or portable data hash +""", + ) + mode.add_argument( + '--by-pdh', + action='store_const', + const='by_pdh', + dest='mode', + help=""" +Mount a magic directory where collections are accessible through +subdirectories named after their portable data hash +""", + ) + mode.add_argument( + '--by-tag', + action='store_const', + const='by_tag', + dest='mode', + help="Mount a subdirectory for each tag attached to a collection or project", + ) + + mounts = self.add_argument_group("Mount custom layout and filtering") + mounts.add_argument( + '--filters', + type=arv_cmd.JSONArgument(arv_cmd.validate_filters), + help=""" +Filters to apply to all project, shared, and tag directory contents. +Pass filters as either a JSON string or a path to a JSON file. +The JSON object should be a list of filters in Arvados API list filter syntax. +""", + ) + mounts.add_argument( + '--mount-home', + metavar='PATH', + action='append', + default=[], + help="Make your home project available under the mount at `PATH`", + ) + mounts.add_argument( + '--mount-shared', + metavar='PATH', + action='append', + default=[], + help="Make projects shared with you available under the mount at `PATH`", + ) + mounts.add_argument( + '--mount-tmp', + metavar='PATH', + action='append', + default=[], + help=""" +Make a new temporary writable collection available under the mount at `PATH`. +This collection is deleted when the mount is unmounted. +""", + ) + mounts.add_argument( + '--mount-by-id', + metavar='PATH', + action='append', + default=[], + help=""" +Make a magic directory available under the mount at `PATH` where collections and +projects are accessible through subdirectories named after their UUID or +portable data hash +""", + ) + mounts.add_argument( + '--mount-by-pdh', + metavar='PATH', + action='append', + default=[], + help=""" +Make a magic directory available under the mount at `PATH` where collections +are accessible through subdirectories named after portable data hash +""", + ) + mounts.add_argument( + '--mount-by-tag', + metavar='PATH', + action='append', + default=[], + help=""" +Make a subdirectory for each tag attached to a collection or project available +under the mount at `PATH` +""" , + ) + + perms = self.add_argument_group("Mount access and permissions") + perms.add_argument( + '--allow-other', + action='store_true', + help="Let other users on this system read mounted data (default false)", + ) + perms.add_argument( + '--read-only', + action='store_false', + default=False, + dest='enable_write', + help="Mounted data cannot be modified from the mount (default)", + ) + perms.add_argument( + '--read-write', + action='store_true', + default=False, + dest='enable_write', + help="Mounted data can be modified from the mount", + ) + + lifecycle = self.add_argument_group("Mount lifecycle management") + lifecycle.add_argument( + '--exec', + nargs=argparse.REMAINDER, + dest="exec_args", + help=""" +Mount data, run the specified command, then unmount and exit. +`--exec` reads all remaining options as the command to run, +so it must be the last option you specify. +Either end your command arguments (and other options) with a `--` argument, +or specify `--exec` after your mount point. +""", + ) + lifecycle.add_argument( + '--foreground', + action='store_true', + default=False, + help="Run mount process in the foreground instead of daemonizing (default false)", + ) + lifecycle.add_argument( + '--subtype', + help="Set mounted filesystem type to `fuse.SUBTYPE` (default is just `fuse`)", + ) + unmount = lifecycle.add_mutually_exclusive_group() + unmount.add_argument( + '--replace', + action='store_true', + default=False, + help=""" +If a FUSE mount is already mounted at the given directory, +unmount it before mounting the requested data. +If `--subtype` is specified, unmount only if the mount has that subtype. +WARNING: This command can affect any kind of FUSE mount, not just arv-mount. +""", + ) + unmount.add_argument( + '--unmount', + action='store_true', + default=False, + help=""" +If a FUSE mount is already mounted at the given directory, unmount it and exit. +If `--subtype` is specified, unmount only if the mount has that subtype. +WARNING: This command can affect any kind of FUSE mount, not just arv-mount. +""", + ) + unmount.add_argument( + '--unmount-all', + action='store_true', + default=False, + help=""" +Unmount all FUSE mounts at or below the given directory, then exit. +If `--subtype` is specified, unmount only if the mount has that subtype. +WARNING: This command can affect any kind of FUSE mount, not just arv-mount. +""", + ) + lifecycle.add_argument( + '--unmount-timeout', + type=float, + default=2.0, + metavar='SECONDS', + help=""" +The number of seconds to wait for a clean unmount after an `--exec` command has +exited (default %(default).01f). +After this time, the mount will be forcefully unmounted. +""", + ) + + reporting = self.add_argument_group("Mount logging and statistics") + reporting.add_argument( + '--crunchstat-interval', + type=float, + default=0.0, + metavar='SECONDS', + help="Write stats to stderr every N seconds (default disabled)", + ) + reporting.add_argument( + '--debug', + action='store_true', + help="Log debug information", + ) + reporting.add_argument( + '--logfile', + help="Write debug logs and errors to the specified file (default stderr)", + ) + + cache = self.add_argument_group("Mount local cache setup") + cachetype = cache.add_mutually_exclusive_group() + cachetype.add_argument( + '--disk-cache', + action='store_true', + default=True, + dest='disk_cache', + help="Cache data on the local filesystem (default)", + ) + cachetype.add_argument( + '--ram-cache', + action='store_false', + default=True, + dest='disk_cache', + help="Cache data in memory", + ) + cache.add_argument( + '--disk-cache-dir', + metavar="DIRECTORY", + help="Filesystem cache location (default `~/.cache/arvados/keep`)", + ) + cache.add_argument( + '--directory-cache', + type=int, + default=128*1024*1024, + metavar='BYTES', + help="Size of directory data cache in bytes (default 128 MiB)", + ) + cache.add_argument( + '--file-cache', + type=int, + default=0, + metavar='BYTES', + help=""" +Size of file data cache in bytes +(default 8 GiB for filesystem cache, 256 MiB for memory cache) +""", + ) + + plumbing = self.add_argument_group("Mount interactions with Arvados and Linux") + plumbing.add_argument( + '--disable-event-listening', + action='store_true', + dest='disable_event_listening', + default=False, + help="Don't subscribe to events on the API server to update mount contents", + ) + plumbing.add_argument( + '--encoding', + default="utf-8", + help=""" +Filesystem character encoding +(default %(default)r; specify a name from the Python codec registry) +""", + ) + plumbing.add_argument( + '--storage-classes', + metavar='CLASSES', + help="Comma-separated list of storage classes to request for new collections", + ) class Mount(object): @@ -134,21 +362,40 @@ class Mount(object): if self.args.logfile: self.args.logfile = os.path.realpath(self.args.logfile) + try: + self._setup_logging() + except Exception as e: + self.logger.exception("exception during setup: %s", e) + exit(1) + try: nofile_limit = resource.getrlimit(resource.RLIMIT_NOFILE) - if nofile_limit[0] < 10240: - resource.setrlimit(resource.RLIMIT_NOFILE, (min(10240, nofile_limit[1]), nofile_limit[1])) + + minlimit = 10240 + if self.args.file_cache: + # Adjust the file handle limit so it can meet + # the desired cache size. Multiply by 8 because the + # number of 64 MiB cache slots that keepclient + # allocates is RLIMIT_NOFILE / 8 + minlimit = int((self.args.file_cache/(64*1024*1024)) * 8) + + if nofile_limit[0] < minlimit: + resource.setrlimit(resource.RLIMIT_NOFILE, (min(minlimit, nofile_limit[1]), nofile_limit[1])) + + if minlimit > nofile_limit[1]: + self.logger.warning("file handles required to meet --file-cache (%s) exceeds hard file handle limit (%s), cache size will be smaller than requested", minlimit, nofile_limit[1]) + except Exception as e: - self.logger.warning("arv-mount: unable to adjust file handle limit: %s", e) + self.logger.warning("unable to adjust file handle limit: %s", e) - self.logger.debug("arv-mount: file handle limit is %s", resource.getrlimit(resource.RLIMIT_NOFILE)) + nofile_limit = resource.getrlimit(resource.RLIMIT_NOFILE) + self.logger.info("file cache capped at %s bytes or less based on available disk (RLIMIT_NOFILE is %s)", ((nofile_limit[0]//8)*64*1024*1024), nofile_limit) try: - self._setup_logging() self._setup_api() self._setup_mount() except Exception as e: - self.logger.exception("arv-mount: exception during setup: %s", e) + self.logger.exception("exception during setup: %s", e) exit(1) def __enter__(self): @@ -228,14 +475,28 @@ class Mount(object): def _setup_api(self): try: + # default value of file_cache is 0, this tells KeepBlockCache to + # choose a default based on whether disk_cache is enabled or not. + + block_cache = arvados.keep.KeepBlockCache(cache_max=self.args.file_cache, + disk_cache=self.args.disk_cache, + disk_cache_dir=self.args.disk_cache_dir) + + # Profiling indicates that prefetching has more of a + # negative impact on the read() fast path (by requiring it + # to do more work and take additional locks) than benefit. + # Also, the kernel does some readahead itself, which has a + # similar effect. + prefetch_threads = 0 + self.api = arvados.safeapi.ThreadSafeApiCache( apiconfig=arvados.config.settings(), - # default value of file_cache is 0, this tells KeepBlockCache to - # choose a default based on whether disk_cache is enabled or not. + api_params={ + 'num_retries': self.args.retries, + }, keep_params={ - 'block_cache': arvados.keep.KeepBlockCache(cache_max=self.args.file_cache, - disk_cache=self.args.disk_cache, - disk_cache_dir=self.args.disk_cache_dir), + 'block_cache': block_cache, + 'num_prefetch_threads': prefetch_threads, 'num_retries': self.args.retries, }, version='v1', @@ -267,7 +528,14 @@ class Mount(object): usr = self.api.users().current().execute(num_retries=self.args.retries) now = time.time() dir_class = None - dir_args = [llfuse.ROOT_INODE, self.operations.inodes, self.api, self.args.retries, self.args.enable_write] + dir_args = [ + llfuse.ROOT_INODE, + self.operations.inodes, + self.api, + self.args.retries, + self.args.enable_write, + self.args.filters, + ] mount_readme = False storage_classes = None @@ -333,7 +601,12 @@ class Mount(object): return e = self.operations.inodes.add_entry(Directory( - llfuse.ROOT_INODE, self.operations.inodes, self.api.config, self.args.enable_write)) + llfuse.ROOT_INODE, + self.operations.inodes, + self.api.config, + self.args.enable_write, + self.args.filters, + )) dir_args[0] = e.inode for name in self.args.mount_by_id: