import subprocess
import sys
import time
+import resource
import arvados.commands._util as arv_cmd
from arvados_fuse import crunchstat
def __init__(self):
super(ArgumentParser, self).__init__(
parents=[arv_cmd.retry_opt],
- description='''Mount Keep data under the local filesystem. Default mode is --home''',
+ description="""
+Mount Keep data under the local filesystem. Default mode is --home
+""",
epilog="""
Note: When using the --exec feature, you must either specify the
mountpoint before --exec, or mark the end of your --exec arguments
with "--".
""")
- self.add_argument('--version', action='version',
- version=u"%s %s" % (sys.argv[0], __version__),
- help='Print version and exit.')
- self.add_argument('mountpoint', type=str, help="""Mount point.""")
- self.add_argument('--allow-other', action='store_true',
- help="""Let other users read the mount""")
- self.add_argument('--subtype', type=str, metavar='STRING',
- help="""Report mounted filesystem type as "fuse.STRING", instead of just "fuse".""")
+ self.add_argument(
+ '--version',
+ action='version',
+ version=u"%s %s" % (sys.argv[0], __version__),
+ help='Print version and exit.',
+ )
+ self.add_argument(
+ 'mountpoint',
+ help="""Mount point.""",
+ )
+ self.add_argument(
+ '--allow-other',
+ action='store_true',
+ help="""Let other users read the mount""",
+ )
+ self.add_argument(
+ '--subtype',
+ metavar='STRING',
+ help="""Report mounted filesystem type as "fuse.STRING", instead of just "fuse".""",
+ )
mode = self.add_mutually_exclusive_group()
-
- mode.add_argument('--all', action='store_const', const='all', dest='mode',
- help="""Mount a subdirectory for each mode: home, shared, by_tag, by_id (default if no --mount-* arguments are given).""")
- mode.add_argument('--custom', action='store_const', const=None, dest='mode',
- help="""Mount a top level meta-directory with subdirectories as specified by additional --mount-* arguments (default if any --mount-* arguments are given).""")
- mode.add_argument('--home', action='store_const', const='home', dest='mode',
- help="""Mount only the user's home project.""")
- mode.add_argument('--shared', action='store_const', const='shared', dest='mode',
- help="""Mount only list of projects shared with the user.""")
- mode.add_argument('--by-tag', action='store_const', const='by_tag', dest='mode',
- help="""Mount subdirectories listed by tag.""")
- mode.add_argument('--by-id', action='store_const', const='by_id', dest='mode',
- help="""Mount subdirectories listed by portable data hash or uuid.""")
- mode.add_argument('--by-pdh', action='store_const', const='by_pdh', dest='mode',
- help="""Mount subdirectories listed by portable data hash.""")
- mode.add_argument('--project', type=str, metavar='UUID',
- help="""Mount the specified project.""")
- mode.add_argument('--collection', type=str, metavar='UUID_or_PDH',
- help="""Mount only the specified collection.""")
+ mode.add_argument(
+ '--all',
+ action='store_const',
+ const='all',
+ dest='mode',
+ help="""
+Mount a subdirectory for each mode: home, shared, by_tag, by_id
+(default if no --mount-* arguments are given).
+""",
+ )
+ mode.add_argument(
+ '--custom',
+ action='store_const',
+ const=None,
+ dest='mode',
+ help="""
+Mount a top level meta-directory with subdirectories as specified by additional --mount-* arguments
+(default if any --mount-* arguments are given).
+""",
+ )
+ mode.add_argument(
+ '--home',
+ action='store_const',
+ const='home',
+ dest='mode',
+ help="""Mount only the user's home project.""",
+ )
+ mode.add_argument(
+ '--shared',
+ action='store_const',
+ const='shared',
+ dest='mode',
+ help="""Mount only list of projects shared with the user.""",
+ )
+ mode.add_argument(
+ '--by-tag',
+ action='store_const',
+ const='by_tag',
+ dest='mode',
+ help="""Mount subdirectories listed by tag.""",
+ )
+ mode.add_argument(
+ '--by-id',
+ action='store_const',
+ const='by_id',
+ dest='mode',
+ help="""Mount subdirectories listed by portable data hash or uuid.""",
+ )
+ mode.add_argument(
+ '--by-pdh',
+ action='store_const',
+ const='by_pdh',
+ dest='mode',
+ help="""Mount subdirectories listed by portable data hash.""",
+ )
+ mode.add_argument(
+ '--project',
+ metavar='UUID',
+ help="""Mount the specified project.""",
+ )
+ mode.add_argument(
+ '--collection',
+ metavar='UUID_or_PDH',
+ help="""Mount only the specified collection.""",
+ )
mounts = self.add_argument_group('Custom mount options')
- mounts.add_argument('--mount-by-pdh',
- type=str, metavar='PATH', action='append', default=[],
- help="Mount each readable collection at mountpoint/PATH/P where P is the collection's portable data hash.")
- mounts.add_argument('--mount-by-id',
- type=str, metavar='PATH', action='append', default=[],
- help="Mount each readable collection at mountpoint/PATH/UUID and mountpoint/PATH/PDH where PDH is the collection's portable data hash and UUID is its UUID.")
- mounts.add_argument('--mount-by-tag',
- type=str, metavar='PATH', action='append', default=[],
- help="Mount all collections with tag TAG at mountpoint/PATH/TAG/UUID.")
- mounts.add_argument('--mount-home',
- type=str, metavar='PATH', action='append', default=[],
- help="Mount the current user's home project at mountpoint/PATH.")
- mounts.add_argument('--mount-shared',
- type=str, metavar='PATH', action='append', default=[],
- help="Mount projects shared with the current user at mountpoint/PATH.")
- mounts.add_argument('--mount-tmp',
- type=str, metavar='PATH', action='append', default=[],
- help="Create a new collection, mount it in read/write mode at mountpoint/PATH, and delete it when unmounting.")
-
- self.add_argument('--debug', action='store_true', help="""Debug mode""")
- self.add_argument('--logfile', help="""Write debug logs and errors to the specified file (default stderr).""")
- self.add_argument('--foreground', action='store_true', help="""Run in foreground (default is to daemonize unless --exec specified)""", default=False)
- self.add_argument('--encoding', type=str, help="Character encoding to use for filesystem, default is utf-8 (see Python codec registry for list of available encodings)", default="utf-8")
-
- self.add_argument('--file-cache', type=int, help="File data cache size, in bytes (default 256MiB)", default=256*1024*1024)
- self.add_argument('--directory-cache', type=int, help="Directory data cache size, in bytes (default 128MiB)", default=128*1024*1024)
-
- self.add_argument('--disable-event-listening', action='store_true', help="Don't subscribe to events on the API server", dest="disable_event_listening", default=False)
-
- self.add_argument('--read-only', action='store_false', help="Mount will be read only (default)", dest="enable_write", default=False)
- self.add_argument('--read-write', action='store_true', help="Mount will be read-write", dest="enable_write", default=False)
- self.add_argument('--storage-classes', type=str, metavar='CLASSES', help="Specify comma separated list of storage classes to be used when saving data of new collections", default=None)
-
- self.add_argument('--crunchstat-interval', type=float, help="Write stats to stderr every N seconds (default disabled)", default=0)
+ mounts.add_argument(
+ '--mount-by-pdh',
+ metavar='PATH',
+ action='append',
+ default=[],
+ help="""
+Mount each readable collection at mountpoint/PATH/P
+where P is the collection's portable data hash.
+""",
+ )
+ mounts.add_argument(
+ '--mount-by-id',
+ metavar='PATH',
+ action='append',
+ default=[],
+ help="""
+Mount each readable collection at mountpoint/PATH/UUID and mountpoint/PATH/PDH
+where PDH is the collection's portable data hash and UUID is its UUID.
+""",
+ )
+ mounts.add_argument(
+ '--mount-by-tag',
+ metavar='PATH',
+ action='append',
+ default=[],
+ help="Mount all collections with tag TAG at mountpoint/PATH/TAG/UUID.",
+ )
+ mounts.add_argument(
+ '--mount-home',
+ metavar='PATH',
+ action='append',
+ default=[],
+ help="Mount the current user's home project at mountpoint/PATH.",
+ )
+ mounts.add_argument(
+ '--mount-shared',
+ metavar='PATH',
+ action='append',
+ default=[],
+ help="Mount projects shared with the current user at mountpoint/PATH.",
+ )
+ mounts.add_argument(
+ '--mount-tmp',
+ metavar='PATH',
+ action='append',
+ default=[],
+ help="""
+Create a new collection, mount it in read/write mode at mountpoint/PATH,
+and delete it when unmounting.
+""",
+ )
+
+ self.add_argument(
+ '--debug',
+ action='store_true',
+ help="""Debug mode""",
+ )
+ self.add_argument(
+ '--logfile',
+ help="""Write debug logs and errors to the specified file (default stderr).""",
+ )
+ self.add_argument(
+ '--foreground',
+ action='store_true',
+ default=False,
+ help="""Run in foreground (default is to daemonize unless --exec specified)""",
+ )
+ self.add_argument(
+ '--encoding',
+ default="utf-8",
+ help="""
+Character encoding to use for filesystem, default is utf-8
+(see Python codec registry for list of available encodings)"
+""",
+ )
+
+ self.add_argument(
+ '--file-cache',
+ type=int,
+ default=0,
+ help="""
+File data cache size, in bytes
+(default 8 GiB for disk-based cache or 256 MiB with RAM-only cache)
+""",
+ )
+ self.add_argument(
+ '--directory-cache',
+ type=int,
+ default=128*1024*1024,
+ help="Directory data cache size, in bytes (default 128 MiB)",
+ )
+
+ cachetype = self.add_mutually_exclusive_group()
+ cachetype.add_argument(
+ '--ram-cache',
+ action='store_false',
+ default=True,
+ dest='disk_cache',
+ help="Use in-memory caching only",
+ )
+ cachetype.add_argument(
+ '--disk-cache',
+ action='store_true',
+ default=True,
+ dest='disk_cache',
+ help="Use disk based caching (default)",
+ )
+
+ self.add_argument(
+ '--disk-cache-dir',
+ help="Disk cache location (default ~/.cache/arvados/keep)",
+ )
+
+ self.add_argument(
+ '--disable-event-listening',
+ action='store_true',
+ dest='disable_event_listening',
+ default=False,
+ help="Don't subscribe to events on the API server",
+ )
+
+ self.add_argument(
+ '--read-only',
+ action='store_false',
+ default=False,
+ dest='enable_write',
+ help="Mount will be read only (default)",
+ )
+ self.add_argument(
+ '--read-write',
+ action='store_true',
+ default=False,
+ dest='enable_write',
+ help="Mount will be read-write",
+ )
+ self.add_argument(
+ '--storage-classes',
+ metavar='CLASSES',
+ help="Specify comma separated list of storage classes to be used when saving data of new collections",
+ )
+
+ self.add_argument(
+ '--crunchstat-interval',
+ type=float,
+ default=0.0,
+ help="Write stats to stderr every N seconds (default disabled)",
+ )
unmount = self.add_mutually_exclusive_group()
- unmount.add_argument('--unmount', action='store_true', default=False,
- help="Forcefully unmount the specified mountpoint (if it's a fuse mount) and exit. If --subtype is given, unmount only if the mount has the specified subtype. WARNING: This command can affect any kind of fuse mount, not just arv-mount.")
- unmount.add_argument('--unmount-all', action='store_true', default=False,
- help="Forcefully unmount every fuse mount at or below the specified path and exit. If --subtype is given, unmount only mounts that have the specified subtype. Exit non-zero if any other types of mounts are found at or below the given path. WARNING: This command can affect any kind of fuse mount, not just arv-mount.")
- unmount.add_argument('--replace', action='store_true', default=False,
- help="If a fuse mount is already present at mountpoint, forcefully unmount it before mounting")
- self.add_argument('--unmount-timeout',
- type=float, default=2.0,
- help="Time to wait for graceful shutdown after --exec program exits and filesystem is unmounted")
-
- self.add_argument('--exec', type=str, nargs=argparse.REMAINDER,
- dest="exec_args", metavar=('command', 'args', '...', '--'),
- help="""Mount, run a command, then unmount and exit""")
+ unmount.add_argument(
+ '--unmount',
+ action='store_true',
+ default=False,
+ help="""
+Forcefully unmount the specified mountpoint (if it's a fuse mount) and exit.
+If --subtype is given, unmount only if the mount has the specified subtype.
+WARNING: This command can affect any kind of fuse mount, not just arv-mount.
+""",
+ )
+ unmount.add_argument(
+ '--unmount-all',
+ action='store_true',
+ default=False,
+ help="""
+Forcefully unmount every fuse mount at or below the specified path and exit.
+If --subtype is given, unmount only mounts that have the specified subtype.
+Exit non-zero if any other types of mounts are found at or below the given path.
+WARNING: This command can affect any kind of fuse mount, not just arv-mount.
+""",
+ )
+ unmount.add_argument(
+ '--replace',
+ action='store_true',
+ default=False,
+ help="""
+If a fuse mount is already present at mountpoint, forcefully unmount it before mounting
+""",
+ )
+ self.add_argument(
+ '--unmount-timeout',
+ type=float,
+ default=2.0,
+ help="""
+Time to wait for graceful shutdown after --exec program exits and filesystem is unmounted
+""",
+ )
+ self.add_argument(
+ '--filters',
+ type=arv_cmd.JSONArgument(arv_cmd.validate_filters),
+ help="""
+Filters to apply to all project, shared, and tag directory contents.
+Pass filters as either a JSON string or a path to a JSON file.
+The JSON object should be a list of filters in Arvados API list filter syntax.
+""",
+ )
+ self.add_argument(
+ '--exec',
+ nargs=argparse.REMAINDER,
+ dest="exec_args",
+ metavar=('command', 'args', '...', '--'),
+ help="""Mount, run a command, then unmount and exit""",
+ )
class Mount(object):
try:
self._setup_logging()
+ except Exception as e:
+ self.logger.exception("exception during setup: %s", e)
+ exit(1)
+
+ try:
+ nofile_limit = resource.getrlimit(resource.RLIMIT_NOFILE)
+
+ minlimit = 10240
+ if self.args.file_cache:
+ # Adjust the file handle limit so it can meet
+ # the desired cache size. Multiply by 8 because the
+ # number of 64 MiB cache slots that keepclient
+ # allocates is RLIMIT_NOFILE / 8
+ minlimit = int((self.args.file_cache/(64*1024*1024)) * 8)
+
+ if nofile_limit[0] < minlimit:
+ resource.setrlimit(resource.RLIMIT_NOFILE, (min(minlimit, nofile_limit[1]), nofile_limit[1]))
+
+ if minlimit > nofile_limit[1]:
+ self.logger.warning("file handles required to meet --file-cache (%s) exceeds hard file handle limit (%s), cache size will be smaller than requested", minlimit, nofile_limit[1])
+
+ except Exception as e:
+ self.logger.warning("unable to adjust file handle limit: %s", e)
+
+ nofile_limit = resource.getrlimit(resource.RLIMIT_NOFILE)
+ self.logger.info("file cache capped at %s bytes or less based on available disk (RLIMIT_NOFILE is %s)", ((nofile_limit[0]//8)*64*1024*1024), nofile_limit)
+
+ try:
self._setup_api()
self._setup_mount()
except Exception as e:
- self.logger.exception("arv-mount: exception during setup: %s", e)
+ self.logger.exception("exception during setup: %s", e)
exit(1)
def __enter__(self):
def _setup_api(self):
try:
+ # default value of file_cache is 0, this tells KeepBlockCache to
+ # choose a default based on whether disk_cache is enabled or not.
+
+ block_cache = arvados.keep.KeepBlockCache(cache_max=self.args.file_cache,
+ disk_cache=self.args.disk_cache,
+ disk_cache_dir=self.args.disk_cache_dir)
+
+ # If there's too many prefetch threads and you
+ # max out the CPU, delivering data to the FUSE
+ # layer actually ends up being slower.
+ # Experimentally, capping 7 threads seems to
+ # be a sweet spot.
+ prefetch_threads = min(max((block_cache.cache_max // (64 * 1024 * 1024)) - 1, 1), 7)
+
self.api = arvados.safeapi.ThreadSafeApiCache(
apiconfig=arvados.config.settings(),
+ api_params={
+ 'num_retries': self.args.retries,
+ },
keep_params={
- 'block_cache': arvados.keep.KeepBlockCache(self.args.file_cache),
+ 'block_cache': block_cache,
+ 'num_prefetch_threads': prefetch_threads,
'num_retries': self.args.retries,
- })
+ },
+ version='v1',
+ )
except KeyError as e:
self.logger.error("Missing environment: %s", e)
exit(1)
usr = self.api.users().current().execute(num_retries=self.args.retries)
now = time.time()
dir_class = None
- dir_args = [llfuse.ROOT_INODE, self.operations.inodes, self.api, self.args.retries]
+ dir_args = [
+ llfuse.ROOT_INODE,
+ self.operations.inodes,
+ self.api,
+ self.args.retries,
+ self.args.enable_write,
+ self.args.filters,
+ ]
mount_readme = False
storage_classes = None
return
e = self.operations.inodes.add_entry(Directory(
- llfuse.ROOT_INODE, self.operations.inodes, self.api.config))
+ llfuse.ROOT_INODE,
+ self.operations.inodes,
+ self.api.config,
+ self.args.enable_write,
+ self.args.filters,
+ ))
dir_args[0] = e.inode
for name in self.args.mount_by_id: