Merge branch '18842-arv-mount-disk-config' refs #18842
[arvados.git] / services / fuse / arvados_fuse / command.py
index 66f8a4d39319fb9bdc8f864962bc9c02a0d57ac3..994c998823905e4f2398b15eb911768de6e03aa5 100644 (file)
@@ -1,3 +1,10 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+from future.utils import native_str
+from builtins import range
+from builtins import object
 import argparse
 import arvados
 import daemon
@@ -13,6 +20,7 @@ import time
 import arvados.commands._util as arv_cmd
 from arvados_fuse import crunchstat
 from arvados_fuse import *
+from arvados_fuse.unmount import unmount
 from arvados_fuse._version import __version__
 
 class ArgumentParser(argparse.ArgumentParser):
@@ -26,7 +34,7 @@ class ArgumentParser(argparse.ArgumentParser):
     with "--".
             """)
         self.add_argument('--version', action='version',
-                          version="%s %s" % (sys.argv[0], __version__),
+                          version=u"%s %s" % (sys.argv[0], __version__),
                           help='Print version and exit.')
         self.add_argument('mountpoint', type=str, help="""Mount point.""")
         self.add_argument('--allow-other', action='store_true',
@@ -75,21 +83,36 @@ class ArgumentParser(argparse.ArgumentParser):
                             type=str, metavar='PATH', action='append', default=[],
                             help="Create a new collection, mount it in read/write mode at mountpoint/PATH, and delete it when unmounting.")
 
+
         self.add_argument('--debug', action='store_true', help="""Debug mode""")
         self.add_argument('--logfile', help="""Write debug logs and errors to the specified file (default stderr).""")
         self.add_argument('--foreground', action='store_true', help="""Run in foreground (default is to daemonize unless --exec specified)""", default=False)
         self.add_argument('--encoding', type=str, help="Character encoding to use for filesystem, default is utf-8 (see Python codec registry for list of available encodings)", default="utf-8")
 
-        self.add_argument('--file-cache', type=int, help="File data cache size, in bytes (default 256MiB)", default=256*1024*1024)
-        self.add_argument('--directory-cache', type=int, help="Directory data cache size, in bytes (default 128MiB)", default=128*1024*1024)
+        self.add_argument('--file-cache', type=int, help="File data cache size, in bytes (default 8 GiB for disk-based cache or 256 MiB with RAM-only cache)", default=0)
+        self.add_argument('--directory-cache', type=int, help="Directory data cache size, in bytes (default 128 MiB)", default=128*1024*1024)
+
+        cachetype = self.add_mutually_exclusive_group()
+        cachetype.add_argument('--ram-cache', action='store_false', dest='disk_cache', help="Use in-memory caching only", default=True)
+        cachetype.add_argument('--disk-cache', action='store_true', dest='disk_cache', help="Use disk based caching (default)", default=True)
+
+        self.add_argument('--disk-cache-dir', type=str, help="Disk cache location (default ~/.cache/arvados/keep)", default=None)
 
         self.add_argument('--disable-event-listening', action='store_true', help="Don't subscribe to events on the API server", dest="disable_event_listening", default=False)
 
         self.add_argument('--read-only', action='store_false', help="Mount will be read only (default)", dest="enable_write", default=False)
         self.add_argument('--read-write', action='store_true', help="Mount will be read-write", dest="enable_write", default=False)
+        self.add_argument('--storage-classes', type=str, metavar='CLASSES', help="Specify comma separated list of storage classes to be used when saving data of new collections", default=None)
 
         self.add_argument('--crunchstat-interval', type=float, help="Write stats to stderr every N seconds (default disabled)", default=0)
 
+        unmount = self.add_mutually_exclusive_group()
+        unmount.add_argument('--unmount', action='store_true', default=False,
+                             help="Forcefully unmount the specified mountpoint (if it's a fuse mount) and exit. If --subtype is given, unmount only if the mount has the specified subtype. WARNING: This command can affect any kind of fuse mount, not just arv-mount.")
+        unmount.add_argument('--unmount-all', action='store_true', default=False,
+                             help="Forcefully unmount every fuse mount at or below the specified path and exit. If --subtype is given, unmount only mounts that have the specified subtype. Exit non-zero if any other types of mounts are found at or below the given path. WARNING: This command can affect any kind of fuse mount, not just arv-mount.")
+        unmount.add_argument('--replace', action='store_true', default=False,
+                             help="If a fuse mount is already present at mountpoint, forcefully unmount it before mounting")
         self.add_argument('--unmount-timeout',
                           type=float, default=2.0,
                           help="Time to wait for graceful shutdown after --exec program exits and filesystem is unmounted")
@@ -101,6 +124,7 @@ class ArgumentParser(argparse.ArgumentParser):
 
 class Mount(object):
     def __init__(self, args, logger=logging.getLogger('arvados.arv-mount')):
+        self.daemon = False
         self.logger = logger
         self.args = args
         self.listen_for_events = False
@@ -118,7 +142,16 @@ class Mount(object):
             exit(1)
 
     def __enter__(self):
-        llfuse.init(self.operations, self.args.mountpoint, self._fuse_options())
+        if self.args.replace:
+            unmount(path=self.args.mountpoint,
+                    timeout=self.args.unmount_timeout)
+        llfuse.init(self.operations, native_str(self.args.mountpoint), self._fuse_options())
+        if self.daemon:
+            daemon.DaemonContext(
+                working_directory=os.path.dirname(self.args.mountpoint),
+                files_preserve=list(range(
+                    3, resource.getrlimit(resource.RLIMIT_NOFILE)[1]))
+            ).open()
         if self.listen_for_events and not self.args.disable_event_listening:
             self.operations.listen_for_events()
         self.llfuse_thread = threading.Thread(None, lambda: self._llfuse_main())
@@ -139,7 +172,12 @@ class Mount(object):
                                 self.args.unmount_timeout)
 
     def run(self):
-        if self.args.exec_args:
+        if self.args.unmount or self.args.unmount_all:
+            unmount(path=self.args.mountpoint,
+                    subtype=self.args.subtype,
+                    timeout=self.args.unmount_timeout,
+                    recursive=self.args.unmount_all)
+        elif self.args.exec_args:
             self._run_exec()
         else:
             self._run_standalone()
@@ -175,15 +213,24 @@ class Mount(object):
             logging.getLogger('arvados.collection').setLevel(logging.DEBUG)
             self.logger.debug("arv-mount debugging enabled")
 
+        self.logger.info("%s %s started", sys.argv[0], __version__)
         self.logger.info("enable write is %s", self.args.enable_write)
 
     def _setup_api(self):
-        self.api = arvados.safeapi.ThreadSafeApiCache(
-            apiconfig=arvados.config.settings(),
-            keep_params={
-                'block_cache': arvados.keep.KeepBlockCache(self.args.file_cache),
-                'num_retries': self.args.retries,
-            })
+        try:
+            self.api = arvados.safeapi.ThreadSafeApiCache(
+                apiconfig=arvados.config.settings(),
+                # default value of file_cache is 0, this tells KeepBlockCache to
+                # choose a default based on whether disk_cache is enabled or not.
+                keep_params={
+                    'block_cache': arvados.keep.KeepBlockCache(cache_max=self.args.file_cache,
+                                                               disk_cache=self.args.disk_cache,
+                                                               disk_cache_dir=self.args.disk_cache_dir),
+                    'num_retries': self.args.retries,
+                })
+        except KeyError as e:
+            self.logger.error("Missing environment: %s", e)
+            exit(1)
         # Do a sanity check that we have a working arvados host + token.
         self.api.users().current().execute()
 
@@ -208,9 +255,14 @@ class Mount(object):
         usr = self.api.users().current().execute(num_retries=self.args.retries)
         now = time.time()
         dir_class = None
-        dir_args = [llfuse.ROOT_INODE, self.operations.inodes, self.api, self.args.retries]
+        dir_args = [llfuse.ROOT_INODE, self.operations.inodes, self.api, self.args.retries, self.args.enable_write]
         mount_readme = False
 
+        storage_classes = None
+        if self.args.storage_classes is not None:
+            storage_classes = self.args.storage_classes.replace(' ', '').split(',')
+            self.logger.info("Storage classes requested for new collections: {}".format(', '.join(storage_classes)))
+
         if self.args.collection is not None:
             # Set up the request handler with the collection at the root
             # First check that the collection is readable
@@ -260,27 +312,30 @@ class Mount(object):
             mount_readme = True
 
         if dir_class is not None:
-            ent = dir_class(*dir_args)
+            if dir_class in [TagsDirectory, CollectionDirectory]:
+                ent = dir_class(*dir_args)
+            else:
+                ent = dir_class(*dir_args, storage_classes=storage_classes)
             self.operations.inodes.add_entry(ent)
             self.listen_for_events = ent.want_event_subscribe()
             return
 
         e = self.operations.inodes.add_entry(Directory(
-            llfuse.ROOT_INODE, self.operations.inodes))
+            llfuse.ROOT_INODE, self.operations.inodes, self.api.config, self.args.enable_write))
         dir_args[0] = e.inode
 
         for name in self.args.mount_by_id:
-            self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=False))
+            self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=False, storage_classes=storage_classes))
         for name in self.args.mount_by_pdh:
             self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=True))
         for name in self.args.mount_by_tag:
             self._add_mount(e, name, TagsDirectory(*dir_args))
         for name in self.args.mount_home:
-            self._add_mount(e, name, ProjectDirectory(*dir_args, project_object=usr, poll=True))
+            self._add_mount(e, name, ProjectDirectory(*dir_args, project_object=usr, poll=True, storage_classes=storage_classes))
         for name in self.args.mount_shared:
-            self._add_mount(e, name, SharedDirectory(*dir_args, exclude=usr, poll=True))
+            self._add_mount(e, name, SharedDirectory(*dir_args, exclude=usr, poll=True, storage_classes=storage_classes))
         for name in self.args.mount_tmp:
-            self._add_mount(e, name, TmpCollectionDirectory(*dir_args))
+            self._add_mount(e, name, TmpCollectionDirectory(*dir_args, storage_classes=storage_classes))
 
         if mount_readme:
             text = self._readme_text(
@@ -338,20 +393,9 @@ From here, the following directories are available:
 
     def _run_standalone(self):
         try:
-            llfuse.init(self.operations, self.args.mountpoint, self._fuse_options())
-
-            if not self.args.foreground:
-                self.daemon_ctx = daemon.DaemonContext(
-                    working_directory=os.path.dirname(self.args.mountpoint),
-                    files_preserve=range(
-                        3, resource.getrlimit(resource.RLIMIT_NOFILE)[1]))
-                self.daemon_ctx.open()
-
-            # Subscribe to change events from API server
-            if self.listen_for_events and not self.args.disable_event_listening:
-                self.operations.listen_for_events()
-
-            self._llfuse_main()
+            self.daemon = not self.args.foreground
+            with self:
+                self.llfuse_thread.join(timeout=None)
         except Exception as e:
             self.logger.exception('arv-mount: exception during mount: %s', e)
             exit(getattr(e, 'errno', 1))