Merge branch 'master' into 13823-bionic
[arvados.git] / services / fuse / arvados_fuse / command.py
1 # Copyright (C) The Arvados Authors. All rights reserved.
2 #
3 # SPDX-License-Identifier: AGPL-3.0
4
5 import argparse
6 import arvados
7 import daemon
8 import llfuse
9 import logging
10 import os
11 import resource
12 import signal
13 import subprocess
14 import sys
15 import time
16
17 import arvados.commands._util as arv_cmd
18 from arvados_fuse import crunchstat
19 from arvados_fuse import *
20 from arvados_fuse.unmount import unmount
21 from arvados_fuse._version import __version__
22
23 class ArgumentParser(argparse.ArgumentParser):
24     def __init__(self):
25         super(ArgumentParser, self).__init__(
26             parents=[arv_cmd.retry_opt],
27             description='''Mount Keep data under the local filesystem.  Default mode is --home''',
28             epilog="""
29     Note: When using the --exec feature, you must either specify the
30     mountpoint before --exec, or mark the end of your --exec arguments
31     with "--".
32             """)
33         self.add_argument('--version', action='version',
34                           version="%s %s" % (sys.argv[0], __version__),
35                           help='Print version and exit.')
36         self.add_argument('mountpoint', type=str, help="""Mount point.""")
37         self.add_argument('--allow-other', action='store_true',
38                             help="""Let other users read the mount""")
39         self.add_argument('--subtype', type=str, metavar='STRING',
40                             help="""Report mounted filesystem type as "fuse.STRING", instead of just "fuse".""")
41
42         mode = self.add_mutually_exclusive_group()
43
44         mode.add_argument('--all', action='store_const', const='all', dest='mode',
45                                 help="""Mount a subdirectory for each mode: home, shared, by_tag, by_id (default if no --mount-* arguments are given).""")
46         mode.add_argument('--custom', action='store_const', const=None, dest='mode',
47                                 help="""Mount a top level meta-directory with subdirectories as specified by additional --mount-* arguments (default if any --mount-* arguments are given).""")
48         mode.add_argument('--home', action='store_const', const='home', dest='mode',
49                                 help="""Mount only the user's home project.""")
50         mode.add_argument('--shared', action='store_const', const='shared', dest='mode',
51                                 help="""Mount only list of projects shared with the user.""")
52         mode.add_argument('--by-tag', action='store_const', const='by_tag', dest='mode',
53                                 help="""Mount subdirectories listed by tag.""")
54         mode.add_argument('--by-id', action='store_const', const='by_id', dest='mode',
55                                 help="""Mount subdirectories listed by portable data hash or uuid.""")
56         mode.add_argument('--by-pdh', action='store_const', const='by_pdh', dest='mode',
57                                 help="""Mount subdirectories listed by portable data hash.""")
58         mode.add_argument('--project', type=str, metavar='UUID',
59                                 help="""Mount the specified project.""")
60         mode.add_argument('--collection', type=str, metavar='UUID_or_PDH',
61                                 help="""Mount only the specified collection.""")
62
63         mounts = self.add_argument_group('Custom mount options')
64         mounts.add_argument('--mount-by-pdh',
65                             type=str, metavar='PATH', action='append', default=[],
66                             help="Mount each readable collection at mountpoint/PATH/P where P is the collection's portable data hash.")
67         mounts.add_argument('--mount-by-id',
68                             type=str, metavar='PATH', action='append', default=[],
69                             help="Mount each readable collection at mountpoint/PATH/UUID and mountpoint/PATH/PDH where PDH is the collection's portable data hash and UUID is its UUID.")
70         mounts.add_argument('--mount-by-tag',
71                             type=str, metavar='PATH', action='append', default=[],
72                             help="Mount all collections with tag TAG at mountpoint/PATH/TAG/UUID.")
73         mounts.add_argument('--mount-home',
74                             type=str, metavar='PATH', action='append', default=[],
75                             help="Mount the current user's home project at mountpoint/PATH.")
76         mounts.add_argument('--mount-shared',
77                             type=str, metavar='PATH', action='append', default=[],
78                             help="Mount projects shared with the current user at mountpoint/PATH.")
79         mounts.add_argument('--mount-tmp',
80                             type=str, metavar='PATH', action='append', default=[],
81                             help="Create a new collection, mount it in read/write mode at mountpoint/PATH, and delete it when unmounting.")
82
83         self.add_argument('--debug', action='store_true', help="""Debug mode""")
84         self.add_argument('--logfile', help="""Write debug logs and errors to the specified file (default stderr).""")
85         self.add_argument('--foreground', action='store_true', help="""Run in foreground (default is to daemonize unless --exec specified)""", default=False)
86         self.add_argument('--encoding', type=str, help="Character encoding to use for filesystem, default is utf-8 (see Python codec registry for list of available encodings)", default="utf-8")
87
88         self.add_argument('--file-cache', type=int, help="File data cache size, in bytes (default 256MiB)", default=256*1024*1024)
89         self.add_argument('--directory-cache', type=int, help="Directory data cache size, in bytes (default 128MiB)", default=128*1024*1024)
90
91         self.add_argument('--disable-event-listening', action='store_true', help="Don't subscribe to events on the API server", dest="disable_event_listening", default=False)
92
93         self.add_argument('--read-only', action='store_false', help="Mount will be read only (default)", dest="enable_write", default=False)
94         self.add_argument('--read-write', action='store_true', help="Mount will be read-write", dest="enable_write", default=False)
95
96         self.add_argument('--crunchstat-interval', type=float, help="Write stats to stderr every N seconds (default disabled)", default=0)
97
98         unmount = self.add_mutually_exclusive_group()
99         unmount.add_argument('--unmount', action='store_true', default=False,
100                              help="Forcefully unmount the specified mountpoint (if it's a fuse mount) and exit. If --subtype is given, unmount only if the mount has the specified subtype. WARNING: This command can affect any kind of fuse mount, not just arv-mount.")
101         unmount.add_argument('--unmount-all', action='store_true', default=False,
102                              help="Forcefully unmount every fuse mount at or below the specified path and exit. If --subtype is given, unmount only mounts that have the specified subtype. Exit non-zero if any other types of mounts are found at or below the given path. WARNING: This command can affect any kind of fuse mount, not just arv-mount.")
103         unmount.add_argument('--replace', action='store_true', default=False,
104                              help="If a fuse mount is already present at mountpoint, forcefully unmount it before mounting")
105         self.add_argument('--unmount-timeout',
106                           type=float, default=2.0,
107                           help="Time to wait for graceful shutdown after --exec program exits and filesystem is unmounted")
108
109         self.add_argument('--exec', type=str, nargs=argparse.REMAINDER,
110                             dest="exec_args", metavar=('command', 'args', '...', '--'),
111                             help="""Mount, run a command, then unmount and exit""")
112
113
114 class Mount(object):
115     def __init__(self, args, logger=logging.getLogger('arvados.arv-mount')):
116         self.daemon = False
117         self.logger = logger
118         self.args = args
119         self.listen_for_events = False
120
121         self.args.mountpoint = os.path.realpath(self.args.mountpoint)
122         if self.args.logfile:
123             self.args.logfile = os.path.realpath(self.args.logfile)
124
125         try:
126             self._setup_logging()
127             self._setup_api()
128             self._setup_mount()
129         except Exception as e:
130             self.logger.exception("arv-mount: exception during setup: %s", e)
131             exit(1)
132
133     def __enter__(self):
134         if self.args.replace:
135             unmount(path=self.args.mountpoint,
136                     timeout=self.args.unmount_timeout)
137         llfuse.init(self.operations, self.args.mountpoint, self._fuse_options())
138         if self.daemon:
139             daemon.DaemonContext(
140                 working_directory=os.path.dirname(self.args.mountpoint),
141                 files_preserve=range(
142                     3, resource.getrlimit(resource.RLIMIT_NOFILE)[1])
143             ).open()
144         if self.listen_for_events and not self.args.disable_event_listening:
145             self.operations.listen_for_events()
146         self.llfuse_thread = threading.Thread(None, lambda: self._llfuse_main())
147         self.llfuse_thread.daemon = True
148         self.llfuse_thread.start()
149         self.operations.initlock.wait()
150         return self
151
152     def __exit__(self, exc_type, exc_value, traceback):
153         if self.operations.events:
154             self.operations.events.close(timeout=self.args.unmount_timeout)
155         subprocess.call(["fusermount", "-u", "-z", self.args.mountpoint])
156         self.llfuse_thread.join(timeout=self.args.unmount_timeout)
157         if self.llfuse_thread.is_alive():
158             self.logger.warning("Mount.__exit__:"
159                                 " llfuse thread still alive %fs after umount"
160                                 " -- abandoning and exiting anyway",
161                                 self.args.unmount_timeout)
162
163     def run(self):
164         if self.args.unmount or self.args.unmount_all:
165             unmount(path=self.args.mountpoint,
166                     subtype=self.args.subtype,
167                     timeout=self.args.unmount_timeout,
168                     recursive=self.args.unmount_all)
169         elif self.args.exec_args:
170             self._run_exec()
171         else:
172             self._run_standalone()
173
174     def _fuse_options(self):
175         """FUSE mount options; see mount.fuse(8)"""
176         opts = [optname for optname in ['allow_other', 'debug']
177                 if getattr(self.args, optname)]
178         # Increase default read/write size from 4KiB to 128KiB
179         opts += ["big_writes", "max_read=131072"]
180         if self.args.subtype:
181             opts += ["subtype="+self.args.subtype]
182         return opts
183
184     def _setup_logging(self):
185         # Configure a log handler based on command-line switches.
186         if self.args.logfile:
187             log_handler = logging.FileHandler(self.args.logfile)
188             log_handler.setFormatter(logging.Formatter(
189                 '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
190                 '%Y-%m-%d %H:%M:%S'))
191         else:
192             log_handler = None
193
194         if log_handler is not None:
195             arvados.logger.removeHandler(arvados.log_handler)
196             arvados.logger.addHandler(log_handler)
197
198         if self.args.debug:
199             arvados.logger.setLevel(logging.DEBUG)
200             logging.getLogger('arvados.keep').setLevel(logging.DEBUG)
201             logging.getLogger('arvados.api').setLevel(logging.DEBUG)
202             logging.getLogger('arvados.collection').setLevel(logging.DEBUG)
203             self.logger.debug("arv-mount debugging enabled")
204
205         self.logger.info("%s %s started", sys.argv[0], __version__)
206         self.logger.info("enable write is %s", self.args.enable_write)
207
208     def _setup_api(self):
209         try:
210             self.api = arvados.safeapi.ThreadSafeApiCache(
211                 apiconfig=arvados.config.settings(),
212                 keep_params={
213                     'block_cache': arvados.keep.KeepBlockCache(self.args.file_cache),
214                     'num_retries': self.args.retries,
215                 })
216         except KeyError as e:
217             self.logger.error("Missing environment: %s", e)
218             exit(1)
219         # Do a sanity check that we have a working arvados host + token.
220         self.api.users().current().execute()
221
222     def _setup_mount(self):
223         self.operations = Operations(
224             os.getuid(),
225             os.getgid(),
226             api_client=self.api,
227             encoding=self.args.encoding,
228             inode_cache=InodeCache(cap=self.args.directory_cache),
229             enable_write=self.args.enable_write)
230
231         if self.args.crunchstat_interval:
232             statsthread = threading.Thread(
233                 target=crunchstat.statlogger,
234                 args=(self.args.crunchstat_interval,
235                       self.api.keep,
236                       self.operations))
237             statsthread.daemon = True
238             statsthread.start()
239
240         usr = self.api.users().current().execute(num_retries=self.args.retries)
241         now = time.time()
242         dir_class = None
243         dir_args = [llfuse.ROOT_INODE, self.operations.inodes, self.api, self.args.retries]
244         mount_readme = False
245
246         if self.args.collection is not None:
247             # Set up the request handler with the collection at the root
248             # First check that the collection is readable
249             self.api.collections().get(uuid=self.args.collection).execute()
250             self.args.mode = 'collection'
251             dir_class = CollectionDirectory
252             dir_args.append(self.args.collection)
253         elif self.args.project is not None:
254             self.args.mode = 'project'
255             dir_class = ProjectDirectory
256             dir_args.append(
257                 self.api.groups().get(uuid=self.args.project).execute(
258                     num_retries=self.args.retries))
259
260         if (self.args.mount_by_id or
261             self.args.mount_by_pdh or
262             self.args.mount_by_tag or
263             self.args.mount_home or
264             self.args.mount_shared or
265             self.args.mount_tmp):
266             if self.args.mode is not None:
267                 sys.exit(
268                     "Cannot combine '{}' mode with custom --mount-* options.".
269                     format(self.args.mode))
270         elif self.args.mode is None:
271             # If no --mount-custom or custom mount args, --all is the default
272             self.args.mode = 'all'
273
274         if self.args.mode in ['by_id', 'by_pdh']:
275             # Set up the request handler with the 'magic directory' at the root
276             dir_class = MagicDirectory
277             dir_args.append(self.args.mode == 'by_pdh')
278         elif self.args.mode == 'by_tag':
279             dir_class = TagsDirectory
280         elif self.args.mode == 'shared':
281             dir_class = SharedDirectory
282             dir_args.append(usr)
283         elif self.args.mode == 'home':
284             dir_class = ProjectDirectory
285             dir_args.append(usr)
286             dir_args.append(True)
287         elif self.args.mode == 'all':
288             self.args.mount_by_id = ['by_id']
289             self.args.mount_by_tag = ['by_tag']
290             self.args.mount_home = ['home']
291             self.args.mount_shared = ['shared']
292             mount_readme = True
293
294         if dir_class is not None:
295             ent = dir_class(*dir_args)
296             self.operations.inodes.add_entry(ent)
297             self.listen_for_events = ent.want_event_subscribe()
298             return
299
300         e = self.operations.inodes.add_entry(Directory(
301             llfuse.ROOT_INODE, self.operations.inodes))
302         dir_args[0] = e.inode
303
304         for name in self.args.mount_by_id:
305             self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=False))
306         for name in self.args.mount_by_pdh:
307             self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=True))
308         for name in self.args.mount_by_tag:
309             self._add_mount(e, name, TagsDirectory(*dir_args))
310         for name in self.args.mount_home:
311             self._add_mount(e, name, ProjectDirectory(*dir_args, project_object=usr, poll=True))
312         for name in self.args.mount_shared:
313             self._add_mount(e, name, SharedDirectory(*dir_args, exclude=usr, poll=True))
314         for name in self.args.mount_tmp:
315             self._add_mount(e, name, TmpCollectionDirectory(*dir_args))
316
317         if mount_readme:
318             text = self._readme_text(
319                 arvados.config.get('ARVADOS_API_HOST'),
320                 usr['email'])
321             self._add_mount(e, 'README', StringFile(e.inode, text, now))
322
323     def _add_mount(self, tld, name, ent):
324         if name in ['', '.', '..'] or '/' in name:
325             sys.exit("Mount point '{}' is not supported.".format(name))
326         tld._entries[name] = self.operations.inodes.add_entry(ent)
327         self.listen_for_events = (self.listen_for_events or ent.want_event_subscribe())
328
329     def _readme_text(self, api_host, user_email):
330         return '''
331 Welcome to Arvados!  This directory provides file system access to
332 files and objects available on the Arvados installation located at
333 '{}' using credentials for user '{}'.
334
335 From here, the following directories are available:
336
337   by_id/     Access to Keep collections by uuid or portable data hash (see by_id/README for details).
338   by_tag/    Access to Keep collections organized by tag.
339   home/      The contents of your home project.
340   shared/    Projects shared with you.
341
342 '''.format(api_host, user_email)
343
344     def _run_exec(self):
345         rc = 255
346         with self:
347             try:
348                 sp = subprocess.Popen(self.args.exec_args, shell=False)
349
350                 # forward signals to the process.
351                 signal.signal(signal.SIGINT, lambda signum, frame: sp.send_signal(signum))
352                 signal.signal(signal.SIGTERM, lambda signum, frame: sp.send_signal(signum))
353                 signal.signal(signal.SIGQUIT, lambda signum, frame: sp.send_signal(signum))
354
355                 # wait for process to complete.
356                 rc = sp.wait()
357
358                 # restore default signal handlers.
359                 signal.signal(signal.SIGINT, signal.SIG_DFL)
360                 signal.signal(signal.SIGTERM, signal.SIG_DFL)
361                 signal.signal(signal.SIGQUIT, signal.SIG_DFL)
362             except Exception as e:
363                 self.logger.exception(
364                     'arv-mount: exception during exec %s', self.args.exec_args)
365                 try:
366                     rc = e.errno
367                 except AttributeError:
368                     pass
369         exit(rc)
370
371     def _run_standalone(self):
372         try:
373             self.daemon = not self.args.foreground
374             with self:
375                 self.llfuse_thread.join(timeout=None)
376         except Exception as e:
377             self.logger.exception('arv-mount: exception during mount: %s', e)
378             exit(getattr(e, 'errno', 1))
379         exit(0)
380
381     def _llfuse_main(self):
382         try:
383             llfuse.main()
384         except:
385             llfuse.close(unmount=False)
386             raise
387         llfuse.close()