1 # Copyright (C) The Arvados Authors. All rights reserved.
3 # SPDX-License-Identifier: AGPL-3.0
5 from future.utils import native_str
6 from builtins import range
7 from builtins import object
21 import arvados.commands._util as arv_cmd
22 from arvados_fuse import crunchstat
23 from arvados_fuse import *
24 from arvados_fuse.unmount import unmount
25 from arvados_fuse._version import __version__
27 class ArgumentParser(argparse.ArgumentParser):
29 super(ArgumentParser, self).__init__(
30 parents=[arv_cmd.retry_opt],
32 Mount Keep data under the local filesystem. Default mode is --home
35 Note: When using the --exec feature, you must either specify the
36 mountpoint before --exec, or mark the end of your --exec arguments
42 version=u"%s %s" % (sys.argv[0], __version__),
43 help='Print version and exit.',
48 help="""Mount point.""",
51 mode_group = self.add_argument_group("Mount contents")
52 mode = mode_group.add_mutually_exclusive_group()
59 Mount a subdirectory for each mode: home, shared, by_tag, by_id
60 (default if no --mount-* arguments are given).
69 Mount a top level meta-directory with subdirectories as specified by additional --mount-* arguments
70 (default if any --mount-* arguments are given).
75 metavar='UUID_OR_PDH',
76 help="""Mount only the specified collection.""",
83 help="""Mount only the user's home project.""",
88 help="""Mount the specified project.""",
95 help="""Mount only list of projects shared with the user.""",
102 help="""Mount subdirectories listed by portable data hash or uuid.""",
106 action='store_const',
109 help="""Mount subdirectories listed by portable data hash.""",
113 action='store_const',
116 help="""Mount subdirectories listed by tag.""",
119 mounts = self.add_argument_group("Mount custom layout and filtering")
122 type=arv_cmd.JSONArgument(arv_cmd.validate_filters),
124 Filters to apply to all project, shared, and tag directory contents.
125 Pass filters as either a JSON string or a path to a JSON file.
126 The JSON object should be a list of filters in Arvados API list filter syntax.
134 help="Mount the current user's home project at mountpoint/PATH.",
142 Mount each readable collection at mountpoint/PATH/UUID and mountpoint/PATH/PDH
143 where PDH is the collection's portable data hash and UUID is its UUID.
152 Mount each readable collection at mountpoint/PATH/P
153 where P is the collection's portable data hash.
161 help="Mount projects shared with the current user at mountpoint/PATH.",
168 help="Mount all collections with tag TAG at mountpoint/PATH/TAG/UUID.",
176 Create a new collection, mount it in read/write mode at mountpoint/PATH,
177 and delete it when unmounting.
181 perms = self.add_argument_group("Mount access and permissions")
185 help="""Let other users read the mount""",
189 action='store_false',
192 help="Mount will be read only (default)",
199 help="Mount will be read-write",
202 lifecycle = self.add_argument_group("Mount lifecycle management")
203 lifecycle.add_argument(
205 nargs=argparse.REMAINDER,
207 help="""Mount, run a command, then unmount and exit""",
209 lifecycle.add_argument(
213 help="""Run in foreground (default is to daemonize unless --exec specified)""",
215 lifecycle.add_argument(
217 help="""Report mounted filesystem type as "fuse.SUBTYPE", instead of just "fuse".""",
219 unmount = lifecycle.add_mutually_exclusive_group()
220 unmount.add_argument(
225 If a fuse mount is already present at mountpoint, forcefully unmount it before mounting
228 unmount.add_argument(
233 Forcefully unmount the specified mountpoint (if it's a fuse mount) and exit.
234 If --subtype is given, unmount only if the mount has the specified subtype.
235 WARNING: This command can affect any kind of fuse mount, not just arv-mount.
238 unmount.add_argument(
243 Forcefully unmount every fuse mount at or below the specified path and exit.
244 If --subtype is given, unmount only mounts that have the specified subtype.
245 Exit non-zero if any other types of mounts are found at or below the given path.
246 WARNING: This command can affect any kind of fuse mount, not just arv-mount.
249 lifecycle.add_argument(
255 Time to wait for graceful shutdown after --exec program exits and filesystem is unmounted
259 reporting = self.add_argument_group("Mount logging and statistics")
260 reporting.add_argument(
261 '--crunchstat-interval',
265 help="Write stats to stderr every N seconds (default disabled)",
267 reporting.add_argument(
270 help="""Debug mode""",
272 reporting.add_argument(
274 help="""Write debug logs and errors to the specified file (default stderr).""",
277 cache = self.add_argument_group("Mount local cache setup")
278 cachetype = cache.add_mutually_exclusive_group()
279 cachetype.add_argument(
284 help="Use disk based caching (default)",
286 cachetype.add_argument(
288 action='store_false',
291 help="Use in-memory caching only",
296 help="Disk cache location (default ~/.cache/arvados/keep)",
301 default=128*1024*1024,
303 help="Directory data cache size, in bytes (default 128 MiB)",
311 File data cache size, in bytes
312 (default 8 GiB for disk-based cache or 256 MiB with RAM-only cache)
316 plumbing = self.add_argument_group("Mount interactions with Arvados and Linux")
317 plumbing.add_argument(
318 '--disable-event-listening',
320 dest='disable_event_listening',
322 help="Don't subscribe to events on the API server",
324 plumbing.add_argument(
328 Character encoding to use for filesystem, default is utf-8
329 (see Python codec registry for list of available encodings)
332 plumbing.add_argument(
335 help="Specify comma separated list of storage classes to be used when saving data of new collections",
340 def __init__(self, args, logger=logging.getLogger('arvados.arv-mount')):
344 self.listen_for_events = False
346 self.args.mountpoint = os.path.realpath(self.args.mountpoint)
347 if self.args.logfile:
348 self.args.logfile = os.path.realpath(self.args.logfile)
351 self._setup_logging()
352 except Exception as e:
353 self.logger.exception("exception during setup: %s", e)
357 nofile_limit = resource.getrlimit(resource.RLIMIT_NOFILE)
360 if self.args.file_cache:
361 # Adjust the file handle limit so it can meet
362 # the desired cache size. Multiply by 8 because the
363 # number of 64 MiB cache slots that keepclient
364 # allocates is RLIMIT_NOFILE / 8
365 minlimit = int((self.args.file_cache/(64*1024*1024)) * 8)
367 if nofile_limit[0] < minlimit:
368 resource.setrlimit(resource.RLIMIT_NOFILE, (min(minlimit, nofile_limit[1]), nofile_limit[1]))
370 if minlimit > nofile_limit[1]:
371 self.logger.warning("file handles required to meet --file-cache (%s) exceeds hard file handle limit (%s), cache size will be smaller than requested", minlimit, nofile_limit[1])
373 except Exception as e:
374 self.logger.warning("unable to adjust file handle limit: %s", e)
376 nofile_limit = resource.getrlimit(resource.RLIMIT_NOFILE)
377 self.logger.info("file cache capped at %s bytes or less based on available disk (RLIMIT_NOFILE is %s)", ((nofile_limit[0]//8)*64*1024*1024), nofile_limit)
382 except Exception as e:
383 self.logger.exception("exception during setup: %s", e)
387 if self.args.replace:
388 unmount(path=self.args.mountpoint,
389 timeout=self.args.unmount_timeout)
390 llfuse.init(self.operations, native_str(self.args.mountpoint), self._fuse_options())
392 daemon.DaemonContext(
393 working_directory=os.path.dirname(self.args.mountpoint),
394 files_preserve=list(range(
395 3, resource.getrlimit(resource.RLIMIT_NOFILE)[1]))
397 if self.listen_for_events and not self.args.disable_event_listening:
398 self.operations.listen_for_events()
399 self.llfuse_thread = threading.Thread(None, lambda: self._llfuse_main())
400 self.llfuse_thread.daemon = True
401 self.llfuse_thread.start()
402 self.operations.initlock.wait()
405 def __exit__(self, exc_type, exc_value, traceback):
406 if self.operations.events:
407 self.operations.events.close(timeout=self.args.unmount_timeout)
408 subprocess.call(["fusermount", "-u", "-z", self.args.mountpoint])
409 self.llfuse_thread.join(timeout=self.args.unmount_timeout)
410 if self.llfuse_thread.is_alive():
411 self.logger.warning("Mount.__exit__:"
412 " llfuse thread still alive %fs after umount"
413 " -- abandoning and exiting anyway",
414 self.args.unmount_timeout)
417 if self.args.unmount or self.args.unmount_all:
418 unmount(path=self.args.mountpoint,
419 subtype=self.args.subtype,
420 timeout=self.args.unmount_timeout,
421 recursive=self.args.unmount_all)
422 elif self.args.exec_args:
425 self._run_standalone()
427 def _fuse_options(self):
428 """FUSE mount options; see mount.fuse(8)"""
429 opts = [optname for optname in ['allow_other', 'debug']
430 if getattr(self.args, optname)]
431 # Increase default read/write size from 4KiB to 128KiB
432 opts += ["big_writes", "max_read=131072"]
433 if self.args.subtype:
434 opts += ["subtype="+self.args.subtype]
437 def _setup_logging(self):
438 # Configure a log handler based on command-line switches.
439 if self.args.logfile:
440 log_handler = logging.FileHandler(self.args.logfile)
441 log_handler.setFormatter(logging.Formatter(
442 '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
443 '%Y-%m-%d %H:%M:%S'))
447 if log_handler is not None:
448 arvados.logger.removeHandler(arvados.log_handler)
449 arvados.logger.addHandler(log_handler)
452 arvados.logger.setLevel(logging.DEBUG)
453 logging.getLogger('arvados.keep').setLevel(logging.DEBUG)
454 logging.getLogger('arvados.api').setLevel(logging.DEBUG)
455 logging.getLogger('arvados.collection').setLevel(logging.DEBUG)
456 self.logger.debug("arv-mount debugging enabled")
458 self.logger.info("%s %s started", sys.argv[0], __version__)
459 self.logger.info("enable write is %s", self.args.enable_write)
461 def _setup_api(self):
463 # default value of file_cache is 0, this tells KeepBlockCache to
464 # choose a default based on whether disk_cache is enabled or not.
466 block_cache = arvados.keep.KeepBlockCache(cache_max=self.args.file_cache,
467 disk_cache=self.args.disk_cache,
468 disk_cache_dir=self.args.disk_cache_dir)
470 # If there's too many prefetch threads and you
471 # max out the CPU, delivering data to the FUSE
472 # layer actually ends up being slower.
473 # Experimentally, capping 7 threads seems to
475 prefetch_threads = min(max((block_cache.cache_max // (64 * 1024 * 1024)) - 1, 1), 7)
477 self.api = arvados.safeapi.ThreadSafeApiCache(
478 apiconfig=arvados.config.settings(),
480 'num_retries': self.args.retries,
483 'block_cache': block_cache,
484 'num_prefetch_threads': prefetch_threads,
485 'num_retries': self.args.retries,
489 except KeyError as e:
490 self.logger.error("Missing environment: %s", e)
492 # Do a sanity check that we have a working arvados host + token.
493 self.api.users().current().execute()
495 def _setup_mount(self):
496 self.operations = Operations(
500 encoding=self.args.encoding,
501 inode_cache=InodeCache(cap=self.args.directory_cache),
502 enable_write=self.args.enable_write)
504 if self.args.crunchstat_interval:
505 statsthread = threading.Thread(
506 target=crunchstat.statlogger,
507 args=(self.args.crunchstat_interval,
510 statsthread.daemon = True
513 usr = self.api.users().current().execute(num_retries=self.args.retries)
518 self.operations.inodes,
521 self.args.enable_write,
526 storage_classes = None
527 if self.args.storage_classes is not None:
528 storage_classes = self.args.storage_classes.replace(' ', '').split(',')
529 self.logger.info("Storage classes requested for new collections: {}".format(', '.join(storage_classes)))
531 if self.args.collection is not None:
532 # Set up the request handler with the collection at the root
533 # First check that the collection is readable
534 self.api.collections().get(uuid=self.args.collection).execute()
535 self.args.mode = 'collection'
536 dir_class = CollectionDirectory
537 dir_args.append(self.args.collection)
538 elif self.args.project is not None:
539 self.args.mode = 'project'
540 dir_class = ProjectDirectory
542 self.api.groups().get(uuid=self.args.project).execute(
543 num_retries=self.args.retries))
545 if (self.args.mount_by_id or
546 self.args.mount_by_pdh or
547 self.args.mount_by_tag or
548 self.args.mount_home or
549 self.args.mount_shared or
550 self.args.mount_tmp):
551 if self.args.mode is not None:
553 "Cannot combine '{}' mode with custom --mount-* options.".
554 format(self.args.mode))
555 elif self.args.mode is None:
556 # If no --mount-custom or custom mount args, --all is the default
557 self.args.mode = 'all'
559 if self.args.mode in ['by_id', 'by_pdh']:
560 # Set up the request handler with the 'magic directory' at the root
561 dir_class = MagicDirectory
562 dir_args.append(self.args.mode == 'by_pdh')
563 elif self.args.mode == 'by_tag':
564 dir_class = TagsDirectory
565 elif self.args.mode == 'shared':
566 dir_class = SharedDirectory
568 elif self.args.mode == 'home':
569 dir_class = ProjectDirectory
571 dir_args.append(True)
572 elif self.args.mode == 'all':
573 self.args.mount_by_id = ['by_id']
574 self.args.mount_by_tag = ['by_tag']
575 self.args.mount_home = ['home']
576 self.args.mount_shared = ['shared']
579 if dir_class is not None:
580 if dir_class in [TagsDirectory, CollectionDirectory]:
581 ent = dir_class(*dir_args)
583 ent = dir_class(*dir_args, storage_classes=storage_classes)
584 self.operations.inodes.add_entry(ent)
585 self.listen_for_events = ent.want_event_subscribe()
588 e = self.operations.inodes.add_entry(Directory(
590 self.operations.inodes,
592 self.args.enable_write,
595 dir_args[0] = e.inode
597 for name in self.args.mount_by_id:
598 self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=False, storage_classes=storage_classes))
599 for name in self.args.mount_by_pdh:
600 self._add_mount(e, name, MagicDirectory(*dir_args, pdh_only=True))
601 for name in self.args.mount_by_tag:
602 self._add_mount(e, name, TagsDirectory(*dir_args))
603 for name in self.args.mount_home:
604 self._add_mount(e, name, ProjectDirectory(*dir_args, project_object=usr, poll=True, storage_classes=storage_classes))
605 for name in self.args.mount_shared:
606 self._add_mount(e, name, SharedDirectory(*dir_args, exclude=usr, poll=True, storage_classes=storage_classes))
607 for name in self.args.mount_tmp:
608 self._add_mount(e, name, TmpCollectionDirectory(*dir_args, storage_classes=storage_classes))
611 text = self._readme_text(
612 arvados.config.get('ARVADOS_API_HOST'),
614 self._add_mount(e, 'README', StringFile(e.inode, text, now))
616 def _add_mount(self, tld, name, ent):
617 if name in ['', '.', '..'] or '/' in name:
618 sys.exit("Mount point '{}' is not supported.".format(name))
619 tld._entries[name] = self.operations.inodes.add_entry(ent)
620 self.listen_for_events = (self.listen_for_events or ent.want_event_subscribe())
622 def _readme_text(self, api_host, user_email):
624 Welcome to Arvados! This directory provides file system access to
625 files and objects available on the Arvados installation located at
626 '{}' using credentials for user '{}'.
628 From here, the following directories are available:
630 by_id/ Access to Keep collections by uuid or portable data hash (see by_id/README for details).
631 by_tag/ Access to Keep collections organized by tag.
632 home/ The contents of your home project.
633 shared/ Projects shared with you.
635 '''.format(api_host, user_email)
641 sp = subprocess.Popen(self.args.exec_args, shell=False)
643 # forward signals to the process.
644 signal.signal(signal.SIGINT, lambda signum, frame: sp.send_signal(signum))
645 signal.signal(signal.SIGTERM, lambda signum, frame: sp.send_signal(signum))
646 signal.signal(signal.SIGQUIT, lambda signum, frame: sp.send_signal(signum))
648 # wait for process to complete.
651 # restore default signal handlers.
652 signal.signal(signal.SIGINT, signal.SIG_DFL)
653 signal.signal(signal.SIGTERM, signal.SIG_DFL)
654 signal.signal(signal.SIGQUIT, signal.SIG_DFL)
655 except Exception as e:
656 self.logger.exception(
657 'arv-mount: exception during exec %s', self.args.exec_args)
660 except AttributeError:
664 def _run_standalone(self):
666 self.daemon = not self.args.foreground
668 self.llfuse_thread.join(timeout=None)
669 except Exception as e:
670 self.logger.exception('arv-mount: exception during mount: %s', e)
671 exit(getattr(e, 'errno', 1))
674 def _llfuse_main(self):
678 llfuse.close(unmount=False)