#!/usr/bin/env python import argparse import arvados import daemon import logging import os import signal import subprocess import time import arvados.commands._util as arv_cmd from arvados_fuse import * from arvados.safeapi import ThreadSafeApiCache import arvados.keep logger = logging.getLogger('arvados.arv-mount') class Stat(object): def __init__(self, prefix, interval, egr_name, ing_name, egr_func, ing_func): self.prefix = prefix self.interval = interval self.egr_name = egr_name self.ing_name = ing_name self.egress = egr_func self.ingress = ing_func self.egr_prev = self.egress() self.ing_prev = self.ingress() def update(self): egr = self.egress() ing = self.ingress() delta = " -- interval %.4f seconds %d %s %d %s" % (self.interval, egr - self.egr_prev, self.egr_name, ing - self.ing_prev, self.ing_name) sys.stderr.write("crunchstat: %s %d %s %d %s%s\n" % (self.prefix, egr, self.egr_name, ing, self.ing_name, delta)) self.egr_prev = egr self.ing_prev = ing def statlogger(interval, keep, ops): calls = Stat("keepcalls", interval, "put", "get", keep.put_counter.get, keep.get_counter.get) net = Stat("net:keep0", interval, "tx", "rx", keep.upload_counter.get, keep.download_counter.get) cache = Stat("keepcache", interval, "hit", "miss", keep.hits_counter.get, keep.misses_counter.get) fuseops = Stat("fuseops", interval,"write", "read", ops.write_ops_counter.get, ops.read_ops_counter.get) blk = Stat("blkio:0:0", interval, "write", "read", ops.write_counter.get, ops.read_counter.get) while True: time.sleep(interval) calls.update() net.update() cache.update() fuseops.update() blk.update() if __name__ == '__main__': # Handle command line parameters parser = argparse.ArgumentParser( parents=[arv_cmd.retry_opt], description='''Mount Keep data under the local filesystem. Default mode is --home''', epilog=""" Note: When using the --exec feature, you must either specify the mountpoint before --exec, or mark the end of your --exec arguments with "--". """) parser.add_argument('mountpoint', type=str, help="""Mount point.""") parser.add_argument('--allow-other', action='store_true', help="""Let other users read the mount""") mount_mode = parser.add_mutually_exclusive_group() mount_mode.add_argument('--all', action='store_true', help="""Mount a subdirectory for each mode: home, shared, by_tag, by_id (default).""") mount_mode.add_argument('--home', action='store_true', help="""Mount only the user's home project.""") mount_mode.add_argument('--shared', action='store_true', help="""Mount only list of projects shared with the user.""") mount_mode.add_argument('--by-tag', action='store_true', help="""Mount subdirectories listed by tag.""") mount_mode.add_argument('--by-id', action='store_true', help="""Mount subdirectories listed by portable data hash or uuid.""") mount_mode.add_argument('--by-pdh', action='store_true', help="""Mount subdirectories listed by portable data hash.""") mount_mode.add_argument('--project', type=str, help="""Mount a specific project.""") mount_mode.add_argument('--collection', type=str, help="""Mount only the specified collection.""") parser.add_argument('--debug', action='store_true', help="""Debug mode""") parser.add_argument('--logfile', help="""Write debug logs and errors to the specified file (default stderr).""") parser.add_argument('--foreground', action='store_true', help="""Run in foreground (default is to daemonize unless --exec specified)""", default=False) parser.add_argument('--encoding', type=str, help="Character encoding to use for filesystem, default is utf-8 (see Python codec registry for list of available encodings)", default="utf-8") parser.add_argument('--file-cache', type=int, help="File data cache size, in bytes (default 256MiB)", default=256*1024*1024) parser.add_argument('--directory-cache', type=int, help="Directory data cache size, in bytes (default 128MiB)", default=128*1024*1024) parser.add_argument('--read-only', action='store_false', help="Mount will be read only (default)", dest="enable_write", default=False) parser.add_argument('--read-write', action='store_true', help="Mount will be read-write", dest="enable_write", default=False) parser.add_argument('--crunchstat-interval', type=float, help="Write stats to stderr every N seconds (default disabled)", default=0) parser.add_argument('--exec', type=str, nargs=argparse.REMAINDER, dest="exec_args", metavar=('command', 'args', '...', '--'), help="""Mount, run a command, then unmount and exit""") args = parser.parse_args() args.mountpoint = os.path.realpath(args.mountpoint) if args.logfile: args.logfile = os.path.realpath(args.logfile) # Daemonize as early as possible, so we don't accidentally close # file descriptors we're using. if not (args.exec_args or args.foreground): os.chdir(args.mountpoint) daemon_ctx = daemon.DaemonContext(working_directory='.') daemon_ctx.open() else: daemon_ctx = None # Configure a log handler based on command-line switches. if args.logfile: log_handler = logging.FileHandler(args.logfile) elif daemon_ctx: log_handler = logging.NullHandler() else: log_handler = None if log_handler is not None: arvados.logger.removeHandler(arvados.log_handler) arvados.logger.addHandler(log_handler) if args.debug: arvados.logger.setLevel(logging.DEBUG) logger.debug("arv-mount debugging enabled") logger.info("enable write is %s", args.enable_write) try: # Create the request handler operations = Operations(os.getuid(), os.getgid(), encoding=args.encoding, inode_cache=InodeCache(cap=args.directory_cache), enable_write=args.enable_write) api = ThreadSafeApiCache(apiconfig=arvados.config.settings(), keep_params={"block_cache": arvados.keep.KeepBlockCache(args.file_cache)}) if args.crunchstat_interval: statsthread = threading.Thread(target=statlogger, args=(args.crunchstat_interval, api.keep, operations)) statsthread.daemon = True statsthread.start() usr = api.users().current().execute(num_retries=args.retries) now = time.time() dir_class = None dir_args = [llfuse.ROOT_INODE, operations.inodes, api, args.retries] if args.by_id or args.by_pdh: # Set up the request handler with the 'magic directory' at the root dir_class = MagicDirectory dir_args.append(args.by_pdh) elif args.by_tag: dir_class = TagsDirectory elif args.shared: dir_class = SharedDirectory dir_args.append(usr) elif args.home: dir_class = ProjectDirectory dir_args.append(usr) dir_args.append(True) elif args.collection is not None: # Set up the request handler with the collection at the root dir_class = CollectionDirectory dir_args.append(args.collection) elif args.project is not None: dir_class = ProjectDirectory dir_args.append(api.groups().get(uuid=args.project).execute( num_retries=args.retries)) if dir_class is not None: operations.inodes.add_entry(dir_class(*dir_args)) else: e = operations.inodes.add_entry(Directory(llfuse.ROOT_INODE, operations.inodes)) dir_args[0] = e.inode e._entries['by_id'] = operations.inodes.add_entry(MagicDirectory(*dir_args)) e._entries['by_tag'] = operations.inodes.add_entry(TagsDirectory(*dir_args)) dir_args.append(usr) dir_args.append(True) e._entries['home'] = operations.inodes.add_entry(ProjectDirectory(*dir_args)) e._entries['shared'] = operations.inodes.add_entry(SharedDirectory(*dir_args)) text = ''' Welcome to Arvados! This directory provides file system access to files and objects available on the Arvados installation located at '{}' using credentials for user '{}'. From here, the following directories are available: by_id/ Access to Keep collections by uuid or portable data hash (see by_id/README for details). by_tag/ Access to Keep collections organized by tag. home/ The contents of your home project. shared/ Projects shared with you. '''.format(arvados.config.get('ARVADOS_API_HOST'), usr['email']) e._entries["README"] = operations.inodes.add_entry(StringFile(e.inode, text, now)) except Exception: logger.exception("arv-mount: exception during API setup") exit(1) # FUSE options, see mount.fuse(8) opts = [optname for optname in ['allow_other', 'debug'] if getattr(args, optname)] # Increase default read/write size from 4KiB to 128KiB opts += ["big_writes", "max_read=131072"] if args.exec_args: # Initialize the fuse connection llfuse.init(operations, args.mountpoint, opts) # Subscribe to change events from API server if not args.by_pdh: operations.listen_for_events(api) t = threading.Thread(None, lambda: llfuse.main()) t.start() # wait until the driver is finished initializing operations.initlock.wait() rc = 255 try: sp = subprocess.Popen(args.exec_args, shell=False) # forward signals to the process. signal.signal(signal.SIGINT, lambda signum, frame: sp.send_signal(signum)) signal.signal(signal.SIGTERM, lambda signum, frame: sp.send_signal(signum)) signal.signal(signal.SIGQUIT, lambda signum, frame: sp.send_signal(signum)) # wait for process to complete. rc = sp.wait() # restore default signal handlers. signal.signal(signal.SIGINT, signal.SIG_DFL) signal.signal(signal.SIGTERM, signal.SIG_DFL) signal.signal(signal.SIGQUIT, signal.SIG_DFL) except Exception as e: logger.exception('arv-mount: exception during exec %s', args.exec_args) try: rc = e.errno except AttributeError: pass finally: subprocess.call(["fusermount", "-u", "-z", args.mountpoint]) operations.destroy() exit(rc) else: try: llfuse.init(operations, args.mountpoint, opts) # Subscribe to change events from API server operations.listen_for_events(api) llfuse.main() except Exception as e: logger.exception('arv-mount: exception during mount') exit(getattr(e, 'errno', 1)) finally: operations.destroy()