From 04a8e038125c6ffa3c912ba6b1fd0b31af01e7ad Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 29 Jan 2015 17:13:25 -0500 Subject: [PATCH] 4904: Eliminate downtime switching collections. * Now re-uses CollectionDirectory and loads new collection contents instead of unmounting and remounting. Now sends sighup instead of restarting Docker if the image is the same. Almost no downtime switching collections unless Docker image changes as well. * Refactor into functions instead executing directly as a module. * Use arvados.arv-web logger --- docker/arv-web/apache2_foreground.sh | 3 +- docker/build_tools/Makefile | 6 + services/arv-web/arv-web.py | 222 +++++++++++------- .../arv-web/sample-static-page/docker_image | 2 +- services/fuse/arvados_fuse/__init__.py | 1 + 5 files changed, 143 insertions(+), 91 deletions(-) diff --git a/docker/arv-web/apache2_foreground.sh b/docker/arv-web/apache2_foreground.sh index fc6028ea83..76766a6ce3 100755 --- a/docker/arv-web/apache2_foreground.sh +++ b/docker/arv-web/apache2_foreground.sh @@ -1,7 +1,8 @@ #! /bin/bash read pid cmd state ppid pgrp session tty_nr tpgid rest < /proc/self/stat -trap "kill -TERM -$pgrp; exit" EXIT TERM KILL SIGKILL SIGTERM SIGQUIT +trap "kill -HUP -$pgrp" HUP +trap "kill -TERM -$pgrp; exit" EXIT TERM QUIT source /etc/apache2/envvars /usr/sbin/apache2 -D FOREGROUND diff --git a/docker/build_tools/Makefile b/docker/build_tools/Makefile index d92349c50b..9d93d2bdef 100644 --- a/docker/build_tools/Makefile +++ b/docker/build_tools/Makefile @@ -60,6 +60,8 @@ SLURM_DEPS = slurm/Dockerfile config.yml $(SLURM_GENERATED) JOBS_DEPS = jobs/Dockerfile +ARV_WEB_DEPS = arv-web/Dockerfile arv-web/apache2_foreground.sh arv-web/apache2_vhost + JAVA_BWA_SAMTOOLS_DEPS = java-bwa-samtools/Dockerfile API_DEPS = api/* config.yml $(API_GENERATED) @@ -209,6 +211,10 @@ sso-image: passenger-image $(SSO_DEPS) $(DOCKER_BUILD) -t arvados/sso sso date >sso-image +arv-web-image: passenger-image $(ARV_WEB_DEPS) + $(DOCKER_BUILD) -t arvados/arv-web arv-web + date >arv-web-image + # ============================================================ # The arvados/base image is the base Debian image plus packages # that are dependencies for every Arvados service. diff --git a/services/arv-web/arv-web.py b/services/arv-web/arv-web.py index c04b29eddc..6a4408b510 100755 --- a/services/arv-web/arv-web.py +++ b/services/arv-web/arv-web.py @@ -12,45 +12,27 @@ import argparse import logging import signal import sys +import functools -logging.basicConfig(level=logging.INFO) - -parser = argparse.ArgumentParser() -parser.add_argument('--project', type=str, required=True, help="Project to watch") -parser.add_argument('--port', type=int, default=8080, help="Local bind port") -parser.add_argument('--image', type=str, help="Docker image to run") - -args = parser.parse_args() - -api = SafeApi(arvados.config) -project = args.project -docker_image = args.image -port = args.port -evqueue = Queue.Queue() - -def run_fuse_mount(collection): - global api - +def run_fuse_mount(api, collection): mountdir = tempfile.mkdtemp() - operations = Operations(os.getuid(), os.getgid(), "utf-8", True) - operations.inodes.add_entry(CollectionDirectory(llfuse.ROOT_INODE, operations.inodes, api, 2, collection)) + operations = Operations(os.getuid(), os.getgid(), "utf-8") + cdir = CollectionDirectory(llfuse.ROOT_INODE, operations.inodes, api, 2, collection) + operations.inodes.add_entry(cdir) # Initialize the fuse connection llfuse.init(operations, mountdir, ['allow_other']) - t = threading.Thread(None, lambda: llfuse.main()) + t = threading.Thread(None, llfuse.main) t.start() # wait until the driver is finished initializing operations.initlock.wait() - return mountdir - -def on_message(ev): - global project - global evqueue + return (mountdir, cdir) +def on_message(project, evqueue, ev): if 'event_type' in ev: old_attr = None if 'old_attributes' in ev['properties'] and ev['properties']['old_attributes']: @@ -60,80 +42,142 @@ def on_message(ev): return et = ev['event_type'] - if ev['event_type'] == 'update' and ev['properties']['new_attributes']['owner_uuid'] != ev['properties']['old_attributes']['owner_uuid']: - if args.project == ev['properties']['new_attributes']['owner_uuid']: - et = 'add' - else: + if ev['event_type'] == 'update': + if ev['properties']['new_attributes']['owner_uuid'] != ev['properties']['old_attributes']['owner_uuid']: + if args.project_uuid == ev['properties']['new_attributes']['owner_uuid']: + et = 'add' + else: + et = 'remove' + if ev['properties']['new_attributes']['expires_at'] is not None: et = 'remove' evqueue.put((project, et, ev['object_uuid'])) -collection = api.collections().list(filters=[["owner_uuid", "=", project]], - limit=1, - order='modified_at desc').execute()['items'][0]['uuid'] +def main(argv): + logger = logging.getLogger('arvados.arv-web') + logger.setLevel(logging.INFO) -ws = arvados.events.subscribe(api, [["object_uuid", "is_a", "arvados#collection"]], on_message) + parser = argparse.ArgumentParser() + parser.add_argument('--project-uuid', type=str, required=True, help="Project uuid to watch") + parser.add_argument('--port', type=int, default=8080, help="Port to listen on (default 8080)") + parser.add_argument('--image', type=str, help="Docker image to run") -signal.signal(signal.SIGTERM, lambda signal, frame: sys.exit(0)) + args = parser.parse_args(argv) -loop = True -cid = None -while loop: - loop = False - logging.info("Mounting %s" % collection) - mountdir = run_fuse_mount(collection) + api = SafeApi(arvados.config) + project = args.project_uuid + docker_image = args.image + port = args.port + evqueue = Queue.Queue() + + collections = api.collections().list(filters=[["owner_uuid", "=", project]], + limit=1, + order='modified_at desc').execute()['items'] + newcollection = collections[0]['uuid'] if len(collections) > 0 else None + collection = None + + ws = arvados.events.subscribe(api, [["object_uuid", "is_a", "arvados#collection"]], functools.partial(on_message, project, evqueue)) + + signal.signal(signal.SIGTERM, lambda signal, frame: sys.exit(0)) + + loop = True + cid = None + prev_docker_image = None + mountdir = None try: - if not args.image: - if os.path.exists(os.path.join(mountdir, "docker_image")): - with open(os.path.join(mountdir, "docker_image")) as di: - docker_image = di.read().strip() - else: - logging.error("Collection must contain a file 'docker_image' or must specify --image on the command line.") - sys.exit(1) - - logging.info("Starting docker container %s" % docker_image) - cid = subprocess.check_output(["docker", "run", - "--detach=true", - "--publish=%i:80" % (port), - "--volume=%s:/mnt:ro" % mountdir, - docker_image]) - cid = cid.rstrip() - logging.info("Container id is %s" % cid) - - logging.info("Waiting for events") - running = True - loop = True - while running: + while loop: + loop = False + if newcollection != collection: + collection = newcollection + if not mountdir: + (mountdir, cdir) = run_fuse_mount(api, collection) + + with llfuse.lock: + cdir.clear() + if collection: + logger.info("Mounting %s", collection) + cdir.collection_locator = collection + cdir.collection_object = None + cdir.clear() + try: - eq = evqueue.get(True, 1) - logging.info("%s %s" % (eq[1], eq[2])) - newcollection = collection - if eq[1] in ('add', 'update', 'create'): - newcollection = eq[2] - elif eq[1] == 'remove': - newcollection = api.collections().list(filters=[["owner_uuid", "=", project]], - limit=1, - order='modified_at desc').execute()['items'][0]['uuid'] - if newcollection != collection: - logging.info("restarting web service") - collection = newcollection - running = False - except Queue.Empty: - pass - except (KeyboardInterrupt): - logging.info("Got keyboard interrupt") - ws.close() - loop = False - except Exception as e: - logging.exception(str(e)) - ws.close() - loop = False + try: + if collection: + if not args.image: + docker_image = None + while not docker_image and os.path.exists(os.path.join(mountdir, "docker_image")): + try: + with open(os.path.join(mountdir, "docker_image")) as di: + docker_image = di.read().strip() + except IOError as e: + pass + + if not docker_image: + logger.error("Collection must contain a file 'docker_image' or must specify --image on the command line.") + + if docker_image != prev_docker_image: + if cid: + logger.info("Stopping docker container") + subprocess.check_call(["docker", "stop", cid]) + + if docker_image: + logger.info("Starting docker container %s", docker_image) + cid = subprocess.check_output(["docker", "run", + "--detach=true", + "--publish=%i:80" % (port), + "--volume=%s:/mnt:ro" % mountdir, + docker_image]) + cid = cid.rstrip() + prev_docker_image = docker_image + logger.info("Container id is %s", cid) + elif cid: + subprocess.check_call(["docker", "kill", "--signal=HUP", cid]) + elif cid: + logger.info("Stopping docker container") + subprocess.call(["docker", "stop", cid]) + except subprocess.CalledProcessError: + cid = None + + if not cid: + logger.warning("No service running! Will wait for a new collection to appear in the project.") + else: + logger.info("Waiting for events") + running = True + loop = True + while running: + try: + eq = evqueue.get(True, 1) + logger.info("%s %s", eq[1], eq[2]) + newcollection = collection + if eq[1] in ('add', 'update', 'create'): + newcollection = eq[2] + elif eq[1] == 'remove': + collections = api.collections().list(filters=[["owner_uuid", "=", project]], + limit=1, + order='modified_at desc').execute()['items'] + newcollection = collections[0]['uuid'] if len(collections) > 0 else None + running = False + except Queue.Empty: + pass + + except (KeyboardInterrupt): + logger.info("Got keyboard interrupt") + ws.close() + loop = False + except Exception as e: + logger.exception(e) + ws.close() + loop = False finally: if cid: - logging.info("Stopping docker container") + logger.info("Stopping docker container") cid = subprocess.call(["docker", "stop", cid]) - logging.info("Unmounting") - subprocess.call(["fusermount", "-u", "-z", mountdir]) - os.rmdir(mountdir) + if mountdir: + logger.info("Unmounting") + subprocess.call(["fusermount", "-u", "-z", mountdir]) + os.rmdir(mountdir) + +if __name__ == '__main__': + main(sys.argv[1:]) diff --git a/services/arv-web/sample-static-page/docker_image b/services/arv-web/sample-static-page/docker_image index 57f344fcd7..d11310422e 100644 --- a/services/arv-web/sample-static-page/docker_image +++ b/services/arv-web/sample-static-page/docker_image @@ -1 +1 @@ -arvados/arv-web \ No newline at end of file +arvados/arv-job \ No newline at end of file diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py index b68574c53d..26098a6714 100644 --- a/services/fuse/arvados_fuse/__init__.py +++ b/services/fuse/arvados_fuse/__init__.py @@ -280,6 +280,7 @@ class Directory(FreshBase): n.clear() llfuse.invalidate_entry(self.inode, str(n)) self.inodes.del_entry(oldentries[n]) + llfuse.invalidate_inode(self.inode) self.invalidate() def mtime(self): -- 2.39.5