Merge branch '8567-moar-docker' refs #8567
authorPeter Amstutz <peter.amstutz@curoverse.com>
Wed, 12 Apr 2017 20:10:13 +0000 (16:10 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Wed, 12 Apr 2017 20:10:13 +0000 (16:10 -0400)
docker/migrate-docker19/build.sh
docker/migrate-docker19/dnd.sh
docker/migrate-docker19/migrate.sh
sdk/python/arvados/commands/migrate19.py

index 3a36cd495ab870b66a0c8aa78fd44fdbffac5bc9..e563bfc7e018458c6c57f87a870866cd8ab7b37f 100755 (executable)
@@ -1,2 +1,2 @@
 #!/bin/sh
-exec docker build -t arvados/migrate-docker19 .
+exec docker build -t arvados/migrate-docker19:1.0 .
index ec6f1e3e12bbde76ecb1e2c5d3406c03d47ab8b2..f253f0be660e565880d085933395ad7197cf618a 100755 (executable)
@@ -96,4 +96,4 @@ rm -rf /var/run/docker.pid
 
 read pid cmd state ppid pgrp session tty_nr tpgid rest < /proc/self/stat
 
-exec docker daemon --storage-driver=vfs $DOCKER_DAEMON_ARGS
+exec docker daemon --storage-driver=$1 $DOCKER_DAEMON_ARGS
index 8e39be45678aa357632dd7e4dd0bb147bcdeebc3..857678bde86b856c2f1ce48e8716e94bcc56d0cc 100755 (executable)
@@ -1,9 +1,46 @@
 #!/bin/bash
 
-set -e
+# This script is called by arv-migrate-docker19 to perform the actual migration
+# of a single image.  This works by running Docker-in-Docker (dnd.sh) to
+# download the image using Docker 1.9 and then upgrading to Docker 1.13 and
+# uploading the converted image.
+
+# When using bash in pid 1 and using "trap on EXIT"
+# it will sometimes go into an 100% CPU infinite loop.
+#
+# Using workaround from here:
+#
+# https://github.com/docker/docker/issues/4854
+if [ "$$" = 1 ]; then
+  $0 "$@"
+  exit $?
+fi
+
+# -x           show script
+# -e           exit on error
+# -o pipefail  use exit code from 1st failure in pipeline, not last
+set -x -e -o pipefail
 
+image_tar_keepref=$1
+image_id=$2
+image_repo=$3
+image_tag=$4
+project_uuid=$5
+graph_driver=$6
+
+if [[ "$image_repo" = "<none>" ]] ; then
+  image_repo=none
+  image_tag=latest
+fi
+
+# Print free space in /var/lib/docker
+function freespace() {
+    df -B1 /var/lib/docker | tail -n1 | sed 's/  */ /g' | cut -d' ' -f4
+}
+
+# Run docker-in-docker script and then wait for it to come up
 function start_docker {
-    /root/dnd.sh &
+    /root/dnd.sh $graph_driver &
     for i in $(seq 1 10) ; do
         if docker version >/dev/null 2>/dev/null ; then
             return
@@ -13,6 +50,7 @@ function start_docker {
     false
 }
 
+# Kill docker from pid then wait for it to be down
 function kill_docker {
     if test -f /var/run/docker.pid ; then
         kill $(cat /var/run/docker.pid)
@@ -26,37 +64,47 @@ function kill_docker {
     false
 }
 
+# Ensure that we clean up docker graph and/or lingering cache files on exit
 function cleanup {
     kill_docker
     rm -rf /var/lib/docker/*
     rm -rf /root/.cache/arvados/docker/*
+    echo "Available space after cleanup is $(freespace)"
 }
 
 trap cleanup EXIT
 
 start_docker
 
-image_tar_keepref=$1
-image_id=$2
-image_repo=$3
-image_tag=$4
-project_uuid=$5
+echo "Initial available space is $(freespace)"
 
 arv-get $image_tar_keepref | docker load
 
+
 docker tag $image_id $image_repo:$image_tag
 
 docker images -a
 
 kill_docker
 
+echo "Available space after image load is $(freespace)"
+
 cd /root/pkgs
 dpkg -i libltdl7_2.4.2-1.11+b1_amd64.deb docker-engine_1.13.1-0~debian-jessie_amd64.deb
 
+echo "Available space after image upgrade is $(freespace)"
+
 start_docker
 
 docker images -a
 
+if [[ "$image_repo" = "none" ]] ; then
+  image_repo=$(docker images -a --no-trunc | sed 's/  */ /g' | grep ^none | cut -d' ' -f3)
+  image_tag=""
+fi
+
 UUID=$(arv-keepdocker --force-image-format --project-uuid=$project_uuid $image_repo $image_tag)
 
+echo "Available space after arv-keepdocker is $(freespace)"
+
 echo "Migrated uuid is $UUID"
index 4be99961ba0febcfcf36b254f582945427b133e8..9ff0b780b7dd6c5b2ec2cf6e290d70faeb74c17b 100644 (file)
@@ -56,6 +56,14 @@ def main(arguments=None):
     migrate19_parser.add_argument(
         '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
         help='Print version and exit.')
+    migrate19_parser.add_argument(
+        '--verbose', action="store_true", help="Print stdout/stderr even on success")
+    migrate19_parser.add_argument(
+        '--force', action="store_true", help="Try to migrate even if there isn't enough space")
+
+    migrate19_parser.add_argument(
+        '--storage-driver', type=str, default="overlay",
+        help="Docker storage driver, e.g. aufs, overlay, vfs")
 
     exgroup = migrate19_parser.add_mutually_exclusive_group()
     exgroup.add_argument(
@@ -74,6 +82,9 @@ def main(arguments=None):
     if args.tempdir:
         tempfile.tempdir = args.tempdir
 
+    if args.verbose:
+        logger.setLevel(logging.DEBUG)
+
     only_migrate = None
     if args.infile:
         only_migrate = set()
@@ -114,26 +125,47 @@ def main(arguments=None):
     uuid_to_collection = {i["uuid"]: i for i in items}
 
     need_migrate = {}
+    totalbytes = 0
     biggest = 0
+    biggest_pdh = None
     for img in old_images:
         i = uuid_to_collection[img["collection"]]
         pdh = i["portable_data_hash"]
-        if pdh not in already_migrated and (only_migrate is None or pdh in only_migrate):
+        if pdh not in already_migrated and pdh not in need_migrate and (only_migrate is None or pdh in only_migrate):
             need_migrate[pdh] = img
             with CollectionReader(i["manifest_text"]) as c:
                 if c.values()[0].size() > biggest:
                     biggest = c.values()[0].size()
+                    biggest_pdh = pdh
+                totalbytes += c.values()[0].size()
+
+
+    if args.storage_driver == "vfs":
+        will_need = (biggest*20)
+    else:
+        will_need = (biggest*2.5)
 
     if args.print_unmigrated:
         only_migrate = set()
         for pdh in need_migrate:
-            print pdh
+            print(pdh)
         return
 
     logger.info("Already migrated %i images", len(already_migrated))
     logger.info("Need to migrate %i images", len(need_migrate))
     logger.info("Using tempdir %s", tempfile.gettempdir())
-    logger.info("Biggest image is about %i MiB, tempdir needs at least %i MiB free", biggest/(2**20), (biggest*2)/(2**20))
+    logger.info("Biggest image %s is about %i MiB", biggest_pdh, biggest/(2**20))
+    logger.info("Total data to migrate about %i MiB", totalbytes/(2**20))
+
+    df_out = subprocess.check_output(["df", "-B1", tempfile.gettempdir()])
+    ln = df_out.splitlines()[1]
+    filesystem, blocks, used, available, use_pct, mounted = re.match(r"^([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+) *([^ ]+)", ln).groups(1)
+    if int(available) <= will_need:
+        logger.warn("Temp filesystem mounted at %s does not have enough space for biggest image (has %i MiB, needs %i MiB)", mounted, int(available)/(2**20), will_need/(2**20))
+        if not args.force:
+            exit(1)
+        else:
+            logger.warn("--force provided, will migrate anyway")
 
     if args.dry_run:
         return
@@ -157,10 +189,10 @@ def main(arguments=None):
         dockercache = tempfile.mkdtemp()
         try:
             with tempfile.NamedTemporaryFile() as envfile:
-                envfile.write("ARVADOS_API_HOST=%s\n" % (os.environ["ARVADOS_API_HOST"]))
-                envfile.write("ARVADOS_API_TOKEN=%s\n" % (os.environ["ARVADOS_API_TOKEN"]))
-                if "ARVADOS_API_HOST_INSECURE" in os.environ:
-                    envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (os.environ["ARVADOS_API_HOST_INSECURE"]))
+                envfile.write("ARVADOS_API_HOST=%s\n" % (arvados.config.get("ARVADOS_API_HOST")))
+                envfile.write("ARVADOS_API_TOKEN=%s\n" % (arvados.config.get("ARVADOS_API_TOKEN")))
+                if arvados.config.get("ARVADOS_API_HOST_INSECURE"):
+                    envfile.write("ARVADOS_API_HOST_INSECURE=%s\n" % (arvados.config.get("ARVADOS_API_HOST_INSECURE")))
                 envfile.flush()
 
                 dockercmd = ["docker", "run",
@@ -169,23 +201,51 @@ def main(arguments=None):
                              "--env-file", envfile.name,
                              "--volume", "%s:/var/lib/docker" % varlibdocker,
                              "--volume", "%s:/root/.cache/arvados/docker" % dockercache,
-                             "arvados/migrate-docker19",
+                             "arvados/migrate-docker19:1.0",
                              "/root/migrate.sh",
                              "%s/%s" % (old_image["collection"], tarfile),
                              tarfile[0:40],
                              old_image["repo"],
                              old_image["tag"],
-                             uuid_to_collection[old_image["collection"]]["owner_uuid"]]
+                             uuid_to_collection[old_image["collection"]]["owner_uuid"],
+                             args.storage_driver]
 
                 proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                 out, err = proc.communicate()
 
+                initial_space = re.search(r"Initial available space is (\d+)", out)
+                imgload_space = re.search(r"Available space after image load is (\d+)", out)
+                imgupgrade_space = re.search(r"Available space after image upgrade is (\d+)", out)
+                keepdocker_space = re.search(r"Available space after arv-keepdocker is (\d+)", out)
+                cleanup_space = re.search(r"Available space after cleanup is (\d+)", out)
+
+                if initial_space:
+                    isp = int(initial_space.group(1))
+                    logger.info("Available space initially: %i MiB", (isp)/(2**20))
+                    if imgload_space:
+                        sp = int(imgload_space.group(1))
+                        logger.debug("Used after load: %i MiB", (isp-sp)/(2**20))
+                    if imgupgrade_space:
+                        sp = int(imgupgrade_space.group(1))
+                        logger.debug("Used after upgrade: %i MiB", (isp-sp)/(2**20))
+                    if keepdocker_space:
+                        sp = int(keepdocker_space.group(1))
+                        logger.info("Used after upload: %i MiB", (isp-sp)/(2**20))
+
+                if cleanup_space:
+                    sp = int(cleanup_space.group(1))
+                    logger.debug("Available after cleanup: %i MiB", (sp)/(2**20))
+
                 if proc.returncode != 0:
                     logger.error("Failed with return code %i", proc.returncode)
                     logger.error("--- Stdout ---\n%s", out)
                     logger.error("--- Stderr ---\n%s", err)
                     raise MigrationFailed()
 
+                if args.verbose:
+                    logger.info("--- Stdout ---\n%s", out)
+                    logger.info("--- Stderr ---\n%s", err)
+
             migrated = re.search(r"Migrated uuid is ([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15})", out)
             if migrated:
                 newcol = CollectionReader(migrated.group(1))