Merge branch '14844-cdc-azure-fixes' closes #14844
authorPeter Amstutz <pamstutz@veritasgenetics.com>
Fri, 1 Mar 2019 14:55:24 +0000 (09:55 -0500)
committerPeter Amstutz <pamstutz@veritasgenetics.com>
Fri, 1 Mar 2019 14:55:24 +0000 (09:55 -0500)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

16 files changed:
.licenseignore
build/run-build-docker-jobs-image.sh
docker/jobs/1078ECD7.key [new file with mode: 0644]
docker/jobs/Dockerfile
docker/jobs/apt.arvados.org-dev.list [new file with mode: 0644]
docker/jobs/apt.arvados.org-stable.list [new file with mode: 0644]
docker/jobs/apt.arvados.org-testing.list [new file with mode: 0644]
docker/jobs/apt.arvados.org.list [deleted file]
lib/dispatchcloud/node_size.go
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvdocker.py
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/arvados_cwl/fsaccess.py
sdk/cwl/arvados_cwl/runner.py

index 06519a98e8bc45afcebdad584198a6b6bb47bf71..45028bf888ff6a40f910f29197aaac1a8d29516f 100644 (file)
@@ -15,7 +15,8 @@ build/package-test-dockerfiles/ubuntu1604/etc-apt-preferences.d-arvados
 doc/fonts/*
 doc/user/cwl/federated/*
 */docker_image
-docker/jobs/apt.arvados.org.list
+docker/jobs/apt.arvados.org*.list
+docker/jobs/1078ECD7.key
 */en.bootstrap.yml
 *font-awesome.css
 *.gif
index 7186a2209129a08c9c6fbd6a094ce6f0a9dac3c0..7d7e1fc8abf9171df1d905c22478eabf23236299 100755 (executable)
@@ -5,21 +5,24 @@
 
 function usage {
     echo >&2
-    echo >&2 "usage: $0 [options]"
+    echo >&2 "usage: WORKSPACE=/path/to/arvados $0 [options]"
     echo >&2
     echo >&2 "$0 options:"
     echo >&2 "  -t, --tags                    version tag for docker"
+    echo >&2 "  -r, --repo                    Arvados package repot to use: dev, testing, stable (default: dev)"
     echo >&2 "  -u, --upload                  Upload the images (docker push)"
     echo >&2 "  --no-cache                    Don't use build cache"
     echo >&2 "  -h, --help                    Display this help and exit"
     echo >&2
-    echo >&2 "  If no options are given, just builds the images."
+    echo >&2 "  WORKSPACE=path                Path to the Arvados source tree to build from"
+    echo >&2
 }
 upload=false
+REPO=dev
 
 # NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
-TEMP=`getopt -o hut: \
-    --long help,upload,no-cache,tags: \
+TEMP=`getopt -o hut:r: \
+    --long help,upload,no-cache,tags,repo: \
     -n "$0" -- "$@"`
 
 if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
@@ -50,6 +53,19 @@ do
                   ;;
             esac
             ;;
+        -r | --repo)
+            case "$2" in
+                "")
+                  echo "ERROR: --repo needs a parameter";
+                  usage;
+                  exit 1
+                  ;;
+                *)
+                  REPO="$2";
+                  shift 2
+                  ;;
+            esac
+            ;;
         --)
             shift
             break
@@ -69,6 +85,16 @@ exit_cleanly() {
     exit $EXITCODE
 }
 
+# Sanity check
+if ! [[ -n "$WORKSPACE" ]]; then
+    usage;
+    echo >&2 "Error: WORKSPACE environment variable not set"
+    echo >&2
+    exit 1
+fi
+
+echo $WORKSPACE
+
 COLUMNS=80
 . $WORKSPACE/build/run-library.sh
 
@@ -88,16 +114,6 @@ docker_push () {
     checkexit $ECODE "docker push $*"
 }
 
-# Sanity check
-if ! [[ -n "$WORKSPACE" ]]; then
-    echo >&2
-    echo >&2 "Error: WORKSPACE environment variable not set"
-    echo >&2
-    exit 1
-fi
-
-echo $WORKSPACE
-
 # find the docker binary
 DOCKER=`which docker.io`
 
@@ -153,6 +169,7 @@ cd docker/jobs
 docker build $NOCACHE \
        --build-arg python_sdk_version=${python_sdk_version} \
        --build-arg cwl_runner_version=${cwl_runner_version} \
+       --build-arg repo_version=${REPO} \
        -t arvados/jobs:$cwl_runner_version_orig .
 
 ECODE=$?
@@ -175,6 +192,9 @@ if docker --version |grep " 1\.[0-9]\." ; then
     # -f flag removed in Docker 1.12
     FORCE=-f
 fi
+
+#docker export arvados/jobs:$cwl_runner_version_orig | docker import - arvados/jobs:$cwl_runner_version_orig
+
 if ! [[ -z "$version_tag" ]]; then
     docker tag $FORCE arvados/jobs:$cwl_runner_version_orig arvados/jobs:"$version_tag"
 else
diff --git a/docker/jobs/1078ECD7.key b/docker/jobs/1078ECD7.key
new file mode 100644 (file)
index 0000000..edc62f4
--- /dev/null
@@ -0,0 +1,30 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQENBEzhgeoBCAChhoK1dqpWzNyDWqRGEvdFdkJaA9D2HRwKPfBfjAoePX6ZyrpA
+ItlUsvt/8s/DRiTiPEFQR4S7VqocmU6whJc3gDEGyOM6b1NF873lIfSVwUoE42QE
+a76dO8woOYgLUyxu2mKG+bJgGMumjBJt6ZOndYVjTYB/7sEeVxwmMVulfZe0s6zg
+ut0+SoTYg2R36qIqeIcWllYt97sEYnyy1qXMis4/3IZnuWkS/frsPR3aeUI4W+o2
+NDN1kj49+LMe7Fb5b7jZY08rZbAWXi1rU1hQx4jC9RvYqlT4HNld4Bn7os1IvOOA
+wNiR0oiVdiuDbBxcMvRPktxMrFVjowusRLq/ABEBAAG0PUN1cm92ZXJzZSwgSW5j
+IEF1dG9tYXRpYyBTaWduaW5nIEtleSA8c3lzYWRtaW5AY3Vyb3ZlcnNlLmNvbT6J
+ATgEEwECACIFAlNgYIECGwMGCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJEFcW
+WREQeOzXPkEH/jQJDIYI1dxWcYiA+hczmpaZvN2/pc/kwIW/6a03+6zqmSNkebOE
+TgoDILacSYc17hy20R1/rWyUstOMKcEgFDBlSehhHyl0f7q/w7d8Ais6MabzsPfx
+IceJpsjUg87+BR7qWhgQ0sxmtIF2TKuTFLs+nkGsgSsiBOEF4NvHxuj3HD4y8F27
+HNqrkqwjLS8xJwwH5Gp2uMEVr1AXIH3iSRjJ8X124s8iEP97Q/3IazoYRf9/MCSm
+QEx8KzxwDX6t4bW6O4D01K+e9gdkTY70dcMgJoqm5IsX7yxjEubiOunphtlJnZ9d
+Oi1yBN5UM3pWKAdcfRj4rcfV9Simvpx9av+5AQ0ETOGB6gEIAMAA0HVMG0BbdnU7
+wWgl5eFdT0AUSrXK/WdcKqVEGGv+c68NETSHWZOJX7O46Eao4gY4cTYprVMBzxpY
+/BtQSYLpE0HLvBc1fcFd61Yz4H/9rGSNY0GcIQEbOjbJY5mr8qFsQ1K/mAf3aUL3
+b6ni4sHVicRiRr0Gl4Ihorlskpfu1SHs/C5tvTSVNF9p4vtl5892y1yILQeVpcBs
+NCR7MUpdS49xCpvnAWsDZX+ij6LTR3lzCm/ZLCg4gNuZkjgU9oqVfGkqysW7WZ8S
+OLvzAwUw7i1EIFX8q6QdudGoezxz8m8OgZM1v8AFpYEKlhEPf1W0MSfaRDwrj866
+8nCLruEAEQEAAYkBHwQYAQIACQUCTOGB6gIbDAAKCRBXFlkREHjs199EB/4+p0G1
+3PHxt6rLWSCGXobDOu4ZOA/qnv0D/JhOLroFds5TzQv6vnS8eAkhCTjHVA+b58cm
+kXpI0oYcD4ZP+KK1CHKq2rGfwou7HfAF+icnNqYkeBOkjjbCgkvBlcCInuAuU8JX
+DZMkfFk52+eBKwTjS/J/fQp0vDru8bHLp98WgdRHWfJQ3mc3gz4A5sR6zhrGPW6/
+ssnROS4dC2Ohp35GpgN1KjD3EmEw5RoSBYlyrARCaMsivgIKMxGUEyFZWhuJt3N1
+2MTddRwz28hbmYCi+MzHYDbRv+cSyUDmvXaWhfkNKBepClBA1rTWBcldit5vvlqr
+yPet6wIKrtLGhAqZ
+=CLkG
+-----END PGP PUBLIC KEY BLOCK-----
index c0fe145db1b292ebc55c536b9d0e7786a7b06daa..02a1c3829d432e284a770d11459aaa111bff57db 100644 (file)
@@ -2,29 +2,33 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-# Based on Debian Jessie
-FROM debian:jessie
-MAINTAINER Ward Vandewege <ward@curoverse.com>
+# Based on Debian Stretch
+FROM debian:stretch
+MAINTAINER Ward Vandewege <wvandewege@veritasgenetics.com>
 
 ENV DEBIAN_FRONTEND noninteractive
 
-ADD apt.arvados.org.list /etc/apt/sources.list.d/
-RUN apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7
-RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3
+RUN apt-get update -q
+RUN apt-get install -yq --no-install-recommends gnupg
+
+ARG repo_version
+RUN echo repo_version $repo_version
+ADD apt.arvados.org-$repo_version.list /etc/apt/sources.list.d/
+
+ADD 1078ECD7.key /tmp/
+RUN cat /tmp/1078ECD7.key | apt-key add -
 
 ARG python_sdk_version
 ARG cwl_runner_version
 RUN echo cwl_runner_version $cwl_runner_version python_sdk_version $python_sdk_version
 
 RUN apt-get update -q
-RUN apt-get install -yq --no-install-recommends \
-    git python-pip python-virtualenv \
-    python-dev libgnutls28-dev libcurl4-gnutls-dev nodejs \
+RUN apt-get install -yq --no-install-recommends nodejs \
     python-arvados-python-client=$python_sdk_version \
     python-arvados-cwl-runner=$cwl_runner_version
 
-# use the Python executable from the python-arvados-python-client package
-RUN rm -f /usr/bin/python && ln -s /usr/share/python2.7/dist/python-arvados-python-client/bin/python /usr/bin/python
+# use the Python executable from the python-arvados-cwl-runner package
+RUN rm -f /usr/bin/python && ln -s /usr/share/python2.7/dist/python-arvados-cwl-runner/bin/python /usr/bin/python
 
 # Install dependencies and set up system.
 RUN /usr/sbin/adduser --disabled-password \
diff --git a/docker/jobs/apt.arvados.org-dev.list b/docker/jobs/apt.arvados.org-dev.list
new file mode 100644 (file)
index 0000000..468000e
--- /dev/null
@@ -0,0 +1,2 @@
+# apt.arvados.org
+deb http://apt.arvados.org/ stretch-dev main
diff --git a/docker/jobs/apt.arvados.org-stable.list b/docker/jobs/apt.arvados.org-stable.list
new file mode 100644 (file)
index 0000000..afbc51e
--- /dev/null
@@ -0,0 +1,2 @@
+# apt.arvados.org
+deb http://apt.arvados.org/ stretch main
diff --git a/docker/jobs/apt.arvados.org-testing.list b/docker/jobs/apt.arvados.org-testing.list
new file mode 100644 (file)
index 0000000..c8ea91d
--- /dev/null
@@ -0,0 +1,2 @@
+# apt.arvados.org
+deb http://apt.arvados.org/ stretch-testing main
diff --git a/docker/jobs/apt.arvados.org.list b/docker/jobs/apt.arvados.org.list
deleted file mode 100644 (file)
index 11b98e2..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-# apt.arvados.org
-deb http://apt.arvados.org/ jessie-dev main
index d7f4585619417904a1125bc05d54d58499199179..6fb46b5f46f9d36c2500ee759cee1a3ec19c59f0 100644 (file)
@@ -46,7 +46,7 @@ func estimateDockerImageSize(collectionPDH string) int64 {
        // the size of the manifest.
        //
        // Use the following heuristic:
-       // - Start with the length of the mainfest (n)
+       // - Start with the length of the manifest (n)
        // - Subtract 80 characters for the filename and file segment
        // - Divide by 42 to get the number of block identifiers ('hash\+size\ ' is 32+1+8+1)
        // - Assume each block is full, multiply by 64 MiB
index 52fd4d21a115f29ee7fb388c11ebeb0564b40ae2..834ca195fdda02d859eafcd4aa0cea5c70c1c359 100644 (file)
@@ -289,10 +289,12 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
         if keep_client is None:
             keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
         executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4)
-    except Exception as e:
-        logger.error(e)
+    except Exception:
+        logger.exception("Error creating the Arvados CWL Executor")
         return 1
 
+    # Note that unless in debug mode, some stack traces related to user 
+    # workflow errors may be suppressed. See ArvadosJob.done().
     if arvargs.debug:
         logger.setLevel(logging.DEBUG)
         logging.getLogger('arvados').setLevel(logging.DEBUG)
index af7c02a8f30010bfe85e51a6928e63a5a617d37e..5fe29bc8f225a8f56da72dee262279a4e9357e74 100644 (file)
@@ -304,8 +304,8 @@ class ArvadosContainer(JobBase):
                 logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"])
             else:
                 logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"])
-        except Exception as e:
-            logger.error("%s got error %s" % (self.arvrunner.label(self), str(e)))
+        except Exception:
+            logger.exception("%s got an error", self.arvrunner.label(self))
             self.output_callback({}, "permanentFail")
 
     def done(self, record):
@@ -353,11 +353,13 @@ class ArvadosContainer(JobBase):
             if container["output"]:
                 outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep")
         except WorkflowException as e:
+            # Only include a stack trace if in debug mode. 
+            # A stack trace may obfuscate more useful output about the workflow. 
             logger.error("%s unable to collect output from %s:\n%s",
                          self.arvrunner.label(self), container["output"], e, exc_info=(e if self.arvrunner.debug else False))
             processStatus = "permanentFail"
-        except Exception as e:
-            logger.exception("%s while getting output object: %s", self.arvrunner.label(self), e)
+        except Exception:
+            logger.exception("%s while getting output object:", self.arvrunner.label(self))
             processStatus = "permanentFail"
         finally:
             self.output_callback(outputs, processStatus)
@@ -523,8 +525,8 @@ class RunnerContainer(Runner):
             container = self.arvrunner.api.containers().get(
                 uuid=record["container_uuid"]
             ).execute(num_retries=self.arvrunner.num_retries)
-        except Exception as e:
-            logger.exception("%s while getting runner container: %s", self.arvrunner.label(self), e)
+        except Exception:
+            logger.exception("%s while getting runner container", self.arvrunner.label(self))
             self.arvrunner.output_callback({}, "permanentFail")
         else:
             super(RunnerContainer, self).done(container)
index 84006b47d2a8ba86fd97f88b63772a53e3d711f6..a8f56ad1d4f30db21c5a59f0fb6df9258c723c58 100644 (file)
@@ -63,6 +63,7 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
                 arvados.commands.put.api_client = api_client
                 arvados.commands.keepdocker.main(args, stdout=sys.stderr, install_sig_handlers=False, api=api_client)
             except SystemExit as e:
+                # If e.code is None or zero, then keepdocker exited normally and we can continue
                 if e.code:
                     raise WorkflowException("keepdocker exited with code %s" % e.code)
 
index 69fe7e2a8f1b632675b16eeb3f6ad07ee76c00e8..ab2078e1571145aac5f334e076bfc00c7b951448 100644 (file)
@@ -199,7 +199,7 @@ class ArvadosJob(JobBase):
                                     e)
             else:
                 logger.info("%s %s is %s", self.arvrunner.label(self), response["uuid"], response["state"])
-        except Exception as e:
+        except Exception:
             logger.exception("%s error" % (self.arvrunner.label(self)))
             self.output_callback({}, "permanentFail")
 
@@ -224,8 +224,8 @@ class ArvadosJob(JobBase):
                             body={
                                 "components": components
                             }).execute(num_retries=self.arvrunner.num_retries)
-                except Exception as e:
-                    logger.info("Error adding to components: %s", e)
+                except Exception:
+                    logger.exception("Error adding to components")
 
     def done(self, record):
         try:
@@ -272,10 +272,12 @@ class ArvadosJob(JobBase):
                         outputs = done.done(self, record, dirs["tmpdir"],
                                             dirs["outdir"], dirs["keep"])
             except WorkflowException as e:
+                # Only include a stack trace if in debug mode. 
+                # This is most likely a user workflow error and a stack trace may obfuscate more useful output. 
                 logger.error("%s unable to collect output from %s:\n%s",
                              self.arvrunner.label(self), record["output"], e, exc_info=(e if self.arvrunner.debug else False))
                 processStatus = "permanentFail"
-            except Exception as e:
+            except Exception:
                 logger.exception("Got unknown exception while collecting output for job %s:", self.name)
                 processStatus = "permanentFail"
 
index 535cfd7582b985ad806d659f518f9da9ce0e6fbc..c1f2b54744083a22a52b659797a720da478304b1 100644 (file)
@@ -361,8 +361,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                     keys = keys[pageSize:]
                     try:
                         proc_states = table.list(filters=[["uuid", "in", page]]).execute(num_retries=self.num_retries)
-                    except Exception as e:
-                        logger.warning("Error checking states on API server: %s", e)
+                    except Exception:
+                        logger.exception("Error checking states on API server: %s")
                         remain_wait = self.poll_interval
                         continue
 
@@ -393,9 +393,9 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
         for i in self.intermediate_output_collections:
             try:
                 self.api.collections().delete(uuid=i).execute(num_retries=self.num_retries)
-            except:
+            except Exception:
                 logger.warning("Failed to delete intermediate output: %s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
-            if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info()[0] is SystemExit:
+            except (KeyboardInterrupt, SystemExit):
                 break
 
     def check_features(self, obj):
@@ -506,8 +506,9 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                                               body={
                                                   'is_trashed': True
                                               }).execute(num_retries=self.num_retries)
-            except Exception as e:
-                logger.info("Setting container output: %s", e)
+            except Exception:
+                logger.exception("Setting container output")
+                return
         elif self.work_api == "jobs" and "TASK_UUID" in os.environ:
             self.api.job_tasks().update(uuid=os.environ["TASK_UUID"],
                                    body={
@@ -731,8 +732,11 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
         except:
             if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info()[0] is SystemExit:
                 logger.error("Interrupted, workflow will be cancelled")
+            elif isinstance(sys.exc_info()[1], WorkflowException):
+                logger.error("Workflow execution failed:\n%s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
             else:
-                logger.error("Execution failed:\n%s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
+                logger.exception("Workflow execution failed")
+
             if self.pipeline:
                 self.api.pipeline_instances().update(uuid=self.pipeline["uuid"],
                                                      body={"state": "Failed"}).execute(num_retries=self.num_retries)
index fc7cc42d15c61f19021f006cb1eec18ab94178cd..3744b4a93afa40df10c17335310503500acd2432 100644 (file)
@@ -240,8 +240,8 @@ class CollectionFetcher(DefaultFetcher):
                     return True
         except arvados.errors.NotFoundError:
             return False
-        except:
-            logger.exception("Got unexpected exception checking if file exists:")
+        except Exception:
+            logger.exception("Got unexpected exception checking if file exists")
             return False
         return super(CollectionFetcher, self).check_exists(url)
 
index c0d165aa9eed4e5cbaeaf9d365a302957a3921b1..e515ac2ce5e99f4ec75011b8ac51bfe2fc1bbff8 100644 (file)
@@ -478,8 +478,8 @@ class Runner(Process):
                     fileobj["location"] = "keep:%s/%s" % (record["output"], path)
             adjustFileObjs(outputs, keepify)
             adjustDirObjs(outputs, keepify)
-        except Exception as e:
-            logger.exception("[%s] While getting final output object: %s", self.name, e)
+        except Exception:
+            logger.exception("[%s] While getting final output object", self.name)
             self.arvrunner.output_callback({}, "permanentFail")
         else:
             self.arvrunner.output_callback(outputs, processStatus)