Merge branch '14885-ciso-and-conda-packaging-pr'
authorEric Biagiotti <ebiagiotti@veritasgenetics.com>
Fri, 1 Mar 2019 15:02:58 +0000 (10:02 -0500)
committerEric Biagiotti <ebiagiotti@veritasgenetics.com>
Fri, 1 Mar 2019 15:02:58 +0000 (10:02 -0500)
refs #14885

Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti@veritasgenetics.com>

23 files changed:
.licenseignore
build/run-build-docker-jobs-image.sh
doc/install/install-manual-prerequisites.html.textile.liquid
docker/jobs/1078ECD7.key [new file with mode: 0644]
docker/jobs/Dockerfile
docker/jobs/apt.arvados.org-dev.list [new file with mode: 0644]
docker/jobs/apt.arvados.org-stable.list [new file with mode: 0644]
docker/jobs/apt.arvados.org-testing.list [new file with mode: 0644]
docker/jobs/apt.arvados.org.list [deleted file]
lib/cloud/azure/azure.go
lib/cloud/azure/azure_test.go
lib/dispatchcloud/node_size.go
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvdocker.py
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/arvados_cwl/fsaccess.py
sdk/cwl/arvados_cwl/runner.py
sdk/python/arvados/_normalize_stream.py
sdk/python/arvados/collection.py
sdk/python/arvados/commands/put.py
sdk/python/tests/test_arv_put.py

index 06519a98e8bc45afcebdad584198a6b6bb47bf71..45028bf888ff6a40f910f29197aaac1a8d29516f 100644 (file)
@@ -15,7 +15,8 @@ build/package-test-dockerfiles/ubuntu1604/etc-apt-preferences.d-arvados
 doc/fonts/*
 doc/user/cwl/federated/*
 */docker_image
-docker/jobs/apt.arvados.org.list
+docker/jobs/apt.arvados.org*.list
+docker/jobs/1078ECD7.key
 */en.bootstrap.yml
 *font-awesome.css
 *.gif
index 7186a2209129a08c9c6fbd6a094ce6f0a9dac3c0..7d7e1fc8abf9171df1d905c22478eabf23236299 100755 (executable)
@@ -5,21 +5,24 @@
 
 function usage {
     echo >&2
-    echo >&2 "usage: $0 [options]"
+    echo >&2 "usage: WORKSPACE=/path/to/arvados $0 [options]"
     echo >&2
     echo >&2 "$0 options:"
     echo >&2 "  -t, --tags                    version tag for docker"
+    echo >&2 "  -r, --repo                    Arvados package repot to use: dev, testing, stable (default: dev)"
     echo >&2 "  -u, --upload                  Upload the images (docker push)"
     echo >&2 "  --no-cache                    Don't use build cache"
     echo >&2 "  -h, --help                    Display this help and exit"
     echo >&2
-    echo >&2 "  If no options are given, just builds the images."
+    echo >&2 "  WORKSPACE=path                Path to the Arvados source tree to build from"
+    echo >&2
 }
 upload=false
+REPO=dev
 
 # NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
-TEMP=`getopt -o hut: \
-    --long help,upload,no-cache,tags: \
+TEMP=`getopt -o hut:r: \
+    --long help,upload,no-cache,tags,repo: \
     -n "$0" -- "$@"`
 
 if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
@@ -50,6 +53,19 @@ do
                   ;;
             esac
             ;;
+        -r | --repo)
+            case "$2" in
+                "")
+                  echo "ERROR: --repo needs a parameter";
+                  usage;
+                  exit 1
+                  ;;
+                *)
+                  REPO="$2";
+                  shift 2
+                  ;;
+            esac
+            ;;
         --)
             shift
             break
@@ -69,6 +85,16 @@ exit_cleanly() {
     exit $EXITCODE
 }
 
+# Sanity check
+if ! [[ -n "$WORKSPACE" ]]; then
+    usage;
+    echo >&2 "Error: WORKSPACE environment variable not set"
+    echo >&2
+    exit 1
+fi
+
+echo $WORKSPACE
+
 COLUMNS=80
 . $WORKSPACE/build/run-library.sh
 
@@ -88,16 +114,6 @@ docker_push () {
     checkexit $ECODE "docker push $*"
 }
 
-# Sanity check
-if ! [[ -n "$WORKSPACE" ]]; then
-    echo >&2
-    echo >&2 "Error: WORKSPACE environment variable not set"
-    echo >&2
-    exit 1
-fi
-
-echo $WORKSPACE
-
 # find the docker binary
 DOCKER=`which docker.io`
 
@@ -153,6 +169,7 @@ cd docker/jobs
 docker build $NOCACHE \
        --build-arg python_sdk_version=${python_sdk_version} \
        --build-arg cwl_runner_version=${cwl_runner_version} \
+       --build-arg repo_version=${REPO} \
        -t arvados/jobs:$cwl_runner_version_orig .
 
 ECODE=$?
@@ -175,6 +192,9 @@ if docker --version |grep " 1\.[0-9]\." ; then
     # -f flag removed in Docker 1.12
     FORCE=-f
 fi
+
+#docker export arvados/jobs:$cwl_runner_version_orig | docker import - arvados/jobs:$cwl_runner_version_orig
+
 if ! [[ -z "$version_tag" ]]; then
     docker tag $FORCE arvados/jobs:$cwl_runner_version_orig arvados/jobs:"$version_tag"
 else
index e0cc4b8581e65a1a38292f1953418db394f92bee..62017163d2b48540290ff3df29733eebfa759735 100644 (file)
@@ -39,6 +39,7 @@ table(table table-bordered table-condensed).
 |Debian 9 ("stretch")|Supported|Latest|
 |Ubuntu 14.04 ("trusty")|Supported|Latest|
 |Ubuntu 16.04 ("xenial")|Supported|Latest|
+|Ubuntu 18.04 ("bionic")|Supported|Latest|
 |Ubuntu 12.04 ("precise")|EOL|8ed7b6dd5d4df93a3f37096afe6d6f81c2a7ef6e (2017-05-03)|
 |Debian 7 ("wheezy")|EOL|997479d1408139e96ecdb42a60b4f727f814f6c9 (2016-12-28)|
 |CentOS 6 |EOL|997479d1408139e96ecdb42a60b4f727f814f6c9 (2016-12-28)|
@@ -64,7 +65,7 @@ gpgkey=http://rpm.arvados.org/CentOS/RPM-GPG-KEY-curoverse
 
 h3. Debian and Ubuntu
 
-Packages are available for Debian 8 ("jessie"), Debian 9 ("stretch"), Ubuntu 14.04 ("trusty") and Ubuntu 16.04 ("xenial").
+Packages are available for Debian 8 ("jessie"), Debian 9 ("stretch"), Ubuntu 14.04 ("trusty"), Ubuntu 16.04 ("xenial") and Ubuntu 18.04 ("bionic").
 
 First, register the Curoverse signing key in apt's database:
 
@@ -78,6 +79,7 @@ table(table table-bordered table-condensed).
 |Debian 9 ("stretch")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ stretch main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 |Ubuntu 14.04 ("trusty")[1]|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ trusty main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 |Ubuntu 16.04 ("xenial")[1]|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ xenial main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
+|Ubuntu 18.04 ("bionic")[1]|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ bionic main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 
 {% include 'notebox_begin' %}
 
diff --git a/docker/jobs/1078ECD7.key b/docker/jobs/1078ECD7.key
new file mode 100644 (file)
index 0000000..edc62f4
--- /dev/null
@@ -0,0 +1,30 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQENBEzhgeoBCAChhoK1dqpWzNyDWqRGEvdFdkJaA9D2HRwKPfBfjAoePX6ZyrpA
+ItlUsvt/8s/DRiTiPEFQR4S7VqocmU6whJc3gDEGyOM6b1NF873lIfSVwUoE42QE
+a76dO8woOYgLUyxu2mKG+bJgGMumjBJt6ZOndYVjTYB/7sEeVxwmMVulfZe0s6zg
+ut0+SoTYg2R36qIqeIcWllYt97sEYnyy1qXMis4/3IZnuWkS/frsPR3aeUI4W+o2
+NDN1kj49+LMe7Fb5b7jZY08rZbAWXi1rU1hQx4jC9RvYqlT4HNld4Bn7os1IvOOA
+wNiR0oiVdiuDbBxcMvRPktxMrFVjowusRLq/ABEBAAG0PUN1cm92ZXJzZSwgSW5j
+IEF1dG9tYXRpYyBTaWduaW5nIEtleSA8c3lzYWRtaW5AY3Vyb3ZlcnNlLmNvbT6J
+ATgEEwECACIFAlNgYIECGwMGCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJEFcW
+WREQeOzXPkEH/jQJDIYI1dxWcYiA+hczmpaZvN2/pc/kwIW/6a03+6zqmSNkebOE
+TgoDILacSYc17hy20R1/rWyUstOMKcEgFDBlSehhHyl0f7q/w7d8Ais6MabzsPfx
+IceJpsjUg87+BR7qWhgQ0sxmtIF2TKuTFLs+nkGsgSsiBOEF4NvHxuj3HD4y8F27
+HNqrkqwjLS8xJwwH5Gp2uMEVr1AXIH3iSRjJ8X124s8iEP97Q/3IazoYRf9/MCSm
+QEx8KzxwDX6t4bW6O4D01K+e9gdkTY70dcMgJoqm5IsX7yxjEubiOunphtlJnZ9d
+Oi1yBN5UM3pWKAdcfRj4rcfV9Simvpx9av+5AQ0ETOGB6gEIAMAA0HVMG0BbdnU7
+wWgl5eFdT0AUSrXK/WdcKqVEGGv+c68NETSHWZOJX7O46Eao4gY4cTYprVMBzxpY
+/BtQSYLpE0HLvBc1fcFd61Yz4H/9rGSNY0GcIQEbOjbJY5mr8qFsQ1K/mAf3aUL3
+b6ni4sHVicRiRr0Gl4Ihorlskpfu1SHs/C5tvTSVNF9p4vtl5892y1yILQeVpcBs
+NCR7MUpdS49xCpvnAWsDZX+ij6LTR3lzCm/ZLCg4gNuZkjgU9oqVfGkqysW7WZ8S
+OLvzAwUw7i1EIFX8q6QdudGoezxz8m8OgZM1v8AFpYEKlhEPf1W0MSfaRDwrj866
+8nCLruEAEQEAAYkBHwQYAQIACQUCTOGB6gIbDAAKCRBXFlkREHjs199EB/4+p0G1
+3PHxt6rLWSCGXobDOu4ZOA/qnv0D/JhOLroFds5TzQv6vnS8eAkhCTjHVA+b58cm
+kXpI0oYcD4ZP+KK1CHKq2rGfwou7HfAF+icnNqYkeBOkjjbCgkvBlcCInuAuU8JX
+DZMkfFk52+eBKwTjS/J/fQp0vDru8bHLp98WgdRHWfJQ3mc3gz4A5sR6zhrGPW6/
+ssnROS4dC2Ohp35GpgN1KjD3EmEw5RoSBYlyrARCaMsivgIKMxGUEyFZWhuJt3N1
+2MTddRwz28hbmYCi+MzHYDbRv+cSyUDmvXaWhfkNKBepClBA1rTWBcldit5vvlqr
+yPet6wIKrtLGhAqZ
+=CLkG
+-----END PGP PUBLIC KEY BLOCK-----
index c0fe145db1b292ebc55c536b9d0e7786a7b06daa..02a1c3829d432e284a770d11459aaa111bff57db 100644 (file)
@@ -2,29 +2,33 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-# Based on Debian Jessie
-FROM debian:jessie
-MAINTAINER Ward Vandewege <ward@curoverse.com>
+# Based on Debian Stretch
+FROM debian:stretch
+MAINTAINER Ward Vandewege <wvandewege@veritasgenetics.com>
 
 ENV DEBIAN_FRONTEND noninteractive
 
-ADD apt.arvados.org.list /etc/apt/sources.list.d/
-RUN apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7
-RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3
+RUN apt-get update -q
+RUN apt-get install -yq --no-install-recommends gnupg
+
+ARG repo_version
+RUN echo repo_version $repo_version
+ADD apt.arvados.org-$repo_version.list /etc/apt/sources.list.d/
+
+ADD 1078ECD7.key /tmp/
+RUN cat /tmp/1078ECD7.key | apt-key add -
 
 ARG python_sdk_version
 ARG cwl_runner_version
 RUN echo cwl_runner_version $cwl_runner_version python_sdk_version $python_sdk_version
 
 RUN apt-get update -q
-RUN apt-get install -yq --no-install-recommends \
-    git python-pip python-virtualenv \
-    python-dev libgnutls28-dev libcurl4-gnutls-dev nodejs \
+RUN apt-get install -yq --no-install-recommends nodejs \
     python-arvados-python-client=$python_sdk_version \
     python-arvados-cwl-runner=$cwl_runner_version
 
-# use the Python executable from the python-arvados-python-client package
-RUN rm -f /usr/bin/python && ln -s /usr/share/python2.7/dist/python-arvados-python-client/bin/python /usr/bin/python
+# use the Python executable from the python-arvados-cwl-runner package
+RUN rm -f /usr/bin/python && ln -s /usr/share/python2.7/dist/python-arvados-cwl-runner/bin/python /usr/bin/python
 
 # Install dependencies and set up system.
 RUN /usr/sbin/adduser --disabled-password \
diff --git a/docker/jobs/apt.arvados.org-dev.list b/docker/jobs/apt.arvados.org-dev.list
new file mode 100644 (file)
index 0000000..468000e
--- /dev/null
@@ -0,0 +1,2 @@
+# apt.arvados.org
+deb http://apt.arvados.org/ stretch-dev main
diff --git a/docker/jobs/apt.arvados.org-stable.list b/docker/jobs/apt.arvados.org-stable.list
new file mode 100644 (file)
index 0000000..afbc51e
--- /dev/null
@@ -0,0 +1,2 @@
+# apt.arvados.org
+deb http://apt.arvados.org/ stretch main
diff --git a/docker/jobs/apt.arvados.org-testing.list b/docker/jobs/apt.arvados.org-testing.list
new file mode 100644 (file)
index 0000000..c8ea91d
--- /dev/null
@@ -0,0 +1,2 @@
+# apt.arvados.org
+deb http://apt.arvados.org/ stretch-testing main
diff --git a/docker/jobs/apt.arvados.org.list b/docker/jobs/apt.arvados.org.list
deleted file mode 100644 (file)
index 11b98e2..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-# apt.arvados.org
-deb http://apt.arvados.org/ jessie-dev main
index 8ae8a44811529f598cb8ecee0044919b76bf467e..b88962714e709765f1c93e0a6a88dbcf860aabaa 100644 (file)
@@ -52,6 +52,11 @@ type azureInstanceSetConfig struct {
 
 const tagKeyInstanceSecret = "InstanceSecret"
 
+type containerWrapper interface {
+       GetBlobReference(name string) *storage.Blob
+       ListBlobs(params storage.ListBlobsParameters) (storage.BlobListResponse, error)
+}
+
 type virtualMachinesClientWrapper interface {
        createOrUpdate(ctx context.Context,
                resourceGroupName string,
@@ -192,20 +197,20 @@ func wrapAzureError(err error) error {
 }
 
 type azureInstanceSet struct {
-       azconfig          azureInstanceSetConfig
-       vmClient          virtualMachinesClientWrapper
-       netClient         interfacesClientWrapper
-       storageAcctClient storageacct.AccountsClient
-       azureEnv          azure.Environment
-       interfaces        map[string]network.Interface
-       dispatcherID      string
-       namePrefix        string
-       ctx               context.Context
-       stopFunc          context.CancelFunc
-       stopWg            sync.WaitGroup
-       deleteNIC         chan string
-       deleteBlob        chan storage.Blob
-       logger            logrus.FieldLogger
+       azconfig     azureInstanceSetConfig
+       vmClient     virtualMachinesClientWrapper
+       netClient    interfacesClientWrapper
+       blobcont     containerWrapper
+       azureEnv     azure.Environment
+       interfaces   map[string]network.Interface
+       dispatcherID string
+       namePrefix   string
+       ctx          context.Context
+       stopFunc     context.CancelFunc
+       stopWg       sync.WaitGroup
+       deleteNIC    chan string
+       deleteBlob   chan storage.Blob
+       logger       logrus.FieldLogger
 }
 
 func newAzureInstanceSet(config json.RawMessage, dispatcherID cloud.InstanceSetID, logger logrus.FieldLogger) (prv cloud.InstanceSet, err error) {
@@ -251,7 +256,22 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str
 
        az.vmClient = &virtualMachinesClientImpl{vmClient}
        az.netClient = &interfacesClientImpl{netClient}
-       az.storageAcctClient = storageAcctClient
+
+       result, err := storageAcctClient.ListKeys(az.ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount)
+       if err != nil {
+               az.logger.WithError(err).Warn("Couldn't get account keys")
+               return err
+       }
+
+       key1 := *(*result.Keys)[0].Value
+       client, err := storage.NewBasicClientOnSovereignCloud(az.azconfig.StorageAccount, key1, az.azureEnv)
+       if err != nil {
+               az.logger.WithError(err).Warn("Couldn't make client")
+               return err
+       }
+
+       blobsvc := client.GetBlobService()
+       az.blobcont = blobsvc.GetContainerReference(az.azconfig.BlobContainer)
 
        az.dispatcherID = dispatcherID
        az.namePrefix = fmt.Sprintf("compute-%s-", az.dispatcherID)
@@ -363,11 +383,12 @@ func (az *azureInstanceSet) Create(
                return nil, wrapAzureError(err)
        }
 
-       instanceVhd := fmt.Sprintf("https://%s.blob.%s/%s/%s-os.vhd",
+       blobname := fmt.Sprintf("%s-os.vhd", name)
+       instanceVhd := fmt.Sprintf("https://%s.blob.%s/%s/%s",
                az.azconfig.StorageAccount,
                az.azureEnv.StorageEndpointSuffix,
                az.azconfig.BlobContainer,
-               name)
+               blobname)
 
        customData := base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\n" + initCommand + "\n"))
 
@@ -422,6 +443,16 @@ func (az *azureInstanceSet) Create(
 
        vm, err := az.vmClient.createOrUpdate(az.ctx, az.azconfig.ResourceGroup, name, vmParameters)
        if err != nil {
+               _, delerr := az.blobcont.GetBlobReference(blobname).DeleteIfExists(nil)
+               if delerr != nil {
+                       az.logger.WithError(delerr).Warnf("Error cleaning up vhd blob after failed create")
+               }
+
+               _, delerr = az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, *nic.Name)
+               if delerr != nil {
+                       az.logger.WithError(delerr).Warnf("Error cleaning up NIC after failed create")
+               }
+
                return nil, wrapAzureError(err)
        }
 
@@ -509,27 +540,12 @@ func (az *azureInstanceSet) manageNics() (map[string]network.Interface, error) {
 // leased to a VM) and haven't been modified for
 // DeleteDanglingResourcesAfter seconds.
 func (az *azureInstanceSet) manageBlobs() {
-       result, err := az.storageAcctClient.ListKeys(az.ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount)
-       if err != nil {
-               az.logger.WithError(err).Warn("Couldn't get account keys")
-               return
-       }
-
-       key1 := *(*result.Keys)[0].Value
-       client, err := storage.NewBasicClientOnSovereignCloud(az.azconfig.StorageAccount, key1, az.azureEnv)
-       if err != nil {
-               az.logger.WithError(err).Warn("Couldn't make client")
-               return
-       }
-
-       blobsvc := client.GetBlobService()
-       blobcont := blobsvc.GetContainerReference(az.azconfig.BlobContainer)
 
        page := storage.ListBlobsParameters{Prefix: az.namePrefix}
        timestamp := time.Now()
 
        for {
-               response, err := blobcont.ListBlobs(page)
+               response, err := az.blobcont.ListBlobs(page)
                if err != nil {
                        az.logger.WithError(err).Warn("Error listing blobs")
                        return
@@ -628,7 +644,13 @@ func (ai *azureInstance) Destroy() error {
 }
 
 func (ai *azureInstance) Address() string {
-       return *(*ai.nic.IPConfigurations)[0].PrivateIPAddress
+       if ai.nic.IPConfigurations != nil &&
+               len(*ai.nic.IPConfigurations) > 0 &&
+               (*ai.nic.IPConfigurations)[0].PrivateIPAddress != nil {
+
+               return *(*ai.nic.IPConfigurations)[0].PrivateIPAddress
+       }
+       return ""
 }
 
 func (ai *azureInstance) RemoteUser() string {
index 61649c39800d7ad8e31252313285e0e85fe7b16c..72832578dfa68ea022c75a6a6a69ef35018be15c 100644 (file)
@@ -105,6 +105,16 @@ func (*InterfacesClientStub) listComplete(ctx context.Context, resourceGroupName
        return network.InterfaceListResultIterator{}, nil
 }
 
+type BlobContainerStub struct{}
+
+func (*BlobContainerStub) GetBlobReference(name string) *storage.Blob {
+       return nil
+}
+
+func (*BlobContainerStub) ListBlobs(params storage.ListBlobsParameters) (storage.BlobListResponse, error) {
+       return storage.BlobListResponse{}, nil
+}
+
 type testConfig struct {
        ImageIDForTestSuite string
        DriverParameters    json.RawMessage
@@ -148,6 +158,7 @@ func GetInstanceSet() (cloud.InstanceSet, cloud.ImageID, arvados.Cluster, error)
        ap.ctx, ap.stopFunc = context.WithCancel(context.Background())
        ap.vmClient = &VirtualMachinesClientStub{}
        ap.netClient = &InterfacesClientStub{}
+       ap.blobcont = &BlobContainerStub{}
        return &ap, cloud.ImageID("blob"), cluster, nil
 }
 
index d7f4585619417904a1125bc05d54d58499199179..6fb46b5f46f9d36c2500ee759cee1a3ec19c59f0 100644 (file)
@@ -46,7 +46,7 @@ func estimateDockerImageSize(collectionPDH string) int64 {
        // the size of the manifest.
        //
        // Use the following heuristic:
-       // - Start with the length of the mainfest (n)
+       // - Start with the length of the manifest (n)
        // - Subtract 80 characters for the filename and file segment
        // - Divide by 42 to get the number of block identifiers ('hash\+size\ ' is 32+1+8+1)
        // - Assume each block is full, multiply by 64 MiB
index 52fd4d21a115f29ee7fb388c11ebeb0564b40ae2..834ca195fdda02d859eafcd4aa0cea5c70c1c359 100644 (file)
@@ -289,10 +289,12 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
         if keep_client is None:
             keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
         executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4)
-    except Exception as e:
-        logger.error(e)
+    except Exception:
+        logger.exception("Error creating the Arvados CWL Executor")
         return 1
 
+    # Note that unless in debug mode, some stack traces related to user 
+    # workflow errors may be suppressed. See ArvadosJob.done().
     if arvargs.debug:
         logger.setLevel(logging.DEBUG)
         logging.getLogger('arvados').setLevel(logging.DEBUG)
index 1261e94a5f5458fb2cce47cfeaf68b1e4a608705..9f93f0a6a6895780b2aca6e5ed235aca64d6aac9 100644 (file)
@@ -304,8 +304,8 @@ class ArvadosContainer(JobBase):
                 logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"])
             else:
                 logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"])
-        except Exception as e:
-            logger.error("%s got error %s" % (self.arvrunner.label(self), str(e)))
+        except Exception:
+            logger.exception("%s got an error", self.arvrunner.label(self))
             self.output_callback({}, "permanentFail")
 
     def done(self, record):
@@ -353,11 +353,13 @@ class ArvadosContainer(JobBase):
             if container["output"]:
                 outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep")
         except WorkflowException as e:
+            # Only include a stack trace if in debug mode. 
+            # A stack trace may obfuscate more useful output about the workflow. 
             logger.error("%s unable to collect output from %s:\n%s",
                          self.arvrunner.label(self), container["output"], e, exc_info=(e if self.arvrunner.debug else False))
             processStatus = "permanentFail"
-        except Exception as e:
-            logger.exception("%s while getting output object: %s", self.arvrunner.label(self), e)
+        except Exception:
+            logger.exception("%s while getting output object:", self.arvrunner.label(self))
             processStatus = "permanentFail"
         finally:
             self.output_callback(outputs, processStatus)
@@ -523,8 +525,8 @@ class RunnerContainer(Runner):
             container = self.arvrunner.api.containers().get(
                 uuid=record["container_uuid"]
             ).execute(num_retries=self.arvrunner.num_retries)
-        except Exception as e:
-            logger.exception("%s while getting runner container: %s", self.arvrunner.label(self), e)
+        except Exception:
+            logger.exception("%s while getting runner container", self.arvrunner.label(self))
             self.arvrunner.output_callback({}, "permanentFail")
         else:
             super(RunnerContainer, self).done(container)
index 84006b47d2a8ba86fd97f88b63772a53e3d711f6..a8f56ad1d4f30db21c5a59f0fb6df9258c723c58 100644 (file)
@@ -63,6 +63,7 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
                 arvados.commands.put.api_client = api_client
                 arvados.commands.keepdocker.main(args, stdout=sys.stderr, install_sig_handlers=False, api=api_client)
             except SystemExit as e:
+                # If e.code is None or zero, then keepdocker exited normally and we can continue
                 if e.code:
                     raise WorkflowException("keepdocker exited with code %s" % e.code)
 
index 69fe7e2a8f1b632675b16eeb3f6ad07ee76c00e8..ab2078e1571145aac5f334e076bfc00c7b951448 100644 (file)
@@ -199,7 +199,7 @@ class ArvadosJob(JobBase):
                                     e)
             else:
                 logger.info("%s %s is %s", self.arvrunner.label(self), response["uuid"], response["state"])
-        except Exception as e:
+        except Exception:
             logger.exception("%s error" % (self.arvrunner.label(self)))
             self.output_callback({}, "permanentFail")
 
@@ -224,8 +224,8 @@ class ArvadosJob(JobBase):
                             body={
                                 "components": components
                             }).execute(num_retries=self.arvrunner.num_retries)
-                except Exception as e:
-                    logger.info("Error adding to components: %s", e)
+                except Exception:
+                    logger.exception("Error adding to components")
 
     def done(self, record):
         try:
@@ -272,10 +272,12 @@ class ArvadosJob(JobBase):
                         outputs = done.done(self, record, dirs["tmpdir"],
                                             dirs["outdir"], dirs["keep"])
             except WorkflowException as e:
+                # Only include a stack trace if in debug mode. 
+                # This is most likely a user workflow error and a stack trace may obfuscate more useful output. 
                 logger.error("%s unable to collect output from %s:\n%s",
                              self.arvrunner.label(self), record["output"], e, exc_info=(e if self.arvrunner.debug else False))
                 processStatus = "permanentFail"
-            except Exception as e:
+            except Exception:
                 logger.exception("Got unknown exception while collecting output for job %s:", self.name)
                 processStatus = "permanentFail"
 
index 535cfd7582b985ad806d659f518f9da9ce0e6fbc..c1f2b54744083a22a52b659797a720da478304b1 100644 (file)
@@ -361,8 +361,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                     keys = keys[pageSize:]
                     try:
                         proc_states = table.list(filters=[["uuid", "in", page]]).execute(num_retries=self.num_retries)
-                    except Exception as e:
-                        logger.warning("Error checking states on API server: %s", e)
+                    except Exception:
+                        logger.exception("Error checking states on API server: %s")
                         remain_wait = self.poll_interval
                         continue
 
@@ -393,9 +393,9 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
         for i in self.intermediate_output_collections:
             try:
                 self.api.collections().delete(uuid=i).execute(num_retries=self.num_retries)
-            except:
+            except Exception:
                 logger.warning("Failed to delete intermediate output: %s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
-            if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info()[0] is SystemExit:
+            except (KeyboardInterrupt, SystemExit):
                 break
 
     def check_features(self, obj):
@@ -506,8 +506,9 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                                               body={
                                                   'is_trashed': True
                                               }).execute(num_retries=self.num_retries)
-            except Exception as e:
-                logger.info("Setting container output: %s", e)
+            except Exception:
+                logger.exception("Setting container output")
+                return
         elif self.work_api == "jobs" and "TASK_UUID" in os.environ:
             self.api.job_tasks().update(uuid=os.environ["TASK_UUID"],
                                    body={
@@ -731,8 +732,11 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
         except:
             if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info()[0] is SystemExit:
                 logger.error("Interrupted, workflow will be cancelled")
+            elif isinstance(sys.exc_info()[1], WorkflowException):
+                logger.error("Workflow execution failed:\n%s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
             else:
-                logger.error("Execution failed:\n%s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
+                logger.exception("Workflow execution failed")
+
             if self.pipeline:
                 self.api.pipeline_instances().update(uuid=self.pipeline["uuid"],
                                                      body={"state": "Failed"}).execute(num_retries=self.num_retries)
index fc7cc42d15c61f19021f006cb1eec18ab94178cd..3744b4a93afa40df10c17335310503500acd2432 100644 (file)
@@ -240,8 +240,8 @@ class CollectionFetcher(DefaultFetcher):
                     return True
         except arvados.errors.NotFoundError:
             return False
-        except:
-            logger.exception("Got unexpected exception checking if file exists:")
+        except Exception:
+            logger.exception("Got unexpected exception checking if file exists")
             return False
         return super(CollectionFetcher, self).check_exists(url)
 
index c0d165aa9eed4e5cbaeaf9d365a302957a3921b1..e515ac2ce5e99f4ec75011b8ac51bfe2fc1bbff8 100644 (file)
@@ -478,8 +478,8 @@ class Runner(Process):
                     fileobj["location"] = "keep:%s/%s" % (record["output"], path)
             adjustFileObjs(outputs, keepify)
             adjustDirObjs(outputs, keepify)
-        except Exception as e:
-            logger.exception("[%s] While getting final output object: %s", self.name, e)
+        except Exception:
+            logger.exception("[%s] While getting final output object", self.name)
             self.arvrunner.output_callback({}, "permanentFail")
         else:
             self.arvrunner.output_callback(outputs, processStatus)
index b579d41ed2839b73fa72c6486101abe97e46a0cc..485c757e7fce34dda579185608f39bfe4911bd94 100644 (file)
@@ -55,13 +55,13 @@ def normalize_stream(stream_name, stream):
                 if streamoffset == current_span[1]:
                     current_span[1] += segment.segment_size
                 else:
-                    stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
+                    stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
                     current_span = [streamoffset, streamoffset + segment.segment_size]
 
         if current_span is not None:
-            stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
+            stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
 
         if not stream[streamfile]:
-            stream_tokens.append("0:0:{0}".format(fout))
+            stream_tokens.append(u"0:0:{0}".format(fout))
 
     return stream_tokens
index 5b1f1a2a677fa15dad5801635ff0e6f639985584..cf1a36f9fdfbbfdf739fe75027d00eaa782df4f2 100644 (file)
@@ -283,7 +283,7 @@ class CollectionWriter(CollectionBase):
             streampath, filename = split(streampath)
         if self._last_open and not self._last_open.closed:
             raise errors.AssertionError(
-                "can't open '{}' when '{}' is still open".format(
+                u"can't open '{}' when '{}' is still open".format(
                     filename, self._last_open.name))
         if streampath != self.current_stream_name():
             self.start_new_stream(streampath)
@@ -461,22 +461,22 @@ class ResumableCollectionWriter(CollectionWriter):
                 writer._queued_file.seek(pos)
             except IOError as error:
                 raise errors.StaleWriterStateError(
-                    "failed to reopen active file {}: {}".format(path, error))
+                    u"failed to reopen active file {}: {}".format(path, error))
         return writer
 
     def check_dependencies(self):
         for path, orig_stat in listitems(self._dependencies):
             if not S_ISREG(orig_stat[ST_MODE]):
-                raise errors.StaleWriterStateError("{} not file".format(path))
+                raise errors.StaleWriterStateError(u"{} not file".format(path))
             try:
                 now_stat = tuple(os.stat(path))
             except OSError as error:
                 raise errors.StaleWriterStateError(
-                    "failed to stat {}: {}".format(path, error))
+                    u"failed to stat {}: {}".format(path, error))
             if ((not S_ISREG(now_stat[ST_MODE])) or
                 (orig_stat[ST_MTIME] != now_stat[ST_MTIME]) or
                 (orig_stat[ST_SIZE] != now_stat[ST_SIZE])):
-                raise errors.StaleWriterStateError("{} changed".format(path))
+                raise errors.StaleWriterStateError(u"{} changed".format(path))
 
     def dump_state(self, copy_func=lambda x: x):
         state = {attr: copy_func(getattr(self, attr))
@@ -492,7 +492,7 @@ class ResumableCollectionWriter(CollectionWriter):
         try:
             src_path = os.path.realpath(source)
         except Exception:
-            raise errors.AssertionError("{} not a file path".format(source))
+            raise errors.AssertionError(u"{} not a file path".format(source))
         try:
             path_stat = os.stat(src_path)
         except OSError as stat_error:
@@ -505,10 +505,10 @@ class ResumableCollectionWriter(CollectionWriter):
             self._dependencies[source] = tuple(fd_stat)
         elif path_stat is None:
             raise errors.AssertionError(
-                "could not stat {}: {}".format(source, stat_error))
+                u"could not stat {}: {}".format(source, stat_error))
         elif path_stat.st_ino != fd_stat.st_ino:
             raise errors.AssertionError(
-                "{} changed between open and stat calls".format(source))
+                u"{} changed between open and stat calls".format(source))
         else:
             self._dependencies[src_path] = tuple(fd_stat)
 
index 61258632bdd94f7acc684eaab50c442046f29f2e..54fa356d3a4537364ab770175addc742ade4826d 100644 (file)
@@ -360,7 +360,7 @@ class ResumeCache(object):
         try:
             fcntl.flock(fileobj, fcntl.LOCK_EX | fcntl.LOCK_NB)
         except IOError:
-            raise ResumeCacheConflict("{} locked".format(fileobj.name))
+            raise ResumeCacheConflict(u"{} locked".format(fileobj.name))
 
     def load(self):
         self.cache_file.seek(0)
@@ -502,7 +502,7 @@ class ArvPutUploadJob(object):
                     raise ArvPutUploadIsPending()
                 self._write_stdin(self.filename or 'stdin')
             elif not os.path.exists(path):
-                 raise PathDoesNotExistError("file or directory '{}' does not exist.".format(path))
+                 raise PathDoesNotExistError(u"file or directory '{}' does not exist.".format(path))
             elif os.path.isdir(path):
                 # Use absolute paths on cache index so CWD doesn't interfere
                 # with the caching logic.
@@ -742,7 +742,7 @@ class ArvPutUploadJob(object):
             elif file_in_local_collection.permission_expired():
                 # Permission token expired, re-upload file. This will change whenever
                 # we have a API for refreshing tokens.
-                self.logger.warning("Uploaded file '{}' access token expired, will re-upload it from scratch".format(filename))
+                self.logger.warning(u"Uploaded file '{}' access token expired, will re-upload it from scratch".format(filename))
                 should_upload = True
                 self._local_collection.remove(filename)
             elif cached_file_data['size'] == file_in_local_collection.size():
@@ -757,7 +757,7 @@ class ArvPutUploadJob(object):
                 # Inconsistent cache, re-upload the file
                 should_upload = True
                 self._local_collection.remove(filename)
-                self.logger.warning("Uploaded version of file '{}' is bigger than local version, will re-upload it from scratch.".format(source))
+                self.logger.warning(u"Uploaded version of file '{}' is bigger than local version, will re-upload it from scratch.".format(source))
         # Local file differs from cached data, re-upload it.
         else:
             if file_in_local_collection:
@@ -834,11 +834,11 @@ class ArvPutUploadJob(object):
         if self.use_cache:
             cache_filepath = self._get_cache_filepath()
             if self.resume and os.path.exists(cache_filepath):
-                self.logger.info("Resuming upload from cache file {}".format(cache_filepath))
+                self.logger.info(u"Resuming upload from cache file {}".format(cache_filepath))
                 self._cache_file = open(cache_filepath, 'a+')
             else:
                 # --no-resume means start with a empty cache file.
-                self.logger.info("Creating new cache file at {}".format(cache_filepath))
+                self.logger.info(u"Creating new cache file at {}".format(cache_filepath))
                 self._cache_file = open(cache_filepath, 'w+')
             self._cache_filename = self._cache_file.name
             self._lock_file(self._cache_file)
@@ -924,7 +924,7 @@ class ArvPutUploadJob(object):
         try:
             fcntl.flock(fileobj, fcntl.LOCK_EX | fcntl.LOCK_NB)
         except IOError:
-            raise ResumeCacheConflict("{} locked".format(fileobj.name))
+            raise ResumeCacheConflict(u"{} locked".format(fileobj.name))
 
     def _save_state(self):
         """
@@ -1234,9 +1234,9 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr,
     else:
         try:
             if args.update_collection:
-                logger.info("Collection updated: '{}'".format(writer.collection_name()))
+                logger.info(u"Collection updated: '{}'".format(writer.collection_name()))
             else:
-                logger.info("Collection saved as '{}'".format(writer.collection_name()))
+                logger.info(u"Collection saved as '{}'".format(writer.collection_name()))
             if args.portable_data_hash:
                 output = writer.portable_data_hash()
             else:
index a41184d10fb4fe7daadeb0892cf60ce36f47e8df..01a52a5e6681ec07daaf16eb0c0c18a9b7ba2ada 100644 (file)
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
@@ -1284,6 +1286,16 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
                          r'^\./%s.*:file2.txt' % os.path.basename(tmpdir))
         self.assertRegex(c['manifest_text'], r'^.*:file3.txt')
 
+    def test_unicode_on_filename(self):
+        tmpdir = self.make_tmpdir()
+        fname = u"iā¤arvados.txt"
+        with open(os.path.join(tmpdir, fname), 'w') as f:
+            f.write("This is a unicode named file")
+        col = self.run_and_find_collection("", ['--no-progress', tmpdir])
+        self.assertNotEqual(None, col['uuid'])
+        c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+        self.assertTrue(fname in c['manifest_text'], u"{} does not include {}".format(c['manifest_text'], fname))
+
     def test_silent_mode_no_errors(self):
         self.authorize_with('active')
         tmpdir = self.make_tmpdir()