Merge branch '14885-ciso-and-conda-packaging-pr'
authorEric Biagiotti <ebiagiotti@veritasgenetics.com>
Wed, 6 Mar 2019 19:43:29 +0000 (14:43 -0500)
committerEric Biagiotti <ebiagiotti@veritasgenetics.com>
Wed, 6 Mar 2019 19:43:29 +0000 (14:43 -0500)
refs #14885

Arvados-DCO-1.1-Signed-off-by: Eric Biagiotti <ebiagiotti@veritasgenetics.com>

28 files changed:
.licenseignore
build/run-build-docker-jobs-image.sh
doc/admin/upgrading.html.textile.liquid
doc/install/install-manual-prerequisites.html.textile.liquid
docker/jobs/1078ECD7.key [new file with mode: 0644]
docker/jobs/Dockerfile
docker/jobs/apt.arvados.org-dev.list [new file with mode: 0644]
docker/jobs/apt.arvados.org-stable.list [new file with mode: 0644]
docker/jobs/apt.arvados.org-testing.list [new file with mode: 0644]
docker/jobs/apt.arvados.org.list [deleted file]
lib/cloud/azure/azure.go
lib/cloud/azure/azure_test.go
lib/dispatchcloud/node_size.go
lib/dispatchcloud/worker/pool.go
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvdocker.py
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/arvados_cwl/fsaccess.py
sdk/cwl/arvados_cwl/runner.py
sdk/cwl/tests/test_container.py
sdk/python/arvados/_normalize_stream.py
sdk/python/arvados/collection.py
sdk/python/arvados/commands/put.py
sdk/python/tests/test_arv_put.py
sdk/ruby/lib/arvados/collection.rb
sdk/ruby/test/test_collection.rb

index 06519a98e8bc45afcebdad584198a6b6bb47bf71..45028bf888ff6a40f910f29197aaac1a8d29516f 100644 (file)
@@ -15,7 +15,8 @@ build/package-test-dockerfiles/ubuntu1604/etc-apt-preferences.d-arvados
 doc/fonts/*
 doc/user/cwl/federated/*
 */docker_image
-docker/jobs/apt.arvados.org.list
+docker/jobs/apt.arvados.org*.list
+docker/jobs/1078ECD7.key
 */en.bootstrap.yml
 *font-awesome.css
 *.gif
index 7186a2209129a08c9c6fbd6a094ce6f0a9dac3c0..7d7e1fc8abf9171df1d905c22478eabf23236299 100755 (executable)
@@ -5,21 +5,24 @@
 
 function usage {
     echo >&2
-    echo >&2 "usage: $0 [options]"
+    echo >&2 "usage: WORKSPACE=/path/to/arvados $0 [options]"
     echo >&2
     echo >&2 "$0 options:"
     echo >&2 "  -t, --tags                    version tag for docker"
+    echo >&2 "  -r, --repo                    Arvados package repot to use: dev, testing, stable (default: dev)"
     echo >&2 "  -u, --upload                  Upload the images (docker push)"
     echo >&2 "  --no-cache                    Don't use build cache"
     echo >&2 "  -h, --help                    Display this help and exit"
     echo >&2
-    echo >&2 "  If no options are given, just builds the images."
+    echo >&2 "  WORKSPACE=path                Path to the Arvados source tree to build from"
+    echo >&2
 }
 upload=false
+REPO=dev
 
 # NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
-TEMP=`getopt -o hut: \
-    --long help,upload,no-cache,tags: \
+TEMP=`getopt -o hut:r: \
+    --long help,upload,no-cache,tags,repo: \
     -n "$0" -- "$@"`
 
 if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
@@ -50,6 +53,19 @@ do
                   ;;
             esac
             ;;
+        -r | --repo)
+            case "$2" in
+                "")
+                  echo "ERROR: --repo needs a parameter";
+                  usage;
+                  exit 1
+                  ;;
+                *)
+                  REPO="$2";
+                  shift 2
+                  ;;
+            esac
+            ;;
         --)
             shift
             break
@@ -69,6 +85,16 @@ exit_cleanly() {
     exit $EXITCODE
 }
 
+# Sanity check
+if ! [[ -n "$WORKSPACE" ]]; then
+    usage;
+    echo >&2 "Error: WORKSPACE environment variable not set"
+    echo >&2
+    exit 1
+fi
+
+echo $WORKSPACE
+
 COLUMNS=80
 . $WORKSPACE/build/run-library.sh
 
@@ -88,16 +114,6 @@ docker_push () {
     checkexit $ECODE "docker push $*"
 }
 
-# Sanity check
-if ! [[ -n "$WORKSPACE" ]]; then
-    echo >&2
-    echo >&2 "Error: WORKSPACE environment variable not set"
-    echo >&2
-    exit 1
-fi
-
-echo $WORKSPACE
-
 # find the docker binary
 DOCKER=`which docker.io`
 
@@ -153,6 +169,7 @@ cd docker/jobs
 docker build $NOCACHE \
        --build-arg python_sdk_version=${python_sdk_version} \
        --build-arg cwl_runner_version=${cwl_runner_version} \
+       --build-arg repo_version=${REPO} \
        -t arvados/jobs:$cwl_runner_version_orig .
 
 ECODE=$?
@@ -175,6 +192,9 @@ if docker --version |grep " 1\.[0-9]\." ; then
     # -f flag removed in Docker 1.12
     FORCE=-f
 fi
+
+#docker export arvados/jobs:$cwl_runner_version_orig | docker import - arvados/jobs:$cwl_runner_version_orig
+
 if ! [[ -z "$version_tag" ]]; then
     docker tag $FORCE arvados/jobs:$cwl_runner_version_orig arvados/jobs:"$version_tag"
 else
index eda25d5cc6678524ea1cd44b0500201f49e56e65..6e2e6cba6dfeb1873d8a58049e1cdb65ac801bdd 100644 (file)
@@ -32,6 +32,63 @@ TODO: extract this information based on git commit messages and generate changel
 
 h3. current master branch
 
+h4. Stricter collection manifest validation on the API server
+
+As a consequence of "#14482":https://dev.arvados.org/issues/14482, the Ruby SDK does a more rigorous collection manifest validation. Collections created after 2015-05 are unlikely to be invalid, however you may check for invalid manifests using the script below.
+
+You could set up a new rvm gemset and install the specific arvados gem for testing, like so:
+
+<notextile>
+<pre><code>~$ <span class="userinput">rvm gemset create rubysdk-test</span>
+~$ <span class="userinput">rvm gemset use rubysdk-test</span>
+~$ <span class="userinput">gem install arvados -v 1.3.1.20190301212059</span>
+</code></pre>
+</notextile>
+
+Next, you can run the following script using admin credentials, it will scan the whole collection database and report any collection that didn't pass the check:
+
+{% codeblock as ruby %}
+require 'arvados'
+require 'arvados/keep'
+
+api = Arvados.new
+offset = 0
+batch_size = 100
+invalid = []
+
+while true
+    begin
+        req = api.collection.index(
+            :select => [:uuid, :created_at, :manifest_text],
+            :include_trash => true, :include_old_versions => true,
+            :limit => batch_size, :offset => offset)
+    rescue
+        invalid.each {|c| puts "#{c[:uuid]} (Created at #{c[:created_at]}): #{c[:error]}" }
+        raise
+    end
+
+    req[:items].each do |col|
+        begin
+            Keep::Manifest.validate! col[:manifest_text]
+        rescue Exception => e
+            puts "Collection #{col[:uuid]} manifest not valid"
+            invalid << {uuid: col[:uuid], error: e, created_at: col[:created_at]}
+        end
+    end
+    puts "Checked #{offset} / #{req[:items_available]} - Invalid: #{invalid.size}"
+    offset += req[:limit]
+    break if offset > req[:items_available]
+end
+
+if invalid.empty?
+    puts "No invalid collection manifests found"
+else
+    invalid.each {|c| puts "#{c[:uuid]} (Created at #{c[:created_at]}): #{c[:error]}" }
+end
+{% endcodeblock %}
+
+The script will return a final report enumerating any invalid collection by UUID, with its creation date and error message so you can take the proper correction measures, if needed.
+
 h4. Python packaging change
 
 As part of story "#9945":https://dev.arvados.org/issues/9945, the distribution packaging (deb/rpm) of our Python packages has changed. These packages now include a built-in virtualenv to reduce dependencies on system packages. We have also stopped packaging and publishing backports for all the Python dependencies of our packages, as they are no longer needed.
index e0cc4b8581e65a1a38292f1953418db394f92bee..62017163d2b48540290ff3df29733eebfa759735 100644 (file)
@@ -39,6 +39,7 @@ table(table table-bordered table-condensed).
 |Debian 9 ("stretch")|Supported|Latest|
 |Ubuntu 14.04 ("trusty")|Supported|Latest|
 |Ubuntu 16.04 ("xenial")|Supported|Latest|
+|Ubuntu 18.04 ("bionic")|Supported|Latest|
 |Ubuntu 12.04 ("precise")|EOL|8ed7b6dd5d4df93a3f37096afe6d6f81c2a7ef6e (2017-05-03)|
 |Debian 7 ("wheezy")|EOL|997479d1408139e96ecdb42a60b4f727f814f6c9 (2016-12-28)|
 |CentOS 6 |EOL|997479d1408139e96ecdb42a60b4f727f814f6c9 (2016-12-28)|
@@ -64,7 +65,7 @@ gpgkey=http://rpm.arvados.org/CentOS/RPM-GPG-KEY-curoverse
 
 h3. Debian and Ubuntu
 
-Packages are available for Debian 8 ("jessie"), Debian 9 ("stretch"), Ubuntu 14.04 ("trusty") and Ubuntu 16.04 ("xenial").
+Packages are available for Debian 8 ("jessie"), Debian 9 ("stretch"), Ubuntu 14.04 ("trusty"), Ubuntu 16.04 ("xenial") and Ubuntu 18.04 ("bionic").
 
 First, register the Curoverse signing key in apt's database:
 
@@ -78,6 +79,7 @@ table(table table-bordered table-condensed).
 |Debian 9 ("stretch")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ stretch main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 |Ubuntu 14.04 ("trusty")[1]|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ trusty main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 |Ubuntu 16.04 ("xenial")[1]|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ xenial main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
+|Ubuntu 18.04 ("bionic")[1]|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ bionic main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
 
 {% include 'notebox_begin' %}
 
diff --git a/docker/jobs/1078ECD7.key b/docker/jobs/1078ECD7.key
new file mode 100644 (file)
index 0000000..edc62f4
--- /dev/null
@@ -0,0 +1,30 @@
+-----BEGIN PGP PUBLIC KEY BLOCK-----
+
+mQENBEzhgeoBCAChhoK1dqpWzNyDWqRGEvdFdkJaA9D2HRwKPfBfjAoePX6ZyrpA
+ItlUsvt/8s/DRiTiPEFQR4S7VqocmU6whJc3gDEGyOM6b1NF873lIfSVwUoE42QE
+a76dO8woOYgLUyxu2mKG+bJgGMumjBJt6ZOndYVjTYB/7sEeVxwmMVulfZe0s6zg
+ut0+SoTYg2R36qIqeIcWllYt97sEYnyy1qXMis4/3IZnuWkS/frsPR3aeUI4W+o2
+NDN1kj49+LMe7Fb5b7jZY08rZbAWXi1rU1hQx4jC9RvYqlT4HNld4Bn7os1IvOOA
+wNiR0oiVdiuDbBxcMvRPktxMrFVjowusRLq/ABEBAAG0PUN1cm92ZXJzZSwgSW5j
+IEF1dG9tYXRpYyBTaWduaW5nIEtleSA8c3lzYWRtaW5AY3Vyb3ZlcnNlLmNvbT6J
+ATgEEwECACIFAlNgYIECGwMGCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJEFcW
+WREQeOzXPkEH/jQJDIYI1dxWcYiA+hczmpaZvN2/pc/kwIW/6a03+6zqmSNkebOE
+TgoDILacSYc17hy20R1/rWyUstOMKcEgFDBlSehhHyl0f7q/w7d8Ais6MabzsPfx
+IceJpsjUg87+BR7qWhgQ0sxmtIF2TKuTFLs+nkGsgSsiBOEF4NvHxuj3HD4y8F27
+HNqrkqwjLS8xJwwH5Gp2uMEVr1AXIH3iSRjJ8X124s8iEP97Q/3IazoYRf9/MCSm
+QEx8KzxwDX6t4bW6O4D01K+e9gdkTY70dcMgJoqm5IsX7yxjEubiOunphtlJnZ9d
+Oi1yBN5UM3pWKAdcfRj4rcfV9Simvpx9av+5AQ0ETOGB6gEIAMAA0HVMG0BbdnU7
+wWgl5eFdT0AUSrXK/WdcKqVEGGv+c68NETSHWZOJX7O46Eao4gY4cTYprVMBzxpY
+/BtQSYLpE0HLvBc1fcFd61Yz4H/9rGSNY0GcIQEbOjbJY5mr8qFsQ1K/mAf3aUL3
+b6ni4sHVicRiRr0Gl4Ihorlskpfu1SHs/C5tvTSVNF9p4vtl5892y1yILQeVpcBs
+NCR7MUpdS49xCpvnAWsDZX+ij6LTR3lzCm/ZLCg4gNuZkjgU9oqVfGkqysW7WZ8S
+OLvzAwUw7i1EIFX8q6QdudGoezxz8m8OgZM1v8AFpYEKlhEPf1W0MSfaRDwrj866
+8nCLruEAEQEAAYkBHwQYAQIACQUCTOGB6gIbDAAKCRBXFlkREHjs199EB/4+p0G1
+3PHxt6rLWSCGXobDOu4ZOA/qnv0D/JhOLroFds5TzQv6vnS8eAkhCTjHVA+b58cm
+kXpI0oYcD4ZP+KK1CHKq2rGfwou7HfAF+icnNqYkeBOkjjbCgkvBlcCInuAuU8JX
+DZMkfFk52+eBKwTjS/J/fQp0vDru8bHLp98WgdRHWfJQ3mc3gz4A5sR6zhrGPW6/
+ssnROS4dC2Ohp35GpgN1KjD3EmEw5RoSBYlyrARCaMsivgIKMxGUEyFZWhuJt3N1
+2MTddRwz28hbmYCi+MzHYDbRv+cSyUDmvXaWhfkNKBepClBA1rTWBcldit5vvlqr
+yPet6wIKrtLGhAqZ
+=CLkG
+-----END PGP PUBLIC KEY BLOCK-----
index c0fe145db1b292ebc55c536b9d0e7786a7b06daa..02a1c3829d432e284a770d11459aaa111bff57db 100644 (file)
@@ -2,29 +2,33 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-# Based on Debian Jessie
-FROM debian:jessie
-MAINTAINER Ward Vandewege <ward@curoverse.com>
+# Based on Debian Stretch
+FROM debian:stretch
+MAINTAINER Ward Vandewege <wvandewege@veritasgenetics.com>
 
 ENV DEBIAN_FRONTEND noninteractive
 
-ADD apt.arvados.org.list /etc/apt/sources.list.d/
-RUN apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7
-RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3
+RUN apt-get update -q
+RUN apt-get install -yq --no-install-recommends gnupg
+
+ARG repo_version
+RUN echo repo_version $repo_version
+ADD apt.arvados.org-$repo_version.list /etc/apt/sources.list.d/
+
+ADD 1078ECD7.key /tmp/
+RUN cat /tmp/1078ECD7.key | apt-key add -
 
 ARG python_sdk_version
 ARG cwl_runner_version
 RUN echo cwl_runner_version $cwl_runner_version python_sdk_version $python_sdk_version
 
 RUN apt-get update -q
-RUN apt-get install -yq --no-install-recommends \
-    git python-pip python-virtualenv \
-    python-dev libgnutls28-dev libcurl4-gnutls-dev nodejs \
+RUN apt-get install -yq --no-install-recommends nodejs \
     python-arvados-python-client=$python_sdk_version \
     python-arvados-cwl-runner=$cwl_runner_version
 
-# use the Python executable from the python-arvados-python-client package
-RUN rm -f /usr/bin/python && ln -s /usr/share/python2.7/dist/python-arvados-python-client/bin/python /usr/bin/python
+# use the Python executable from the python-arvados-cwl-runner package
+RUN rm -f /usr/bin/python && ln -s /usr/share/python2.7/dist/python-arvados-cwl-runner/bin/python /usr/bin/python
 
 # Install dependencies and set up system.
 RUN /usr/sbin/adduser --disabled-password \
diff --git a/docker/jobs/apt.arvados.org-dev.list b/docker/jobs/apt.arvados.org-dev.list
new file mode 100644 (file)
index 0000000..468000e
--- /dev/null
@@ -0,0 +1,2 @@
+# apt.arvados.org
+deb http://apt.arvados.org/ stretch-dev main
diff --git a/docker/jobs/apt.arvados.org-stable.list b/docker/jobs/apt.arvados.org-stable.list
new file mode 100644 (file)
index 0000000..afbc51e
--- /dev/null
@@ -0,0 +1,2 @@
+# apt.arvados.org
+deb http://apt.arvados.org/ stretch main
diff --git a/docker/jobs/apt.arvados.org-testing.list b/docker/jobs/apt.arvados.org-testing.list
new file mode 100644 (file)
index 0000000..c8ea91d
--- /dev/null
@@ -0,0 +1,2 @@
+# apt.arvados.org
+deb http://apt.arvados.org/ stretch-testing main
diff --git a/docker/jobs/apt.arvados.org.list b/docker/jobs/apt.arvados.org.list
deleted file mode 100644 (file)
index 11b98e2..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-# apt.arvados.org
-deb http://apt.arvados.org/ jessie-dev main
index 8ae8a44811529f598cb8ecee0044919b76bf467e..d19e4bef2372ff87dfb13e26db1ffddb9726bdca 100644 (file)
@@ -52,6 +52,11 @@ type azureInstanceSetConfig struct {
 
 const tagKeyInstanceSecret = "InstanceSecret"
 
+type containerWrapper interface {
+       GetBlobReference(name string) *storage.Blob
+       ListBlobs(params storage.ListBlobsParameters) (storage.BlobListResponse, error)
+}
+
 type virtualMachinesClientWrapper interface {
        createOrUpdate(ctx context.Context,
                resourceGroupName string,
@@ -192,20 +197,20 @@ func wrapAzureError(err error) error {
 }
 
 type azureInstanceSet struct {
-       azconfig          azureInstanceSetConfig
-       vmClient          virtualMachinesClientWrapper
-       netClient         interfacesClientWrapper
-       storageAcctClient storageacct.AccountsClient
-       azureEnv          azure.Environment
-       interfaces        map[string]network.Interface
-       dispatcherID      string
-       namePrefix        string
-       ctx               context.Context
-       stopFunc          context.CancelFunc
-       stopWg            sync.WaitGroup
-       deleteNIC         chan string
-       deleteBlob        chan storage.Blob
-       logger            logrus.FieldLogger
+       azconfig     azureInstanceSetConfig
+       vmClient     virtualMachinesClientWrapper
+       netClient    interfacesClientWrapper
+       blobcont     containerWrapper
+       azureEnv     azure.Environment
+       interfaces   map[string]network.Interface
+       dispatcherID string
+       namePrefix   string
+       ctx          context.Context
+       stopFunc     context.CancelFunc
+       stopWg       sync.WaitGroup
+       deleteNIC    chan string
+       deleteBlob   chan storage.Blob
+       logger       logrus.FieldLogger
 }
 
 func newAzureInstanceSet(config json.RawMessage, dispatcherID cloud.InstanceSetID, logger logrus.FieldLogger) (prv cloud.InstanceSet, err error) {
@@ -215,12 +220,14 @@ func newAzureInstanceSet(config json.RawMessage, dispatcherID cloud.InstanceSetI
                return nil, err
        }
 
-       ap := azureInstanceSet{logger: logger}
-       err = ap.setup(azcfg, string(dispatcherID))
+       az := azureInstanceSet{logger: logger}
+       az.ctx, az.stopFunc = context.WithCancel(context.Background())
+       err = az.setup(azcfg, string(dispatcherID))
        if err != nil {
+               az.stopFunc()
                return nil, err
        }
-       return &ap, nil
+       return &az, nil
 }
 
 func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID string) (err error) {
@@ -251,12 +258,26 @@ func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID str
 
        az.vmClient = &virtualMachinesClientImpl{vmClient}
        az.netClient = &interfacesClientImpl{netClient}
-       az.storageAcctClient = storageAcctClient
+
+       result, err := storageAcctClient.ListKeys(az.ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount)
+       if err != nil {
+               az.logger.WithError(err).Warn("Couldn't get account keys")
+               return err
+       }
+
+       key1 := *(*result.Keys)[0].Value
+       client, err := storage.NewBasicClientOnSovereignCloud(az.azconfig.StorageAccount, key1, az.azureEnv)
+       if err != nil {
+               az.logger.WithError(err).Warn("Couldn't make client")
+               return err
+       }
+
+       blobsvc := client.GetBlobService()
+       az.blobcont = blobsvc.GetContainerReference(az.azconfig.BlobContainer)
 
        az.dispatcherID = dispatcherID
        az.namePrefix = fmt.Sprintf("compute-%s-", az.dispatcherID)
 
-       az.ctx, az.stopFunc = context.WithCancel(context.Background())
        go func() {
                az.stopWg.Add(1)
                defer az.stopWg.Done()
@@ -363,11 +384,12 @@ func (az *azureInstanceSet) Create(
                return nil, wrapAzureError(err)
        }
 
-       instanceVhd := fmt.Sprintf("https://%s.blob.%s/%s/%s-os.vhd",
+       blobname := fmt.Sprintf("%s-os.vhd", name)
+       instanceVhd := fmt.Sprintf("https://%s.blob.%s/%s/%s",
                az.azconfig.StorageAccount,
                az.azureEnv.StorageEndpointSuffix,
                az.azconfig.BlobContainer,
-               name)
+               blobname)
 
        customData := base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\n" + initCommand + "\n"))
 
@@ -422,6 +444,16 @@ func (az *azureInstanceSet) Create(
 
        vm, err := az.vmClient.createOrUpdate(az.ctx, az.azconfig.ResourceGroup, name, vmParameters)
        if err != nil {
+               _, delerr := az.blobcont.GetBlobReference(blobname).DeleteIfExists(nil)
+               if delerr != nil {
+                       az.logger.WithError(delerr).Warnf("Error cleaning up vhd blob after failed create")
+               }
+
+               _, delerr = az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, *nic.Name)
+               if delerr != nil {
+                       az.logger.WithError(delerr).Warnf("Error cleaning up NIC after failed create")
+               }
+
                return nil, wrapAzureError(err)
        }
 
@@ -509,27 +541,12 @@ func (az *azureInstanceSet) manageNics() (map[string]network.Interface, error) {
 // leased to a VM) and haven't been modified for
 // DeleteDanglingResourcesAfter seconds.
 func (az *azureInstanceSet) manageBlobs() {
-       result, err := az.storageAcctClient.ListKeys(az.ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount)
-       if err != nil {
-               az.logger.WithError(err).Warn("Couldn't get account keys")
-               return
-       }
-
-       key1 := *(*result.Keys)[0].Value
-       client, err := storage.NewBasicClientOnSovereignCloud(az.azconfig.StorageAccount, key1, az.azureEnv)
-       if err != nil {
-               az.logger.WithError(err).Warn("Couldn't make client")
-               return
-       }
-
-       blobsvc := client.GetBlobService()
-       blobcont := blobsvc.GetContainerReference(az.azconfig.BlobContainer)
 
        page := storage.ListBlobsParameters{Prefix: az.namePrefix}
        timestamp := time.Now()
 
        for {
-               response, err := blobcont.ListBlobs(page)
+               response, err := az.blobcont.ListBlobs(page)
                if err != nil {
                        az.logger.WithError(err).Warn("Error listing blobs")
                        return
@@ -628,7 +645,13 @@ func (ai *azureInstance) Destroy() error {
 }
 
 func (ai *azureInstance) Address() string {
-       return *(*ai.nic.IPConfigurations)[0].PrivateIPAddress
+       if ai.nic.IPConfigurations != nil &&
+               len(*ai.nic.IPConfigurations) > 0 &&
+               (*ai.nic.IPConfigurations)[0].PrivateIPAddress != nil {
+
+               return *(*ai.nic.IPConfigurations)[0].PrivateIPAddress
+       }
+       return ""
 }
 
 func (ai *azureInstance) RemoteUser() string {
index 61649c39800d7ad8e31252313285e0e85fe7b16c..72832578dfa68ea022c75a6a6a69ef35018be15c 100644 (file)
@@ -105,6 +105,16 @@ func (*InterfacesClientStub) listComplete(ctx context.Context, resourceGroupName
        return network.InterfaceListResultIterator{}, nil
 }
 
+type BlobContainerStub struct{}
+
+func (*BlobContainerStub) GetBlobReference(name string) *storage.Blob {
+       return nil
+}
+
+func (*BlobContainerStub) ListBlobs(params storage.ListBlobsParameters) (storage.BlobListResponse, error) {
+       return storage.BlobListResponse{}, nil
+}
+
 type testConfig struct {
        ImageIDForTestSuite string
        DriverParameters    json.RawMessage
@@ -148,6 +158,7 @@ func GetInstanceSet() (cloud.InstanceSet, cloud.ImageID, arvados.Cluster, error)
        ap.ctx, ap.stopFunc = context.WithCancel(context.Background())
        ap.vmClient = &VirtualMachinesClientStub{}
        ap.netClient = &InterfacesClientStub{}
+       ap.blobcont = &BlobContainerStub{}
        return &ap, cloud.ImageID("blob"), cluster, nil
 }
 
index d7f4585619417904a1125bc05d54d58499199179..6fb46b5f46f9d36c2500ee759cee1a3ec19c59f0 100644 (file)
@@ -46,7 +46,7 @@ func estimateDockerImageSize(collectionPDH string) int64 {
        // the size of the manifest.
        //
        // Use the following heuristic:
-       // - Start with the length of the mainfest (n)
+       // - Start with the length of the manifest (n)
        // - Subtract 80 characters for the filename and file segment
        // - Divide by 42 to get the number of block identifiers ('hash\+size\ ' is 32+1+8+1)
        // - Assume each block is full, multiply by 64 MiB
index 14f6a3efced3815f11b19b6e08612ead4326e4f6..fe1c6ecc0304f64345135f6016d29a8b1512fea1 100644 (file)
@@ -565,6 +565,7 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
 func (wp *Pool) runMetrics() {
        ch := wp.Subscribe()
        defer wp.Unsubscribe(ch)
+       wp.updateMetrics()
        for range ch {
                wp.updateMetrics()
        }
index 52fd4d21a115f29ee7fb388c11ebeb0564b40ae2..834ca195fdda02d859eafcd4aa0cea5c70c1c359 100644 (file)
@@ -289,10 +289,12 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
         if keep_client is None:
             keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
         executor = ArvCwlExecutor(api_client, arvargs, keep_client=keep_client, num_retries=4)
-    except Exception as e:
-        logger.error(e)
+    except Exception:
+        logger.exception("Error creating the Arvados CWL Executor")
         return 1
 
+    # Note that unless in debug mode, some stack traces related to user 
+    # workflow errors may be suppressed. See ArvadosJob.done().
     if arvargs.debug:
         logger.setLevel(logging.DEBUG)
         logging.getLogger('arvados').setLevel(logging.DEBUG)
index 461f5b66a563403fdad056073cebe56802818cb5..03b4e07c76f5849a97ae85b9bd179e897ec8fc33 100644 (file)
@@ -304,8 +304,8 @@ class ArvadosContainer(JobBase):
                 logger.info("%s reused container %s", self.arvrunner.label(self), response["container_uuid"])
             else:
                 logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"])
-        except Exception as e:
-            logger.error("%s got error %s" % (self.arvrunner.label(self), str(e)))
+        except Exception:
+            logger.exception("%s got an error", self.arvrunner.label(self))
             self.output_callback({}, "permanentFail")
 
     def done(self, record):
@@ -353,11 +353,13 @@ class ArvadosContainer(JobBase):
             if container["output"]:
                 outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep")
         except WorkflowException as e:
+            # Only include a stack trace if in debug mode. 
+            # A stack trace may obfuscate more useful output about the workflow. 
             logger.error("%s unable to collect output from %s:\n%s",
                          self.arvrunner.label(self), container["output"], e, exc_info=(e if self.arvrunner.debug else False))
             processStatus = "permanentFail"
-        except Exception as e:
-            logger.exception("%s while getting output object: %s", self.arvrunner.label(self), e)
+        except Exception:
+            logger.exception("%s while getting output object:", self.arvrunner.label(self))
             processStatus = "permanentFail"
         finally:
             self.output_callback(outputs, processStatus)
@@ -523,8 +525,8 @@ class RunnerContainer(Runner):
             container = self.arvrunner.api.containers().get(
                 uuid=record["container_uuid"]
             ).execute(num_retries=self.arvrunner.num_retries)
-        except Exception as e:
-            logger.exception("%s while getting runner container: %s", self.arvrunner.label(self), e)
+        except Exception:
+            logger.exception("%s while getting runner container", self.arvrunner.label(self))
             self.arvrunner.output_callback({}, "permanentFail")
         else:
             super(RunnerContainer, self).done(container)
index 84006b47d2a8ba86fd97f88b63772a53e3d711f6..a8f56ad1d4f30db21c5a59f0fb6df9258c723c58 100644 (file)
@@ -63,6 +63,7 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
                 arvados.commands.put.api_client = api_client
                 arvados.commands.keepdocker.main(args, stdout=sys.stderr, install_sig_handlers=False, api=api_client)
             except SystemExit as e:
+                # If e.code is None or zero, then keepdocker exited normally and we can continue
                 if e.code:
                     raise WorkflowException("keepdocker exited with code %s" % e.code)
 
index 69fe7e2a8f1b632675b16eeb3f6ad07ee76c00e8..ab2078e1571145aac5f334e076bfc00c7b951448 100644 (file)
@@ -199,7 +199,7 @@ class ArvadosJob(JobBase):
                                     e)
             else:
                 logger.info("%s %s is %s", self.arvrunner.label(self), response["uuid"], response["state"])
-        except Exception as e:
+        except Exception:
             logger.exception("%s error" % (self.arvrunner.label(self)))
             self.output_callback({}, "permanentFail")
 
@@ -224,8 +224,8 @@ class ArvadosJob(JobBase):
                             body={
                                 "components": components
                             }).execute(num_retries=self.arvrunner.num_retries)
-                except Exception as e:
-                    logger.info("Error adding to components: %s", e)
+                except Exception:
+                    logger.exception("Error adding to components")
 
     def done(self, record):
         try:
@@ -272,10 +272,12 @@ class ArvadosJob(JobBase):
                         outputs = done.done(self, record, dirs["tmpdir"],
                                             dirs["outdir"], dirs["keep"])
             except WorkflowException as e:
+                # Only include a stack trace if in debug mode. 
+                # This is most likely a user workflow error and a stack trace may obfuscate more useful output. 
                 logger.error("%s unable to collect output from %s:\n%s",
                              self.arvrunner.label(self), record["output"], e, exc_info=(e if self.arvrunner.debug else False))
                 processStatus = "permanentFail"
-            except Exception as e:
+            except Exception:
                 logger.exception("Got unknown exception while collecting output for job %s:", self.name)
                 processStatus = "permanentFail"
 
index 535cfd7582b985ad806d659f518f9da9ce0e6fbc..319e8a887114b88b55865ca673dbafb3e0b9a7dc 100644 (file)
@@ -59,6 +59,7 @@ class RuntimeStatusLoggingHandler(logging.Handler):
     def __init__(self, runtime_status_update_func):
         super(RuntimeStatusLoggingHandler, self).__init__()
         self.runtime_status_update = runtime_status_update_func
+        self.updatingRuntimeStatus = False
 
     def emit(self, record):
         kind = None
@@ -66,22 +67,27 @@ class RuntimeStatusLoggingHandler(logging.Handler):
             kind = 'error'
         elif record.levelno >= logging.WARNING:
             kind = 'warning'
-        if kind is not None:
-            log_msg = record.getMessage()
-            if '\n' in log_msg:
-                # If the logged message is multi-line, use its first line as status
-                # and the rest as detail.
-                status, detail = log_msg.split('\n', 1)
-                self.runtime_status_update(
-                    kind,
-                    "%s: %s" % (record.name, status),
-                    detail
-                )
-            else:
-                self.runtime_status_update(
-                    kind,
-                    "%s: %s" % (record.name, record.getMessage())
-                )
+        if kind is not None and self.updatingRuntimeStatus is not True:
+            self.updatingRuntimeStatus = True
+            try:
+                log_msg = record.getMessage()
+                if '\n' in log_msg:
+                    # If the logged message is multi-line, use its first line as status
+                    # and the rest as detail.
+                    status, detail = log_msg.split('\n', 1)
+                    self.runtime_status_update(
+                        kind,
+                        "%s: %s" % (record.name, status),
+                        detail
+                    )
+                else:
+                    self.runtime_status_update(
+                        kind,
+                        "%s: %s" % (record.name, record.getMessage())
+                    )
+            finally:
+                self.updatingRuntimeStatus = False
+            
 
 class ArvCwlExecutor(object):
     """Execute a CWL tool or workflow, submit work (using either jobs or
@@ -361,8 +367,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                     keys = keys[pageSize:]
                     try:
                         proc_states = table.list(filters=[["uuid", "in", page]]).execute(num_retries=self.num_retries)
-                    except Exception as e:
-                        logger.warning("Error checking states on API server: %s", e)
+                    except Exception:
+                        logger.exception("Error checking states on API server: %s")
                         remain_wait = self.poll_interval
                         continue
 
@@ -393,9 +399,9 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
         for i in self.intermediate_output_collections:
             try:
                 self.api.collections().delete(uuid=i).execute(num_retries=self.num_retries)
-            except:
+            except Exception:
                 logger.warning("Failed to delete intermediate output: %s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
-            if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info()[0] is SystemExit:
+            except (KeyboardInterrupt, SystemExit):
                 break
 
     def check_features(self, obj):
@@ -506,8 +512,9 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                                               body={
                                                   'is_trashed': True
                                               }).execute(num_retries=self.num_retries)
-            except Exception as e:
-                logger.info("Setting container output: %s", e)
+            except Exception:
+                logger.exception("Setting container output")
+                return
         elif self.work_api == "jobs" and "TASK_UUID" in os.environ:
             self.api.job_tasks().update(uuid=os.environ["TASK_UUID"],
                                    body={
@@ -731,8 +738,11 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
         except:
             if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info()[0] is SystemExit:
                 logger.error("Interrupted, workflow will be cancelled")
+            elif isinstance(sys.exc_info()[1], WorkflowException):
+                logger.error("Workflow execution failed:\n%s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
             else:
-                logger.error("Execution failed:\n%s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
+                logger.exception("Workflow execution failed")
+
             if self.pipeline:
                 self.api.pipeline_instances().update(uuid=self.pipeline["uuid"],
                                                      body={"state": "Failed"}).execute(num_retries=self.num_retries)
index fc7cc42d15c61f19021f006cb1eec18ab94178cd..3744b4a93afa40df10c17335310503500acd2432 100644 (file)
@@ -240,8 +240,8 @@ class CollectionFetcher(DefaultFetcher):
                     return True
         except arvados.errors.NotFoundError:
             return False
-        except:
-            logger.exception("Got unexpected exception checking if file exists:")
+        except Exception:
+            logger.exception("Got unexpected exception checking if file exists")
             return False
         return super(CollectionFetcher, self).check_exists(url)
 
index c0d165aa9eed4e5cbaeaf9d365a302957a3921b1..e515ac2ce5e99f4ec75011b8ac51bfe2fc1bbff8 100644 (file)
@@ -478,8 +478,8 @@ class Runner(Process):
                     fileobj["location"] = "keep:%s/%s" % (record["output"], path)
             adjustFileObjs(outputs, keepify)
             adjustDirObjs(outputs, keepify)
-        except Exception as e:
-            logger.exception("[%s] While getting final output object: %s", self.name, e)
+        except Exception:
+            logger.exception("[%s] While getting final output object", self.name)
             self.arvrunner.output_callback({}, "permanentFail")
         else:
             self.arvrunner.output_callback(outputs, processStatus)
index de21fc0b92d1c7437978be0a5018c3ea51fd76c3..1a57da3927a352e614f5a65ebb46887864ece07b 100644 (file)
@@ -80,6 +80,23 @@ class TestContainer(unittest.TestCase):
 
         return loadingContext, runtimeContext
 
+    # Helper function to set up the ArvCwlExecutor to use the containers api 
+    # and test that the RuntimeStatusLoggingHandler is set up correctly
+    def setup_and_test_container_executor_and_logging(self, gcc_mock) :
+        api = mock.MagicMock()
+        api._rootDesc = copy.deepcopy(get_rootDesc())
+        del api._rootDesc.get('resources')['jobs']['methods']['create']
+
+        # Make sure ArvCwlExecutor thinks it's running inside a container so it
+        # adds the logging handler that will call runtime_status_update() mock
+        self.assertFalse(gcc_mock.called)
+        runner = arvados_cwl.ArvCwlExecutor(api)
+        self.assertEqual(runner.work_api, 'containers')
+        root_logger = logging.getLogger('')
+        handlerClasses = [h.__class__ for h in root_logger.handlers]
+        self.assertTrue(arvados_cwl.RuntimeStatusLoggingHandler in handlerClasses)
+        return runner
+        
     # The test passes no builder.resources
     # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
     @mock.patch("arvados.commands.keepdocker.list_images_in_arv")
@@ -500,28 +517,30 @@ class TestContainer(unittest.TestCase):
         arvjob.output_callback.assert_called_with({"out": "stuff"}, "success")
         runner.add_intermediate_output.assert_called_with("zzzzz-4zz18-zzzzzzzzzzzzzz2")
 
+    # Test to make sure we dont call runtime_status_update if we already did
+    # some where higher up in the call stack
     @mock.patch("arvados_cwl.util.get_current_container")
-    @mock.patch("arvados.collection.CollectionReader")
-    @mock.patch("arvados.collection.Collection")
-    def test_child_failure(self, col, reader, gcc_mock):
-        api = mock.MagicMock()
-        api._rootDesc = copy.deepcopy(get_rootDesc())
-        del api._rootDesc.get('resources')['jobs']['methods']['create']
+    def test_recursive_runtime_status_update(self, gcc_mock):
+        self.setup_and_test_container_executor_and_logging(gcc_mock)
+        root_logger = logging.getLogger('')
 
-        # Set up runner with mocked runtime_status_update()
-        self.assertFalse(gcc_mock.called)
-        runtime_status_update = mock.MagicMock()
-        arvados_cwl.ArvCwlExecutor.runtime_status_update = runtime_status_update
-        runner = arvados_cwl.ArvCwlExecutor(api)
-        self.assertEqual(runner.work_api, 'containers')
+        # get_current_container is invoked when we call runtime_status_update
+        # so try and log again!
+        gcc_mock.side_effect = lambda *args: root_logger.error("Second Error")
+        try: 
+            root_logger.error("First Error")
+        except RuntimeError: 
+            self.fail("RuntimeStatusLoggingHandler should not be called recursively")
 
-        # Make sure ArvCwlExecutor thinks it's running inside a container so it
-        # adds the logging handler that will call runtime_status_update() mock
+    @mock.patch("arvados_cwl.ArvCwlExecutor.runtime_status_update")
+    @mock.patch("arvados_cwl.util.get_current_container")
+    @mock.patch("arvados.collection.CollectionReader")
+    @mock.patch("arvados.collection.Collection")
+    def test_child_failure(self, col, reader, gcc_mock, rts_mock):
+        runner = self.setup_and_test_container_executor_and_logging(gcc_mock)
+        
         gcc_mock.return_value = {"uuid" : "zzzzz-dz642-zzzzzzzzzzzzzzz"}
         self.assertTrue(gcc_mock.called)
-        root_logger = logging.getLogger('')
-        handlerClasses = [h.__class__ for h in root_logger.handlers]
-        self.assertTrue(arvados_cwl.RuntimeStatusLoggingHandler in handlerClasses)
 
         runner.num_retries = 0
         runner.ignore_docker_for_reuse = False
@@ -565,7 +584,7 @@ class TestContainer(unittest.TestCase):
             "modified_at": "2017-05-26T12:01:22Z"
         })
 
-        runtime_status_update.assert_called_with(
+        rts_mock.assert_called_with(
             'error',
             'arvados.cwl-runner: [container testjob] (zzzzz-xvhdp-zzzzzzzzzzzzzzz) error log:',
             '  ** log is empty **'
index b579d41ed2839b73fa72c6486101abe97e46a0cc..485c757e7fce34dda579185608f39bfe4911bd94 100644 (file)
@@ -55,13 +55,13 @@ def normalize_stream(stream_name, stream):
                 if streamoffset == current_span[1]:
                     current_span[1] += segment.segment_size
                 else:
-                    stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
+                    stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
                     current_span = [streamoffset, streamoffset + segment.segment_size]
 
         if current_span is not None:
-            stream_tokens.append("{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
+            stream_tokens.append(u"{0}:{1}:{2}".format(current_span[0], current_span[1] - current_span[0], fout))
 
         if not stream[streamfile]:
-            stream_tokens.append("0:0:{0}".format(fout))
+            stream_tokens.append(u"0:0:{0}".format(fout))
 
     return stream_tokens
index 5b1f1a2a677fa15dad5801635ff0e6f639985584..cf1a36f9fdfbbfdf739fe75027d00eaa782df4f2 100644 (file)
@@ -283,7 +283,7 @@ class CollectionWriter(CollectionBase):
             streampath, filename = split(streampath)
         if self._last_open and not self._last_open.closed:
             raise errors.AssertionError(
-                "can't open '{}' when '{}' is still open".format(
+                u"can't open '{}' when '{}' is still open".format(
                     filename, self._last_open.name))
         if streampath != self.current_stream_name():
             self.start_new_stream(streampath)
@@ -461,22 +461,22 @@ class ResumableCollectionWriter(CollectionWriter):
                 writer._queued_file.seek(pos)
             except IOError as error:
                 raise errors.StaleWriterStateError(
-                    "failed to reopen active file {}: {}".format(path, error))
+                    u"failed to reopen active file {}: {}".format(path, error))
         return writer
 
     def check_dependencies(self):
         for path, orig_stat in listitems(self._dependencies):
             if not S_ISREG(orig_stat[ST_MODE]):
-                raise errors.StaleWriterStateError("{} not file".format(path))
+                raise errors.StaleWriterStateError(u"{} not file".format(path))
             try:
                 now_stat = tuple(os.stat(path))
             except OSError as error:
                 raise errors.StaleWriterStateError(
-                    "failed to stat {}: {}".format(path, error))
+                    u"failed to stat {}: {}".format(path, error))
             if ((not S_ISREG(now_stat[ST_MODE])) or
                 (orig_stat[ST_MTIME] != now_stat[ST_MTIME]) or
                 (orig_stat[ST_SIZE] != now_stat[ST_SIZE])):
-                raise errors.StaleWriterStateError("{} changed".format(path))
+                raise errors.StaleWriterStateError(u"{} changed".format(path))
 
     def dump_state(self, copy_func=lambda x: x):
         state = {attr: copy_func(getattr(self, attr))
@@ -492,7 +492,7 @@ class ResumableCollectionWriter(CollectionWriter):
         try:
             src_path = os.path.realpath(source)
         except Exception:
-            raise errors.AssertionError("{} not a file path".format(source))
+            raise errors.AssertionError(u"{} not a file path".format(source))
         try:
             path_stat = os.stat(src_path)
         except OSError as stat_error:
@@ -505,10 +505,10 @@ class ResumableCollectionWriter(CollectionWriter):
             self._dependencies[source] = tuple(fd_stat)
         elif path_stat is None:
             raise errors.AssertionError(
-                "could not stat {}: {}".format(source, stat_error))
+                u"could not stat {}: {}".format(source, stat_error))
         elif path_stat.st_ino != fd_stat.st_ino:
             raise errors.AssertionError(
-                "{} changed between open and stat calls".format(source))
+                u"{} changed between open and stat calls".format(source))
         else:
             self._dependencies[src_path] = tuple(fd_stat)
 
index 61258632bdd94f7acc684eaab50c442046f29f2e..54fa356d3a4537364ab770175addc742ade4826d 100644 (file)
@@ -360,7 +360,7 @@ class ResumeCache(object):
         try:
             fcntl.flock(fileobj, fcntl.LOCK_EX | fcntl.LOCK_NB)
         except IOError:
-            raise ResumeCacheConflict("{} locked".format(fileobj.name))
+            raise ResumeCacheConflict(u"{} locked".format(fileobj.name))
 
     def load(self):
         self.cache_file.seek(0)
@@ -502,7 +502,7 @@ class ArvPutUploadJob(object):
                     raise ArvPutUploadIsPending()
                 self._write_stdin(self.filename or 'stdin')
             elif not os.path.exists(path):
-                 raise PathDoesNotExistError("file or directory '{}' does not exist.".format(path))
+                 raise PathDoesNotExistError(u"file or directory '{}' does not exist.".format(path))
             elif os.path.isdir(path):
                 # Use absolute paths on cache index so CWD doesn't interfere
                 # with the caching logic.
@@ -742,7 +742,7 @@ class ArvPutUploadJob(object):
             elif file_in_local_collection.permission_expired():
                 # Permission token expired, re-upload file. This will change whenever
                 # we have a API for refreshing tokens.
-                self.logger.warning("Uploaded file '{}' access token expired, will re-upload it from scratch".format(filename))
+                self.logger.warning(u"Uploaded file '{}' access token expired, will re-upload it from scratch".format(filename))
                 should_upload = True
                 self._local_collection.remove(filename)
             elif cached_file_data['size'] == file_in_local_collection.size():
@@ -757,7 +757,7 @@ class ArvPutUploadJob(object):
                 # Inconsistent cache, re-upload the file
                 should_upload = True
                 self._local_collection.remove(filename)
-                self.logger.warning("Uploaded version of file '{}' is bigger than local version, will re-upload it from scratch.".format(source))
+                self.logger.warning(u"Uploaded version of file '{}' is bigger than local version, will re-upload it from scratch.".format(source))
         # Local file differs from cached data, re-upload it.
         else:
             if file_in_local_collection:
@@ -834,11 +834,11 @@ class ArvPutUploadJob(object):
         if self.use_cache:
             cache_filepath = self._get_cache_filepath()
             if self.resume and os.path.exists(cache_filepath):
-                self.logger.info("Resuming upload from cache file {}".format(cache_filepath))
+                self.logger.info(u"Resuming upload from cache file {}".format(cache_filepath))
                 self._cache_file = open(cache_filepath, 'a+')
             else:
                 # --no-resume means start with a empty cache file.
-                self.logger.info("Creating new cache file at {}".format(cache_filepath))
+                self.logger.info(u"Creating new cache file at {}".format(cache_filepath))
                 self._cache_file = open(cache_filepath, 'w+')
             self._cache_filename = self._cache_file.name
             self._lock_file(self._cache_file)
@@ -924,7 +924,7 @@ class ArvPutUploadJob(object):
         try:
             fcntl.flock(fileobj, fcntl.LOCK_EX | fcntl.LOCK_NB)
         except IOError:
-            raise ResumeCacheConflict("{} locked".format(fileobj.name))
+            raise ResumeCacheConflict(u"{} locked".format(fileobj.name))
 
     def _save_state(self):
         """
@@ -1234,9 +1234,9 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr,
     else:
         try:
             if args.update_collection:
-                logger.info("Collection updated: '{}'".format(writer.collection_name()))
+                logger.info(u"Collection updated: '{}'".format(writer.collection_name()))
             else:
-                logger.info("Collection saved as '{}'".format(writer.collection_name()))
+                logger.info(u"Collection saved as '{}'".format(writer.collection_name()))
             if args.portable_data_hash:
                 output = writer.portable_data_hash()
             else:
index a41184d10fb4fe7daadeb0892cf60ce36f47e8df..01a52a5e6681ec07daaf16eb0c0c18a9b7ba2ada 100644 (file)
@@ -1,3 +1,5 @@
+# -*- coding: utf-8 -*-
+
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
@@ -1284,6 +1286,16 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
                          r'^\./%s.*:file2.txt' % os.path.basename(tmpdir))
         self.assertRegex(c['manifest_text'], r'^.*:file3.txt')
 
+    def test_unicode_on_filename(self):
+        tmpdir = self.make_tmpdir()
+        fname = u"iā¤arvados.txt"
+        with open(os.path.join(tmpdir, fname), 'w') as f:
+            f.write("This is a unicode named file")
+        col = self.run_and_find_collection("", ['--no-progress', tmpdir])
+        self.assertNotEqual(None, col['uuid'])
+        c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+        self.assertTrue(fname in c['manifest_text'], u"{} does not include {}".format(c['manifest_text'], fname))
+
     def test_silent_mode_no_errors(self):
         self.authorize_with('active')
         tmpdir = self.make_tmpdir()
index 6cc3072b77e175c5ddc91cd930fccb6ed213b975..f236ce83a30a47e51c8e8499abb7b5aecfe29c56 100644 (file)
@@ -369,7 +369,11 @@ module Arv
       end
 
       def add_copy(src_item, key)
-        self[key] = src_item.copy_named("#{path}/#{key}")
+        if key == "."
+          self[key] = src_item.copy_named("#{path}")
+        else
+          self[key] = src_item.copy_named("#{path}/#{key}")
+        end
       end
 
       def merge(src_item, key)
@@ -457,6 +461,10 @@ module Arv
         items["."] = CollectionStream.new(".")
       end
 
+      def add_copy(src_item, key)
+        items["."].add_copy(src_item, key)
+      end
+
       def raise_root_write_error(key)
         raise ArgumentError.new("can't write to %p at collection root" % key)
       end
index f34e58a6b5a48dcb18b18a89f4affa481ecbe3db..288fd263fa8bdbe69cff943446dd2c0f3db9bc04 100644 (file)
@@ -385,6 +385,16 @@ class CollectionTest < Minitest::Test
                  dst_coll.manifest_text)
   end
 
+  def test_copy_root_into_empty_collection
+    block = random_block(8)
+    src_coll = Arv::Collection.new(". #{block} 0:8:f1\n")
+    dst_coll = Arv::Collection.new()
+    dst_coll.cp_r("./", ".", src_coll)
+    assert_equal(". %s 0:8:f1\n" %
+                 [block],
+                 dst_coll.manifest_text)
+  end
+
   def test_copy_empty_source_path_raises_ArgumentError(src="", dst="./s1")
     coll = Arv::Collection.new(SIMPLEST_MANIFEST)
     assert_raises(ArgumentError) do