Merge branch '8931-event-thread-catch-exceptions' closes #8931
authorPeter Amstutz <peter.amstutz@curoverse.com>
Tue, 26 Apr 2016 17:00:25 +0000 (13:00 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Tue, 26 Apr 2016 17:00:25 +0000 (13:00 -0400)
70 files changed:
build/package-build-dockerfiles/centos6/Dockerfile
build/package-build-dockerfiles/debian7/Dockerfile
build/package-build-dockerfiles/debian8/Dockerfile
build/package-build-dockerfiles/ubuntu1204/Dockerfile
build/package-build-dockerfiles/ubuntu1404/Dockerfile
build/run-build-packages-one-target.sh
build/run-build-packages.sh
build/run-tests.sh
crunch_scripts/cwl-runner
doc/install/arvbox.html.textile.liquid
doc/install/install-compute-node.html.textile.liquid
doc/install/install-keepstore.html.textile.liquid
doc/install/install-shell-server.html.textile.liquid
sdk/cli/bin/arv-run-pipeline-instance
sdk/cli/bin/crunch-job
sdk/cwl/setup.py
sdk/go/arvadostest/fixtures.go
sdk/go/keepclient/collectionreader.go
sdk/go/keepclient/collectionreader_test.go
sdk/go/keepclient/perms.go
sdk/go/keepclient/perms_test.go
sdk/go/manifest/manifest.go
sdk/go/streamer/transfer.go
sdk/python/arvados/commands/keepdocker.py
sdk/ruby/arvados.gemspec
sdk/ruby/lib/arvados.rb
sdk/ruby/lib/arvados/collection.rb
sdk/ruby/lib/arvados/keep.rb
sdk/ruby/test/test_keep_manifest.rb
services/api/app/models/blob.rb
services/api/config/application.default.yml
services/api/test/unit/blob_test.rb
services/crunch-run/crunchrun_test.go
services/datamanager/loggerutil/loggerutil.go
services/datamanager/summary/pull_list.go
services/fuse/tests/test_mount.py
services/keep-web/handler.go
services/keepproxy/keepproxy.go
services/keepstore/keepstore.go
services/keepstore/perms.go
services/keepstore/perms_test.go
services/keepstore/trash_worker_test.go
services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
services/nodemanager/arvnodeman/computenode/dispatch/transitions.py [new file with mode: 0644]
services/nodemanager/arvnodeman/computenode/driver/__init__.py
services/nodemanager/arvnodeman/daemon.py
services/nodemanager/tests/test_computenode_dispatch.py
services/nodemanager/tests/test_computenode_dispatch_slurm.py
services/nodemanager/tests/test_computenode_driver_azure.py
services/nodemanager/tests/test_computenode_driver_gce.py
services/nodemanager/tests/test_daemon.py
services/nodemanager/tests/testutil.py
tools/arvbox/bin/arvbox
tools/arvbox/lib/arvbox/docker/Dockerfile.base
tools/arvbox/lib/arvbox/docker/api-setup.sh [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/common.sh
tools/arvbox/lib/arvbox/docker/createusers.sh
tools/arvbox/lib/arvbox/docker/service/api/run-service
tools/arvbox/lib/arvbox/docker/service/websockets/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/websockets/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/websockets/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/websockets/run-service [new file with mode: 0755]
tools/crunchstat-summary/crunchstat_summary/reader.py
tools/crunchstat-summary/crunchstat_summary/summarizer.py
tools/keep-block-check/.gitignore [new file with mode: 0644]
tools/keep-block-check/keep-block-check.go [new file with mode: 0644]
tools/keep-block-check/keep-block-check_test.go [new file with mode: 0644]
tools/keep-rsync/keep-rsync.go
tools/keep-rsync/keep-rsync_test.go

index cfd94c85d3f6943b60458ac6e452c85861919b24..679f68e64e81ca079eb63aaf1c620ae93c6f4c8e 100644 (file)
@@ -2,7 +2,7 @@ FROM centos:6
 MAINTAINER Brett Smith <brett@curoverse.com>
 
 # Install build dependencies provided in base distribution
-RUN yum -q -y install make automake gcc gcc-c++ libyaml-devel patch readline-devel zlib-devel libffi-devel openssl-devel bzip2 libtool bison sqlite-devel rpm-build git perl-ExtUtils-MakeMaker libattr-devel nss-devel libcurl-devel which tar scl-utils centos-release-SCL postgresql-devel
+RUN yum -q -y install make automake gcc gcc-c++ libyaml-devel patch readline-devel zlib-devel libffi-devel openssl-devel bzip2 libtool bison sqlite-devel rpm-build git perl-ExtUtils-MakeMaker libattr-devel nss-devel libcurl-devel which tar unzip scl-utils centos-release-SCL postgresql-devel
 
 # Install golang binary
 ADD generated/golang-amd64.tar.gz /usr/local/
@@ -20,6 +20,10 @@ RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
 RUN touch /var/lib/rpm/* && yum -q -y install python27 python33
 RUN scl enable python33 "easy_install-3.3 pip" && scl enable python27 "easy_install-2.7 pip"
 
+# fpm requires ffi which now wants xz-libs-5 which isn't packaged for centos6
+# but the library from xz-libs-4.999 appears to be good enough.
+RUN ln -s /usr/lib64/liblzma.so.0 /usr/lib64/lzma.so.5
+
 RUN cd /tmp && \
     curl -OL 'http://pkgs.repoforge.org/rpmforge-release/rpmforge-release-0.5.3-1.el6.rf.x86_64.rpm' && \
     rpm -ivh rpmforge-release-0.5.3-1.el6.rf.x86_64.rpm && \
index 0d0459032a0303131cd90b1bff0399c609074886..a62a9fea605401d97254f09fb9d78a63a400f913 100644 (file)
@@ -2,7 +2,7 @@ FROM debian:wheezy
 MAINTAINER Ward Vandewege <ward@curoverse.com>
 
 # Install dependencies and set up system.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libpq-dev python-pip
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libpq-dev python-pip unzip
 
 # Install RVM
 RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
index fcd390fa279591b537196fab46b9ddbc002b8418..f1f23f4fdff01d0760c75acaa49e753f03d628f4 100644 (file)
@@ -2,7 +2,7 @@ FROM debian:jessie
 MAINTAINER Ward Vandewege <ward@curoverse.com>
 
 # Install dependencies and set up system.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libgnutls28-dev libpq-dev python-pip
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libgnutls28-dev libpq-dev python-pip unzip
 
 # Install RVM
 RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
index 158053c570ad8d48bfe3ccad15db3b3e883003ae..8f3997a131da0548412824b647689ebc3f061a2c 100644 (file)
@@ -2,7 +2,7 @@ FROM ubuntu:precise
 MAINTAINER Ward Vandewege <ward@curoverse.com>
 
 # Install dependencies and set up system.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools libcurl4-gnutls-dev curl git libattr1-dev libfuse-dev libpq-dev python-pip build-essential
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools libcurl4-gnutls-dev curl git libattr1-dev libfuse-dev libpq-dev python-pip build-essential unzip
 
 # Install RVM
 RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
index 0b8ee7ade697cf42c2cd68211a3cef53739b995a..edd12077173dde24a871224d55b60c09b3d0ace1 100644 (file)
@@ -2,7 +2,7 @@ FROM ubuntu:trusty
 MAINTAINER Brett Smith <brett@curoverse.com>
 
 # Install dependencies and set up system.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools libcurl4-gnutls-dev curl git libattr1-dev libfuse-dev libpq-dev python-pip
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools libcurl4-gnutls-dev curl git libattr1-dev libfuse-dev libpq-dev python-pip unzip
 
 # Install RVM
 RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
index e5e7f0bfa4308bf5fb9c49b3bc24a6b7164812a8..7034e3fafe043357dc876f782d4137e6ab7eb4c1 100755 (executable)
@@ -130,6 +130,7 @@ if test -z "$packages" ; then
         crunchstat
         keepproxy
         keep-rsync
+        keep-block-check
         keepstore
         keep-web
         libarvados-perl"
index 3178ea2ad1ec2a9ccb2c80f51ead93ca1defbfda..01c63d51d929a66e929f98486959ccf9c7ada1d5 100755 (executable)
@@ -89,7 +89,7 @@ case "$TARGET" in
         PYTHON2_PKG_PREFIX=python
         PYTHON3_PACKAGE=python$PYTHON3_VERSION
         PYTHON3_PKG_PREFIX=python3
-        PYTHON_BACKPORTS=(python-gflags google-api-python-client==1.4.2 \
+        PYTHON_BACKPORTS=(python-gflags==2.0 google-api-python-client==1.4.2 \
             oauth2client==1.5.2 pyasn1==0.1.7 pyasn1-modules==0.0.5 \
             rsa uritemplate httplib2 ws4py pykka six pyexecjs jsonschema \
             ciso8601 pycrypto backports.ssl_match_hostname llfuse==0.41.1 \
@@ -103,7 +103,7 @@ case "$TARGET" in
         PYTHON2_PKG_PREFIX=python
         PYTHON3_PACKAGE=python$PYTHON3_VERSION
         PYTHON3_PKG_PREFIX=python3
-        PYTHON_BACKPORTS=(python-gflags google-api-python-client==1.4.2 \
+        PYTHON_BACKPORTS=(python-gflags==2.0 google-api-python-client==1.4.2 \
             oauth2client==1.5.2 pyasn1==0.1.7 pyasn1-modules==0.0.5 \
             rsa uritemplate httplib2 ws4py pykka six pyexecjs jsonschema \
             ciso8601 pycrypto backports.ssl_match_hostname llfuse==0.41.1 \
@@ -117,7 +117,7 @@ case "$TARGET" in
         PYTHON2_PKG_PREFIX=python
         PYTHON3_PACKAGE=python$PYTHON3_VERSION
         PYTHON3_PKG_PREFIX=python3
-        PYTHON_BACKPORTS=(python-gflags google-api-python-client==1.4.2 \
+        PYTHON_BACKPORTS=(python-gflags==2.0 google-api-python-client==1.4.2 \
             oauth2client==1.5.2 pyasn1==0.1.7 pyasn1-modules==0.0.5 \
             rsa uritemplate httplib2 ws4py pykka six pyexecjs jsonschema \
             ciso8601 pycrypto backports.ssl_match_hostname llfuse==0.41.1 \
@@ -143,7 +143,7 @@ case "$TARGET" in
         PYTHON2_PKG_PREFIX=$PYTHON2_PACKAGE
         PYTHON3_PACKAGE=$(rpm -qf "$(which python$PYTHON3_VERSION)" --queryformat '%{NAME}\n')
         PYTHON3_PKG_PREFIX=$PYTHON3_PACKAGE
-        PYTHON_BACKPORTS=(python-gflags google-api-python-client==1.4.2 \
+        PYTHON_BACKPORTS=(python-gflags==2.0 google-api-python-client==1.4.2 \
             oauth2client==1.5.2 pyasn1==0.1.7 pyasn1-modules==0.0.5 \
             rsa uritemplate httplib2 ws4py pykka six pyexecjs jsonschema \
             ciso8601 pycrypto backports.ssl_match_hostname 'pycurl<7.21.5' \
@@ -357,7 +357,7 @@ elif [[ $TARGET =~ centos6 ]]; then
             rpm2cpio ${LIBFUSE_DIR}/fuse-2.9.2-6.el7.src.rpm | cpio -i
             perl -pi -e 's/Conflicts:\s*filesystem.*//g' fuse.spec
         )
-        # build rpms from source 
+        # build rpms from source
         rpmbuild -bb /root/rpmbuild/SOURCES/fuse.spec
         rm -f fuse-2.9.2-6.el7.src.rpm
         # move built RPMs to LIBFUSE_DIR
@@ -392,6 +392,8 @@ package_go_binary services/crunchstat crunchstat \
     "Gather cpu/memory/network statistics of running Crunch jobs"
 package_go_binary tools/keep-rsync keep-rsync \
     "Copy all data from one set of Keep servers to another"
+package_go_binary tools/keep-block-check keep-block-check \
+    "Verify that all data from one set of Keep servers to another was copied"
 package_go_binary sdk/go/crunchrunner crunchrunner \
     "Crunchrunner executes a command inside a container and uploads the output"
 
@@ -429,7 +431,7 @@ fpm_build $WORKSPACE/sdk/cwl "${PYTHON2_PKG_PREFIX}-arvados-cwl-runner" 'Curover
 fpm --maintainer='Ward Vandewege <ward@curoverse.com>' -s python -t $FORMAT --exclude=*/dist-packages/tests/* --exclude=*/site-packages/tests/* --deb-ignore-iteration-in-dependencies -n "${PYTHON2_PKG_PREFIX}-schema-salad" --iteration 1 --python-bin python2.7 --python-easyinstall "$EASY_INSTALL2" --python-package-name-prefix "$PYTHON2_PKG_PREFIX" --depends "$PYTHON2_PACKAGE" -v 1.7.20160316203940 schema_salad
 
 # And for cwltool we have the same problem as for schema_salad. Ward, 2016-03-17
-fpm --maintainer='Ward Vandewege <ward@curoverse.com>' -s python -t $FORMAT --exclude=*/dist-packages/tests/* --exclude=*/site-packages/tests/* --deb-ignore-iteration-in-dependencies -n "${PYTHON2_PKG_PREFIX}-cwltool" --iteration 1 --python-bin python2.7 --python-easyinstall "$EASY_INSTALL2" --python-package-name-prefix "$PYTHON2_PKG_PREFIX" --depends "$PYTHON2_PACKAGE" -v 1.0.20160325200114 cwltool
+fpm --maintainer='Ward Vandewege <ward@curoverse.com>' -s python -t $FORMAT --exclude=*/dist-packages/tests/* --exclude=*/site-packages/tests/* --deb-ignore-iteration-in-dependencies -n "${PYTHON2_PKG_PREFIX}-cwltool" --iteration 1 --python-bin python2.7 --python-easyinstall "$EASY_INSTALL2" --python-package-name-prefix "$PYTHON2_PKG_PREFIX" --depends "$PYTHON2_PACKAGE" -v 1.0.20160421140153 cwltool
 
 # FPM eats the trailing .0 in the python-rdflib-jsonld package when built with 'rdflib-jsonld>=0.3.0'. Force the version. Ward, 2016-03-25
 fpm --maintainer='Ward Vandewege <ward@curoverse.com>' -s python -t $FORMAT --exclude=*/dist-packages/tests/* --exclude=*/site-packages/tests/* --deb-ignore-iteration-in-dependencies --verbose --log info -n "${PYTHON2_PKG_PREFIX}-rdflib-jsonld" --iteration 1 --python-bin python2.7 --python-easyinstall "$EASY_INSTALL2" --python-package-name-prefix "$PYTHON2_PKG_PREFIX" --depends "$PYTHON2_PACKAGE" -v 0.3.0 rdflib-jsonld
@@ -498,7 +500,10 @@ for deppkg in "${PYTHON_BACKPORTS[@]}"; do
                 set -e
                 cd "$pyfpm_workdir"
                 pip install "${PIP_DOWNLOAD_SWITCHES[@]}" --download . "$deppkg"
-                tar -xf "$deppkg"-*.tar*
+                # Sometimes pip gives us a tarball, sometimes a zip file...
+                DOWNLOADED=`ls $deppkg-*`
+                [[ "$DOWNLOADED" =~ ".tar" ]] && tar -xf $DOWNLOADED
+                [[ "$DOWNLOADED" =~ ".zip" ]] && unzip $DOWNLOADED
                 cd "$deppkg"-*/
                 "python$PYTHON2_VERSION" setup.py $DASHQ_UNLESS_DEBUG egg_info build
                 chmod -R go+rX .
index de64b8cc5bac81dbe09094e949504a12b5c394ce..884eda3da13a8f8c25b13bfdad1d19d6ed1bbed9 100755 (executable)
@@ -87,6 +87,7 @@ sdk/go/crunchrunner
 sdk/cwl
 tools/crunchstat-summary
 tools/keep-rsync
+tools/keep-block-check
 
 EOF
 
@@ -714,6 +715,7 @@ gostuff=(
     services/crunch-dispatch-slurm
     services/crunch-run
     tools/keep-rsync
+    tools/keep-block-check
     )
 for g in "${gostuff[@]}"
 do
index d628f1c12d2adc80707523e84a8be6b835cf426d..8a4de24d356c6bfe8ed9547bc057da9a3edea5ec 100755 (executable)
@@ -47,6 +47,7 @@ try:
     args.submit = False
     args.debug = True
     args.quiet = False
+    args.ignore_docker_for_reuse = False
     outputObj = runner.arvExecutor(t, job_order_object, "", args, cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]})
 
     files = {}
index 3ddc7c825819e2799d75ea8e9b94eb7f2bb6a41d..9c8cd49f7e8228f7a7a2ff84c9a6bfdadb984caf 100644 (file)
@@ -26,9 +26,8 @@ h2. Usage
 $ arvbox
 Arvados-in-a-box                      http://arvados.org
 
-arvbox (build|start|run|open|shell|ip|stop|rebuild|reset|destroy|log|svrestart)
-
-build <config>      build arvbox Docker image
+build   <config>      build arvbox Docker image
+rebuild <config>      build arvbox Docker image, no layer cache
 start|run <config>  start arvbox container
 open       open arvbox workbench in a web browser
 shell      enter arvbox shell
@@ -37,7 +36,7 @@ host       print arvbox published host
 status     print some information about current arvbox
 stop       stop arvbox container
 restart <config>  stop, then run again
-rebuild  <config>  stop, build arvbox Docker image, run
+reboot  <config>  stop, build arvbox Docker image, run
 reset      delete arvbox arvados data (be careful!)
 destroy    delete all arvbox code and data (be careful!)
 log <service> tail log of specified service
@@ -122,14 +121,14 @@ h2. Making Arvbox accessible from other hosts
 In "dev" and "localdemo" mode, Arvbox can only be accessed on the same host it is running.  To publish Arvbox service ports to the host's service ports and advertise the host's IP address for services, use @publicdev@ or @publicdemo@:
 
 <pre>
-$ arvbox rebuild publicdemo
+$ arvbox start publicdemo
 </pre>
 
 This attempts to auto-detect the correct IP address to use by taking the IP address of the default route device.  If the auto-detection is wrong, you want to publish a hostname instead of a raw address, or you need to access it through a different device (such as a router or firewall), set @ARVBOX_PUBLISH_IP@ to the desire hostname or IP address.
 
 <pre>
 $ export ARVBOX_PUBLISH_IP=example.com
-$ arvbox rebuild publicdemo
+$ arvbox start publicdemo
 </pre>
 
 Note: this expects to bind the host's port 80 (http) for workbench, so you cannot have a conflicting web server already running on the host.  It does not attempt to take bind the host's port 22 (ssh), as a result the arvbox ssh port is not published.
index 9a64ac76d79532d643a895a7df5d2f9971dfd2fc..a2e3e01ecb117aa0170e1349914dfe9b3a07d7ca 100644 (file)
@@ -13,14 +13,14 @@ First, "add the appropriate package repository for your distribution":{{ site.ba
 On Debian-based systems:
 
 <notextile>
-<pre><code>~$ <span class="userinput">sudo apt-get install perl python-virtualenv fuse python-arvados-python-client python-arvados-fuse crunchstat arvados-docker-cleaner iptables ca-certificates</span>
+<pre><code>~$ <span class="userinput">sudo apt-get install perl python-virtualenv fuse python-arvados-python-client python-arvados-fuse crunchrunner crunchstat arvados-docker-cleaner iptables ca-certificates</span>
 </code></pre>
 </notextile>
 
 On Red Hat-based systems:
 
 <notextile>
-<pre><code>~$ <span class="userinput">sudo yum install perl python27-python-virtualenv fuse python27-python-arvados-python-client python27-python-arvados-fuse crunchstat arvados-docker-cleaner iptables ca-certificates</span>
+<pre><code>~$ <span class="userinput">sudo yum install perl python27-python-virtualenv fuse python27-python-arvados-python-client python27-python-arvados-fuse crunchrunner crunchstat arvados-docker-cleaner iptables ca-certificates</span>
 </code></pre>
 </notextile>
 
index 13dfaf6725d40e089759ec62058fcd63ff83c33b..b211ce60bf49cbd88221a60c0dd7eb4e3164f5ad 100644 (file)
@@ -41,7 +41,7 @@ Usage of ./keepstore:
   -azure-storage-account-name="": Azure storage account name used for subsequent --azure-storage-container-volume arguments.
   -azure-storage-container-volume=[]: Use the given container as a storage volume. Can be given multiple times.
   -azure-storage-replication=3: Replication level to report to clients when data is stored in an Azure container.
-  -blob-signature-ttl=1209600: Lifetime of blob permission signatures. See services/api/config/application.default.yml.
+  -blob-signature-ttl=1209600: Lifetime of blob permission signatures. Modifying the ttl will invalidate all existing signatures. See services/api/config/application.default.yml.
   -blob-signing-key-file="": File containing the secret key for generating and verifying blob permission signatures.
   -data-manager-token-file="": File with the API token used by the Data Manager. All DELETE requests or GET /index requests must carry this token.
   -enforce-permissions=false: Enforce permission signatures on requests.
index dd5995ffdde442c85f665cb5feae14fd9b0fe879..3d3f4b474d4fa53d00633e63ff2bab97c1066bbd 100644 (file)
@@ -47,14 +47,14 @@ h2. Install the Python SDK and utilities
 On Debian-based systems:
 
 <notextile>
-<pre><code>~$ <span class="userinput">sudo apt-get install python-arvados-python-client python-arvados-fuse</span>
+<pre><code>~$ <span class="userinput">sudo apt-get install python-arvados-python-client python-arvados-fuse crunchrunner</span>
 </code></pre>
 </notextile>
 
 On Red Hat-based systems:
 
 <notextile>
-<pre><code>~$ <span class="userinput">sudo yum install python27-python-arvados-python-client python27-python-arvados-fuse</span>
+<pre><code>~$ <span class="userinput">sudo yum install python27-python-arvados-python-client python27-python-arvados-fuse crunchrunner</span>
 </code></pre>
 </notextile>
 
index 70e2f42ede56ab9949f7ee9105c730b83a545966..6dc82c5a20b841b1aeb1400ecdaf7dd6c21d4ed5 100755 (executable)
@@ -17,7 +17,7 @@ begin
   require 'trollop'
   require 'google/api_client'
 rescue LoadError => l
-  puts $:
+  $stderr.puts $:
   abort <<-EOS
 #{$0}: fatal: #{l.message}
 Some runtime dependencies may be missing.
@@ -132,7 +132,7 @@ if $options[:instance]
     abort "#{$0}: syntax error: --instance cannot be combined with --template or --submit."
   end
 elsif not $options[:template]
-  puts "error: you must supply a --template or --instance."
+  $stderr.puts "error: you must supply a --template or --instance."
   p.educate
   abort
 end
index b4cb21405f829f0f431700b0fe1ade8786e1ee95..149d20b1d887369027a313d360dd013bdfbc0933 100755 (executable)
@@ -862,12 +862,10 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
         .q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' </proc/meminfo) }
         ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
         ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP "
-        # $VOLUME_CRUNCHRUNNER and $VOLUME_CERTS will be passed unquoted as
-        # arguments to `docker run`.  They must contain their own quoting.
-        .q{&& VOLUME_CRUNCHRUNNER="" VOLUME_CERTS="" }
-        .q{&& if which crunchrunner >/dev/null ; then VOLUME_CRUNCHRUNNER=--volume=$(which crunchrunner):/usr/local/bin/crunchrunner ; fi }
-        .q{&& if test -f /etc/ssl/certs/ca-certificates.crt ; then VOLUME_CERTS=--volume=/etc/ssl/certs/ca-certificates.crt:/etc/arvados/ca-certificates.crt ; }
-        .q{elif test -f /etc/pki/tls/certs/ca-bundle.crt ; then VOLUME_CERTS=--volume=/etc/pki/tls/certs/ca-bundle.crt:/etc/arvados/ca-certificates.crt ; fi };
+        .q{&& declare -a VOLUMES=() }
+        .q{&& if which crunchrunner >/dev/null ; then VOLUMES+=("--volume=$(which crunchrunner):/usr/local/bin/crunchrunner") ; fi }
+        .q{&& if test -f /etc/ssl/certs/ca-certificates.crt ; then VOLUMES+=("--volume=/etc/ssl/certs/ca-certificates.crt:/etc/arvados/ca-certificates.crt") ; }
+        .q{elif test -f /etc/pki/tls/certs/ca-bundle.crt ; then VOLUMES+=("--volume=/etc/pki/tls/certs/ca-bundle.crt:/etc/arvados/ca-certificates.crt") ; fi };
 
     $command .= "&& exec arv-mount --read-write --mount-by-pdh=by_pdh --mount-tmp=tmp --crunchstat-interval=10 --allow-other $arv_file_cache \Q$keep_mnt\E --exec ";
     $ENV{TASK_KEEPMOUNT} = "$keep_mnt/by_pdh";
@@ -934,7 +932,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
 
       # Bind mount the crunchrunner binary and host TLS certificates file into
       # the container.
-      $command .= "\$VOLUME_CRUNCHRUNNER \$VOLUME_CERTS ";
+      $command .= '"${VOLUMES[@]}" ';
 
       while (my ($env_key, $env_val) = each %ENV)
       {
index c061309f70ddb548ca1f543d4ffee54ba3447504..149c0bae860388dc37e9c6881318ae65807ab17c 100644 (file)
@@ -30,7 +30,7 @@ setup(name='arvados-cwl-runner',
           'bin/arvados-cwl-runner'
       ],
       install_requires=[
-          'cwltool>=1.0.20160325200114',
+          'cwltool==1.0.20160421140153',
           'arvados-python-client>=0.1.20160322001610'
       ],
       test_suite='tests',
index 47b75b384577e50a244355b8ef3dd35ba92c20ab..bebef79074ebb1758f3f5298502f235a8a02f1e1 100644 (file)
@@ -33,3 +33,5 @@ var (
        }
        MD5CollisionMD5 = "cee9a457e790cf20d4bdaa6d69f01e41"
 )
+
+const BlobSigningKey = "zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc"
index d2c171d96111af3e3c6922f73511427d0d41ae2a..bed60f499562a36c4585018932860fe35df34701 100644 (file)
@@ -209,6 +209,10 @@ GET:
                }
                var buf = make([]byte, fs.Offset+fs.Len)
                _, err = io.ReadFull(rdr, buf)
+               errClosing := rdr.Close()
+               if err == nil {
+                       err = errClosing
+               }
                if err != nil {
                        r.err = err
                        close(r.errNotNil)
index 58a047c55a053c14d8324266363fc7ad7fae33fa..2cc23738855dfeab3cd8ab2ef33cb27055a35fa1 100644 (file)
@@ -220,4 +220,5 @@ func (s *CollectionReaderUnit) TestCollectionReaderDataError(c *check.C) {
                c.Check(err, check.NotNil)
                c.Check(err, check.Not(check.Equals), io.EOF)
        }
+       c.Check(rdr.Close(), check.NotNil)
 }
index 12105c6cfca72f19ec424890fe6f8fc1c275822e..d650f0d7ad1bffc14b301a9bb2f4859f65133a4d 100644 (file)
@@ -29,13 +29,15 @@ var (
 
 // makePermSignature generates a SHA-1 HMAC digest for the given blob,
 // token, expiry, and site secret.
-func makePermSignature(blobHash, apiToken, expiry string, permissionSecret []byte) string {
+func makePermSignature(blobHash, apiToken, expiry, blobSignatureTTL string, permissionSecret []byte) string {
        hmac := hmac.New(sha1.New, permissionSecret)
        hmac.Write([]byte(blobHash))
        hmac.Write([]byte("@"))
        hmac.Write([]byte(apiToken))
        hmac.Write([]byte("@"))
        hmac.Write([]byte(expiry))
+       hmac.Write([]byte("@"))
+       hmac.Write([]byte(blobSignatureTTL))
        digest := hmac.Sum(nil)
        return fmt.Sprintf("%x", digest)
 }
@@ -46,15 +48,16 @@ func makePermSignature(blobHash, apiToken, expiry string, permissionSecret []byt
 //
 // This function is intended to be used by system components and admin
 // utilities: userland programs do not know the permissionSecret.
-func SignLocator(blobLocator, apiToken string, expiry time.Time, permissionSecret []byte) string {
+func SignLocator(blobLocator, apiToken string, expiry time.Time, blobSignatureTTL time.Duration, permissionSecret []byte) string {
        if len(permissionSecret) == 0 || apiToken == "" {
                return blobLocator
        }
        // Strip off all hints: only the hash is used to sign.
        blobHash := strings.Split(blobLocator, "+")[0]
        timestampHex := fmt.Sprintf("%08x", expiry.Unix())
+       blobSignatureTTLHex := strconv.FormatInt(int64(blobSignatureTTL.Seconds()), 16)
        return blobLocator +
-               "+A" + makePermSignature(blobHash, apiToken, timestampHex, permissionSecret) +
+               "+A" + makePermSignature(blobHash, apiToken, timestampHex, blobSignatureTTLHex, permissionSecret) +
                "@" + timestampHex
 }
 
@@ -70,7 +73,7 @@ var signedLocatorRe = regexp.MustCompile(`^([[:xdigit:]]{32}).*\+A([[:xdigit:]]{
 //
 // This function is intended to be used by system components and admin
 // utilities: userland programs do not know the permissionSecret.
-func VerifySignature(signedLocator, apiToken string, permissionSecret []byte) error {
+func VerifySignature(signedLocator, apiToken string, blobSignatureTTL time.Duration, permissionSecret []byte) error {
        matches := signedLocatorRe.FindStringSubmatch(signedLocator)
        if matches == nil {
                return ErrSignatureMissing
@@ -83,7 +86,8 @@ func VerifySignature(signedLocator, apiToken string, permissionSecret []byte) er
        } else if expiryTime.Before(time.Now()) {
                return ErrSignatureExpired
        }
-       if signatureHex != makePermSignature(blobHash, apiToken, expiryHex, permissionSecret) {
+       blobSignatureTTLHex := strconv.FormatInt(int64(blobSignatureTTL.Seconds()), 16)
+       if signatureHex != makePermSignature(blobHash, apiToken, expiryHex, blobSignatureTTLHex, permissionSecret) {
                return ErrSignatureInvalid
        }
        return nil
index 138079528747962ba87defe5975768283dc4084c..242b15c0a6e4d33f6b1977a22fb0555737773884 100644 (file)
@@ -16,83 +16,84 @@ const (
                "gokee3eamvjy8qq1fvy238838enjmy5wzy2md7yvsitp5vztft6j4q866efym7e6" +
                "vu5wm9fpnwjyxfldw3vbo01mgjs75rgo7qioh8z8ij7jpyp8508okhgbbex3ceei" +
                "786u5rw2a9gx743dj3fgq2irk"
-       knownSignature     = "257f3f5f5f0a4e4626a18fc74bd42ec34dcb228a"
+       knownSignature     = "89118b78732c33104a4d6231e8b5a5fa1e4301e3"
        knownTimestamp     = "7fffffff"
        knownSigHint       = "+A" + knownSignature + "@" + knownTimestamp
        knownSignedLocator = knownLocator + knownSigHint
+       blobSignatureTTL   = 1209600 * time.Second
 )
 
 func TestSignLocator(t *testing.T) {
        if ts, err := parseHexTimestamp(knownTimestamp); err != nil {
                t.Errorf("bad knownTimestamp %s", knownTimestamp)
        } else {
-               if knownSignedLocator != SignLocator(knownLocator, knownToken, ts, []byte(knownKey)) {
+               if knownSignedLocator != SignLocator(knownLocator, knownToken, ts, blobSignatureTTL, []byte(knownKey)) {
                        t.Fail()
                }
        }
 }
 
 func TestVerifySignature(t *testing.T) {
-       if VerifySignature(knownSignedLocator, knownToken, []byte(knownKey)) != nil {
+       if VerifySignature(knownSignedLocator, knownToken, blobSignatureTTL, []byte(knownKey)) != nil {
                t.Fail()
        }
 }
 
 func TestVerifySignatureExtraHints(t *testing.T) {
-       if VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint, knownToken, []byte(knownKey)) != nil {
+       if VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint, knownToken, blobSignatureTTL, []byte(knownKey)) != nil {
                t.Fatal("Verify cannot handle hint before permission signature")
        }
 
-       if VerifySignature(knownLocator+knownSigHint+"+Zfoo", knownToken, []byte(knownKey)) != nil {
+       if VerifySignature(knownLocator+knownSigHint+"+Zfoo", knownToken, blobSignatureTTL, []byte(knownKey)) != nil {
                t.Fatal("Verify cannot handle hint after permission signature")
        }
 
-       if VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint+"+Zfoo", knownToken, []byte(knownKey)) != nil {
+       if VerifySignature(knownLocator+"+K@xyzzy"+knownSigHint+"+Zfoo", knownToken, blobSignatureTTL, []byte(knownKey)) != nil {
                t.Fatal("Verify cannot handle hints around permission signature")
        }
 }
 
 // The size hint on the locator string should not affect signature validation.
 func TestVerifySignatureWrongSize(t *testing.T) {
-       if VerifySignature(knownHash+"+999999"+knownSigHint, knownToken, []byte(knownKey)) != nil {
+       if VerifySignature(knownHash+"+999999"+knownSigHint, knownToken, blobSignatureTTL, []byte(knownKey)) != nil {
                t.Fatal("Verify cannot handle incorrect size hint")
        }
 
-       if VerifySignature(knownHash+knownSigHint, knownToken, []byte(knownKey)) != nil {
+       if VerifySignature(knownHash+knownSigHint, knownToken, blobSignatureTTL, []byte(knownKey)) != nil {
                t.Fatal("Verify cannot handle missing size hint")
        }
 }
 
 func TestVerifySignatureBadSig(t *testing.T) {
        badLocator := knownLocator + "+Aaaaaaaaaaaaaaaa@" + knownTimestamp
-       if VerifySignature(badLocator, knownToken, []byte(knownKey)) != ErrSignatureMissing {
+       if VerifySignature(badLocator, knownToken, blobSignatureTTL, []byte(knownKey)) != ErrSignatureMissing {
                t.Fail()
        }
 }
 
 func TestVerifySignatureBadTimestamp(t *testing.T) {
        badLocator := knownLocator + "+A" + knownSignature + "@OOOOOOOl"
-       if VerifySignature(badLocator, knownToken, []byte(knownKey)) != ErrSignatureMissing {
+       if VerifySignature(badLocator, knownToken, blobSignatureTTL, []byte(knownKey)) != ErrSignatureMissing {
                t.Fail()
        }
 }
 
 func TestVerifySignatureBadSecret(t *testing.T) {
-       if VerifySignature(knownSignedLocator, knownToken, []byte("00000000000000000000")) != ErrSignatureInvalid {
+       if VerifySignature(knownSignedLocator, knownToken, blobSignatureTTL, []byte("00000000000000000000")) != ErrSignatureInvalid {
                t.Fail()
        }
 }
 
 func TestVerifySignatureBadToken(t *testing.T) {
-       if VerifySignature(knownSignedLocator, "00000000", []byte(knownKey)) != ErrSignatureInvalid {
+       if VerifySignature(knownSignedLocator, "00000000", blobSignatureTTL, []byte(knownKey)) != ErrSignatureInvalid {
                t.Fail()
        }
 }
 
 func TestVerifySignatureExpired(t *testing.T) {
        yesterday := time.Now().AddDate(0, 0, -1)
-       expiredLocator := SignLocator(knownHash, knownToken, yesterday, []byte(knownKey))
-       if VerifySignature(expiredLocator, knownToken, []byte(knownKey)) != ErrSignatureExpired {
+       expiredLocator := SignLocator(knownHash, knownToken, yesterday, blobSignatureTTL, []byte(knownKey))
+       if VerifySignature(expiredLocator, knownToken, blobSignatureTTL, []byte(knownKey)) != ErrSignatureExpired {
                t.Fail()
        }
 }
index cf0ae85c5782a8848d234de10d3fce81b6e661cd..22b1c974e634cd8229b645421ecb09480807c000 100644 (file)
@@ -265,7 +265,7 @@ func (m *Manifest) FileSegmentIterByName(filepath string) <-chan *FileSegment {
        return ch
 }
 
-// Blocks may appear mulitple times within the same manifest if they
+// Blocks may appear multiple times within the same manifest if they
 // are used by multiple files. In that case this Iterator will output
 // the same block multiple times.
 //
index 3f5f9344c521f3021e2f6fd35f025e4b9c7fb95e..499b4d966810223d7e1747caff4a1ce39113b470 100644 (file)
@@ -16,13 +16,13 @@ channel back to the transfer() function.
 Meanwhile, the transfer() function selects() on two channels, the "requests"
 channel and the "slices" channel.
 
-When a message is recieved on the "slices" channel, this means the a new
+When a message is received on the "slices" channel, this means the a new
 section of the buffer has data, or an error is signaled.  Since the data has
 been read directly into the source_buffer, it is able to simply increases the
 size of the body slice to encompass the newly filled in section.  Then any
 pending reads are serviced with handleReadRequest (described below).
 
-When a message is recieved on the "requests" channel, it means a StreamReader
+When a message is received on the "requests" channel, it means a StreamReader
 wants access to a slice of the buffer.  This is passed to handleReadRequest().
 
 The handleReadRequest() function takes a sliceRequest consisting of a buffer
index f66554115306796b94a0bdaef6aa0a194e5fabe7..72c5e8d35989b541157953dfb9f3ad86ce437ea0 100644 (file)
@@ -393,7 +393,7 @@ def main(arguments=None, stdout=sys.stdout):
         put_args += ['--name', collection_name]
 
     coll_uuid = arv_put.main(
-        put_args + ['--filename', outfile_name, image_file.name]).strip()
+        put_args + ['--filename', outfile_name, image_file.name], stdout=stdout).strip()
 
     # Read the image metadata and make Arvados links from it.
     image_file.seek(0)
index 3d090f4b5cc69aa23eec3dc057041c200024ed9a..355ed5d2d814b9542a6fca773a214de0e9af1292 100644 (file)
@@ -18,13 +18,15 @@ Gem::Specification.new do |s|
   s.files       = ["lib/arvados.rb", "lib/arvados/google_api_client.rb",
                    "lib/arvados/collection.rb", "lib/arvados/keep.rb",
                    "README", "LICENSE-2.0.txt"]
-  s.required_ruby_version = '>= 2.1.0'
+  s.required_ruby_version = '>= 1.8.7'
   # activesupport <4.2.6 only because https://dev.arvados.org/issues/8222
-  s.add_dependency('activesupport', '>= 3.2.13', '< 4.2.6')
+  s.add_dependency('activesupport', '>= 3', '< 4.2.6')
   s.add_dependency('andand', '~> 1.3', '>= 1.3.3')
-  s.add_dependency('google-api-client', '~> 0.6.3', '>= 0.6.3')
+  s.add_dependency('google-api-client', '>= 0.7', '< 0.9')
+  # work around undeclared dependency on i18n in some activesupport 3.x.x:
+  s.add_dependency('i18n', '~> 0')
   s.add_dependency('json', '~> 1.7', '>= 1.7.7')
-  s.add_runtime_dependency('jwt', '>= 0.1.5', '< 1.0.0')
+  s.add_runtime_dependency('jwt', '<2', '>= 0.1.5')
   s.homepage    =
     'https://arvados.org'
 end
index 753c518b3191ebbfefbd4407ca67c2f9b83daa45..7a3f4b4226210646cbdd098d5594ff354f42778e 100644 (file)
@@ -209,7 +209,7 @@ class Arvados
                 :parameters => parameters,
                 :body_object => body,
                 :headers => {
-                  authorization: 'OAuth2 '+arvados.config['ARVADOS_API_TOKEN']
+                  :authorization => 'OAuth2 '+arvados.config['ARVADOS_API_TOKEN']
                 })
       resp = JSON.parse result.body, :symbolize_names => true
       if resp[:errors]
@@ -217,7 +217,7 @@ class Arvados
       elsif resp[:uuid] and resp[:etag]
         self.new(resp)
       elsif resp[:items].is_a? Array
-        resp.merge(items: resp[:items].collect do |i|
+        resp.merge(:items => resp[:items].collect do |i|
                      self.new(i)
                    end)
       else
index 07b751908f7da26b93fd5321fe8a5c192872a8d6..474241dc41832e657b53d4d55b5746e479e79075 100644 (file)
@@ -44,7 +44,7 @@ module Arv
     end
 
     def cp_r(source, target, source_collection=nil)
-      opts = {descend_target: !source.end_with?("/")}
+      opts = {:descend_target => !source.end_with?("/")}
       copy(:merge, source.chomp("/"), target, source_collection, opts)
     end
 
@@ -70,7 +70,7 @@ module Arv
     end
 
     def rm_r(source)
-      remove(source, recursive: true)
+      remove(source, :recursive => true)
     end
 
     protected
@@ -155,7 +155,7 @@ module Arv
       modified
     end
 
-    LocatorSegment = Struct.new(:locators, :start_pos, :length)
+    Struct.new("LocatorSegment", :locators, :start_pos, :length)
 
     class LocatorRange < Range
       attr_reader :locator
@@ -187,9 +187,9 @@ module Arv
           end_index = search_for_byte(start_pos + length - 1, start_index)
         end
         seg_ranges = @ranges[start_index..end_index]
-        LocatorSegment.new(seg_ranges.map(&:locator),
-                           start_pos - seg_ranges.first.begin,
-                           length)
+        Struct::LocatorSegment.new(seg_ranges.map(&:locator),
+                                   start_pos - seg_ranges.first.begin,
+                                   length)
       end
 
       private
index 3c6b26b765f59c4938465aaa7dcc589187fa7722..489eeeeebb7e11a3ea3dda78375b33809fdf97a6 100644 (file)
@@ -47,19 +47,19 @@ module Keep
         raise ArgumentError.new "locator is nil or empty"
       end
 
-      m = LOCATOR_REGEXP.match(tok.strip)
+      m = LOCATOR_REGEXP.match(tok)
       unless m
         raise ArgumentError.new "not a valid locator #{tok}"
       end
 
-      tokhash, _, toksize, _, trailer = m[1..5]
+      tokhash, _, toksize, _, _, trailer = m[1..6]
       tokhints = []
       if trailer
         trailer.split('+').each do |hint|
-          if hint =~ /^[[:upper:]][[:alnum:]@_-]+$/
+          if hint =~ /^[[:upper:]][[:alnum:]@_-]*$/
             tokhints.push(hint)
           else
-            raise ArgumentError.new "unknown hint #{hint}"
+            raise ArgumentError.new "invalid hint #{hint}"
           end
         end
       end
index 5ed9cfc2b186b6e2122d41a879603054962bd0d3..fa1dc3f2e83da930e84e2f468f1b083f91cd965d 100644 (file)
@@ -266,6 +266,8 @@ class ManifestTest < Minitest::Test
    [true, 'd41d8cd98f00b204e9800998ecf8427e+0', '+0','0',nil],
    [true, 'd41d8cd98f00b204e9800998ecf8427e+0+Fizz+Buzz','+0','0','+Fizz+Buzz'],
    [true, 'd41d8cd98f00b204e9800998ecf8427e+Fizz+Buzz', nil,nil,'+Fizz+Buzz'],
+   [true, 'd41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000+Foo', '+0','0','+Ad41d8cd98f00b204e9800998ecf8427e00000000+Foo'],
+   [true, 'd41d8cd98f00b204e9800998ecf8427e+Ad41d8cd98f00b204e9800998ecf8427e00000000+Foo', nil,nil,'+Ad41d8cd98f00b204e9800998ecf8427e00000000+Foo'],
    [true, 'd41d8cd98f00b204e9800998ecf8427e+0+Z', '+0','0','+Z'],
    [true, 'd41d8cd98f00b204e9800998ecf8427e+Z', nil,nil,'+Z'],
   ].each do |ok, locator, match2, match3, match4|
@@ -278,6 +280,18 @@ class ManifestTest < Minitest::Test
         assert_equal match4, match[4]
       end
     end
+    define_method "test_parse_method_on_#{locator.inspect}" do
+      loc = Keep::Locator.parse locator
+      if !ok
+        assert_nil loc
+      else
+        refute_nil loc
+        assert loc.is_a?(Keep::Locator)
+        #assert loc.hash
+        #assert loc.size
+        #assert loc.hints.is_a?(Array)
+      end
+    end
   end
 
   [
@@ -301,6 +315,7 @@ class ManifestTest < Minitest::Test
     [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\040\n"],
     [true, ". 00000000000000000000000000000000+0 0:0:0\n"],
     [true, ". 00000000000000000000000000000000+0 0:0:d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000@ffffffff\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000@ffffffff 0:0:empty.txt\n"],
     [false, '. d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt',
       "Invalid manifest: does not end with newline"],
     [false, "abc d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
index 34600d7a25a8c716bd9d1fd6ec49cea052dc0c58..41d5b27093c3ab55c296f7a592b9defb7e25d6dc 100644 (file)
@@ -49,11 +49,12 @@ class Blob
     end
     timestamp_hex = timestamp.to_s(16)
     # => "53163cb4"
+    blob_signature_ttl = Rails.configuration.blob_signature_ttl.to_s(16)
 
     # Generate a signature.
     signature =
       generate_signature((opts[:key] or Rails.configuration.blob_signing_key),
-                         blob_hash, opts[:api_token], timestamp_hex)
+                         blob_hash, opts[:api_token], timestamp_hex, blob_signature_ttl)
 
     blob_locator + '+A' + signature + '@' + timestamp_hex
   end
@@ -96,10 +97,11 @@ class Blob
     if timestamp.to_i(16) < (opts[:now] or db_current_time.to_i)
       raise Blob::InvalidSignatureError.new 'Signature expiry time has passed.'
     end
+    blob_signature_ttl = Rails.configuration.blob_signature_ttl.to_s(16)
 
     my_signature =
       generate_signature((opts[:key] or Rails.configuration.blob_signing_key),
-                         blob_hash, opts[:api_token], timestamp)
+                         blob_hash, opts[:api_token], timestamp, blob_signature_ttl)
 
     if my_signature != given_signature
       raise Blob::InvalidSignatureError.new 'Signature is invalid.'
@@ -108,10 +110,11 @@ class Blob
     true
   end
 
-  def self.generate_signature key, blob_hash, api_token, timestamp
+  def self.generate_signature key, blob_hash, api_token, timestamp, blob_signature_ttl
     OpenSSL::HMAC.hexdigest('sha1', key,
                             [blob_hash,
                              api_token,
-                             timestamp].join('@'))
+                             timestamp,
+                             blob_signature_ttl].join('@'))
   end
 end
index 66916836c23fb9538281400ad84b1f1f672247e2..c709633d0bfbddd73e890e8ec1ba52c2e6d07b9d 100644 (file)
@@ -27,6 +27,11 @@ common:
   # generate permission signatures for Keep locators. It must be
   # identical to the permission key given to Keep. IMPORTANT: This is
   # a site secret. It should be at least 50 characters.
+  #
+  # Modifying blob_signing_key will invalidate all existing
+  # signatures, which can cause programs to fail (e.g., arv-put,
+  # arv-get, and Crunch jobs).  To avoid errors, rotate keys only when
+  # no such processes are running.
   blob_signing_key: ~
 
   # These settings are provided by your OAuth2 provider (e.g.,
@@ -155,12 +160,12 @@ common:
   # still has permission) the client can retrieve the collection again
   # to get fresh signatures.
   #
-  # Datamanager considers an unreferenced block older than this to be
-  # eligible for garbage collection. Therefore, it should never be
-  # smaller than the corresponding value used by any local keepstore
-  # service (see keepstore -blob-signature-ttl flag). This rule
-  # prevents datamanager from trying to garbage-collect recently
-  # written blocks while clients are still holding valid signatures.
+  # This must be exactly equal to the -blob-signature-ttl flag used by
+  # keepstore servers.  Otherwise, reading data blocks and saving
+  # collections will fail with HTTP 403 permission errors.
+  #
+  # Modifying blob_signature_ttl invalidates existing signatures; see
+  # blob_signing_key note above.
   #
   # The default is 2 weeks.
   blob_signature_ttl: 1209600
index 0794a751e097bb7b42317529177085e5572a3810..938c57bd6ea0d45f844ea9c90b5ea550e1e3b35c 100644 (file)
@@ -17,7 +17,7 @@ class BlobTest < ActiveSupport::TestCase
     'vu5wm9fpnwjyxfldw3vbo01mgjs75rgo7qioh8z8ij7jpyp8508okhgbbex3ceei' +
     '786u5rw2a9gx743dj3fgq2irk'
   @@known_signed_locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3' +
-    '+A257f3f5f5f0a4e4626a18fc74bd42ec34dcb228a@7fffffff'
+    '+A89118b78732c33104a4d6231e8b5a5fa1e4301e3@7fffffff'
 
   test 'generate predictable invincible signature' do
     signed = Blob.sign_locator @@known_locator, {
@@ -118,4 +118,23 @@ class BlobTest < ActiveSupport::TestCase
       Blob.verify_signature!(@@blob_locator, api_token: @@api_token, key: @@key)
     end
   end
+
+  test 'signature changes when ttl changes' do
+    signed = Blob.sign_locator @@known_locator, {
+      api_token: @@known_token,
+      key: @@known_key,
+      expire: 0x7fffffff,
+    }
+
+    original_ttl = Rails.configuration.blob_signature_ttl
+    Rails.configuration.blob_signature_ttl = original_ttl*2
+    signed2 = Blob.sign_locator @@known_locator, {
+      api_token: @@known_token,
+      key: @@known_key,
+      expire: 0x7fffffff,
+    }
+    Rails.configuration.blob_signature_ttl = original_ttl
+
+    assert_not_equal signed, signed2
+  end
 end
index 659b3c0ede524a31af3ada93369fcc6cab808e2a..5ee879d9f99c1c59aa58bbf4f951fa28ed4c0d98 100644 (file)
@@ -452,7 +452,7 @@ func (s *TestSuite) TestUpdateContainerRecordCancelled(c *C) {
 }
 
 // Used by the TestFullRun*() test below to DRY up boilerplate setup to do full
-// dress rehersal of the Run() function, starting from a JSON container record.
+// dress rehearsal of the Run() function, starting from a JSON container record.
 func FullRunHelper(c *C, record string, fn func(t *TestDockerClient)) (api *ArvTestClient, cr *ContainerRunner) {
        rec := ContainerRecord{}
        err := json.NewDecoder(strings.NewReader(record)).Decode(&rec)
@@ -524,7 +524,7 @@ func (s *TestSuite) TestFullRunStderr(c *C) {
                t.finish <- dockerclient.WaitResult{ExitCode: 1}
        })
 
-       c.Check(api.Calls, Equals, 8)
+       c.Assert(api.Calls, Equals, 8)
        c.Check(api.Content[7]["container"].(arvadosclient.Dict)["log"], NotNil)
        c.Check(api.Content[7]["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
        c.Check(api.Content[7]["container"].(arvadosclient.Dict)["state"], Equals, "Complete")
index 8c655cd5ff68a981146493bfa21fd71693ba12c0..8111425d7af76a9fa55ca71a3b3620a1d0a095a1 100644 (file)
@@ -10,7 +10,7 @@ import (
        "time"
 )
 
-// Useful to call at the begining of execution to log info about the
+// Useful to call at the beginning of execution to log info about the
 // current run.
 func LogRunInfo(arvLogger *logger.Logger) {
        if arvLogger != nil {
index aac9aec9716a74c33f5c18a4ec23e2f9b0b8e212..d7fb3eb8f7cb3953d0f40e0e67626376f0d2941a 100644 (file)
@@ -71,7 +71,7 @@ func ComputePullServers(kc *keepclient.KeepClient,
        blockToDesiredReplication map[blockdigest.DigestWithSize]int,
        underReplicated BlockSet) (m map[Locator]PullServers) {
        m = map[Locator]PullServers{}
-       // We use CanonicalString to avoid filling memory with dupicate
+       // We use CanonicalString to avoid filling memory with duplicate
        // copies of the same string.
        var cs CanonicalString
 
index fa48849626a5d251c57ba1584b78855d1c73d0a6..e534e3273747372ce0f9ba19d7b08e9a21b3b7a8 100644 (file)
@@ -1142,7 +1142,6 @@ class TokenExpiryTest(MountTestBase):
 
     @mock.patch('arvados.keep.KeepClient.get')
     def runTest(self, mocked_get):
-        logging.getLogger('arvados.arvados_fuse').setLevel(logging.DEBUG)
         self.api._rootDesc = {"blobSignatureTtl": 2}
         mnt = self.make_mount(fuse.CollectionDirectory, collection_record='zzzzz-4zz18-op4e2lbej01tcvu')
         mocked_get.return_value = 'fake data'
index e1b23621af8f70aa214e43639140ca23ed2784c4..6f5f66ae0ef1bf57979f04189fe4d110818b1bd6 100644 (file)
@@ -320,6 +320,12 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                statusCode, statusText = http.StatusInternalServerError, err.Error()
                return
        }
+       if kc.Client != nil && kc.Client.Transport != nil {
+               // Workaround for https://dev.arvados.org/issues/9005
+               if t, ok := kc.Client.Transport.(*http.Transport); ok {
+                       defer t.CloseIdleConnections()
+               }
+       }
        rdr, err := kc.CollectionFileReader(collection, filename)
        if os.IsNotExist(err) {
                statusCode = http.StatusNotFound
index 7b5cd2befb8f69bd25fa62674d01590214aec5ad..4cd931037ef830dfd8a6b25022126c84c13d7036 100644 (file)
@@ -185,7 +185,7 @@ func CheckAuthorizationHeader(kc *keepclient.KeepClient, cache *ApiTokenCache, r
        }
 
        if cache.RecallToken(tok) {
-               // Valid in the cache, short circut
+               // Valid in the cache, short circuit
                return true, tok
        }
 
index 40e62c5c50146aa6a89f0bcf6566eb194b943b73..b17cc79e17d5bf540894801b648f5d60d4d5bdc9 100644 (file)
@@ -189,7 +189,7 @@ func main() {
                &permissionTTLSec,
                "blob-signature-ttl",
                int(time.Duration(2*7*24*time.Hour).Seconds()),
-               "Lifetime of blob permission signatures. "+
+               "Lifetime of blob permission signatures. Modifying the ttl will invalidate all existing signatures. "+
                        "See services/api/config/application.default.yml.")
        flag.BoolVar(
                &flagSerializeIO,
index 6168a321c27e464fff5d0555ed363b2636331c76..9cd97bd3b746b1d66c0eba3b002fe5c9b8d70083 100644 (file)
@@ -13,7 +13,7 @@ var PermissionSecret []byte
 // SignLocator takes a blobLocator, an apiToken and an expiry time, and
 // returns a signed locator string.
 func SignLocator(blobLocator, apiToken string, expiry time.Time) string {
-       return keepclient.SignLocator(blobLocator, apiToken, expiry, PermissionSecret)
+       return keepclient.SignLocator(blobLocator, apiToken, expiry, blobSignatureTTL, PermissionSecret)
 }
 
 // VerifySignature returns nil if the signature on the signedLocator
@@ -22,7 +22,7 @@ func SignLocator(blobLocator, apiToken string, expiry time.Time) string {
 // something the client could have figured out independently) or
 // PermissionError.
 func VerifySignature(signedLocator, apiToken string) error {
-       err := keepclient.VerifySignature(signedLocator, apiToken, PermissionSecret)
+       err := keepclient.VerifySignature(signedLocator, apiToken, blobSignatureTTL, PermissionSecret)
        if err == keepclient.ErrSignatureExpired {
                return ExpiredError
        } else if err != nil {
index f4443fc7be1b423c4f535cccae66f0de32e71648..43717b23720d8c71b32c126810f8e39dd41a0429 100644 (file)
@@ -17,7 +17,8 @@ const (
                "gokee3eamvjy8qq1fvy238838enjmy5wzy2md7yvsitp5vztft6j4q866efym7e6" +
                "vu5wm9fpnwjyxfldw3vbo01mgjs75rgo7qioh8z8ij7jpyp8508okhgbbex3ceei" +
                "786u5rw2a9gx743dj3fgq2irk"
-       knownSignature     = "257f3f5f5f0a4e4626a18fc74bd42ec34dcb228a"
+       knownSignatureTTL  = 1209600 * time.Second
+       knownSignature     = "89118b78732c33104a4d6231e8b5a5fa1e4301e3"
        knownTimestamp     = "7fffffff"
        knownSigHint       = "+A" + knownSignature + "@" + knownTimestamp
        knownSignedLocator = knownLocator + knownSigHint
@@ -34,6 +35,8 @@ func TestSignLocator(t *testing.T) {
        }
        t0 := time.Unix(tsInt, 0)
 
+       blobSignatureTTL = knownSignatureTTL
+
        PermissionSecret = []byte(knownKey)
        if x := SignLocator(knownLocator, knownToken, t0); x != knownSignedLocator {
                t.Fatalf("Got %+q, expected %+q", x, knownSignedLocator)
@@ -50,6 +53,8 @@ func TestVerifyLocator(t *testing.T) {
                PermissionSecret = b
        }(PermissionSecret)
 
+       blobSignatureTTL = knownSignatureTTL
+
        PermissionSecret = []byte(knownKey)
        if err := VerifySignature(knownSignedLocator, knownToken); err != nil {
                t.Fatal(err)
index 1d3063a9de10651cf675062a439403372d39f49f..ac9406178c00ffaffc024f61ffc981479fc9d4e9 100644 (file)
@@ -163,7 +163,7 @@ func TestTrashWorkerIntegration_TwoDifferentLocatorsInVolume1(t *testing.T) {
 }
 
 /* Allow default Trash Life time to be used. Thus, the newly created block
-   will not be deleted becuase its Mtime is within the trash life time.
+   will not be deleted because its Mtime is within the trash life time.
 */
 func TestTrashWorkerIntegration_SameLocatorInTwoVolumesWithDefaultTrashLifeTime(t *testing.T) {
        neverDelete = false
index 552ed01b728d9e1e7bc11df7940eee2970ab01ff..412a5d7547313505e11da861172add90ef5292e1 100644 (file)
@@ -14,6 +14,7 @@ from .. import \
     arvados_node_missing, RetryMixin
 from ...clientactor import _notify_subscribers
 from ... import config
+from .transitions import transitions
 
 class ComputeNodeStateChangeBase(config.actor_class, RetryMixin):
     """Base class for actors that change a compute node's state.
@@ -208,17 +209,6 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
         self._logger.info("Shutdown cancelled: %s.", reason)
         self._finished(success_flag=False)
 
-    def _stop_if_window_closed(orig_func):
-        @functools.wraps(orig_func)
-        def stop_wrapper(self, *args, **kwargs):
-            if (self.cancellable and
-                  (self._monitor.shutdown_eligible().get() is not True)):
-                self._later.cancel_shutdown(self.WINDOW_CLOSED)
-                return None
-            else:
-                return orig_func(self, *args, **kwargs)
-        return stop_wrapper
-
     def _cancel_on_exception(orig_func):
         @functools.wraps(orig_func)
         def finish_wrapper(self, *args, **kwargs):
@@ -230,7 +220,6 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
         return finish_wrapper
 
     @_cancel_on_exception
-    @_stop_if_window_closed
     @RetryMixin._retry()
     def shutdown_node(self):
         self._logger.info("Starting shutdown")
@@ -254,9 +243,6 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
         self._clean_arvados_node(arvados_node, "Shut down by Node Manager")
         self._finished(success_flag=True)
 
-    # Make the decorator available to subclasses.
-    _stop_if_window_closed = staticmethod(_stop_if_window_closed)
-
 
 class ComputeNodeUpdateActor(config.actor_class):
     """Actor to dispatch one-off cloud management requests.
@@ -370,50 +356,63 @@ class ComputeNodeMonitorActor(config.actor_class):
         return result
 
     def shutdown_eligible(self):
-        """Return True if eligible for shutdown, or a string explaining why the node
-        is not eligible for shutdown."""
+        """Determine if node is candidate for shut down.
+
+        Returns a tuple of (boolean, string) where the first value is whether
+        the node is candidate for shut down, and the second value is the
+        reason for the decision.
+        """
+
+        # Collect states and then consult state transition table whether we
+        # should shut down.  Possible states are:
+        # crunch_worker_state = ['unpaired', 'busy', 'idle', 'down']
+        # window = ["open", "closed"]
+        # boot_grace = ["boot wait", "boot exceeded"]
+        # idle_grace = ["not idle", "idle wait", "idle exceeded"]
 
-        if not self._shutdowns.window_open():
-            return "shutdown window is not open."
         if self.arvados_node is None:
-            # Node is unpaired.
-            # If it hasn't pinged Arvados after boot_fail seconds, shut it down
-            if timestamp_fresh(self.cloud_node_start_time, self.boot_fail_after):
-                return "node is still booting, will be considered a failed boot at %s" % time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.cloud_node_start_time + self.boot_fail_after))
-            else:
-                return True
-        missing = arvados_node_missing(self.arvados_node, self.node_stale_after)
-        if missing and self._cloud.broken(self.cloud_node):
-            # Node is paired, but Arvados says it is missing and the cloud says the node
-            # is in an error state, so shut it down.
-            return True
-        if missing is None and self._cloud.broken(self.cloud_node):
-            self._logger.info(
-                "Cloud node considered 'broken' but paired node %s last_ping_at is None, " +
-                "cannot check node_stale_after (node may be shut down and we just haven't gotten the message yet).",
-                self.arvados_node['uuid'])
-        if self.in_state('idle'):
-            return True
+            crunch_worker_state = 'unpaired'
+        elif not timestamp_fresh(arvados_node_mtime(self.arvados_node), self.node_stale_after):
+            return (False, "node state is stale")
+        elif self.arvados_node['crunch_worker_state']:
+            crunch_worker_state = self.arvados_node['crunch_worker_state']
+        else:
+            return (False, "node is paired but crunch_worker_state is '%s'" % self.arvados_node['crunch_worker_state'])
+
+        window = "open" if self._shutdowns.window_open() else "closed"
+
+        if timestamp_fresh(self.cloud_node_start_time, self.boot_fail_after):
+            boot_grace = "boot wait"
         else:
-            return "node is not idle."
+            boot_grace = "boot exceeded"
 
-    def resume_node(self):
-        pass
+        # API server side not implemented yet.
+        idle_grace = 'idle exceeded'
+
+        node_state = (crunch_worker_state, window, boot_grace, idle_grace)
+        t = transitions[node_state]
+        if t is not None:
+            # yes, shutdown eligible
+            return (True, "node state is %s" % (node_state,))
+        else:
+            # no, return a reason
+            return (False, "node state is %s" % (node_state,))
 
     def consider_shutdown(self):
         try:
+            eligible, reason = self.shutdown_eligible()
             next_opening = self._shutdowns.next_opening()
-            eligible = self.shutdown_eligible()
-            if eligible is True:
-                self._debug("Suggesting shutdown.")
+            if eligible:
+                self._debug("Suggesting shutdown because %s", reason)
                 _notify_subscribers(self.actor_ref.proxy(), self.subscribers)
-            elif self._shutdowns.window_open():
-                self._debug("Cannot shut down because %s", eligible)
-            elif self.last_shutdown_opening != next_opening:
-                self._debug("Shutdown window closed.  Next at %s.",
-                            time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(next_opening)))
-                self._timer.schedule(next_opening, self._later.consider_shutdown)
-                self.last_shutdown_opening = next_opening
+            else:
+                self._debug("Not eligible for shut down because %s", reason)
+
+                if self.last_shutdown_opening != next_opening:
+                    self._debug("Shutdown window closed.  Next at %s.",
+                                time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(next_opening)))
+                    self._timer.schedule(next_opening, self._later.consider_shutdown)
+                    self.last_shutdown_opening = next_opening
         except Exception:
             self._logger.exception("Unexpected exception")
 
index 41919db07e12efe7a262c4635e9a8432febdec2f..cae87197f72f0241ef8763c637e6eecb885acc94 100644 (file)
@@ -6,9 +6,8 @@ import subprocess
 import time
 
 from . import \
-    ComputeNodeSetupActor, ComputeNodeUpdateActor
+    ComputeNodeSetupActor, ComputeNodeUpdateActor, ComputeNodeMonitorActor
 from . import ComputeNodeShutdownActor as ShutdownActorBase
-from . import ComputeNodeMonitorActor as MonitorActorBase
 from .. import RetryMixin
 
 class SlurmMixin(object):
@@ -52,43 +51,27 @@ class ComputeNodeShutdownActor(SlurmMixin, ShutdownActorBase):
         return super(ComputeNodeShutdownActor, self).cancel_shutdown(reason)
 
     @RetryMixin._retry((subprocess.CalledProcessError,))
-    @ShutdownActorBase._stop_if_window_closed
     def issue_slurm_drain(self):
-        self._set_node_state(self._nodename, 'DRAIN', 'Reason=Node Manager shutdown')
-        self._logger.info("Waiting for SLURM node %s to drain", self._nodename)
-        self._later.await_slurm_drain()
+        if self.cancel_reason is not None:
+            return
+        if self._nodename:
+            self._set_node_state(self._nodename, 'DRAIN', 'Reason=Node Manager shutdown')
+            self._logger.info("Waiting for SLURM node %s to drain", self._nodename)
+            self._later.await_slurm_drain()
+        else:
+            self._later.shutdown_node()
 
     @RetryMixin._retry((subprocess.CalledProcessError,))
-    @ShutdownActorBase._stop_if_window_closed
     def await_slurm_drain(self):
+        if self.cancel_reason is not None:
+            return
         output = self._get_slurm_state(self._nodename)
-        if output in self.SLURM_END_STATES:
-            self._later.shutdown_node()
-        else:
+        if output in ("drng\n", "alloc\n", "drng*\n", "alloc*\n"):
             self._timer.schedule(time.time() + 10,
                                  self._later.await_slurm_drain)
-
-
-class ComputeNodeMonitorActor(SlurmMixin, MonitorActorBase):
-
-    def shutdown_eligible(self):
-        if self.arvados_node is not None:
-            state = self._get_slurm_state(self.arvados_node['hostname'])
-            # Automatically eligible for shutdown if it's down or failed, but
-            # not drain to avoid a race condition with resume_node().
-            if state in self.SLURM_END_STATES:
-                if state in self.SLURM_DRAIN_STATES:
-                    return "node is draining"
-                else:
-                    return True
-        return super(ComputeNodeMonitorActor, self).shutdown_eligible()
-
-    def resume_node(self):
-        try:
-            if (self.arvados_node is not None and
-                self._get_slurm_state(self.arvados_node['hostname']) in self.SLURM_DRAIN_STATES):
-                # Resume from "drng" or "drain"
-                self._set_node_state(self.arvados_node['hostname'], 'RESUME')
-        except Exception as error:
-            self._logger.warn(
-                "Exception reenabling node: %s", error, exc_info=error)
+        elif output in ("idle\n"):
+            # Not in "drng" so cancel self.
+            self.cancel_shutdown("slurm state is %s" % output.strip())
+        else:
+            # any other state.
+            self._later.shutdown_node()
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/transitions.py b/services/nodemanager/arvnodeman/computenode/dispatch/transitions.py
new file mode 100644 (file)
index 0000000..2ff3c94
--- /dev/null
@@ -0,0 +1,52 @@
+transitions = {
+ ('busy', 'closed', 'boot exceeded', 'idle exceeded'): None,
+ ('busy', 'closed', 'boot exceeded', 'idle wait'): None,
+ ('busy', 'closed', 'boot exceeded', 'not idle'): None,
+ ('busy', 'closed', 'boot wait', 'idle exceeded'): None,
+ ('busy', 'closed', 'boot wait', 'idle wait'): None,
+ ('busy', 'closed', 'boot wait', 'not idle'): None,
+ ('busy', 'open', 'boot exceeded', 'idle exceeded'): None,
+ ('busy', 'open', 'boot exceeded', 'idle wait'): None,
+ ('busy', 'open', 'boot exceeded', 'not idle'): None,
+ ('busy', 'open', 'boot wait', 'idle exceeded'): None,
+ ('busy', 'open', 'boot wait', 'idle wait'): None,
+ ('busy', 'open', 'boot wait', 'not idle'): None,
+
+ ('down', 'closed', 'boot exceeded', 'idle exceeded'): "START_SHUTDOWN",
+ ('down', 'closed', 'boot exceeded', 'idle wait'): "START_SHUTDOWN",
+ ('down', 'closed', 'boot exceeded', 'not idle'): "START_SHUTDOWN",
+ ('down', 'closed', 'boot wait', 'idle exceeded'): None,
+ ('down', 'closed', 'boot wait', 'idle wait'): None,
+ ('down', 'closed', 'boot wait', 'not idle'): None,
+ ('down', 'open', 'boot exceeded', 'idle exceeded'): "START_SHUTDOWN",
+ ('down', 'open', 'boot exceeded', 'idle wait'): "START_SHUTDOWN",
+ ('down', 'open', 'boot exceeded', 'not idle'): "START_SHUTDOWN",
+ ('down', 'open', 'boot wait', 'idle exceeded'): "START_SHUTDOWN",
+ ('down', 'open', 'boot wait', 'idle wait'): "START_SHUTDOWN",
+ ('down', 'open', 'boot wait', 'not idle'): "START_SHUTDOWN",
+
+ ('idle', 'closed', 'boot exceeded', 'idle exceeded'): None,
+ ('idle', 'closed', 'boot exceeded', 'idle wait'): None,
+ ('idle', 'closed', 'boot exceeded', 'not idle'): None,
+ ('idle', 'closed', 'boot wait', 'idle exceeded'): None,
+ ('idle', 'closed', 'boot wait', 'idle wait'): None,
+ ('idle', 'closed', 'boot wait', 'not idle'): None,
+ ('idle', 'open', 'boot exceeded', 'idle exceeded'): "START_DRAIN",
+ ('idle', 'open', 'boot exceeded', 'idle wait'): None,
+ ('idle', 'open', 'boot exceeded', 'not idle'): None,
+ ('idle', 'open', 'boot wait', 'idle exceeded'): "START_DRAIN",
+ ('idle', 'open', 'boot wait', 'idle wait'): None,
+ ('idle', 'open', 'boot wait', 'not idle'): None,
+
+ ('unpaired', 'closed', 'boot exceeded', 'idle exceeded'): "START_SHUTDOWN",
+ ('unpaired', 'closed', 'boot exceeded', 'idle wait'): "START_SHUTDOWN",
+ ('unpaired', 'closed', 'boot exceeded', 'not idle'): "START_SHUTDOWN",
+ ('unpaired', 'closed', 'boot wait', 'idle exceeded'): None,
+ ('unpaired', 'closed', 'boot wait', 'idle wait'): None,
+ ('unpaired', 'closed', 'boot wait', 'not idle'): None,
+ ('unpaired', 'open', 'boot exceeded', 'idle exceeded'): "START_SHUTDOWN",
+ ('unpaired', 'open', 'boot exceeded', 'idle wait'): "START_SHUTDOWN",
+ ('unpaired', 'open', 'boot exceeded', 'not idle'): "START_SHUTDOWN",
+ ('unpaired', 'open', 'boot wait', 'idle exceeded'): None,
+ ('unpaired', 'open', 'boot wait', 'idle wait'): None,
+ ('unpaired', 'open', 'boot wait', 'not idle'): None}
index 95b6fa8e0ce962d67b43b26f8106469d9d692faf..1bf2493060d2b84c784f54b1dd11eab9662421e5 100644 (file)
@@ -86,13 +86,13 @@ class BaseComputeNodeDriver(RetryMixin):
 
         Arguments:
         * term: The value that identifies a matching item.
-        * list_method: A string that names the method to call on this
-          instance's libcloud driver for a list of objects.
+        * list_method: A string that names the method to call for a
+          list of objects.
         * key: A function that accepts a cloud object and returns a
           value search for a `term` match on each item.  Returns the
           object's 'id' attribute by default.
         """
-        items = getattr(self.real, list_method)(**kwargs)
+        items = getattr(self, list_method)(**kwargs)
         results = [item for item in items if key(item) == term]
         count = len(results)
         if count != 1:
index a02da230d8a8b8765debcc18f6971bb1769aacb2..11204409c03cd1894ac4128f1bfc84e3a4767652 100644 (file)
@@ -151,9 +151,6 @@ class NodeManagerDaemonActor(actor_class):
     def _update_poll_time(self, poll_key):
         self.last_polls[poll_key] = time.time()
 
-    def _resume_node(self, node_record):
-        node_record.actor.resume_node()
-
     def _pair_nodes(self, node_record, arvados_node):
         self._logger.info("Cloud node %s is now paired with Arvados node %s",
                           node_record.cloud_node.name, arvados_node['uuid'])
@@ -221,15 +218,6 @@ class NodeManagerDaemonActor(actor_class):
                 if cloud_rec.actor.offer_arvados_pair(arv_node).get():
                     self._pair_nodes(cloud_rec, arv_node)
                     break
-        for rec in self.cloud_nodes.nodes.itervalues():
-            # crunch-dispatch turns all slurm states that are not either "idle"
-            # or "alloc" into "down", but in case that behavior changes, assume
-            # any state that is not "idle" or "alloc" could be a state we want
-            # to try to resume from.
-            if (rec.arvados_node is not None and
-                rec.arvados_node["info"].get("slurm_state") not in ("idle", "alloc") and
-                rec.cloud_node.id not in self.shutdowns):
-                self._resume_node(rec)
 
     def _nodes_booting(self, size):
         s = sum(1
@@ -314,17 +302,17 @@ class NodeManagerDaemonActor(actor_class):
         booting_count = self._nodes_booting(size) + self._nodes_unpaired(size)
         shutdown_count = self._size_shutdowns(size)
         busy_count = self._nodes_busy(size)
-        up_count = self._nodes_up(size) - (shutdown_count + busy_count + self._nodes_missing(size))
+        idle_count = self._nodes_up(size) - (busy_count + self._nodes_missing(size))
 
         self._logger.info("%s: wishlist %i, up %i (booting %i, idle %i, busy %i), shutting down %i", size.name,
                           self._size_wishlist(size),
-                          up_count + busy_count,
+                          idle_count + busy_count,
                           booting_count,
-                          up_count - booting_count,
+                          idle_count - booting_count,
                           busy_count,
                           shutdown_count)
 
-        wanted = self._size_wishlist(size) - up_count
+        wanted = self._size_wishlist(size) - idle_count
         if wanted > 0 and self.max_total_price and ((total_price + (size.price*wanted)) > self.max_total_price):
             can_boot = int((self.max_total_price - total_price) / size.price)
             if can_boot == 0:
@@ -335,7 +323,7 @@ class NodeManagerDaemonActor(actor_class):
             return wanted
 
     def _nodes_excess(self, size):
-        up_count = self._nodes_up(size) - self._size_shutdowns(size)
+        up_count = (self._nodes_booting(size) + self._nodes_booted(size)) - self._size_shutdowns(size)
         if size.id == self.min_cloud_size.id:
             up_count -= self.min_nodes
         return up_count - self._nodes_busy(size) - self._size_wishlist(size)
index 14cd85e414ec271f2d8d1a3a9c283ac3c724fe7c..8def8535cf488e7deb028c6f01d0f9dc45ba3ffa 100644 (file)
@@ -205,7 +205,9 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
         cloud_node = testutil.cloud_node_mock(61)
         arv_node = testutil.arvados_node_mock(61)
         self.make_mocks(cloud_node, arv_node, shutdown_open=False)
+        self.cloud_client.destroy_node.return_value = False
         self.make_actor(cancellable=True)
+        self.shutdown_actor.cancel_shutdown("test")
         self.check_success_flag(False, 2)
         self.assertFalse(self.arvados_client.nodes().update.called)
 
@@ -219,14 +221,6 @@ class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         self.check_success_flag(True)
         self.assertTrue(self.cloud_client.destroy_node.called)
 
-    def test_shutdown_cancelled_when_window_closes(self):
-        self.make_mocks(shutdown_open=False)
-        self.make_actor()
-        self.check_success_flag(False, 2)
-        self.assertFalse(self.cloud_client.destroy_node.called)
-        self.assertEqual(self.ACTOR_CLASS.WINDOW_CLOSED,
-                         self.shutdown_actor.cancel_reason.get(self.TIMEOUT))
-
     def test_shutdown_retries_when_cloud_fails(self):
         self.make_mocks()
         self.cloud_client.destroy_node.return_value = False
@@ -356,28 +350,29 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
     def test_no_shutdown_booting(self):
         self.make_actor()
         self.shutdowns._set_state(True, 600)
-        self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is still booting"))
+        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT),
+                          (False, "node state is ('unpaired', 'open', 'boot wait', 'idle exceeded')"))
 
     def test_shutdown_without_arvados_node(self):
         self.make_actor(start_time=0)
         self.shutdowns._set_state(True, 600)
-        self.assertIs(True, self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), (True, "node state is ('unpaired', 'open', 'boot exceeded', 'idle exceeded')"))
 
-    def test_no_shutdown_missing(self):
+    def test_shutdown_missing(self):
         arv_node = testutil.arvados_node_mock(10, job_uuid=None,
                                               crunch_worker_state="down",
                                               last_ping_at='1970-01-01T01:02:03.04050607Z')
         self.make_actor(10, arv_node)
         self.shutdowns._set_state(True, 600)
-        self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle."))
+        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), (True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"))
 
-    def test_no_shutdown_running_broken(self):
+    def test_shutdown_running_broken(self):
         arv_node = testutil.arvados_node_mock(12, job_uuid=None,
                                               crunch_worker_state="down")
         self.make_actor(12, arv_node)
         self.shutdowns._set_state(True, 600)
         self.cloud_client.broken.return_value = True
-        self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle."))
+        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), (True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"))
 
     def test_shutdown_missing_broken(self):
         arv_node = testutil.arvados_node_mock(11, job_uuid=None,
@@ -386,27 +381,31 @@ class ComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
         self.make_actor(11, arv_node)
         self.shutdowns._set_state(True, 600)
         self.cloud_client.broken.return_value = True
-        self.assertIs(True, self.node_actor.shutdown_eligible().get(self.TIMEOUT))
+        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT), (True, "node state is ('down', 'open', 'boot wait', 'idle exceeded')"))
 
     def test_no_shutdown_when_window_closed(self):
         self.make_actor(3, testutil.arvados_node_mock(3, job_uuid=None))
-        self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("shutdown window is not open."))
+        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT),
+                          (False, "node state is ('idle', 'closed', 'boot wait', 'idle exceeded')"))
 
     def test_no_shutdown_when_node_running_job(self):
         self.make_actor(4, testutil.arvados_node_mock(4, job_uuid=True))
         self.shutdowns._set_state(True, 600)
-        self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle."))
+        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT),
+                          (False, "node state is ('busy', 'open', 'boot wait', 'idle exceeded')"))
 
     def test_no_shutdown_when_node_state_unknown(self):
         self.make_actor(5, testutil.arvados_node_mock(
             5, crunch_worker_state=None))
         self.shutdowns._set_state(True, 600)
-        self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle."))
+        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT),
+                          (False, "node is paired but crunch_worker_state is 'None'"))
 
     def test_no_shutdown_when_node_state_stale(self):
         self.make_actor(6, testutil.arvados_node_mock(6, age=90000))
         self.shutdowns._set_state(True, 600)
-        self.assertTrue(self.node_actor.shutdown_eligible().get(self.TIMEOUT).startswith("node is not idle."))
+        self.assertEquals(self.node_actor.shutdown_eligible().get(self.TIMEOUT),
+                          (False, "node state is stale"))
 
     def test_arvados_node_match(self):
         self.make_actor(2)
index 135b817d91b725d26f712a8f2b1f5a5bb1f93144..85a40ceeb25964e15e3696141b373d9ef87cee4d 100644 (file)
@@ -41,11 +41,11 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
             self.check_success_after_reset(proc_mock, end_state)
         return test
 
-    for wait_state in ['alloc\n', 'drng\n', 'idle*\n']:
+    for wait_state in ['alloc\n', 'drng\n']:
         locals()['test_wait_while_' + wait_state.strip()
                  ] = make_wait_state_test(start_state=wait_state)
 
-    for end_state in ['down\n', 'down*\n', 'drain\n', 'fail\n']:
+    for end_state in ['idle*\n', 'down\n', 'down*\n', 'drain\n', 'fail\n']:
         locals()['test_wait_until_' + end_state.strip()
                  ] = make_wait_state_test(end_state=end_state)
 
@@ -55,27 +55,30 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
 
     def test_slurm_bypassed_when_no_arvados_node(self, proc_mock):
         # Test we correctly handle a node that failed to bootstrap.
-        proc_mock.return_value = 'idle\n'
+        proc_mock.return_value = 'down\n'
         self.make_actor(start_time=0)
         self.check_success_flag(True)
         self.assertFalse(proc_mock.called)
 
-    def test_node_undrained_when_shutdown_window_closes(self, proc_mock):
-        proc_mock.side_effect = iter(['drng\n', 'idle\n'])
-        self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
-        self.make_actor()
-        self.check_success_flag(False, 2)
-        self.check_slurm_got_args(proc_mock, 'NodeName=compute99', 'State=RESUME')
-
-    def test_alloc_node_undrained_when_shutdown_window_closes(self, proc_mock):
-        proc_mock.side_effect = iter(['alloc\n'])
-        self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
-        self.make_actor()
-        self.check_success_flag(False, 2)
-        self.check_slurm_got_args(proc_mock, 'sinfo', '--noheader', '-o', '%t', '-n', 'compute99')
+    def test_node_undrained_when_shutdown_cancelled(self, proc_mock):
+        try:
+            proc_mock.side_effect = iter(['', 'drng\n', 'drng\n', ''])
+            self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
+            self.timer = testutil.MockTimer(False)
+            self.make_actor()
+            self.busywait(lambda: proc_mock.call_args is not None)
+            self.shutdown_actor.cancel_shutdown("test").get(self.TIMEOUT)
+            self.check_success_flag(False, 2)
+            self.assertEqual(proc_mock.call_args_list,
+                             [mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=DRAIN', 'Reason=Node Manager shutdown']),
+                              mock.call(['sinfo', '--noheader', '-o', '%t', '-n', 'compute99']),
+                              mock.call(['sinfo', '--noheader', '-o', '%t', '-n', 'compute99']),
+                              mock.call(['scontrol', 'update', 'NodeName=compute99', 'State=RESUME'])])
+        finally:
+            self.shutdown_actor.actor_ref.stop()
 
     def test_cancel_shutdown_retry(self, proc_mock):
-        proc_mock.side_effect = iter([OSError, 'drain\n', OSError, 'idle\n'])
+        proc_mock.side_effect = iter([OSError, 'drain\n', OSError, 'idle\n', 'idle\n'])
         self.make_mocks(arvados_node=testutil.arvados_node_mock(job_uuid=True))
         self.make_actor()
         self.check_success_flag(False, 2)
@@ -88,66 +91,3 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
         proc_mock.return_value = 'drain\n'
         super(SLURMComputeNodeShutdownActorTestCase,
               self).test_arvados_node_cleaned_after_shutdown()
-
-class SLURMComputeNodeMonitorActorTestCase(testutil.ActorTestMixin,
-                                      unittest.TestCase):
-
-    def make_mocks(self, node_num):
-        self.shutdowns = testutil.MockShutdownTimer()
-        self.shutdowns._set_state(False, 300)
-        self.timer = mock.MagicMock(name='timer_mock')
-        self.updates = mock.MagicMock(name='update_mock')
-        self.cloud_mock = testutil.cloud_node_mock(node_num)
-        self.subscriber = mock.Mock(name='subscriber_mock')
-        self.cloud_client = mock.MagicMock(name='cloud_client')
-        self.cloud_client.broken.return_value = False
-
-    def make_actor(self, node_num=1, arv_node=None, start_time=None):
-        if not hasattr(self, 'cloud_mock'):
-            self.make_mocks(node_num)
-        if start_time is None:
-            start_time = time.time()
-        self.node_actor = slurm_dispatch.ComputeNodeMonitorActor.start(
-            self.cloud_mock, start_time, self.shutdowns,
-            testutil.cloud_node_fqdn, self.timer, self.updates, self.cloud_client,
-            arv_node, boot_fail_after=300).proxy()
-        self.node_actor.subscribe(self.subscriber).get(self.TIMEOUT)
-
-    @mock.patch("subprocess.check_output")
-    def test_resume_node(self, check_output):
-        arv_node = testutil.arvados_node_mock()
-        self.make_actor(arv_node=arv_node)
-        check_output.return_value = "drain\n"
-        self.node_actor.resume_node().get(self.TIMEOUT)
-        self.assertIn(mock.call(['sinfo', '--noheader', '-o', '%t', '-n', arv_node['hostname']]), check_output.call_args_list)
-        self.assertIn(mock.call(['scontrol', 'update', 'NodeName=' + arv_node['hostname'], 'State=RESUME']), check_output.call_args_list)
-
-    @mock.patch("subprocess.check_output")
-    def test_no_resume_idle_node(self, check_output):
-        arv_node = testutil.arvados_node_mock()
-        self.make_actor(arv_node=arv_node)
-        check_output.return_value = "idle\n"
-        self.node_actor.resume_node().get(self.TIMEOUT)
-        self.assertIn(mock.call(['sinfo', '--noheader', '-o', '%t', '-n', arv_node['hostname']]), check_output.call_args_list)
-        self.assertNotIn(mock.call(['scontrol', 'update', 'NodeName=' + arv_node['hostname'], 'State=RESUME']), check_output.call_args_list)
-
-    @mock.patch("subprocess.check_output")
-    def test_resume_node_exception(self, check_output):
-        arv_node = testutil.arvados_node_mock()
-        self.make_actor(arv_node=arv_node)
-        check_output.side_effect = Exception()
-        self.node_actor.resume_node().get(self.TIMEOUT)
-        self.assertIn(mock.call(['sinfo', '--noheader', '-o', '%t', '-n', arv_node['hostname']]), check_output.call_args_list)
-        self.assertNotIn(mock.call(['scontrol', 'update', 'NodeName=' + arv_node['hostname'], 'State=RESUME']), check_output.call_args_list)
-
-    @mock.patch("subprocess.check_output")
-    def test_shutdown_down_node(self, check_output):
-        check_output.return_value = "down\n"
-        self.make_actor(arv_node=testutil.arvados_node_mock())
-        self.assertIs(True, self.node_actor.shutdown_eligible().get(self.TIMEOUT))
-
-    @mock.patch("subprocess.check_output")
-    def test_no_shutdown_drain_node(self, check_output):
-        check_output.return_value = "drain\n"
-        self.make_actor(arv_node=testutil.arvados_node_mock())
-        self.assertEquals('node is draining', self.node_actor.shutdown_eligible().get(self.TIMEOUT))
index 8e701b971352f8630030f8e817c22b337eb4a26e..59fc503128aef69be02a6a45aabf86f80fde6540 100644 (file)
@@ -120,3 +120,17 @@ echo z1.test > /var/tmp/arv-node-data/meta-data/instance-type
         n = driver.list_nodes()
         self.assertEqual(nodelist, n)
         self.driver_mock().list_nodes.assert_called_with(ex_fetch_nic=False, ex_resource_group='TestResourceGroup')
+
+    def test_create_can_find_node_after_timeout(self):
+        super(AzureComputeNodeDriverTestCase,
+              self).test_create_can_find_node_after_timeout(
+                  create_kwargs={'tag_arvados-class': 'test'},
+                  node_extra={'tags': {'arvados-class': 'test'}})
+
+    def test_node_found_after_timeout_has_fixed_size(self):
+        size = testutil.MockSize(4)
+        node_props = {'hardwareProfile': {'vmSize': size.id}}
+        cloud_node = testutil.cloud_node_mock(
+            size=None, tags={'arvados-class': 'test'}, properties=node_props)
+        self.check_node_found_after_timeout_has_fixed_size(
+            size, cloud_node, {'tag_arvados-class': 'test'})
index e8b2fa36c582876359fa6e667f80e9a7cb1f3013..84e061d867ff42033fd526e92440695702a3dd8c 100644 (file)
@@ -231,6 +231,11 @@ class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
         self.assertIs(node, nodelist[0])
         self.assertIs(size, nodelist[0].size)
 
+    def test_node_found_after_timeout_has_fixed_size(self):
+        size = testutil.MockSize(4)
+        cloud_node = testutil.cloud_node_mock(size=size.id)
+        self.check_node_found_after_timeout_has_fixed_size(size, cloud_node)
+
     def test_list_empty_nodes(self):
         self.driver_mock().list_nodes.return_value = []
         self.assertEqual([], self.new_driver().list_nodes())
index 00e05a147ffaf58ea46d345b4b3f4fd84a56a333..d52cdae65172f95061798bddcfc4ac8b2ec12d5a 100644 (file)
@@ -233,6 +233,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         arv_node = testutil.arvados_node_mock(2, job_uuid=True)
         self.make_daemon([testutil.cloud_node_mock(2, size=size)], [arv_node],
                          [size], avail_sizes=[(size, {"cores":1})])
+        self.busywait(lambda: self.node_setup.start.called)
         self.stop_proxy(self.daemon)
         self.assertTrue(self.node_setup.start.called)
 
@@ -522,7 +523,7 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
     def test_nodes_shutting_down_replaced_below_max_nodes(self):
         size = testutil.MockSize(6)
         cloud_node = testutil.cloud_node_mock(6, size=size)
-        self.make_daemon([cloud_node], [testutil.arvados_node_mock(6)],
+        self.make_daemon([cloud_node], [testutil.arvados_node_mock(6, crunch_worker_state='down')],
                          avail_sizes=[(size, {"cores":1})])
         self.assertEqual(1, self.alive_monitor_count())
         monitor = self.monitor_list()[0].proxy()
@@ -602,13 +603,6 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         self.stop_proxy(self.daemon)
         self.assertEqual(1, self.last_shutdown.stop.call_count)
 
-    def busywait(self, f):
-        n = 0
-        while not f() and n < 10:
-            time.sleep(.1)
-            n += 1
-        self.assertTrue(f())
-
     def test_node_create_two_sizes(self):
         small = testutil.MockSize(1)
         big = testutil.MockSize(2)
@@ -718,25 +712,3 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
         # test for that.
         self.assertEqual(2, sizecounts[small.id])
         self.assertEqual(1, sizecounts[big.id])
-
-    @mock.patch("arvnodeman.daemon.NodeManagerDaemonActor._resume_node")
-    def test_resume_drained_nodes(self, resume_node):
-        cloud_node = testutil.cloud_node_mock(1)
-        arv_node = testutil.arvados_node_mock(1, info={"ec2_instance_id": "1", "slurm_state": "down"})
-        self.make_daemon([cloud_node], [arv_node])
-        resume_node.assert_called_with(self.daemon.cloud_nodes.get(self.TIMEOUT).nodes.values()[0])
-        self.stop_proxy(self.daemon)
-
-    @mock.patch("arvnodeman.daemon.NodeManagerDaemonActor._resume_node")
-    def test_no_resume_shutdown_nodes(self, resume_node):
-        cloud_node = testutil.cloud_node_mock(1)
-        arv_node = testutil.arvados_node_mock(1, info={"ec2_instance_id": "1", "slurm_state": "down"})
-
-        self.make_daemon([cloud_node], [])
-
-        self.node_shutdown = mock.MagicMock(name='shutdown_mock')
-        self.daemon.shutdowns.get(self.TIMEOUT)[cloud_node.id] = self.node_shutdown
-
-        self.daemon.update_arvados_nodes([arv_node]).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        resume_node.assert_not_called()
index b376ca792a01e9bd9a8aecf5ee782982168e6174..1b6aab3cafed16cfc0960d1a39a32d669fe53ffb 100644 (file)
@@ -119,6 +119,13 @@ class ActorTestMixin(object):
             if result is not unassigned:
                 return result
 
+    def busywait(self, f):
+        n = 0
+        while not f() and n < 10:
+            time.sleep(.1)
+            n += 1
+        self.assertTrue(f())
+
 
 class DriverTestMixin(object):
     def setUp(self):
@@ -143,10 +150,10 @@ class DriverTestMixin(object):
             self.assertTrue(self.driver_mock.called)
             self.assertIs(driver.real, driver_mock2)
 
-    def test_create_can_find_node_after_timeout(self):
-        driver = self.new_driver()
+    def test_create_can_find_node_after_timeout(self, create_kwargs={}, node_extra={}):
+        driver = self.new_driver(create_kwargs=create_kwargs)
         arv_node = arvados_node_mock()
-        cloud_node = cloud_node_mock()
+        cloud_node = cloud_node_mock(**node_extra)
         cloud_node.name = driver.create_cloud_name(arv_node)
         create_method = self.driver_mock().create_node
         create_method.side_effect = cloud_types.LibcloudError("fake timeout")
@@ -166,6 +173,20 @@ class DriverTestMixin(object):
             driver.create_node(MockSize(1), arv_node)
         self.assertIs(create_method.side_effect, exc_test.exception)
 
+    def check_node_found_after_timeout_has_fixed_size(self, size, cloud_node,
+                                                      create_kwargs={}):
+        # This method needs to be called explicitly by driver test suites
+        # that need it.
+        self.driver_mock().list_sizes.return_value = [size]
+        driver = self.new_driver(create_kwargs=create_kwargs)
+        arv_node = arvados_node_mock()
+        cloud_node.name = driver.create_cloud_name(arv_node)
+        create_method = self.driver_mock().create_node
+        create_method.side_effect = cloud_types.LibcloudError("fake timeout")
+        self.driver_mock().list_nodes.return_value = [cloud_node]
+        actual = driver.create_node(size, arv_node)
+        self.assertIs(size, actual.size)
+
 
 class RemotePollLoopActorTestMixin(ActorTestMixin):
     def build_monitor(self, *args, **kwargs):
index 928adf8f2856d0cabd65b0076f0db56096a0e188..991ccec674e1bd65350e3e59ecbf79ee8eebd8c1 100755 (executable)
@@ -101,8 +101,13 @@ wait_for_arvbox() {
 }
 
 run() {
+    if docker ps -a --filter "status=running" | grep -E "$ARVBOX_CONTAINER$" -q ; then
+        echo "Container $ARVBOX_CONTAINER is already running"
+        exit 0
+    fi
+
     if docker ps -a | grep -E "$ARVBOX_CONTAINER$" -q ; then
-        echo "Container $ARVBOX_CONTAINER is already running, use stop, restart or rebuild"
+        echo "Container $ARVBOX_CONTAINER already exists but is not running; use restart or rebuild"
         exit 1
     fi
 
@@ -125,7 +130,8 @@ run() {
               --publish=25100:25100
               --publish=25107:25107
               --publish=25108:25108
-              --publish=8001:8001"
+              --publish=8001:8001
+              --publish=8002:8002"
     else
         PUBLIC=""
     fi
@@ -247,11 +253,11 @@ build() {
         echo "Could not find Dockerfile (expected it at $ARVBOX_DOCKER/Dockerfile.base)"
         exit 1
     fi
-    docker build -t arvados/arvbox-base -f "$ARVBOX_DOCKER/Dockerfile.base" "$ARVBOX_DOCKER"
+    docker build $NO_CACHE -t arvados/arvbox-base -f "$ARVBOX_DOCKER/Dockerfile.base" "$ARVBOX_DOCKER"
     if test "$1" = localdemo -o "$1" = publicdemo ; then
-        docker build -t arvados/arvbox-demo -f "$ARVBOX_DOCKER/Dockerfile.demo" "$ARVBOX_DOCKER"
+        docker build $NO_CACHE -t arvados/arvbox-demo -f "$ARVBOX_DOCKER/Dockerfile.demo" "$ARVBOX_DOCKER"
     else
-        docker build -t arvados/arvbox-dev -f "$ARVBOX_DOCKER/Dockerfile.dev" "$ARVBOX_DOCKER"
+        docker build $NO_CACHE -t arvados/arvbox-dev -f "$ARVBOX_DOCKER/Dockerfile.dev" "$ARVBOX_DOCKER"
     fi
 }
 
@@ -277,6 +283,11 @@ case "$subcmd" in
         build $@
         ;;
 
+    rebuild)
+        check $@
+        NO_CACHE=--no-cache build $@
+        ;;
+
     start|run)
         check $@
         run $@
@@ -300,7 +311,7 @@ case "$subcmd" in
         run $@
         ;;
 
-    rebuild)
+    reboot)
         check $@
         stop
         build $@
@@ -413,9 +424,8 @@ case "$subcmd" in
     *)
         echo "Arvados-in-a-box                      http://arvados.org"
         echo
-        echo "$(basename $0) (build|start|run|open|shell|ip|stop|rebuild|reset|destroy|log|svrestart)"
-        echo
-        echo "build <config>      build arvbox Docker image"
+        echo "build   <config>      build arvbox Docker image"
+        echo "rebuild <config>      build arvbox Docker image, no layer cache"
         echo "start|run <config>  start $ARVBOX_CONTAINER container"
         echo "open       open arvbox workbench in a web browser"
         echo "shell      enter arvbox shell"
@@ -424,7 +434,7 @@ case "$subcmd" in
         echo "status     print some information about current arvbox"
         echo "stop       stop arvbox container"
         echo "restart <config>  stop, then run again"
-        echo "rebuild <config>  stop, build arvbox Docker image, run"
+        echo "reboot  <config>  stop, build arvbox Docker image, run"
         echo "reset      delete arvbox arvados data (be careful!)"
         echo "destroy    delete all arvbox code and data (be careful!)"
         echo "log <service> tail log of specified service"
index 280ac6854e1540f8ff82905a29d5572953731d50..4c9a4ef318bb7c854119be26a520e268e52c453e 100644 (file)
@@ -35,7 +35,7 @@ ADD fuse.conf /etc/
 ADD crunch-setup.sh gitolite.rc \
     keep-setup.sh common.sh createusers.sh \
     logger runsu.sh waitforpostgres.sh \
-    application_yml_override.py \
+    application_yml_override.py api-setup.sh \
     /usr/local/lib/arvbox/
 
 # Start the supervisor.
diff --git a/tools/arvbox/lib/arvbox/docker/api-setup.sh b/tools/arvbox/lib/arvbox/docker/api-setup.sh
new file mode 100755 (executable)
index 0000000..67c43b4
--- /dev/null
@@ -0,0 +1,86 @@
+#!/bin/bash
+
+exec 2>&1
+set -ex -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+cd /usr/src/arvados/services/api
+export RAILS_ENV=development
+
+set -u
+
+if ! test -s /var/lib/arvados/api_uuid_prefix ; then
+    ruby -e 'puts "#{rand(2**64).to_s(36)[0,5]}"' > /var/lib/arvados/api_uuid_prefix
+fi
+uuid_prefix=$(cat /var/lib/arvados/api_uuid_prefix)
+
+if ! test -s /var/lib/arvados/api_secret_token ; then
+    ruby -e 'puts rand(2**400).to_s(36)' > /var/lib/arvados/api_secret_token
+fi
+secret_token=$(cat /var/lib/arvados/api_secret_token)
+
+if ! test -s /var/lib/arvados/blob_signing_key ; then
+    ruby -e 'puts rand(2**400).to_s(36)' > /var/lib/arvados/blob_signing_key
+fi
+blob_signing_key=$(cat /var/lib/arvados/blob_signing_key)
+
+# self signed key will be created by SSO server script.
+test -s /var/lib/arvados/self-signed.key
+
+sso_app_secret=$(cat /var/lib/arvados/sso_app_secret)
+
+if test -s /var/lib/arvados/vm-uuid ; then
+    vm_uuid=$(cat /var/lib/arvados/vm-uuid)
+else
+    vm_uuid=$uuid_prefix-2x53u-$(ruby -e 'puts rand(2**400).to_s(36)[0,15]')
+    echo $vm_uuid > /var/lib/arvados/vm-uuid
+fi
+
+cat >config/application.yml <<EOF
+development:
+  uuid_prefix: $uuid_prefix
+  secret_token: $secret_token
+  blob_signing_key: $blob_signing_key
+  sso_app_secret: $sso_app_secret
+  sso_app_id: arvados-server
+  sso_provider_url: "https://$localip:${services[sso]}"
+  sso_insecure: true
+  workbench_address: "http://$localip/"
+  websocket_address: "ws://$localip:${services[websockets]}/websocket"
+  git_repo_ssh_base: "git@$localip:"
+  git_repo_https_base: "http://$localip:${services[arv-git-httpd]}/"
+  new_users_are_active: true
+  auto_admin_first_user: true
+  auto_setup_new_users: true
+  auto_setup_new_users_with_vm_uuid: $vm_uuid
+  auto_setup_new_users_with_repository: true
+  default_collection_replication: 1
+EOF
+
+(cd config && /usr/local/lib/arvbox/application_yml_override.py)
+
+if ! test -f /var/lib/arvados/api_database_pw ; then
+    ruby -e 'puts rand(2**128).to_s(36)' > /var/lib/arvados/api_database_pw
+fi
+database_pw=$(cat /var/lib/arvados/api_database_pw)
+
+if ! (psql postgres -c "\du" | grep "^ arvados ") >/dev/null ; then
+    psql postgres -c "create user arvados with password '$database_pw'"
+    psql postgres -c "ALTER USER arvados CREATEDB;"
+fi
+
+sed "s/password:.*/password: $database_pw/" <config/database.yml.example >config/database.yml
+
+if ! test -f /var/lib/arvados/api_database_setup ; then
+   bundle exec rake db:setup
+   touch /var/lib/arvados/api_database_setup
+fi
+
+if ! test -s /var/lib/arvados/superuser_token ; then
+    bundle exec ./script/create_superuser_token.rb > /var/lib/arvados/superuser_token
+fi
+
+rm -rf tmp
+
+bundle exec rake db:migrate
index 0e50a06ef59b4de5c3e18516bd63b3ed8af27f32..3733fa2ecb187b7a1daa5ce7851546faf5856603 100644 (file)
@@ -21,6 +21,7 @@ services=(
   [keepstore1]=25108
   [ssh]=22
   [doc]=8001
+  [websockets]=8002
 )
 
 if test "$(id arvbox -u 2>/dev/null)" = 0 ; then
index b77c9c27071021d40af771e61a12e0907a51bc98..9ef37921ec18f01b85f9b77bd91fc69f24b48415 100755 (executable)
@@ -7,7 +7,8 @@ if ! grep "^arvbox:" /etc/passwd >/dev/null 2>/dev/null ; then
     HOSTGID=$(ls -nd /usr/src/arvados | sed 's/ */ /' | cut -d' ' -f5)
     FUSEGID=$(ls -nd /dev/fuse | sed 's/ */ /' | cut -d' ' -f5)
 
-    mkdir -p /var/lib/arvados/git /var/lib/gems /var/lib/passenger
+    mkdir -p /var/lib/arvados/git /var/lib/gems \
+          /var/lib/passenger /var/lib/gopath /var/lib/pip
 
     groupadd --gid $HOSTGID --non-unique arvbox
     groupadd --gid $FUSEGID --non-unique fuse
@@ -22,7 +23,8 @@ if ! grep "^arvbox:" /etc/passwd >/dev/null 2>/dev/null ; then
 
     chown arvbox:arvbox -R /usr/local /var/lib/arvados /var/lib/gems \
           /var/lib/passenger /var/lib/postgresql \
-          /var/lib/nginx /var/log/nginx /etc/ssl/private
+          /var/lib/nginx /var/log/nginx /etc/ssl/private \
+          /var/lib/gopath /var/lib/pip
 
     mkdir -p /var/lib/gems/ruby/2.1.0
     chown arvbox:arvbox -R /var/lib/gems/ruby/2.1.0
index 058939c477723d703960b19ccb4331641b1d56eb..a36205c9678d4e67063bbec141072df1a737cea9 100755 (executable)
@@ -15,88 +15,14 @@ if test "$1" = "--only-deps" ; then
     exit
 fi
 
-set -u
-
-if ! test -s /var/lib/arvados/api_uuid_prefix ; then
-    ruby -e 'puts "#{rand(2**64).to_s(36)[0,5]}"' > /var/lib/arvados/api_uuid_prefix
-fi
-uuid_prefix=$(cat /var/lib/arvados/api_uuid_prefix)
-
-if ! test -s /var/lib/arvados/api_secret_token ; then
-    ruby -e 'puts rand(2**400).to_s(36)' > /var/lib/arvados/api_secret_token
-fi
-secret_token=$(cat /var/lib/arvados/api_secret_token)
-
-if ! test -s /var/lib/arvados/blob_signing_key ; then
-    ruby -e 'puts rand(2**400).to_s(36)' > /var/lib/arvados/blob_signing_key
-fi
-blob_signing_key=$(cat /var/lib/arvados/blob_signing_key)
-
-# self signed key will be created by SSO server script.
-test -s /var/lib/arvados/self-signed.key
-
-sso_app_secret=$(cat /var/lib/arvados/sso_app_secret)
-
-if test -s /var/lib/arvados/vm-uuid ; then
-    vm_uuid=$(cat /var/lib/arvados/vm-uuid)
-else
-    vm_uuid=$uuid_prefix-2x53u-$(ruby -e 'puts rand(2**400).to_s(36)[0,15]')
-    echo $vm_uuid > /var/lib/arvados/vm-uuid
-fi
-
-cat >config/application.yml <<EOF
-development:
-  uuid_prefix: $uuid_prefix
-  secret_token: $secret_token
-  blob_signing_key: $blob_signing_key
-  sso_app_secret: $sso_app_secret
-  sso_app_id: arvados-server
-  sso_provider_url: "https://$localip:${services[sso]}"
-  sso_insecure: true
-  workbench_address: "http://$localip/"
-  git_repo_ssh_base: "git@$localip:"
-  git_repo_https_base: "http://$localip:${services[arv-git-httpd]}/"
-  new_users_are_active: true
-  auto_admin_first_user: true
-  auto_setup_new_users: true
-  auto_setup_new_users_with_vm_uuid: $vm_uuid
-  auto_setup_new_users_with_repository: true
-  default_collection_replication: 1
-EOF
-
-(cd config && /usr/local/lib/arvbox/application_yml_override.py)
-
-if ! test -f /var/lib/arvados/api_database_pw ; then
-    ruby -e 'puts rand(2**128).to_s(36)' > /var/lib/arvados/api_database_pw
-fi
-database_pw=$(cat /var/lib/arvados/api_database_pw)
-
-if ! (psql postgres -c "\du" | grep "^ arvados ") >/dev/null ; then
-    psql postgres -c "create user arvados with password '$database_pw'"
-    psql postgres -c "ALTER USER arvados CREATEDB;"
-fi
-
-sed "s/password:.*/password: $database_pw/" <config/database.yml.example >config/database.yml
-
-if ! test -f /var/lib/arvados/api_database_setup ; then
-   bundle exec rake db:setup
-   touch /var/lib/arvados/api_database_setup
-fi
-
-if ! test -s /var/lib/arvados/superuser_token ; then
-    bundle exec ./script/create_superuser_token.rb > /var/lib/arvados/superuser_token
-fi
-
-rm -rf tmp
-
-bundle exec rake db:migrate
+flock /var/lib/arvados/api.lock /usr/local/lib/arvbox/api-setup.sh
 
 set +u
 if test "$1" = "--only-setup" ; then
     exit
 fi
 
-ARVADOS_WEBSOCKETS=1 exec bundle exec passenger start --port=${services[api]} \
+exec bundle exec passenger start --port=${services[api]} \
                   --runtime-dir=/var/lib/passenger \
                   --ssl --ssl-certificate=/var/lib/arvados/self-signed.pem \
                   --ssl-certificate-key=/var/lib/arvados/self-signed.key
diff --git a/tools/arvbox/lib/arvbox/docker/service/websockets/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/websockets/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/websockets/log/run b/tools/arvbox/lib/arvbox/docker/service/websockets/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/websockets/run b/tools/arvbox/lib/arvbox/docker/service/websockets/run
new file mode 120000 (symlink)
index 0000000..a388c8b
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/websockets/run-service b/tools/arvbox/lib/arvbox/docker/service/websockets/run-service
new file mode 100755 (executable)
index 0000000..d0c0b5d
--- /dev/null
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+exec 2>&1
+set -ex -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+cd /usr/src/arvados/services/api
+export RAILS_ENV=development
+
+run_bundler --without=development
+
+if test "$1" = "--only-deps" ; then
+    exit
+fi
+
+flock /var/lib/arvados/api.lock /usr/local/lib/arvbox/api-setup.sh
+
+set +u
+if test "$1" = "--only-setup" ; then
+    exit
+fi
+
+export ARVADOS_WEBSOCKETS=ws-only
+
+# serving ssl directly doesn't work, gets
+# Rack app error: #<TypeError: no implicit conversion of Puma::MiniSSL::Socket into Integer>
+#exec bundle exec puma -b "ssl://0.0.0.0:${services[websockets]}?cert=/var/lib/arvados/self-signed.pem&key=/var/lib/arvados/self-signed.key"
+
+exec bundle exec puma -p${services[websockets]}
index 2b6ebce16dc55ff5ad63af14d0924c4cd245a545..f6b5b586ef8217775ed180ea5c51fd5ee501b821 100644 (file)
@@ -3,6 +3,7 @@ from __future__ import print_function
 import arvados
 import Queue
 import threading
+import _strptime
 
 from crunchstat_summary import logger
 
index 2ac12abcba23e381073589cf209915b88a9d8cef..a88e4d5c41f06f017863965d43a9b60818426130 100644 (file)
@@ -11,6 +11,7 @@ import math
 import re
 import sys
 import threading
+import _strptime
 
 from arvados.api import OrderedJsonModel
 from crunchstat_summary import logger
diff --git a/tools/keep-block-check/.gitignore b/tools/keep-block-check/.gitignore
new file mode 100644 (file)
index 0000000..97eb5da
--- /dev/null
@@ -0,0 +1 @@
+keep-block-check
diff --git a/tools/keep-block-check/keep-block-check.go b/tools/keep-block-check/keep-block-check.go
new file mode 100644 (file)
index 0000000..6cf11a7
--- /dev/null
@@ -0,0 +1,239 @@
+package main
+
+import (
+       "crypto/tls"
+       "errors"
+       "flag"
+       "fmt"
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "io/ioutil"
+       "log"
+       "net/http"
+       "os"
+       "regexp"
+       "strings"
+       "time"
+)
+
+func main() {
+       err := doMain(os.Args[1:])
+       if err != nil {
+               log.Fatalf("%v", err)
+       }
+}
+
+func doMain(args []string) error {
+       flags := flag.NewFlagSet("keep-block-check", flag.ExitOnError)
+
+       configFile := flags.String(
+               "config",
+               "",
+               "Configuration filename. May be either a pathname to a config file, or (for example) 'foo' as shorthand for $HOME/.config/arvados/foo.conf file. This file is expected to specify the values for ARVADOS_API_TOKEN, ARVADOS_API_HOST, ARVADOS_API_HOST_INSECURE, and ARVADOS_BLOB_SIGNING_KEY for the source.")
+
+       keepServicesJSON := flags.String(
+               "keep-services-json",
+               "",
+               "An optional list of available keepservices. "+
+                       "If not provided, this list is obtained from api server configured in config-file.")
+
+       locatorFile := flags.String(
+               "block-hash-file",
+               "",
+               "Filename containing the block hashes to be checked. This is required. "+
+                       "This file contains the block hashes one per line.")
+
+       prefix := flags.String(
+               "prefix",
+               "",
+               "Block hash prefix. When a prefix is specified, only hashes listed in the file with this prefix will be checked.")
+
+       blobSignatureTTLFlag := flags.Duration(
+               "blob-signature-ttl",
+               0,
+               "Lifetime of blob permission signatures on the keepservers. If not provided, this will be retrieved from the API server's discovery document.")
+
+       verbose := flags.Bool(
+               "v",
+               false,
+               "Log progress of each block verification")
+
+       // Parse args; omit the first arg which is the command name
+       flags.Parse(args)
+
+       config, blobSigningKey, err := loadConfig(*configFile)
+       if err != nil {
+               return fmt.Errorf("Error loading configuration from file: %s", err.Error())
+       }
+
+       // get list of block locators to be checked
+       blockLocators, err := getBlockLocators(*locatorFile, *prefix)
+       if err != nil {
+               return fmt.Errorf("Error reading block hashes to be checked from file: %s", err.Error())
+       }
+
+       // setup keepclient
+       kc, blobSignatureTTL, err := setupKeepClient(config, *keepServicesJSON, *blobSignatureTTLFlag)
+       if err != nil {
+               return fmt.Errorf("Error configuring keepclient: %s", err.Error())
+       }
+
+       return performKeepBlockCheck(kc, blobSignatureTTL, blobSigningKey, blockLocators, *verbose)
+}
+
+type apiConfig struct {
+       APIToken        string
+       APIHost         string
+       APIHostInsecure bool
+       ExternalClient  bool
+}
+
+// Load config from given file
+func loadConfig(configFile string) (config apiConfig, blobSigningKey string, err error) {
+       if configFile == "" {
+               err = errors.New("Client config file not specified")
+               return
+       }
+
+       config, blobSigningKey, err = readConfigFromFile(configFile)
+       return
+}
+
+var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
+
+// Read config from file
+func readConfigFromFile(filename string) (config apiConfig, blobSigningKey string, err error) {
+       if !strings.Contains(filename, "/") {
+               filename = os.Getenv("HOME") + "/.config/arvados/" + filename + ".conf"
+       }
+
+       content, err := ioutil.ReadFile(filename)
+
+       if err != nil {
+               return
+       }
+
+       lines := strings.Split(string(content), "\n")
+       for _, line := range lines {
+               if line == "" {
+                       continue
+               }
+
+               kv := strings.SplitN(line, "=", 2)
+               if len(kv) == 2 {
+                       key := strings.TrimSpace(kv[0])
+                       value := strings.TrimSpace(kv[1])
+
+                       switch key {
+                       case "ARVADOS_API_TOKEN":
+                               config.APIToken = value
+                       case "ARVADOS_API_HOST":
+                               config.APIHost = value
+                       case "ARVADOS_API_HOST_INSECURE":
+                               config.APIHostInsecure = matchTrue.MatchString(value)
+                       case "ARVADOS_EXTERNAL_CLIENT":
+                               config.ExternalClient = matchTrue.MatchString(value)
+                       case "ARVADOS_BLOB_SIGNING_KEY":
+                               blobSigningKey = value
+                       }
+               }
+       }
+
+       return
+}
+
+// setup keepclient using the config provided
+func setupKeepClient(config apiConfig, keepServicesJSON string, blobSignatureTTL time.Duration) (kc *keepclient.KeepClient, ttl time.Duration, err error) {
+       arv := arvadosclient.ArvadosClient{
+               ApiToken:    config.APIToken,
+               ApiServer:   config.APIHost,
+               ApiInsecure: config.APIHostInsecure,
+               Client: &http.Client{Transport: &http.Transport{
+                       TLSClientConfig: &tls.Config{InsecureSkipVerify: config.APIHostInsecure}}},
+               External: config.ExternalClient,
+       }
+
+       // if keepServicesJSON is provided, use it to load services; else, use DiscoverKeepServers
+       if keepServicesJSON == "" {
+               kc, err = keepclient.MakeKeepClient(&arv)
+               if err != nil {
+                       return
+               }
+       } else {
+               kc = keepclient.New(&arv)
+               err = kc.LoadKeepServicesFromJSON(keepServicesJSON)
+               if err != nil {
+                       return
+               }
+       }
+
+       // Get if blobSignatureTTL is not provided
+       ttl = blobSignatureTTL
+       if blobSignatureTTL == 0 {
+               value, err := arv.Discovery("blobSignatureTtl")
+               if err == nil {
+                       ttl = time.Duration(int(value.(float64))) * time.Second
+               } else {
+                       return nil, 0, err
+               }
+       }
+
+       return
+}
+
+// Get list of unique block locators from the given file
+func getBlockLocators(locatorFile, prefix string) (locators []string, err error) {
+       if locatorFile == "" {
+               err = errors.New("block-hash-file not specified")
+               return
+       }
+
+       content, err := ioutil.ReadFile(locatorFile)
+       if err != nil {
+               return
+       }
+
+       locatorMap := make(map[string]bool)
+       for _, line := range strings.Split(string(content), "\n") {
+               line = strings.TrimSpace(line)
+               if line == "" || !strings.HasPrefix(line, prefix) || locatorMap[line] {
+                       continue
+               }
+               locators = append(locators, line)
+               locatorMap[line] = true
+       }
+
+       return
+}
+
+// Get block headers from keep. Log any errors.
+func performKeepBlockCheck(kc *keepclient.KeepClient, blobSignatureTTL time.Duration, blobSigningKey string, blockLocators []string, verbose bool) error {
+       totalBlocks := len(blockLocators)
+       notFoundBlocks := 0
+       current := 0
+       for _, locator := range blockLocators {
+               current++
+               if verbose {
+                       log.Printf("Verifying block %d of %d: %v", current, totalBlocks, locator)
+               }
+               getLocator := locator
+               if blobSigningKey != "" {
+                       expiresAt := time.Now().AddDate(0, 0, 1)
+                       getLocator = keepclient.SignLocator(locator, kc.Arvados.ApiToken, expiresAt, blobSignatureTTL, []byte(blobSigningKey))
+               }
+
+               _, _, err := kc.Ask(getLocator)
+               if err != nil {
+                       notFoundBlocks++
+                       log.Printf("Error verifying block %v: %v", locator, err)
+               }
+       }
+
+       log.Printf("Verify block totals: %d attempts, %d successes, %d errors", totalBlocks, totalBlocks-notFoundBlocks, notFoundBlocks)
+
+       if notFoundBlocks > 0 {
+               return fmt.Errorf("Block verification failed for %d out of %d blocks with matching prefix.", notFoundBlocks, totalBlocks)
+       }
+
+       return nil
+}
diff --git a/tools/keep-block-check/keep-block-check_test.go b/tools/keep-block-check/keep-block-check_test.go
new file mode 100644 (file)
index 0000000..e49fe68
--- /dev/null
@@ -0,0 +1,352 @@
+package main
+
+import (
+       "bytes"
+       "fmt"
+       "io"
+       "io/ioutil"
+       "log"
+       "os"
+       "regexp"
+       "strings"
+       "testing"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+
+       . "gopkg.in/check.v1"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       TestingT(t)
+}
+
+// Gocheck boilerplate
+var _ = Suite(&ServerRequiredSuite{})
+var _ = Suite(&DoMainTestSuite{})
+
+type ServerRequiredSuite struct{}
+type DoMainTestSuite struct{}
+
+var kc *keepclient.KeepClient
+var logBuffer bytes.Buffer
+
+var TestHash = "aaaa09c290d0fb1ca068ffaddf22cbd0"
+var TestHash2 = "aaaac516f788aec4f30932ffb6395c39"
+
+var blobSignatureTTL = time.Duration(2*7*24) * time.Hour
+
+func (s *ServerRequiredSuite) SetUpSuite(c *C) {
+       arvadostest.StartAPI()
+}
+
+func (s *ServerRequiredSuite) TearDownSuite(c *C) {
+       arvadostest.StopAPI()
+       arvadostest.ResetEnv()
+}
+
+func (s *ServerRequiredSuite) SetUpTest(c *C) {
+       logOutput := io.MultiWriter(&logBuffer)
+       log.SetOutput(logOutput)
+}
+
+func (s *ServerRequiredSuite) TearDownTest(c *C) {
+       arvadostest.StopKeep(2)
+       log.SetOutput(os.Stdout)
+       log.Printf("%v", logBuffer.String())
+}
+
+func (s *DoMainTestSuite) SetUpSuite(c *C) {
+}
+
+func (s *DoMainTestSuite) SetUpTest(c *C) {
+       logOutput := io.MultiWriter(&logBuffer)
+       log.SetOutput(logOutput)
+}
+
+func (s *DoMainTestSuite) TearDownTest(c *C) {
+       log.SetOutput(os.Stdout)
+       log.Printf("%v", logBuffer.String())
+}
+
+func setupKeepBlockCheck(c *C, enforcePermissions bool, keepServicesJSON string) {
+       setupKeepBlockCheckWithTTL(c, enforcePermissions, keepServicesJSON, blobSignatureTTL)
+}
+
+func setupKeepBlockCheckWithTTL(c *C, enforcePermissions bool, keepServicesJSON string, ttl time.Duration) {
+       var config apiConfig
+       config.APIHost = os.Getenv("ARVADOS_API_HOST")
+       config.APIToken = arvadostest.DataManagerToken
+       config.APIHostInsecure = matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
+
+       // Start Keep servers
+       arvadostest.StartKeep(2, enforcePermissions)
+
+       // setup keepclients
+       var err error
+       kc, ttl, err = setupKeepClient(config, keepServicesJSON, ttl)
+       c.Assert(ttl, Equals, blobSignatureTTL)
+       c.Check(err, IsNil)
+}
+
+// Setup test data
+func setupTestData(c *C) []string {
+       allLocators := []string{}
+
+       // Put a few blocks
+       for i := 0; i < 5; i++ {
+               hash, _, err := kc.PutB([]byte(fmt.Sprintf("keep-block-check-test-data-%d", i)))
+               c.Check(err, IsNil)
+               allLocators = append(allLocators, strings.Split(hash, "+A")[0])
+       }
+
+       return allLocators
+}
+
+func setupConfigFile(c *C, fileName string) string {
+       // Setup a config file
+       file, err := ioutil.TempFile(os.TempDir(), fileName)
+       c.Check(err, IsNil)
+
+       // Add config to file. While at it, throw some extra white space
+       fileContent := "ARVADOS_API_HOST=" + os.Getenv("ARVADOS_API_HOST") + "\n"
+       fileContent += "ARVADOS_API_TOKEN=" + arvadostest.DataManagerToken + "\n"
+       fileContent += "\n"
+       fileContent += "ARVADOS_API_HOST_INSECURE=" + os.Getenv("ARVADOS_API_HOST_INSECURE") + "\n"
+       fileContent += " ARVADOS_EXTERNAL_CLIENT = false \n"
+       fileContent += " NotANameValuePairAndShouldGetIgnored \n"
+       fileContent += "ARVADOS_BLOB_SIGNING_KEY=abcdefg\n"
+
+       _, err = file.Write([]byte(fileContent))
+       c.Check(err, IsNil)
+
+       return file.Name()
+}
+
+func setupBlockHashFile(c *C, name string, blocks []string) string {
+       // Setup a block hash file
+       file, err := ioutil.TempFile(os.TempDir(), name)
+       c.Check(err, IsNil)
+
+       // Add the hashes to the file. While at it, throw some extra white space
+       fileContent := ""
+       for _, hash := range blocks {
+               fileContent += fmt.Sprintf(" %s \n", hash)
+       }
+       fileContent += "\n"
+       _, err = file.Write([]byte(fileContent))
+       c.Check(err, IsNil)
+
+       return file.Name()
+}
+
+func checkErrorLog(c *C, blocks []string, prefix, suffix string) {
+       for _, hash := range blocks {
+               expected := prefix + `.*` + hash + `.*` + suffix
+               match, _ := regexp.MatchString(expected, logBuffer.String())
+               c.Assert(match, Equals, true)
+       }
+}
+
+func checkNoErrorsLogged(c *C, prefix, suffix string) {
+       expected := prefix + `.*` + suffix
+       match, _ := regexp.MatchString(expected, logBuffer.String())
+       c.Assert(match, Equals, false)
+}
+
+func (s *ServerRequiredSuite) TestBlockCheck(c *C) {
+       setupKeepBlockCheck(c, false, "")
+       allLocators := setupTestData(c)
+       err := performKeepBlockCheck(kc, blobSignatureTTL, "", allLocators, true)
+       c.Check(err, IsNil)
+       checkNoErrorsLogged(c, "Error verifying block", "Block not found")
+}
+
+func (s *ServerRequiredSuite) TestBlockCheckWithBlobSigning(c *C) {
+       setupKeepBlockCheck(c, true, "")
+       allLocators := setupTestData(c)
+       err := performKeepBlockCheck(kc, blobSignatureTTL, arvadostest.BlobSigningKey, allLocators, true)
+       c.Check(err, IsNil)
+       checkNoErrorsLogged(c, "Error verifying block", "Block not found")
+}
+
+func (s *ServerRequiredSuite) TestBlockCheckWithBlobSigningAndTTLFromDiscovery(c *C) {
+       setupKeepBlockCheckWithTTL(c, true, "", 0)
+       allLocators := setupTestData(c)
+       err := performKeepBlockCheck(kc, blobSignatureTTL, arvadostest.BlobSigningKey, allLocators, true)
+       c.Check(err, IsNil)
+       checkNoErrorsLogged(c, "Error verifying block", "Block not found")
+}
+
+func (s *ServerRequiredSuite) TestBlockCheck_NoSuchBlock(c *C) {
+       setupKeepBlockCheck(c, false, "")
+       allLocators := setupTestData(c)
+       allLocators = append(allLocators, TestHash)
+       allLocators = append(allLocators, TestHash2)
+       err := performKeepBlockCheck(kc, blobSignatureTTL, "", allLocators, true)
+       c.Check(err, NotNil)
+       c.Assert(err.Error(), Equals, "Block verification failed for 2 out of 7 blocks with matching prefix.")
+       checkErrorLog(c, []string{TestHash, TestHash2}, "Error verifying block", "Block not found")
+}
+
+func (s *ServerRequiredSuite) TestBlockCheck_NoSuchBlock_WithMatchingPrefix(c *C) {
+       setupKeepBlockCheck(c, false, "")
+       allLocators := setupTestData(c)
+       allLocators = append(allLocators, TestHash)
+       allLocators = append(allLocators, TestHash2)
+       locatorFile := setupBlockHashFile(c, "block-hash", allLocators)
+       defer os.Remove(locatorFile)
+       locators, err := getBlockLocators(locatorFile, "aaa")
+       c.Check(err, IsNil)
+       err = performKeepBlockCheck(kc, blobSignatureTTL, "", locators, true)
+       c.Check(err, NotNil)
+       // Of the 7 blocks in allLocators, only two match the prefix and hence only those are checked
+       c.Assert(err.Error(), Equals, "Block verification failed for 2 out of 2 blocks with matching prefix.")
+       checkErrorLog(c, []string{TestHash, TestHash2}, "Error verifying block", "Block not found")
+}
+
+func (s *ServerRequiredSuite) TestBlockCheck_NoSuchBlock_WithPrefixMismatch(c *C) {
+       setupKeepBlockCheck(c, false, "")
+       allLocators := setupTestData(c)
+       allLocators = append(allLocators, TestHash)
+       allLocators = append(allLocators, TestHash2)
+       locatorFile := setupBlockHashFile(c, "block-hash", allLocators)
+       defer os.Remove(locatorFile)
+       locators, err := getBlockLocators(locatorFile, "999")
+       c.Check(err, IsNil)
+       err = performKeepBlockCheck(kc, blobSignatureTTL, "", locators, true)
+       c.Check(err, IsNil) // there were no matching locators in file and hence nothing was checked
+}
+
+func (s *ServerRequiredSuite) TestBlockCheck_BadSignature(c *C) {
+       setupKeepBlockCheck(c, true, "")
+       setupTestData(c)
+       err := performKeepBlockCheck(kc, blobSignatureTTL, "badblobsigningkey", []string{TestHash, TestHash2}, false)
+       c.Assert(err.Error(), Equals, "Block verification failed for 2 out of 2 blocks with matching prefix.")
+       checkErrorLog(c, []string{TestHash, TestHash2}, "Error verifying block", "HTTP 403")
+       // verbose logging not requested
+       c.Assert(strings.Contains(logBuffer.String(), "Verifying block 1 of 2"), Equals, false)
+}
+
+var testKeepServicesJSON = `{
+  "kind":"arvados#keepServiceList",
+  "etag":"",
+  "self_link":"",
+  "offset":null, "limit":null,
+  "items":[
+    {"href":"/keep_services/zzzzz-bi6l4-123456789012340",
+     "kind":"arvados#keepService",
+     "uuid":"zzzzz-bi6l4-123456789012340",
+     "service_host":"keep0.zzzzz.arvadosapi.com",
+     "service_port":25107,
+     "service_ssl_flag":false,
+     "service_type":"disk",
+     "read_only":false },
+    {"href":"/keep_services/zzzzz-bi6l4-123456789012341",
+     "kind":"arvados#keepService",
+     "uuid":"zzzzz-bi6l4-123456789012341",
+     "service_host":"keep0.zzzzz.arvadosapi.com",
+     "service_port":25108,
+     "service_ssl_flag":false,
+     "service_type":"disk",
+     "read_only":false }
+    ],
+  "items_available":2 }`
+
+// Setup block-check using keepServicesJSON with fake keepservers.
+// Expect error during performKeepBlockCheck due to unreachable keepservers.
+func (s *ServerRequiredSuite) TestErrorDuringKeepBlockCheck_FakeKeepservers(c *C) {
+       setupKeepBlockCheck(c, false, testKeepServicesJSON)
+       err := performKeepBlockCheck(kc, blobSignatureTTL, "", []string{TestHash, TestHash2}, true)
+       c.Assert(err.Error(), Equals, "Block verification failed for 2 out of 2 blocks with matching prefix.")
+       checkErrorLog(c, []string{TestHash, TestHash2}, "Error verifying block", "")
+}
+
+// Test keep-block-check initialization with keepServicesJSON
+func (s *ServerRequiredSuite) TestKeepBlockCheck_InitializeWithKeepServicesJSON(c *C) {
+       setupKeepBlockCheck(c, false, testKeepServicesJSON)
+       found := 0
+       for k := range kc.LocalRoots() {
+               if k == "zzzzz-bi6l4-123456789012340" || k == "zzzzz-bi6l4-123456789012341" {
+                       found++
+               }
+       }
+       c.Check(found, Equals, 2)
+}
+
+// Test loadConfig func
+func (s *ServerRequiredSuite) TestLoadConfig(c *C) {
+       // Setup config file
+       configFile := setupConfigFile(c, "config")
+       defer os.Remove(configFile)
+
+       // load configuration from the file
+       config, blobSigningKey, err := loadConfig(configFile)
+       c.Check(err, IsNil)
+
+       c.Assert(config.APIHost, Equals, os.Getenv("ARVADOS_API_HOST"))
+       c.Assert(config.APIToken, Equals, arvadostest.DataManagerToken)
+       c.Assert(config.APIHostInsecure, Equals, matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE")))
+       c.Assert(config.ExternalClient, Equals, false)
+       c.Assert(blobSigningKey, Equals, "abcdefg")
+}
+
+func (s *DoMainTestSuite) Test_doMain_WithNoConfig(c *C) {
+       args := []string{"-prefix", "a"}
+       err := doMain(args)
+       c.Check(err, NotNil)
+       c.Assert(strings.Contains(err.Error(), "config file not specified"), Equals, true)
+}
+
+func (s *DoMainTestSuite) Test_doMain_WithNoSuchConfigFile(c *C) {
+       args := []string{"-config", "no-such-file"}
+       err := doMain(args)
+       c.Check(err, NotNil)
+       c.Assert(strings.Contains(err.Error(), "no such file or directory"), Equals, true)
+}
+
+func (s *DoMainTestSuite) Test_doMain_WithNoBlockHashFile(c *C) {
+       config := setupConfigFile(c, "config")
+       defer os.Remove(config)
+
+       // Start keepservers.
+       arvadostest.StartKeep(2, false)
+       defer arvadostest.StopKeep(2)
+
+       args := []string{"-config", config}
+       err := doMain(args)
+       c.Assert(strings.Contains(err.Error(), "block-hash-file not specified"), Equals, true)
+}
+
+func (s *DoMainTestSuite) Test_doMain_WithNoSuchBlockHashFile(c *C) {
+       config := setupConfigFile(c, "config")
+       defer os.Remove(config)
+
+       arvadostest.StartKeep(2, false)
+       defer arvadostest.StopKeep(2)
+
+       args := []string{"-config", config, "-block-hash-file", "no-such-file"}
+       err := doMain(args)
+       c.Assert(strings.Contains(err.Error(), "no such file or directory"), Equals, true)
+}
+
+func (s *DoMainTestSuite) Test_doMain(c *C) {
+       // Start keepservers.
+       arvadostest.StartKeep(2, false)
+       defer arvadostest.StopKeep(2)
+
+       config := setupConfigFile(c, "config")
+       defer os.Remove(config)
+
+       locatorFile := setupBlockHashFile(c, "block-hash", []string{TestHash, TestHash2})
+       defer os.Remove(locatorFile)
+
+       args := []string{"-config", config, "-block-hash-file", locatorFile, "-v"}
+       err := doMain(args)
+       c.Check(err, NotNil)
+       c.Assert(err.Error(), Equals, "Block verification failed for 2 out of 2 blocks with matching prefix.")
+       checkErrorLog(c, []string{TestHash, TestHash2}, "Error verifying block", "Block not found")
+       c.Assert(strings.Contains(logBuffer.String(), "Verifying block 1 of 2"), Equals, true)
+}
index 820772eb5b6a040d15683549b1e617c5d9e718e0..c6e7665caa2a312c327b8a603159a7da07941450 100644 (file)
@@ -60,6 +60,11 @@ func doMain() error {
                "",
                "Index prefix")
 
+       srcBlobSignatureTTLFlag := flags.Duration(
+               "src-blob-signature-ttl",
+               0,
+               "Lifetime of blob permission signatures on source keepservers. If not provided, this will be retrieved from the API server's discovery document.")
+
        // Parse args; omit the first arg which is the command name
        flags.Parse(os.Args[1:])
 
@@ -74,18 +79,18 @@ func doMain() error {
        }
 
        // setup src and dst keepclients
-       kcSrc, err := setupKeepClient(srcConfig, *srcKeepServicesJSON, false, 0)
+       kcSrc, srcBlobSignatureTTL, err := setupKeepClient(srcConfig, *srcKeepServicesJSON, false, 0, *srcBlobSignatureTTLFlag)
        if err != nil {
                return fmt.Errorf("Error configuring src keepclient: %s", err.Error())
        }
 
-       kcDst, err := setupKeepClient(dstConfig, *dstKeepServicesJSON, true, *replications)
+       kcDst, _, err := setupKeepClient(dstConfig, *dstKeepServicesJSON, true, *replications, 0)
        if err != nil {
                return fmt.Errorf("Error configuring dst keepclient: %s", err.Error())
        }
 
        // Copy blocks not found in dst from src
-       err = performKeepRsync(kcSrc, kcDst, srcBlobSigningKey, *prefix)
+       err = performKeepRsync(kcSrc, kcDst, srcBlobSignatureTTL, srcBlobSigningKey, *prefix)
        if err != nil {
                return fmt.Errorf("Error while syncing data: %s", err.Error())
        }
@@ -155,7 +160,7 @@ func readConfigFromFile(filename string) (config apiConfig, blobSigningKey strin
 }
 
 // setup keepclient using the config provided
-func setupKeepClient(config apiConfig, keepServicesJSON string, isDst bool, replications int) (kc *keepclient.KeepClient, err error) {
+func setupKeepClient(config apiConfig, keepServicesJSON string, isDst bool, replications int, srcBlobSignatureTTL time.Duration) (kc *keepclient.KeepClient, blobSignatureTTL time.Duration, err error) {
        arv := arvadosclient.ArvadosClient{
                ApiToken:    config.APIToken,
                ApiServer:   config.APIHost,
@@ -169,13 +174,13 @@ func setupKeepClient(config apiConfig, keepServicesJSON string, isDst bool, repl
        if keepServicesJSON == "" {
                kc, err = keepclient.MakeKeepClient(&arv)
                if err != nil {
-                       return nil, err
+                       return nil, 0, err
                }
        } else {
                kc = keepclient.New(&arv)
                err = kc.LoadKeepServicesFromJSON(keepServicesJSON)
                if err != nil {
-                       return kc, err
+                       return kc, 0, err
                }
        }
 
@@ -186,19 +191,30 @@ func setupKeepClient(config apiConfig, keepServicesJSON string, isDst bool, repl
                        if err == nil {
                                replications = int(value.(float64))
                        } else {
-                               return nil, err
+                               return nil, 0, err
                        }
                }
 
                kc.Want_replicas = replications
        }
 
-       return kc, nil
+       // If srcBlobSignatureTTL is not provided, get it from API server discovery doc
+       blobSignatureTTL = srcBlobSignatureTTL
+       if !isDst && srcBlobSignatureTTL == 0 {
+               value, err := arv.Discovery("blobSignatureTtl")
+               if err == nil {
+                       blobSignatureTTL = time.Duration(int(value.(float64))) * time.Second
+               } else {
+                       return nil, 0, err
+               }
+       }
+
+       return kc, blobSignatureTTL, nil
 }
 
 // Get unique block locators from src and dst
 // Copy any blocks missing in dst
-func performKeepRsync(kcSrc, kcDst *keepclient.KeepClient, blobSigningKey, prefix string) error {
+func performKeepRsync(kcSrc, kcDst *keepclient.KeepClient, srcBlobSignatureTTL time.Duration, blobSigningKey, prefix string) error {
        // Get unique locators from src
        srcIndex, err := getUniqueLocators(kcSrc, prefix)
        if err != nil {
@@ -218,7 +234,7 @@ func performKeepRsync(kcSrc, kcDst *keepclient.KeepClient, blobSigningKey, prefi
        log.Printf("Before keep-rsync, there are %d blocks in src and %d blocks in dst. Start copying %d blocks from src not found in dst.",
                len(srcIndex), len(dstIndex), len(toBeCopied))
 
-       err = copyBlocksToDst(toBeCopied, kcSrc, kcDst, blobSigningKey)
+       err = copyBlocksToDst(toBeCopied, kcSrc, kcDst, srcBlobSignatureTTL, blobSigningKey)
 
        return err
 }
@@ -254,7 +270,7 @@ func getMissingLocators(srcLocators, dstLocators map[string]bool) []string {
 }
 
 // Copy blocks from src to dst; only those that are missing in dst are copied
-func copyBlocksToDst(toBeCopied []string, kcSrc, kcDst *keepclient.KeepClient, blobSigningKey string) error {
+func copyBlocksToDst(toBeCopied []string, kcSrc, kcDst *keepclient.KeepClient, srcBlobSignatureTTL time.Duration, blobSigningKey string) error {
        total := len(toBeCopied)
 
        startedAt := time.Now()
@@ -271,7 +287,7 @@ func copyBlocksToDst(toBeCopied []string, kcSrc, kcDst *keepclient.KeepClient, b
                getLocator := locator
                expiresAt := time.Now().AddDate(0, 0, 1)
                if blobSigningKey != "" {
-                       getLocator = keepclient.SignLocator(getLocator, kcSrc.Arvados.ApiToken, expiresAt, []byte(blobSigningKey))
+                       getLocator = keepclient.SignLocator(getLocator, kcSrc.Arvados.ApiToken, expiresAt, srcBlobSignatureTTL, []byte(blobSigningKey))
                }
 
                reader, len, _, err := kcSrc.Get(getLocator)
index 94281fa8bcbb89f5432614adf715376ad666beab..09609eb7498bb8dc28d95bc41892f4cca9ec8563 100644 (file)
@@ -49,6 +49,7 @@ func (s *DoMainTestSuite) SetUpSuite(c *C) {
 
 var kcSrc, kcDst *keepclient.KeepClient
 var srcKeepServicesJSON, dstKeepServicesJSON, blobSigningKey string
+var blobSignatureTTL = time.Duration(2*7*24) * time.Hour
 
 func (s *ServerRequiredSuite) SetUpTest(c *C) {
        // reset all variables between tests
@@ -91,7 +92,7 @@ func setupRsync(c *C, enforcePermissions bool, replications int) {
        dstConfig.APIHostInsecure = matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
 
        if enforcePermissions {
-               blobSigningKey = "zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc"
+               blobSigningKey = arvadostest.BlobSigningKey
        }
 
        // Start Keep servers
@@ -99,10 +100,10 @@ func setupRsync(c *C, enforcePermissions bool, replications int) {
 
        // setup keepclients
        var err error
-       kcSrc, err = setupKeepClient(srcConfig, srcKeepServicesJSON, false, 0)
+       kcSrc, _, err = setupKeepClient(srcConfig, srcKeepServicesJSON, false, 0, blobSignatureTTL)
        c.Check(err, IsNil)
 
-       kcDst, err = setupKeepClient(dstConfig, dstKeepServicesJSON, true, replications)
+       kcDst, _, err = setupKeepClient(dstConfig, dstKeepServicesJSON, true, replications, 0)
        c.Check(err, IsNil)
 
        for uuid := range kcSrc.LocalRoots() {
@@ -174,7 +175,7 @@ func testNoCrosstalk(c *C, testData string, kc1, kc2 *keepclient.KeepClient) {
        c.Assert(err, Equals, nil)
 
        locator = strings.Split(locator, "+")[0]
-       _, _, _, err = kc2.Get(keepclient.SignLocator(locator, kc2.Arvados.ApiToken, time.Now().AddDate(0, 0, 1), []byte(blobSigningKey)))
+       _, _, _, err = kc2.Get(keepclient.SignLocator(locator, kc2.Arvados.ApiToken, time.Now().AddDate(0, 0, 1), blobSignatureTTL, []byte(blobSigningKey)))
        c.Assert(err, NotNil)
        c.Check(err.Error(), Equals, "Block not found")
 }
@@ -256,7 +257,7 @@ func testKeepRsync(c *C, enforcePermissions bool, prefix string) {
        // setupTestData
        setupTestData(c, prefix)
 
-       err := performKeepRsync(kcSrc, kcDst, blobSigningKey, prefix)
+       err := performKeepRsync(kcSrc, kcDst, blobSignatureTTL, blobSigningKey, prefix)
        c.Check(err, IsNil)
 
        // Now GetIndex from dst and verify that all 5 from src and the 2 extra blocks are found
@@ -327,7 +328,7 @@ func (s *ServerRequiredSuite) TestErrorDuringRsync_FakeSrcKeepservers(c *C) {
 
        setupRsync(c, false, 1)
 
-       err := performKeepRsync(kcSrc, kcDst, "", "")
+       err := performKeepRsync(kcSrc, kcDst, blobSignatureTTL, "", "")
        log.Printf("Err = %v", err)
        c.Check(strings.Contains(err.Error(), "no such host"), Equals, true)
 }
@@ -339,7 +340,7 @@ func (s *ServerRequiredSuite) TestErrorDuringRsync_FakeDstKeepservers(c *C) {
 
        setupRsync(c, false, 1)
 
-       err := performKeepRsync(kcSrc, kcDst, "", "")
+       err := performKeepRsync(kcSrc, kcDst, blobSignatureTTL, "", "")
        log.Printf("Err = %v", err)
        c.Check(strings.Contains(err.Error(), "no such host"), Equals, true)
 }
@@ -354,7 +355,7 @@ func (s *ServerRequiredSuite) TestErrorDuringRsync_ErrorGettingBlockFromSrc(c *C
        // Change blob signing key to a fake key, so that Get from src fails
        blobSigningKey = "thisisfakeblobsigningkey"
 
-       err := performKeepRsync(kcSrc, kcDst, blobSigningKey, "")
+       err := performKeepRsync(kcSrc, kcDst, blobSignatureTTL, blobSigningKey, "")
        c.Check(strings.Contains(err.Error(), "HTTP 403 \"Forbidden\""), Equals, true)
 }
 
@@ -368,7 +369,7 @@ func (s *ServerRequiredSuite) TestErrorDuringRsync_ErrorPuttingBlockInDst(c *C)
        // Increase Want_replicas on dst to result in insufficient replicas error during Put
        kcDst.Want_replicas = 2
 
-       err := performKeepRsync(kcSrc, kcDst, blobSigningKey, "")
+       err := performKeepRsync(kcSrc, kcDst, blobSignatureTTL, blobSigningKey, "")
        c.Check(strings.Contains(err.Error(), "Could not write sufficient replicas"), Equals, true)
 }
 
@@ -416,6 +417,18 @@ func (s *ServerNotRequiredSuite) TestLoadConfig_ErrorLoadingSrcConfig(c *C) {
        c.Assert(strings.Contains(err.Error(), "no such file or directory"), Equals, true)
 }
 
+func (s *ServerNotRequiredSuite) TestSetupKeepClient_NoBlobSignatureTTL(c *C) {
+       var srcConfig apiConfig
+       srcConfig.APIHost = os.Getenv("ARVADOS_API_HOST")
+       srcConfig.APIToken = arvadostest.DataManagerToken
+       srcConfig.APIHostInsecure = matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
+       arvadostest.StartKeep(2, false)
+
+       _, ttl, err := setupKeepClient(srcConfig, srcKeepServicesJSON, false, 0, 0)
+       c.Check(err, IsNil)
+       c.Assert(ttl, Equals, blobSignatureTTL)
+}
+
 func setupConfigFile(c *C, name string) *os.File {
        // Setup a config file
        file, err := ioutil.TempFile(os.TempDir(), name)