Merge branch '14383-java-sdk-double-slash'. Fixes #14383.
authorTom Morris <tfmorris@VTG-11015-TMORRIS.local>
Wed, 27 Feb 2019 15:58:28 +0000 (10:58 -0500)
committerTom Morris <tfmorris@VTG-11015-TMORRIS.local>
Wed, 27 Feb 2019 15:58:28 +0000 (10:58 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Morris <tfmorris@veritasgenetics.com>

167 files changed:
.gitignore
apps/workbench/.gitignore
apps/workbench/app/assets/javascripts/components/edit_tags.js
apps/workbench/app/controllers/work_units_controller.rb
apps/workbench/app/views/layouts/body.html.erb
apps/workbench/config/application.default.yml
apps/workbench/config/initializers/validate_wb2_url_config.rb [new file with mode: 0644]
apps/workbench/lib/config_validators.rb [new file with mode: 0644]
apps/workbench/package-build.version [deleted file]
apps/workbench/test/integration/application_layout_test.rb
apps/workbench/test/the.patch [deleted file]
backports/deb-libfuse-dev/fpm-info.sh [deleted file]
backports/python-apache-libcloud/fpm-info.sh [deleted file]
backports/python-ciso8601/fpm-info.sh [deleted file]
backports/python-llfuse/fpm-info.sh [deleted file]
backports/python-pycurl/fpm-info.sh [deleted file]
build/build-dev-docker-jobs-image.sh
build/build.list [deleted file]
build/check-copyright-notices
build/package-build-dockerfiles/Makefile
build/package-build-dockerfiles/centos7/Dockerfile
build/package-build-dockerfiles/debian8/Dockerfile
build/package-build-dockerfiles/debian9/Dockerfile
build/package-build-dockerfiles/ubuntu1404/Dockerfile
build/package-build-dockerfiles/ubuntu1604/Dockerfile
build/package-build-dockerfiles/ubuntu1804/Dockerfile
build/package-test-dockerfiles/Makefile
build/package-test-dockerfiles/centos7/Dockerfile
build/package-test-dockerfiles/debian8/Dockerfile
build/package-test-dockerfiles/debian9/Dockerfile
build/package-test-dockerfiles/ubuntu1404/Dockerfile
build/package-test-dockerfiles/ubuntu1604/Dockerfile
build/package-test-dockerfiles/ubuntu1804/Dockerfile
build/package-testing/deb-common-test-packages.sh
build/package-testing/rpm-common-test-packages.sh
build/package-testing/test-package-arvados-node-manager.sh
build/package-testing/test-package-python27-python-arvados-cwl-runner.sh
build/package-testing/test-package-python27-python-arvados-fuse.sh
build/package-testing/test-package-python27-python-arvados-python-client.sh
build/run-build-docker-jobs-image.sh
build/run-build-packages-one-target.sh
build/run-build-packages-python-and-ruby.sh
build/run-build-packages.sh
build/run-library.sh
build/run-tests.sh
cmd/arvados-server/arvados-dispatch-cloud.service [moved from cmd/arvados-server/crunch-dispatch-cloud.service with 89% similarity]
doc/_includes/_install_compute_docker.liquid
doc/admin/upgrading.html.textile.liquid
doc/sdk/python/sdk-python.html.textile.liquid
doc/user/topics/arv-docker.html.textile.liquid
docker/jobs/Dockerfile
lib/cloud/azure/azure.go [moved from lib/cloud/azure.go with 64% similarity]
lib/cloud/azure/azure_test.go [moved from lib/cloud/azure_test.go with 62% similarity]
lib/cloud/gocheck_test.go [deleted file]
lib/cloud/interfaces.go
lib/controller/cmd.go
lib/controller/federation_test.go
lib/controller/handler.go
lib/controller/handler_test.go
lib/controller/server_test.go
lib/dispatchcloud/cmd.go
lib/dispatchcloud/container/queue.go
lib/dispatchcloud/container/queue_test.go [new file with mode: 0644]
lib/dispatchcloud/dispatcher.go
lib/dispatchcloud/dispatcher_test.go
lib/dispatchcloud/driver.go
lib/dispatchcloud/instance_set_proxy.go [deleted file]
lib/dispatchcloud/readme_states.txt [new file with mode: 0644]
lib/dispatchcloud/scheduler/fix_stale_locks.go
lib/dispatchcloud/scheduler/interfaces.go
lib/dispatchcloud/scheduler/run_queue.go
lib/dispatchcloud/scheduler/run_queue_test.go
lib/dispatchcloud/scheduler/scheduler.go
lib/dispatchcloud/scheduler/sync.go
lib/dispatchcloud/ssh_executor/executor.go
lib/dispatchcloud/ssh_executor/executor_test.go
lib/dispatchcloud/test/lame_instance_set.go [deleted file]
lib/dispatchcloud/test/ssh_service.go
lib/dispatchcloud/test/stub_driver.go
lib/dispatchcloud/worker/pool.go
lib/dispatchcloud/worker/pool_test.go
lib/dispatchcloud/worker/throttle.go [new file with mode: 0644]
lib/dispatchcloud/worker/throttle_test.go [new file with mode: 0644]
lib/dispatchcloud/worker/verify.go [new file with mode: 0644]
lib/dispatchcloud/worker/worker.go
lib/dispatchcloud/worker/worker_test.go [new file with mode: 0644]
lib/service/cmd.go
sdk/cli/arvados-cli.gemspec
sdk/cli/bin/crunch-job
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/arvados_cwl/arvtool.py
sdk/cwl/arvados_cwl/arvworkflow.py
sdk/cwl/arvados_cwl/crunch_script.py
sdk/cwl/arvados_cwl/done.py
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/arvados_cwl/fsaccess.py
sdk/cwl/arvados_cwl/http.py
sdk/cwl/arvados_cwl/pathmapper.py
sdk/cwl/arvados_cwl/perf.py
sdk/cwl/arvados_cwl/runner.py
sdk/cwl/arvados_cwl/task_queue.py
sdk/cwl/arvados_cwl/util.py
sdk/cwl/fpm-info.sh [moved from backports/python-pycrypto/fpm-info.sh with 51% similarity]
sdk/cwl/gittaggers.py
sdk/cwl/setup.py
sdk/cwl/test_with_arvbox.sh
sdk/cwl/tests/12213-keepref-expr.cwl
sdk/cwl/tests/arvados-tests.yml
sdk/cwl/tests/federation/framework/check_exist.py
sdk/cwl/tests/federation/framework/prepare.py
sdk/cwl/tests/hw.py
sdk/cwl/tests/matcher.py
sdk/cwl/tests/test_container.py
sdk/cwl/tests/test_http.py
sdk/cwl/tests/test_job.py
sdk/cwl/tests/test_make_output.py
sdk/cwl/tests/test_submit.py
sdk/cwl/tests/test_urljoin.py
sdk/cwl/tests/test_util.py
sdk/cwl/tests/wf/check_mem.py
sdk/dev-jobs.dockerfile
sdk/go/arvados/client.go
sdk/go/arvados/config.go
sdk/go/arvados/container.go
sdk/go/arvados/duration.go
sdk/go/arvadostest/fixtures.go
sdk/go/ctxlog/log.go
sdk/pam/fpm-info.sh
sdk/pam/lib/libpam_arvados.py
sdk/python/arvados/commands/keepdocker.py
sdk/python/fpm-info.sh [moved from backports/deb-fuse/fpm-info.sh with 68% similarity]
sdk/python/setup.py
services/api/.gitignore
services/api/app/models/api_client_authorization.rb
services/api/app/models/collection.rb
services/api/app/models/container.rb
services/api/config/application.default.yml
services/api/db/migrate/20190214214814_add_container_lock_count.rb [new file with mode: 0644]
services/api/db/structure.sql
services/api/test/functional/arvados/v1/collections_controller_test.rb
services/api/test/unit/container_test.rb
services/crunch-run/background.go
services/crunch-run/crunchrun.go
services/dockercleaner/README.rst [new file with mode: 0644]
services/dockercleaner/arvados-docker-cleaner.service
services/dockercleaner/bin/arvados-docker-cleaner [new file with mode: 0755]
services/fuse/fpm-info.sh
services/keepstore/azure_blob_volume.go
services/keepstore/handler_test.go
services/keepstore/handlers.go
services/keepstore/keepstore.go
services/keepstore/keepstore_test.go
services/keepstore/volume_test.go
services/login-sync/arvados-login-sync.gemspec
services/nodemanager/fpm-info.sh [new file with mode: 0644]
services/ws/session_v0.go
tools/arvbox/bin/arvbox
tools/arvbox/lib/arvbox/docker/Dockerfile.base
tools/arvbox/lib/arvbox/docker/common.sh
tools/arvbox/lib/arvbox/docker/service/certificate/run
tools/arvbox/lib/arvbox/docker/service/workbench/run-service
tools/crunchstat-summary/README.rst [new file with mode: 0644]
tools/crunchstat-summary/fpm-info.sh [new file with mode: 0644]
tools/crunchstat-summary/setup.py
vendor/vendor.json

index 517166c3fbd2043bff880db2b11b97238e4eb5f2..db3020ae82d13231817872355b04dd09849f87a0 100644 (file)
@@ -29,4 +29,5 @@ services/api/config/arvados-clients.yml
 .DS_Store
 .vscode
 .Rproj.user
-_version.py
\ No newline at end of file
+_version.py
+*.bak
index 156fc86a5eadee7b9cef56c004db808c8a3d8d03..25c7c3ef24ea04acc225e1d1295d424d3959a0cc 100644 (file)
@@ -44,3 +44,6 @@
 # npm-rails
 /node_modules
 /npm-debug.log
+
+# Generated when building distribution packages
+/package-build.version
index ac4d2df7b235f57851c80dae768d1da7fda3182f..1fddb2651ef96a2cbec2e5dff1da030a0f33c3eb 100644 (file)
@@ -4,7 +4,7 @@
 
 window.SimpleInput = {
     view: function(vnode) {
-        return m("input.form-control", {
+        return m('input.form-control', {
             style: {
                 width: '100%',
             },
@@ -22,7 +22,7 @@ window.SimpleInput = {
 
 window.SelectOrAutocomplete = {
     view: function(vnode) {
-        return m("input.form-control", {
+        return m('input.form-control', {
             style: {
                 width: '100%'
             },
@@ -87,9 +87,9 @@ window.TagEditorRow = {
                     valueOpts = vnode.attrs.vocabulary().tags[vnode.attrs.name()].values
             }
         }
-        return m("tr", [
+        return m('tr', [
             // Erase tag
-            m("td", [
+            m('td', [
                 vnode.attrs.editMode &&
                 m('div.text-center', m('a.btn.btn-default.btn-sm', {
                     style: {
@@ -99,13 +99,13 @@ window.TagEditorRow = {
                 }, m('i.fa.fa-fw.fa-trash-o')))
             ]),
             // Tag key
-            m("td", [
+            m('td', [
                 vnode.attrs.editMode ?
-                m("div", {key: 'key'}, [
+                m('div', {key: 'key'}, [
                     m(inputComponent, {
                         options: nameOpts,
                         value: vnode.attrs.name,
-                        // Allow any tag name unless "strict" is set to true.
+                        // Allow any tag name unless 'strict' is set to true.
                         create: !vnode.attrs.vocabulary().strict,
                         placeholder: 'key',
                     })
@@ -113,9 +113,9 @@ window.TagEditorRow = {
                 : vnode.attrs.name
             ]),
             // Tag value
-            m("td", [
+            m('td', [
                 vnode.attrs.editMode ?
-                m("div", {key: 'value'}, [
+                m('div', {key: 'value'}, [
                     m(inputComponent, {
                         options: valueOpts,
                         value: vnode.attrs.value,
@@ -137,20 +137,20 @@ window.TagEditorRow = {
 
 window.TagEditorTable = {
     view: function(vnode) {
-        return m("table.table.table-condensed.table-justforlayout", [
-            m("colgroup", [
-                m("col", {width:"5%"}),
-                m("col", {width:"25%"}),
-                m("col", {width:"70%"}),
+        return m('table.table.table-condensed.table-justforlayout', [
+            m('colgroup', [
+                m('col', {width:'5%'}),
+                m('col', {width:'25%'}),
+                m('col', {width:'70%'}),
             ]),
-            m("thead", [
-                m("tr", [
-                    m("th"),
-                    m("th", "Key"),
-                    m("th", "Value"),
+            m('thead', [
+                m('tr', [
+                    m('th'),
+                    m('th', 'Key'),
+                    m('th', 'Value'),
                 ])
             ]),
-            m("tbody", [
+            m('tbody', [
                 vnode.attrs.tags.length > 0
                 ? vnode.attrs.tags.map(function(tag, idx) {
                     return m(TagEditorRow, {
@@ -165,7 +165,7 @@ window.TagEditorTable = {
                         vocabulary: vnode.attrs.vocabulary
                     })
                 })
-                : m("tr", m("td[colspan=3]", m("center", "Loading tags...")))
+                : m('tr', m('td[colspan=3]', m('center', 'Loading tags...')))
             ]),
         ])
     }
@@ -185,18 +185,18 @@ window.TagEditorApp = {
     oninit: function(vnode) {
         vnode.state.sessionDB = new SessionDB()
         // Get vocabulary
-        vnode.state.vocabulary = m.stream({"strict":false, "tags":{}})
+        vnode.state.vocabulary = m.stream({'strict':false, 'tags':{}})
         var vocabularyTimestamp = parseInt(Date.now() / 300000) // Bust cache every 5 minutes
         m.request('/vocabulary.json?v=' + vocabularyTimestamp).then(vnode.state.vocabulary)
         vnode.state.editMode = vnode.attrs.targetEditable
         vnode.state.tags = []
         vnode.state.dirty = m.stream(false)
         vnode.state.dirty.map(m.redraw)
-        vnode.state.objPath = '/arvados/v1/'+vnode.attrs.targetController+'/'+vnode.attrs.targetUuid
+        vnode.state.objPath = 'arvados/v1/' + vnode.attrs.targetController + '/' + vnode.attrs.targetUuid
         // Get tags
         vnode.state.sessionDB.request(
             vnode.state.sessionDB.loadLocal(),
-            '/arvados/v1/'+vnode.attrs.targetController,
+            'arvados/v1/' + vnode.attrs.targetController,
             {
                 data: {
                     filters: JSON.stringify([['uuid', '=', vnode.attrs.targetUuid]]),
@@ -228,8 +228,8 @@ window.TagEditorApp = {
     view: function(vnode) {
         return [
             vnode.state.editMode &&
-            m("div.pull-left", [
-                m("a.btn.btn-primary.btn-sm"+(vnode.state.dirty() ? '' : '.disabled'), {
+            m('div.pull-left', [
+                m('a.btn.btn-primary.btn-sm' + (vnode.state.dirty() ? '' : '.disabled'), {
                     style: {
                         margin: '10px 0px'
                     },
@@ -244,7 +244,7 @@ window.TagEditorApp = {
                         vnode.state.sessionDB.request(
                             vnode.state.sessionDB.loadLocal(),
                             vnode.state.objPath, {
-                                method: "PUT",
+                                method: 'PUT',
                                 data: {properties: JSON.stringify(tags)}
                             }
                         ).then(function(v) {
index 767762c81e3cd3d899bda0b3bce873cc97c390b9..d3ded867c198f5c265fafb7b49a89d50e1515fc9 100644 (file)
@@ -126,7 +126,7 @@ class WorkUnitsController < ApplicationController
                           "--local",
                           "--api=containers",
                           "--project-uuid=#{params['work_unit']['owner_uuid']}",
-                          "--collection-keep-cache=#{keep_cache}",
+                          "--collection-cache-size=#{keep_cache}",
                           "/var/lib/cwl/workflow.json#main",
                           "/var/lib/cwl/cwl.input.json"]
 
index b017b4a29ae2bbd35877301f5a6f021555eb6f11..b2cd097f3174c03fa49c7d1f280d7c809ecf280c 100644 (file)
@@ -82,6 +82,21 @@ SPDX-License-Identifier: AGPL-3.0 %>
                      </form>
                     </li>
                   <% end %>
+                <% if Rails.configuration.workbench2_url %>
+                <li role="menuitem">
+                  <%
+                    wb2_url = Rails.configuration.workbench2_url
+                    wb2_url += '/' if wb2_url[-1] != '/'
+                    wb2_url += 'token'
+                  %>
+                  <form action="<%= wb2_url %>" method="GET">
+                    <input type="hidden" name="api_token" value="<%= Thread.current[:arvados_api_token] %>">
+                    <button role="menuitem" type="submit">
+                      <i class="fa fa-lg fa-share-square fa-fw"></i> Go to Workbench 2
+                    </button>
+                  </form>
+                </li>
+                <% end %>
                 <li role="menuitem">
                   <%= link_to virtual_machines_user_path(current_user), role: 'menu-item' do %>
                     <i class="fa fa-lg fa-terminal fa-fw"></i> Virtual machines
index 4e0a35a5550360252cae77e49e22ac1d7dec370f..ccc7e4bbddaaf8c6396fe33c863b96f1bbf54235 100644 (file)
@@ -326,3 +326,11 @@ common:
   # the jobs api is disabled and there are no local git repositories.
   #
   repositories: true
+
+  #
+  # Add an item to the user menu pointing to workbench2_url, if not false.
+  #
+  # Example:
+  # workbench2_url: https://workbench2.qr1hi.arvadosapi.com
+  #
+  workbench2_url: false
diff --git a/apps/workbench/config/initializers/validate_wb2_url_config.rb b/apps/workbench/config/initializers/validate_wb2_url_config.rb
new file mode 100644 (file)
index 0000000..f909648
--- /dev/null
@@ -0,0 +1,7 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+include ConfigValidators
+
+ConfigValidators::validate_wb2_url_config()
\ No newline at end of file
diff --git a/apps/workbench/lib/config_validators.rb b/apps/workbench/lib/config_validators.rb
new file mode 100644 (file)
index 0000000..ec76916
--- /dev/null
@@ -0,0 +1,28 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+require 'uri'
+
+module ConfigValidators
+    def validate_wb2_url_config
+        if Rails.configuration.workbench2_url
+            begin
+                if !URI.parse(Rails.configuration.workbench2_url).is_a?(URI::HTTP)
+                    Rails.logger.warn("workbench2_url config is not an HTTP URL: #{Rails.configuration.workbench2_url}")
+                    Rails.configuration.workbench2_url = false
+                elsif /.*[\/]{2,}$/.match(Rails.configuration.workbench2_url)
+                    Rails.logger.warn("workbench2_url config shouldn't have multiple trailing slashes: #{Rails.configuration.workbench2_url}")
+                    Rails.configuration.workbench2_url = false
+                else
+                    return true
+                end
+            rescue URI::InvalidURIError
+                Rails.logger.warn("workbench2_url config invalid URL: #{Rails.configuration.workbench2_url}")
+                Rails.configuration.workbench2_url = false
+            end
+        end
+        return false
+    end
+end
+
diff --git a/apps/workbench/package-build.version b/apps/workbench/package-build.version
deleted file mode 100644 (file)
index 41eb2c7..0000000
+++ /dev/null
@@ -1 +0,0 @@
-1.2.1.20181126194329
index 74a42877b1301f52b65e557b75bbac60165439f6..b3f704cdd98feb3be31326b2cbaf6451c5aa7925 100644 (file)
@@ -140,6 +140,30 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
     end
   end
 
+  [
+    [false, false],
+    ['http://wb2.example.org//', false],
+    ['ftp://wb2.example.org', false],
+    ['wb2.example.org', false],
+    ['http://wb2.example.org', true],
+    ['https://wb2.example.org', true],
+    ['http://wb2.example.org/', true],
+    ['https://wb2.example.org/', true],
+  ].each do |wb2_url_config, wb2_menu_appear|
+    test "workbench2_url=#{wb2_url_config} should#{wb2_menu_appear ? '' : ' not'} show WB2 menu" do
+      Rails.configuration.workbench2_url = wb2_url_config
+      assert_equal wb2_menu_appear, ConfigValidators::validate_wb2_url_config()
+
+      visit page_with_token('active')
+      within('.navbar-fixed-top') do
+        page.find("#notifications-menu").click
+        within('.dropdown-menu') do
+          assert_equal wb2_menu_appear, page.has_text?('Go to Workbench 2')
+        end
+      end
+    end
+  end
+
   [
     ['active', true],
     ['active_with_prefs_profile_no_getting_started_shown', false],
diff --git a/apps/workbench/test/the.patch b/apps/workbench/test/the.patch
deleted file mode 100644 (file)
index 5a55679..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-+    echo -n 'geckodriver: '
-+    which geckodriver || fatal "No geckodriver. Unable to find Mozilla geckodriver. Please download the server from https://github.com/mozilla/geckodriver/releases and place it somewhere on your PATH. More info at https://developer.mozilla.org/en-US/docs/Mozilla/QA/Marionette/WebDriver."
-
diff --git a/backports/deb-libfuse-dev/fpm-info.sh b/backports/deb-libfuse-dev/fpm-info.sh
deleted file mode 100644 (file)
index 46088c0..0000000
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-case "$TARGET" in
-    ubuntu1204)
-        fpm_depends+=('libfuse2 = 2.9.2-5')
-        ;;
-esac
diff --git a/backports/python-apache-libcloud/fpm-info.sh b/backports/python-apache-libcloud/fpm-info.sh
deleted file mode 100644 (file)
index c866168..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/bash
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-case $TARGET in
-     centos7)
-         # fpm incorrectly transforms the dependency name in this case.
-         fpm_depends+=(python-backports-ssl_match_hostname)
-         fpm_args+=(--python-disable-dependency backports.ssl-match-hostname)
-     ;;
-esac
diff --git a/backports/python-ciso8601/fpm-info.sh b/backports/python-ciso8601/fpm-info.sh
deleted file mode 100644 (file)
index 7e24f5d..0000000
+++ /dev/null
@@ -1,12 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-case "$TARGET" in
-    centos*)
-        fpm_depends+=(glibc)
-        ;;
-    debian* | ubuntu*)
-        fpm_depends+=(libc6)
-        ;;
-esac
diff --git a/backports/python-llfuse/fpm-info.sh b/backports/python-llfuse/fpm-info.sh
deleted file mode 100644 (file)
index ef0b446..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-case "$TARGET" in
-    centos*)
-        build_depends+=('fuse-devel')
-        fpm_depends+=(glibc fuse-libs)
-        ;;
-    ubuntu1204)
-        build_depends+=(libfuse2 libfuse-dev)
-        fpm_depends+=(libc6 python-contextlib2 'libfuse2 = 2.9.2-5' 'fuse = 2.9.2-5')
-        ;;
-    debian* | ubuntu*)
-        build_depends+=('libfuse-dev')
-        fpm_depends+=(libc6 'libfuse2 > 2.9.0' 'fuse > 2.9.0')
-        ;;
-esac
diff --git a/backports/python-pycurl/fpm-info.sh b/backports/python-pycurl/fpm-info.sh
deleted file mode 100644 (file)
index 8ec9c79..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: Apache-2.0
-
-case "$TARGET" in
-    debian8)
-            fpm_depends+=(
-                libc6
-                libcomerr2
-                libcurl3-gnutls
-                libffi6
-                libgcrypt20
-                libgmp10
-                libgnutls-deb0-28
-                libgpg-error0
-                libgssapi-krb5-2
-                libhogweed2
-                libidn11
-                libk5crypto3
-                libkeyutils1
-                libkrb5-3
-                libkrb5support0
-                libldap-2.4-2
-                libnettle4
-                libp11-kit0
-                librtmp1
-                libsasl2-2
-                libssh2-1
-                libtasn1-6
-                zlib1g
-            ) ;;
-    ubuntu1204)
-            fpm_depends+=(
-                libasn1-8-heimdal
-                libc6
-                libcomerr2
-                libcurl3-gnutls
-                libgcrypt11
-                libgnutls26
-                libgpg-error0
-                libgssapi-krb5-2
-                libgssapi3-heimdal
-                libhcrypto4-heimdal
-                libheimbase1-heimdal
-                libheimntlm0-heimdal
-                libhx509-5-heimdal
-                libidn11
-                libk5crypto3
-                libkeyutils1
-                libkrb5-26-heimdal
-                libkrb5-3
-                libkrb5support0
-                libldap-2.4-2
-                libp11-kit0
-                libroken18-heimdal
-                librtmp0
-                libsasl2-2
-                libsqlite3-0
-                libtasn1-3
-                libwind0-heimdal
-                zlib1g
-            ) ;;
-    ubuntu1404)
-            fpm_depends+=(
-                libasn1-8-heimdal
-                libc6
-                libcomerr2
-                libcurl3-gnutls
-                libffi6
-                libgcrypt11
-                libgnutls26
-                libgpg-error0
-                libgssapi-krb5-2
-                libgssapi3-heimdal
-                libhcrypto4-heimdal
-                libheimbase1-heimdal
-                libheimntlm0-heimdal
-                libhx509-5-heimdal
-                libidn11
-                libk5crypto3
-                libkeyutils1
-                libkrb5-26-heimdal
-                libkrb5-3
-                libkrb5support0
-                libldap-2.4-2
-                libp11-kit0
-                libroken18-heimdal
-                librtmp0
-                libsasl2-2
-                libsqlite3-0
-                libtasn1-6
-                libwind0-heimdal
-                zlib1g
-            ) ;;
-esac
index 9393c1accec46a4e6e3bb6cb35df3cd4bef54bf6..2e4c4573823e4d67efc84ad22b70c4ec915aff95 100755 (executable)
@@ -16,6 +16,7 @@ Syntax:
 WORKSPACE=path         Path to the Arvados source tree to build packages from
 CWLTOOL=path           (optional) Path to cwltool git repository.
 SALAD=path             (optional) Path to schema_salad git repository.
+PYCMD=pythonexec       (optional) Specify the python executable to use in the docker image. Defaults to "python".
 
 EOF
 
@@ -35,6 +36,11 @@ fi
 
 cd "$WORKSPACE"
 
+py=python
+if [[ -n "$PYCMD" ]] ; then
+    py="$PYCMD" ;
+fi
+
 (cd sdk/python && python setup.py sdist)
 sdk=$(cd sdk/python/dist && ls -t arvados-python-client-*.tar.gz | head -n1)
 
@@ -69,6 +75,6 @@ if [[ $python_sdk_ts -gt $cwl_runner_ts ]]; then
     cwl_runner_version=$(cd sdk/python && nohash_version_from_git 1.0)
 fi
 
-docker build --build-arg sdk=$sdk --build-arg runner=$runner --build-arg salad=$salad --build-arg cwltool=$cwltool -f "$WORKSPACE/sdk/dev-jobs.dockerfile" -t arvados/jobs:$cwl_runner_version "$WORKSPACE/sdk"
+docker build --build-arg sdk=$sdk --build-arg runner=$runner --build-arg salad=$salad --build-arg cwltool=$cwltool --build-arg pythoncmd=$py -f "$WORKSPACE/sdk/dev-jobs.dockerfile" -t arvados/jobs:$cwl_runner_version "$WORKSPACE/sdk"
 echo arv-keepdocker arvados/jobs $cwl_runner_version
 arv-keepdocker arvados/jobs $cwl_runner_version
diff --git a/build/build.list b/build/build.list
deleted file mode 100644 (file)
index 502460b..0000000
+++ /dev/null
@@ -1,57 +0,0 @@
-# Copyright (C) The Arvados Authors. All rights reserved.
-#
-# SPDX-License-Identifier: AGPL-3.0
-
-#distribution(s)|name|version|iteration|type|architecture|extra fpm arguments
-debian8,debian9,centos7|python-gflags|2.0|2|python|all
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|google-api-python-client|1.6.2|2|python|all
-debian8,debian9,ubuntu1404,centos7|oauth2client|1.5.2|2|python|all
-debian8,debian9,ubuntu1404,centos7|pyasn1|0.1.7|2|python|all
-debian8,debian9,ubuntu1404,centos7|pyasn1-modules|0.0.5|2|python|all
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|rsa|3.4.2|2|python|all
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|uritemplate|3.0.0|2|python|all
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|httplib2|0.9.2|3|python|all
-debian8,debian9,centos7,ubuntu1404,ubuntu1604|ws4py|0.4.2|2|python|all
-debian8,debian9,centos7|pykka|1.2.1|2|python|all
-debian8,debian9,ubuntu1404,centos7|six|1.10.0|2|python|all
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|ciso8601|1.0.6|3|python|amd64
-debian8,debian9,centos7|pycrypto|2.6.1|3|python|amd64
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804|backports.ssl_match_hostname|3.5.0.1|2|python|all
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|llfuse|1.2|3|python|amd64
-debian8,debian9,ubuntu1404,centos7|pycurl|7.19.5.3|3|python|amd64
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|pyyaml|3.12|2|python|amd64
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|rdflib|4.2.2|2|python|all
-debian8,debian9,ubuntu1404,centos7|shellescape|3.4.1|2|python|all
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|mistune|0.8.1|2|python|all
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|typing|3.6.4|2|python|all
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|avro|1.8.1|2|python|all
-debian8,debian9,ubuntu1404,centos7|ruamel.ordereddict|0.4.9|2|python|amd64
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|cachecontrol|0.11.7|2|python|all
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|pathlib2|2.3.2|2|python|all
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|scandir|1.7|2|python|all
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|docker-py|1.7.2|2|python3|all
-debian8,debian9,centos7|six|1.10.0|2|python3|all
-debian8,debian9,ubuntu1404,centos7|requests|2.12.4|2|python3|all
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|websocket-client|0.37.0|2|python3|all
-debian8,ubuntu1404,centos7|requests|2.6.1|2|python|all
-centos7|contextlib2|0.5.4|2|python|all
-centos7|isodate|0.5.4|2|python|all
-centos7|python-daemon|2.1.2|1|python|all
-centos7|pbr|0.11.1|2|python|all
-centos7|pyparsing|2.1.10|2|python|all
-centos7|keepalive|0.5|2|python|all
-centos7|networkx|1.11|0|python|all
-centos7|psutil|5.0.1|0|python|all
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|lockfile|0.12.2|2|python|all|--epoch 1
-debian8,debian9,ubuntu1404,ubuntu1604,ubuntu1804,centos7|subprocess32|3.5.1|2|python|all
-all|ruamel.yaml|0.15.77|1|python|amd64|--python-setup-py-arguments --single-version-externally-managed --depends 'python-ruamel.ordereddict >= 0.4.9'
-all|cwltest|1.0.20180518074130|4|python|all|--depends 'python-futures >= 3.0.5' --depends 'python-subprocess32 >= 3.5.0'
-all|junit-xml|1.8|3|python|all
-all|rdflib-jsonld|0.4.0|2|python|all
-all|futures|3.0.5|2|python|all
-all|future|0.16.0|2|python|all
-all|future|0.16.0|2|python3|all
-all|mypy-extensions|0.3.0|1|python|all
-all|prov|1.5.1|0|python|all
-all|bagit|1.6.4|0|python|all
-all|typing-extensions|3.6.5|0|python|all
index 2a40b50ec1f5b94c2523e293871d04005d962973..ba08f34bcd46ebffd64adb7d387714f4b97d189b 100755 (executable)
@@ -180,7 +180,7 @@ ${cc}${cc:+ }SPDX-License-Identifier: CC-BY-SA-3.0${ce}"
         Makefile | build/* | lib/* | tools/* | apps/* | services/* | sdk/cli/bin/crunch-job)
             want=${wantGPL}
             ;;
-        crunch_scripts/* | backports/* | docker/* | sdk/*)
+        crunch_scripts/* | docker/* | sdk/*)
             want=${wantApache}
             ;;
         doc/*)
index 6591319029f131dd81a32665c2bc35fe5ef9a9d8..523205021304cb1462c4da66d966072293972e4a 100644 (file)
@@ -30,9 +30,10 @@ ubuntu1804/generated: common-generated-all
 
 GOTARBALL=go1.10.1.linux-amd64.tar.gz
 NODETARBALL=node-v6.11.2-linux-x64.tar.xz
-RVMKEY=rvm.asc
+RVMKEY1=mpapis.asc
+RVMKEY2=pkuczynski.asc
 
-common-generated-all: common-generated/$(GOTARBALL) common-generated/$(NODETARBALL) common-generated/$(RVMKEY)
+common-generated-all: common-generated/$(GOTARBALL) common-generated/$(NODETARBALL) common-generated/$(RVMKEY1) common-generated/$(RVMKEY2)
 
 common-generated/$(GOTARBALL): common-generated
        wget -cqO common-generated/$(GOTARBALL) http://storage.googleapis.com/golang/$(GOTARBALL)
@@ -40,8 +41,11 @@ common-generated/$(GOTARBALL): common-generated
 common-generated/$(NODETARBALL): common-generated
        wget -cqO common-generated/$(NODETARBALL) https://nodejs.org/dist/v6.11.2/$(NODETARBALL)
 
-common-generated/$(RVMKEY): common-generated
-       wget -cqO common-generated/$(RVMKEY) https://rvm.io/pkuczynski.asc
+common-generated/$(RVMKEY1): common-generated
+       wget -cqO common-generated/$(RVMKEY1) https://rvm.io/mpapis.asc
+
+common-generated/$(RVMKEY2): common-generated
+       wget -cqO common-generated/$(RVMKEY2) https://rvm.io/pkuczynski.asc
 
 common-generated:
        mkdir common-generated
index ba616eef1aeefcf36c67edfaa6cc62dc6226f008..ad6f4e1e8f5051c8bb90e449eb89873ca13ea107 100644 (file)
@@ -6,11 +6,13 @@ FROM centos:7
 MAINTAINER Ward Vandewege <ward@curoverse.com>
 
 # Install dependencies.
-RUN yum -q -y install make automake gcc gcc-c++ libyaml-devel patch readline-devel zlib-devel libffi-devel openssl-devel bzip2 libtool bison sqlite-devel rpm-build git perl-ExtUtils-MakeMaker libattr-devel nss-devel libcurl-devel which tar unzip scl-utils centos-release-scl postgresql-devel python-devel python-setuptools fuse-devel xz-libs git
+RUN yum -q -y install make automake gcc gcc-c++ libyaml-devel patch readline-devel zlib-devel libffi-devel openssl-devel bzip2 libtool bison sqlite-devel rpm-build git perl-ExtUtils-MakeMaker libattr-devel nss-devel libcurl-devel which tar unzip scl-utils centos-release-scl postgresql-devel python-devel python-setuptools fuse-devel xz-libs git python-virtualenv wget
 
 # Install RVM
-ADD generated/rvm.asc /tmp/
-RUN gpg --import /tmp/rvm.asc && \
+ADD generated/mpapis.asc /tmp/
+ADD generated/pkuczynski.asc /tmp/
+RUN gpg --import --no-tty /tmp/mpapis.asc && \
+    gpg --import --no-tty /tmp/pkuczynski.asc && \
     curl -L https://get.rvm.io | bash -s stable && \
     /usr/local/rvm/bin/rvm install 2.3 && \
     /usr/local/rvm/bin/rvm alias create default ruby-2.3 && \
@@ -27,13 +29,17 @@ RUN ln -s /usr/local/node-v6.11.2-linux-x64/bin/* /usr/local/bin/
 
 # Need to "touch" RPM database to workaround bug in interaction between
 # overlayfs and yum (https://bugzilla.redhat.com/show_bug.cgi?id=1213602)
-RUN touch /var/lib/rpm/* && yum -q -y install python33
-RUN scl enable python33 "easy_install-3.3 pip" && easy_install-2.7 pip
+RUN touch /var/lib/rpm/* && yum -q -y install rh-python35
+RUN scl enable rh-python35 "easy_install-3.5 pip" && easy_install-2.7 pip
 
-# Old versions of setuptools cannot build a schema-salad package.
-RUN pip install --upgrade setuptools
+# Add epel, we need it for the python-pam dependency
+RUN wget http://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
+RUN rpm -ivh epel-release-latest-7.noarch.rpm
 
 RUN git clone --depth 1 git://git.curoverse.com/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle && rm -rf /tmp/arvados
 
+# The version of setuptools that comes with CentOS is way too old
+RUN pip install --upgrade setuptools
+
 ENV WORKSPACE /arvados
-CMD ["scl", "enable", "python33", "/usr/local/rvm/bin/rvm-exec default bash /jenkins/run-build-packages.sh --target centos7"]
+CMD ["scl", "enable", "rh-python35", "/usr/local/rvm/bin/rvm-exec default bash /jenkins/run-build-packages.sh --target centos7"]
index 1244e3f55d4ac244742f9a3bfb803dde9b7aca2d..3f591cdfa14aceab1ff1b4be1c650192d396131f 100644 (file)
@@ -8,11 +8,16 @@ MAINTAINER Ward Vandewege <ward@curoverse.com>
 ENV DEBIAN_FRONTEND noninteractive
 
 # Install dependencies.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libgnutls28-dev libpq-dev python-pip unzip
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools python3-pip libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libgnutls28-dev libpq-dev python-pip unzip python3-venv python3-dev
+
+# Install virtualenv
+RUN /usr/bin/pip install virtualenv
 
 # Install RVM
-ADD generated/rvm.asc /tmp/
-RUN gpg --import /tmp/rvm.asc && \
+ADD generated/mpapis.asc /tmp/
+ADD generated/pkuczynski.asc /tmp/
+RUN gpg --import --no-tty /tmp/mpapis.asc && \
+    gpg --import --no-tty /tmp/pkuczynski.asc && \
     curl -L https://get.rvm.io | bash -s stable && \
     /usr/local/rvm/bin/rvm install 2.3 && \
     /usr/local/rvm/bin/rvm alias create default ruby-2.3 && \
@@ -27,9 +32,6 @@ RUN ln -s /usr/local/go/bin/go /usr/local/bin/
 ADD generated/node-v6.11.2-linux-x64.tar.xz /usr/local/
 RUN ln -s /usr/local/node-v6.11.2-linux-x64/bin/* /usr/local/bin/
 
-# Old versions of setuptools cannot build a schema-salad package.
-RUN pip install --upgrade setuptools
-
 RUN git clone --depth 1 git://git.curoverse.com/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle && rm -rf /tmp/arvados
 
 ENV WORKSPACE /arvados
index 42094d53bb8a4d982afce90b5a65bd28dd3fd763..6f7f3faafe8c4e18f580be1f8e341a978cf38c84 100644 (file)
@@ -9,11 +9,16 @@ MAINTAINER Nico Cesar <nico@curoverse.com>
 ENV DEBIAN_FRONTEND noninteractive
 
 # Install dependencies.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libgnutls28-dev libpq-dev python-pip unzip
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools python3-pip libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libgnutls28-dev libpq-dev python-pip unzip python3-venv python3-dev
+
+# Install virtualenv
+RUN /usr/bin/pip install virtualenv
 
 # Install RVM
-ADD generated/rvm.asc /tmp/
-RUN gpg --no-tty --import /tmp/rvm.asc && \
+ADD generated/mpapis.asc /tmp/
+ADD generated/pkuczynski.asc /tmp/
+RUN gpg --import --no-tty /tmp/mpapis.asc && \
+    gpg --import --no-tty /tmp/pkuczynski.asc && \
     curl -L https://get.rvm.io | bash -s stable && \
     /usr/local/rvm/bin/rvm install 2.3 && \
     /usr/local/rvm/bin/rvm alias create default ruby-2.3 && \
@@ -28,9 +33,6 @@ RUN ln -s /usr/local/go/bin/go /usr/local/bin/
 ADD generated/node-v6.11.2-linux-x64.tar.xz /usr/local/
 RUN ln -s /usr/local/node-v6.11.2-linux-x64/bin/* /usr/local/bin/
 
-# Old versions of setuptools cannot build a schema-salad package.
-RUN pip install --upgrade setuptools
-
 RUN git clone --depth 1 git://git.curoverse.com/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle && rm -rf /tmp/arvados
 
 ENV WORKSPACE /arvados
index a6bf65bc11a84c12b32c3231c33257ff28be9c65..4c01c9e8180fefb268ff80e9d930f339144eb879 100644 (file)
@@ -8,11 +8,16 @@ MAINTAINER Ward Vandewege <ward@curoverse.com>
 ENV DEBIAN_FRONTEND noninteractive
 
 # Install dependencies.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools libcurl4-gnutls-dev curl git libattr1-dev libfuse-dev libpq-dev python-pip unzip 
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools python3-pip libcurl4-gnutls-dev curl git libattr1-dev libfuse-dev libpq-dev python-pip unzip python3.4-venv python3.4-dev
+
+# Install virtualenv
+RUN /usr/bin/pip install virtualenv
 
 # Install RVM
-ADD generated/rvm.asc /tmp/
-RUN gpg --import /tmp/rvm.asc && \
+ADD generated/mpapis.asc /tmp/
+ADD generated/pkuczynski.asc /tmp/
+RUN gpg --import --no-tty /tmp/mpapis.asc && \
+    gpg --import --no-tty /tmp/pkuczynski.asc && \
     curl -L https://get.rvm.io | bash -s stable && \
     /usr/local/rvm/bin/rvm install 2.3 && \
     /usr/local/rvm/bin/rvm alias create default ruby-2.3 && \
@@ -27,9 +32,6 @@ RUN ln -s /usr/local/go/bin/go /usr/local/bin/
 ADD generated/node-v6.11.2-linux-x64.tar.xz /usr/local/
 RUN ln -s /usr/local/node-v6.11.2-linux-x64/bin/* /usr/local/bin/
 
-# Old versions of setuptools cannot build a schema-salad package.
-RUN pip install --upgrade setuptools
-
 RUN git clone --depth 1 git://git.curoverse.com/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle && rm -rf /tmp/arvados
 
 ENV WORKSPACE /arvados
index 17bf89f3b64a253a0421846974801f2ebc6ffbf0..a83fc77132315651dfd432877c7fecb5b4090f4e 100644 (file)
@@ -8,11 +8,16 @@ MAINTAINER Ward Vandewege <ward@curoverse.com>
 ENV DEBIAN_FRONTEND noninteractive
 
 # Install dependencies.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools libcurl4-gnutls-dev libgnutls-dev curl git libattr1-dev libfuse-dev libpq-dev python-pip unzip tzdata
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools python3-pip libcurl4-gnutls-dev libgnutls-dev curl git libattr1-dev libfuse-dev libpq-dev python-pip unzip tzdata python3-venv python3-dev
+
+# Install virtualenv
+RUN /usr/bin/pip install virtualenv
 
 # Install RVM
-ADD generated/rvm.asc /tmp/
-RUN gpg --import /tmp/rvm.asc && \
+ADD generated/mpapis.asc /tmp/
+ADD generated/pkuczynski.asc /tmp/
+RUN gpg --import --no-tty /tmp/mpapis.asc && \
+    gpg --import --no-tty /tmp/pkuczynski.asc && \
     curl -L https://get.rvm.io | bash -s stable && \
     /usr/local/rvm/bin/rvm install 2.3 && \
     /usr/local/rvm/bin/rvm alias create default ruby-2.3 && \
@@ -27,9 +32,6 @@ RUN ln -s /usr/local/go/bin/go /usr/local/bin/
 ADD generated/node-v6.11.2-linux-x64.tar.xz /usr/local/
 RUN ln -s /usr/local/node-v6.11.2-linux-x64/bin/* /usr/local/bin/
 
-# Old versions of setuptools cannot build a schema-salad package.
-RUN pip install --upgrade setuptools
-
 RUN git clone --depth 1 git://git.curoverse.com/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle && rm -rf /tmp/arvados
 
 ENV WORKSPACE /arvados
index c66de46908daef3de6700c9b790917fde6c08e85..d0a099911ce91ac9c0e7892d75072310c91e710e 100644 (file)
@@ -8,11 +8,16 @@ MAINTAINER Ward Vandewege <ward@curoverse.com>
 ENV DEBIAN_FRONTEND noninteractive
 
 # Install dependencies.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-pip libcurl4-gnutls-dev libgnutls28-dev curl git libattr1-dev libfuse-dev libpq-dev python-pip unzip tzdata
+RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-pip libcurl4-gnutls-dev libgnutls28-dev curl git libattr1-dev libfuse-dev libpq-dev python-pip unzip tzdata python3-venv python3-dev
+
+# Install virtualenv
+RUN /usr/bin/pip install virtualenv
 
 # Install RVM
-ADD generated/rvm.asc /tmp/
-RUN gpg --import /tmp/rvm.asc && \
+ADD generated/mpapis.asc /tmp/
+ADD generated/pkuczynski.asc /tmp/
+RUN gpg --import --no-tty /tmp/mpapis.asc && \
+    gpg --import --no-tty /tmp/pkuczynski.asc && \
     curl -L https://get.rvm.io | bash -s stable && \
     /usr/local/rvm/bin/rvm install 2.3 && \
     /usr/local/rvm/bin/rvm alias create default ruby-2.3 && \
@@ -27,9 +32,6 @@ RUN ln -s /usr/local/go/bin/go /usr/local/bin/
 ADD generated/node-v6.11.2-linux-x64.tar.xz /usr/local/
 RUN ln -s /usr/local/node-v6.11.2-linux-x64/bin/* /usr/local/bin/
 
-# Old versions of setuptools cannot build a schema-salad package.
-RUN pip install --upgrade setuptools
-
 RUN git clone --depth 1 git://git.curoverse.com/arvados.git /tmp/arvados && cd /tmp/arvados/services/api && /usr/local/rvm/bin/rvm-exec default bundle && cd /tmp/arvados/apps/workbench && /usr/local/rvm/bin/rvm-exec default bundle && rm -rf /tmp/arvados
 
 ENV WORKSPACE /arvados
index 7b52511396fbe1b61ef1a87ec8b27893c43eb5f8..c6d5a15fbab5915f0854b300a72d522aa90b5b29 100644 (file)
@@ -28,12 +28,16 @@ ubuntu1804/generated: common-generated-all
        test -d ubuntu1804/generated || mkdir ubuntu1804/generated
        cp -rlt ubuntu1804/generated common-generated/*
 
-RVMKEY=rvm.asc
+RVMKEY1=mpapis.asc
+RVMKEY2=pkuczynski.asc
 
-common-generated-all: common-generated/$(RVMKEY)
+common-generated-all: common-generated/$(RVMKEY1) common-generated/$(RVMKEY2)
 
-common-generated/$(RVMKEY): common-generated
-       wget -cqO common-generated/$(RVMKEY) https://rvm.io/pkuczynski.asc
+common-generated/$(RVMKEY1): common-generated
+       wget -cqO common-generated/$(RVMKEY1) https://rvm.io/mpapis.asc
+
+common-generated/$(RVMKEY2): common-generated
+       wget -cqO common-generated/$(RVMKEY2) https://rvm.io/pkuczynski.asc
 
 common-generated:
        mkdir common-generated
index 36be0ba32b0ac0cb11ee30a416e6d8c380a96989..0bfe80b70d87178619d0e9a8049cfa23f61a0acd 100644 (file)
@@ -6,16 +6,20 @@ FROM centos:7
 MAINTAINER Ward Vandewege <wvandewege@veritasgenetics.com>
 
 # Install dependencies.
-RUN yum -q -y install scl-utils centos-release-scl which tar
+RUN yum -q -y install scl-utils centos-release-scl which tar wget
 
 # Install RVM
-ADD generated/rvm.asc /tmp/
+ADD generated/mpapis.asc /tmp/
+ADD generated/pkuczynski.asc /tmp/
 RUN touch /var/lib/rpm/* && \
-    gpg --import /tmp/rvm.asc && \
+    gpg --import --no-tty /tmp/mpapis.asc && \
+    gpg --import --no-tty /tmp/pkuczynski.asc && \
     curl -L https://get.rvm.io | bash -s stable && \
     /usr/local/rvm/bin/rvm install 2.3 && \
-    /usr/local/rvm/bin/rvm alias create default ruby-2.3 && \
-    /usr/local/rvm/bin/rvm-exec default gem install bundle && \
-    /usr/local/rvm/bin/rvm-exec default gem install cure-fpm --version 1.6.0b
+    /usr/local/rvm/bin/rvm alias create default ruby-2.3
+
+# Add epel, we need it for the python-pam dependency
+RUN wget http://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
+RUN rpm -ivh epel-release-latest-7.noarch.rpm
 
 COPY localrepo.repo /etc/yum.repos.d/localrepo.repo
index fdefadea5080e4cacbd2ecbba04b7c5db1fd9b90..2168f725a1a5a3d9cf7fcbfafb50b28733870f79 100644 (file)
@@ -12,8 +12,10 @@ RUN apt-get update && \
     apt-get -y install --no-install-recommends curl ca-certificates
 
 # Install RVM
-ADD generated/rvm.asc /tmp/
-RUN gpg --import /tmp/rvm.asc && \
+ADD generated/mpapis.asc /tmp/
+ADD generated/pkuczynski.asc /tmp/
+RUN gpg --import --no-tty /tmp/mpapis.asc && \
+    gpg --import --no-tty /tmp/pkuczynski.asc && \
     curl -L https://get.rvm.io | bash -s stable && \
     /usr/local/rvm/bin/rvm install 2.3 && \
     /usr/local/rvm/bin/rvm alias create default ruby-2.3
index c682ccc7cb05f84fbe68d56c60a4846c3448fe69..9c46ef601313939d38549fbd44fa32bee44bfbfb 100644 (file)
@@ -12,8 +12,10 @@ RUN apt-get update && \
     apt-get -y install --no-install-recommends curl ca-certificates gpg procps
 
 # Install RVM
-ADD generated/rvm.asc /tmp/
-RUN gpg --no-tty --import /tmp/rvm.asc && \
+ADD generated/mpapis.asc /tmp/
+ADD generated/pkuczynski.asc /tmp/
+RUN gpg --import --no-tty /tmp/mpapis.asc && \
+    gpg --import --no-tty /tmp/pkuczynski.asc && \
     curl -L https://get.rvm.io | bash -s stable && \
     /usr/local/rvm/bin/rvm install 2.3 && \
     /usr/local/rvm/bin/rvm alias create default ruby-2.3
index 5f5b1d88191b0ddf3019594094a505b0fac13ba5..c05dbee0f3b88bc4bb12964ce72d04efc5a66869 100644 (file)
@@ -12,8 +12,10 @@ RUN apt-get update && \
     apt-get -y install --no-install-recommends curl ca-certificates python2.7-dev python3 python-setuptools python3-setuptools libcurl4-gnutls-dev curl git libattr1-dev libfuse-dev libpq-dev python-pip unzip binutils build-essential ca-certificates
 
 # Install RVM
-ADD generated/rvm.asc /tmp/
-RUN gpg --import /tmp/rvm.asc && \
+ADD generated/mpapis.asc /tmp/
+ADD generated/pkuczynski.asc /tmp/
+RUN gpg --import --no-tty /tmp/mpapis.asc && \
+    gpg --import --no-tty /tmp/pkuczynski.asc && \
     curl -L https://get.rvm.io | bash -s stable && \
     /usr/local/rvm/bin/rvm install 2.3 && \
     /usr/local/rvm/bin/rvm alias create default ruby-2.3
index 1f65c7a474c3226976f1516850b1059094a493c0..615ab1c00e9a48ba598f88c323b0fbc69e084d46 100644 (file)
@@ -12,8 +12,10 @@ RUN apt-get update && \
     apt-get -y install --no-install-recommends curl ca-certificates
 
 # Install RVM
-ADD generated/rvm.asc /tmp/
-RUN gpg --import /tmp/rvm.asc && \
+ADD generated/mpapis.asc /tmp/
+ADD generated/pkuczynski.asc /tmp/
+RUN gpg --import --no-tty /tmp/mpapis.asc && \
+    gpg --import --no-tty /tmp/pkuczynski.asc && \
     curl -L https://get.rvm.io | bash -s stable && \
     /usr/local/rvm/bin/rvm install 2.3 && \
     /usr/local/rvm/bin/rvm alias create default ruby-2.3
index 9d326c72946bb645900637ebc852b87c2d253743..d530d22a292545873331b65e8d27201e1d5ab747 100644 (file)
@@ -12,8 +12,10 @@ RUN apt-get update && \
     apt-get -y install --no-install-recommends curl ca-certificates gnupg2
 
 # Install RVM
-ADD generated/rvm.asc /tmp/
-RUN gpg --import /tmp/rvm.asc && \
+ADD generated/mpapis.asc /tmp/
+ADD generated/pkuczynski.asc /tmp/
+RUN gpg --import --no-tty /tmp/mpapis.asc && \
+    gpg --import --no-tty /tmp/pkuczynski.asc && \
     curl -L https://get.rvm.io | bash -s stable && \
     /usr/local/rvm/bin/rvm install 2.3 && \
     /usr/local/rvm/bin/rvm alias create default ruby-2.3
index b5325224ee170bbc6170babfa8c2b95665d2057a..77017ba9702cb870ed83cb982fe3b135a779e6e6 100755 (executable)
@@ -2,9 +2,19 @@
 # Copyright (C) The Arvados Authors. All rights reserved.
 #
 # SPDX-License-Identifier: AGPL-3.0
-
 set -eu
 
+# Set up
+DEBUG=${ARVADOS_DEBUG:-0}
+STDOUT_IF_DEBUG=/dev/null
+STDERR_IF_DEBUG=/dev/null
+DASHQQ_UNLESS_DEBUG=-qq
+if [[ "$DEBUG" != "0" ]]; then
+  STDOUT_IF_DEBUG=/dev/stdout
+  STDERR_IF_DEBUG=/dev/stderr
+  DASHQQ_UNLESS_DEBUG=
+fi
+
 # Multiple .deb based distros symlink to this script, so extract the target
 # from the invocation path.
 target=$(echo $0 | sed 's/.*test-packages-\([^.]*\)\.sh.*/\1/')
@@ -13,8 +23,9 @@ export ARV_PACKAGES_DIR="/arvados/packages/$target"
 
 dpkg-query --show > "$ARV_PACKAGES_DIR/$1.before"
 
-apt-get -qq update
-apt-get --assume-yes --allow-unauthenticated install "$1"
+apt-get $DASHQQ_UNLESS_DEBUG update
+
+apt-get $DASHQQ_UNLESS_DEBUG -y --allow-unauthenticated install "$1" >"$STDOUT_IF_DEBUG" 2>"$STDERR_IF_DEBUG"
 
 dpkg-query --show > "$ARV_PACKAGES_DIR/$1.after"
 
@@ -35,12 +46,14 @@ fi
 
 dpkg-deb -x $debpkg .
 
-while read so && [ -n "$so" ]; do
-    echo
-    echo "== Packages dependencies for $so =="
-    ldd "$so" | awk '($3 ~ /^\//){print $3}' | sort -u | xargs dpkg -S | cut -d: -f1 | sort -u
-done <<EOF
+if [[ "$DEBUG" != "0" ]]; then
+  while read so && [ -n "$so" ]; do
+      echo
+      echo "== Packages dependencies for $so =="
+      ldd "$so" | awk '($3 ~ /^\//){print $3}' | sort -u | xargs dpkg -S | cut -d: -f1 | sort -u
+  done <<EOF
 $(find -name '*.so')
 EOF
+fi
 
 exec /jenkins/package-testing/common-test-packages.sh "$1"
index 268611cb7f50eba879e84c2e10f766b8ea4b6ff5..12450dd4f954acf65a58fe637880697be5918861 100755 (executable)
@@ -5,6 +5,15 @@
 
 set -eu
 
+# Set up
+DEBUG=${ARVADOS_DEBUG:-0}
+STDOUT_IF_DEBUG=/dev/null
+STDERR_IF_DEBUG=/dev/null
+if [[ "$DEBUG" != "0" ]]; then
+  STDOUT_IF_DEBUG=/dev/stdout
+  STDERR_IF_DEBUG=/dev/stderr
+fi
+
 target=$(basename "$0" | grep -Eo '\bcentos[[:digit:]]+\b')
 
 yum -q clean all
@@ -14,7 +23,7 @@ export ARV_PACKAGES_DIR="/arvados/packages/$target"
 
 rpm -qa | sort > "$ARV_PACKAGES_DIR/$1.before"
 
-yum install --assumeyes $1
+yum install --assumeyes -e 0 $1
 
 rpm -qa | sort > "$ARV_PACKAGES_DIR/$1.after"
 
@@ -41,10 +50,12 @@ cd /tmp/opts
 
 rpm2cpio $(ls -t "$ARV_PACKAGES_DIR/$1"-*.rpm | head -n1) | cpio -idm 2>/dev/null
 
-find -name '*.so' | while read so; do
-    echo -e "\n== Packages dependencies for $so =="
-    ldd "$so" \
-        | awk '($3 ~ /^\//){print $3}' | sort -u | xargs rpm -qf | sort -u
-done
+if [[ "$DEBUG" != "0" ]]; then
+  find -name '*.so' | while read so; do
+      echo -e "\n== Packages dependencies for $so =="
+      ldd "$so" \
+          | awk '($3 ~ /^\//){print $3}' | sort -u | xargs rpm -qf | sort -u
+  done
+fi
 
 exec /jenkins/package-testing/common-test-packages.sh "$1"
index c699fb45b54ae7ff7f89a8567aa4536147ef0bd8..9300f4cc6901af136fc0f93384ca5be8e45aa9d5 100755 (executable)
@@ -3,7 +3,11 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-exec python <<EOF
+set -e
+
+arvados-node-manager --version
+
+exec /usr/share/python2.7/dist/arvados-node-manager/bin/python2.7 <<EOF
 import libcloud.compute.types
 import libcloud.compute.providers
 libcloud.compute.providers.get_driver(libcloud.compute.types.Provider.AZURE_ARM)
index e499238d89eb2572af6beb6f9d9a05bce1dd8b31..99327c016ad618dbf69971a0960e19def60469e9 100755 (executable)
@@ -6,8 +6,3 @@
 set -e
 
 arvados-cwl-runner --version
-
-exec python <<EOF
-import arvados_cwl
-print "arvados-cwl-runner version", arvados_cwl.__version__
-EOF
index 152d1eb6979a520eb56fcb60f498752cd4847eb9..81929857b8eaa6791a3e47e196f578de6f17b9a0 100755 (executable)
@@ -3,7 +3,6 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-exec python <<EOF
-import arvados_fuse
-print "Successfully imported arvados_fuse"
-EOF
+set -e
+
+arv-mount --version
index 2603e034aad3cd35b79c54ee3e53d4eedb569df6..2c92a3efb354f5f0914e74363852ed315091d475 100755 (executable)
@@ -3,7 +3,11 @@
 #
 # SPDX-License-Identifier: AGPL-3.0
 
-exec python2.7 <<EOF
+set -e
+
+arv-put --version
+
+/usr/share/python2.7/dist/python-arvados-python-client/bin/python2.7 << EOF
 import arvados
 print "Successfully imported arvados"
 EOF
index 83bb5ae7165cb2cfd11879e85411253472887b26..7186a2209129a08c9c6fbd6a094ce6f0a9dac3c0 100755 (executable)
@@ -136,7 +136,7 @@ fi
 echo cwl_runner_version $cwl_runner_version python_sdk_version $python_sdk_version
 
 if [[ "${python_sdk_version}" != "${ARVADOS_BUILDING_VERSION}" ]]; then
-       python_sdk_version="${python_sdk_version}-2"
+       python_sdk_version="${python_sdk_version}-1"
 else
        python_sdk_version="${ARVADOS_BUILDING_VERSION}-${ARVADOS_BUILDING_ITERATION}"
 fi
@@ -144,7 +144,7 @@ fi
 cwl_runner_version_orig=$cwl_runner_version
 
 if [[ "${cwl_runner_version}" != "${ARVADOS_BUILDING_VERSION}" ]]; then
-       cwl_runner_version="${cwl_runner_version}-4"
+       cwl_runner_version="${cwl_runner_version}-1"
 else
        cwl_runner_version="${ARVADOS_BUILDING_VERSION}-${ARVADOS_BUILDING_ITERATION}"
 fi
index 46379e7b9ad0c5b29d1cd85797ad4cdbeb7e2e99..9b21b58e2af75d8968c68f7280adb707c8c444ba 100755 (executable)
@@ -74,6 +74,7 @@ while [ $# -gt 0 ]; do
             ;;
         --only-test)
             test_packages=1
+            testing_one_package=1
             packages="$2"; shift
             ;;
         --force-test)
@@ -121,33 +122,33 @@ if [[ -n "$ARVADOS_BUILDING_VERSION" ]]; then
 fi
 
 if [[ -n "$test_packages" ]]; then
-    if [[ -n "$(find $WORKSPACE/packages/$TARGET -name '*.rpm')" ]] ; then
-       set +e
-       /usr/bin/which createrepo >/dev/null
-       if [[ "$?" != "0" ]]; then
-               echo >&2
-               echo >&2 "Error: please install createrepo. E.g. sudo apt-get install createrepo"
-               echo >&2
-               exit 1
-       fi
-       set -e
-        createrepo $WORKSPACE/packages/$TARGET
+  if [[ -n "$(find $WORKSPACE/packages/$TARGET -name '*.rpm')" ]] ; then
+    set +e
+    /usr/bin/which createrepo >/dev/null
+    if [[ "$?" != "0" ]]; then
+      echo >&2
+      echo >&2 "Error: please install createrepo. E.g. sudo apt-get install createrepo"
+      echo >&2
+      exit 1
     fi
+    set -e
+    createrepo $WORKSPACE/packages/$TARGET
+  fi
 
-    if [[ -n "$(find $WORKSPACE/packages/$TARGET -name '*.deb')" ]] ; then
-        (cd $WORKSPACE/packages/$TARGET
-          dpkg-scanpackages .  2> >(grep -v 'warning' 1>&2) | tee Packages | gzip -c > Packages.gz
-          apt-ftparchive -o APT::FTPArchive::Release::Origin=Arvados release . > Release
-        )
-    fi
+  if [[ -n "$(find $WORKSPACE/packages/$TARGET -name '*.deb')" ]] ; then
+    (cd $WORKSPACE/packages/$TARGET
+      dpkg-scanpackages .  2> >(grep -v 'warning' 1>&2) | tee Packages | gzip -c > Packages.gz
+      apt-ftparchive -o APT::FTPArchive::Release::Origin=Arvados release . > Release
+    )
+  fi
 
-    COMMAND="/jenkins/package-testing/test-packages-$TARGET.sh"
-    IMAGE="arvados/package-test:$TARGET"
+  COMMAND="/jenkins/package-testing/test-packages-$TARGET.sh"
+  IMAGE="arvados/package-test:$TARGET"
 else
-    IMAGE="arvados/build:$TARGET"
-    if [[ "$COMMAND" != "" ]]; then
-        COMMAND="/usr/local/rvm/bin/rvm-exec default bash /jenkins/$COMMAND --target $TARGET$DEBUG"
-    fi
+  IMAGE="arvados/build:$TARGET"
+  if [[ "$COMMAND" != "" ]]; then
+    COMMAND="/usr/local/rvm/bin/rvm-exec default bash /jenkins/$COMMAND --target $TARGET$DEBUG"
+  fi
 fi
 
 JENKINS_DIR=$(dirname "$(readlink -e "$0")")
@@ -218,6 +219,17 @@ if [[ -n "$test_packages" ]]; then
             continue
           fi
         fi
+        # If we're testing all packages, we should not error out on packages that don't exist.
+        # If we are testing one specific package only (i.e. --only-test was given), we should
+        # error out if that package does not exist.
+        if [[ -z "$testing_one_package" ]]; then
+          MATCH=`find ${WORKSPACE}/packages/ -regextype posix-extended -regex .*${TARGET}/$p.*\\(deb\\|rpm\\)`
+          if [[ "$MATCH" == "" ]]; then
+            # No new package has been built that needs testing
+            echo "Skipping $p test because no package file is available to test."
+            continue
+          fi
+        fi
         echo
         echo "START: $p test on $IMAGE" >&2
         # ulimit option can be removed when debian8 and ubuntu1404 are retired
@@ -237,7 +249,9 @@ if [[ -n "$test_packages" ]]; then
         fi
     done
 
-    touch ${WORKSPACE}/packages/.last_test_${TARGET}
+    if [[ "$FINAL_EXITCODE" == "0" ]]; then
+      touch ${WORKSPACE}/packages/.last_test_${TARGET}
+    fi
 else
     echo
     echo "START: build packages on $IMAGE" >&2
index 35f8104450339c80c4f9e2ff92f50cd200f2b1cc..4c5f39a373e66cdf160ac71aaa7edf7fc47cd2e3 100755 (executable)
@@ -118,9 +118,6 @@ if [[ "$DEBUG" != 0 ]]; then
     DASHQ_UNLESS_DEBUG=
 fi
 
-EASY_INSTALL2=$(find_easy_install -$PYTHON2_VERSION "")
-EASY_INSTALL3=$(find_easy_install -$PYTHON3_VERSION 3)
-
 RUN_BUILD_PACKAGES_PATH="`dirname \"$0\"`"
 RUN_BUILD_PACKAGES_PATH="`( cd \"$RUN_BUILD_PACKAGES_PATH\" && pwd )`"  # absolutized and normalized
 if [ -z "$RUN_BUILD_PACKAGES_PATH" ] ; then
index f316c563bd53e1ea6ddac44ca0928c6b299d8ffe..6264e93f0f1e9b3d2d6634de35a76a0a55fd2588 100755 (executable)
@@ -30,8 +30,18 @@ WORKSPACE=path         Path to the Arvados source tree to build packages from
 
 EOF
 
-EXITCODE=0
+# Begin of user configuration
+
+# set to --no-cache-dir to disable pip caching
+CACHE_FLAG=
+
+MAINTAINER="Ward Vandewege <wvandewege@veritasgenetics.com>"
+VENDOR="Veritas Genetics, Inc."
+
+# End of user configuration
+
 DEBUG=${ARVADOS_DEBUG:-0}
+EXITCODE=0
 TARGET=debian8
 COMMAND=
 
@@ -117,7 +127,7 @@ case "$TARGET" in
         PYTHON2_INSTALL_LIB=lib/python$PYTHON2_VERSION/site-packages
         PYTHON3_PACKAGE=$(rpm -qf "$(which python$PYTHON3_VERSION)" --queryformat '%{NAME}\n')
         PYTHON3_PKG_PREFIX=$PYTHON3_PACKAGE
-        PYTHON3_PREFIX=/opt/rh/python33/root/usr
+        PYTHON3_PREFIX=/opt/rh/rh-python35/root/usr
         PYTHON3_INSTALL_LIB=lib/python$PYTHON3_VERSION/site-packages
         export PYCURL_SSL_LIBRARY=nss
         ;;
@@ -215,7 +225,7 @@ if [[ -z "$ONLY_BUILD" ]] || [[ "libarvados-perl" = "$ONLY_BUILD" ]] ; then
     perl Makefile.PL INSTALL_BASE=install >"$STDOUT_IF_DEBUG" && \
         make install INSTALLDIRS=perl >"$STDOUT_IF_DEBUG" && \
         fpm_build install/lib/=/usr/share libarvados-perl \
-        "Curoverse, Inc." dir "$(version_from_git)" install/man/=/usr/share/man \
+        dir "$(version_from_git)" install/man/=/usr/share/man \
         "$WORKSPACE/apache-2.0.txt=/usr/share/doc/libarvados-perl/apache-2.0.txt" && \
         mv --no-clobber libarvados-perl*.$FORMAT "$WORKSPACE/packages/$TARGET/"
   fi
@@ -238,21 +248,6 @@ handle_ruby_gem arvados-login-sync
 # Python packages
 debug_echo -e "\nPython packages\n"
 
-cd "$WORKSPACE/sdk/pam"
-handle_python_package
-
-cd "$WORKSPACE/sdk/python"
-handle_python_package
-
-cd "$WORKSPACE/sdk/cwl"
-handle_python_package
-
-cd "$WORKSPACE/services/fuse"
-handle_python_package
-
-cd "$WORKSPACE/services/nodemanager"
-handle_python_package
-
 # arvados-src
 (
     cd "$WORKSPACE"
@@ -277,10 +272,9 @@ handle_python_package
       cd "$SRC_BUILD_DIR"
       PKG_VERSION=$(version_from_git)
       cd $WORKSPACE/packages/$TARGET
-      fpm_build $SRC_BUILD_DIR/=/usr/local/arvados/src arvados-src 'Curoverse, Inc.' 'dir' "$PKG_VERSION" "--exclude=usr/local/arvados/src/.git" "--url=https://arvados.org" "--license=GNU Affero General Public License, version 3.0" "--description=The Arvados source code" "--architecture=all"
+      fpm_build $SRC_BUILD_DIR/=/usr/local/arvados/src arvados-src 'dir' "$PKG_VERSION" "--exclude=usr/local/arvados/src/.git" "--url=https://arvados.org" "--license=GNU Affero General Public License, version 3.0" "--description=The Arvados source code" "--architecture=all"
 
       rm -rf "$SRC_BUILD_DIR"
-
     fi
 )
 
@@ -294,9 +288,8 @@ package_go_binary cmd/arvados-server arvados-server \
     "Arvados server daemons"
 package_go_binary cmd/arvados-server arvados-controller \
     "Arvados cluster controller daemon"
-# No package until #14325
-#package_go_binary cmd/arvados-server crunch-dispatch-cloud \
-#    "Arvados cluster cloud dispatch"
+package_go_binary cmd/arvados-server arvados-dispatch-cloud \
+    "Arvados cluster cloud dispatch"
 package_go_binary sdk/go/crunchrunner crunchrunner \
     "Crunchrunner executes a command inside a container and uploads the output"
 package_go_binary services/arv-git-httpd arvados-git-httpd \
@@ -330,270 +323,27 @@ package_go_binary tools/keep-rsync keep-rsync \
 package_go_binary tools/keep-exercise keep-exercise \
     "Performance testing tool for Arvados Keep"
 
-
-# we need explicit debian_revision values in the dependencies for ruamel.yaml, because we have a package iteration
-# greater than zero. So we parse setup.py, get the ruamel.yaml dependencies, tell fpm not to automatically include
-# them in the package being built, and re-add them manually with an appropriate debian_revision value.
-# See #14552 for the reason for this (nasty) workaround. We use ${ruamel_depends[@]} in a few places further down
-# in this script.
-# Ward, 2018-11-28
-IFS=', ' read -r -a deps <<< `grep ruamel.yaml $WORKSPACE/sdk/python/setup.py |cut -f 3 -dl |sed -e "s/'//g"`
-declare -a ruamel_depends=()
-for i in ${deps[@]}; do
-  i=`echo "$i" | sed -e 's!\([0-9]\)! \1!'`
-  if [[ $i =~ .*\>.* ]]; then
-    ruamel_depends+=(--depends "python-ruamel.yaml $i-1")
-  elif [[ $i =~ .*\<.* ]]; then
-    ruamel_depends+=(--depends "python-ruamel.yaml $i-9")
-  else
-    echo "Encountered ruamel dependency that I can't parse. Aborting..."
-    exit 1
-  fi
-done
-
-
 # The Python SDK
-# Please resist the temptation to add --no-python-fix-name to the fpm call here
-# (which would remove the python- prefix from the package name), because this
-# package is a dependency of arvados-fuse, and fpm can not omit the python-
-# prefix from only one of the dependencies of a package...  Maybe I could
-# whip up a patch and send it upstream, but that will be for another day. Ward,
-# 2014-05-15
-cd $WORKSPACE/packages/$TARGET
-rm -rf "$WORKSPACE/sdk/python/build"
-arvados_python_client_version=${ARVADOS_BUILDING_VERSION:-$(awk '($1 == "Version:"){print $2}' $WORKSPACE/sdk/python/arvados_python_client.egg-info/PKG-INFO)}
-test_package_presence ${PYTHON2_PKG_PREFIX}-arvados-python-client "$arvados_python_client_version" python
-if [[ "$?" == "0" ]]; then
-
-  fpm_build $WORKSPACE/sdk/python "${PYTHON2_PKG_PREFIX}-arvados-python-client" 'Curoverse, Inc.' 'python' "$arvados_python_client_version" "--url=https://arvados.org" "--description=The Arvados Python SDK" --depends "${PYTHON2_PKG_PREFIX}-setuptools" --deb-recommends=git  --python-disable-dependency ruamel.yaml "${ruamel_depends[@]}"
-fi
-
-# cwl-runner
-cd $WORKSPACE/packages/$TARGET
-rm -rf "$WORKSPACE/sdk/cwl/build"
-arvados_cwl_runner_version=${ARVADOS_BUILDING_VERSION:-$(awk '($1 == "Version:"){print $2}' $WORKSPACE/sdk/cwl/arvados_cwl_runner.egg-info/PKG-INFO)}
-declare -a iterargs=()
-if [[ -z "$ARVADOS_BUILDING_VERSION" ]]; then
-    arvados_cwl_runner_iteration=4
-    iterargs+=(--iteration $arvados_cwl_runner_iteration)
-else
-    arvados_cwl_runner_iteration=
-fi
-test_package_presence ${PYTHON2_PKG_PREFIX}-arvados-cwl-runner "$arvados_cwl_runner_version" python "$arvados_cwl_runner_iteration"
-if [[ "$?" == "0" ]]; then
-  fpm_build $WORKSPACE/sdk/cwl "${PYTHON2_PKG_PREFIX}-arvados-cwl-runner" 'Curoverse, Inc.' 'python' "$arvados_cwl_runner_version" "--url=https://arvados.org" "--description=The Arvados CWL runner" --depends "${PYTHON2_PKG_PREFIX}-setuptools" --depends "${PYTHON2_PKG_PREFIX}-subprocess32 >= 3.5.0" --depends "${PYTHON2_PKG_PREFIX}-pathlib2" --depends "${PYTHON2_PKG_PREFIX}-scandir" --python-disable-dependency ruamel.yaml "${ruamel_depends[@]}" "${iterargs[@]}"
-fi
-
-# schema_salad. This is a python dependency of arvados-cwl-runner,
-# but we can't use the usual PYTHONPACKAGES way to build this package due to the
-# intricacies of how version numbers get generated in setup.py: we need a specific version,
-# e.g. 1.7.20160316203940. If we don't explicitly list that version with the -v
-# argument to fpm, and instead specify it as schema_salad==1.7.20160316203940, we get
-# a package with version 1.7. That's because our gittagger hack is not being
-# picked up by self.distribution.get_version(), which is called from
-# https://github.com/jordansissel/fpm/blob/master/lib/fpm/package/pyfpm/get_metadata.py
-# by means of this command:
-#
-# python2.7 setup.py --command-packages=pyfpm get_metadata --output=metadata.json
-#
-# So we build this thing separately.
-#
-# Ward, 2016-03-17
-saladversion=$(cat "$WORKSPACE/sdk/cwl/setup.py" | grep schema-salad== | sed "s/.*==\(.*\)'.*/\1/")
-test_package_presence python-schema-salad "$saladversion" python 2
-if [[ "$?" == "0" ]]; then
-  fpm_build schema_salad "" "" python $saladversion --depends "${PYTHON2_PKG_PREFIX}-lockfile >= 1:0.12.2-2" --depends "${PYTHON2_PKG_PREFIX}-avro = 1.8.1-2" --iteration 2
-fi
+fpm_build_virtualenv "arvados-python-client" "sdk/python"
+fpm_build_virtualenv "arvados-python-client" "sdk/python" "python3"
 
-# And for cwltool we have the same problem as for schema_salad. Ward, 2016-03-17
-cwltoolversion=$(cat "$WORKSPACE/sdk/cwl/setup.py" | grep cwltool== | sed "s/.*==\(.*\)'.*/\1/")
-test_package_presence python-cwltool "$cwltoolversion" python 3
-if [[ "$?" == "0" ]]; then
-  fpm_build cwltool "" "" python $cwltoolversion --iteration 3 --python-disable-dependency ruamel.yaml "${ruamel_depends[@]}"
-fi
+# Arvados cwl runner
+fpm_build_virtualenv "arvados-cwl-runner" "sdk/cwl"
 
 # The PAM module
-if [[ $TARGET =~ debian|ubuntu ]]; then
-    cd $WORKSPACE/packages/$TARGET
-    rm -rf "$WORKSPACE/sdk/pam/build"
-    libpam_arvados_version=$(awk '($1 == "Version:"){print $2}' $WORKSPACE/sdk/pam/arvados_pam.egg-info/PKG-INFO)
-    test_package_presence libpam-arvados "$libpam_arvados_version" python
-    if [[ "$?" == "0" ]]; then
-      fpm_build $WORKSPACE/sdk/pam libpam-arvados 'Curoverse, Inc.' 'python' "$libpam_arvados_version" "--url=https://arvados.org" "--description=PAM module for authenticating shell logins using Arvados API tokens" --depends libpam-python
-    fi
-fi
+fpm_build_virtualenv "libpam-arvados" "sdk/pam"
 
 # The FUSE driver
-# Please see comment about --no-python-fix-name above; we stay consistent and do
-# not omit the python- prefix first.
-cd $WORKSPACE/packages/$TARGET
-rm -rf "$WORKSPACE/services/fuse/build"
-arvados_fuse_version=${ARVADOS_BUILDING_VERSION:-$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/fuse/arvados_fuse.egg-info/PKG-INFO)}
-test_package_presence "${PYTHON2_PKG_PREFIX}-arvados-fuse" "$arvados_fuse_version" python
-if [[ "$?" == "0" ]]; then
-  fpm_build $WORKSPACE/services/fuse "${PYTHON2_PKG_PREFIX}-arvados-fuse" 'Curoverse, Inc.' 'python' "$arvados_fuse_version" "--url=https://arvados.org" "--description=The Keep FUSE driver" --depends "${PYTHON2_PKG_PREFIX}-setuptools"
-fi
+fpm_build_virtualenv "arvados-fuse" "services/fuse"
 
 # The node manager
-cd $WORKSPACE/packages/$TARGET
-rm -rf "$WORKSPACE/services/nodemanager/build"
-nodemanager_version=${ARVADOS_BUILDING_VERSION:-$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/nodemanager/arvados_node_manager.egg-info/PKG-INFO)}
-iteration="${ARVADOS_BUILDING_ITERATION:-1}"
-test_package_presence arvados-node-manager "$nodemanager_version" python "$iteration"
-if [[ "$?" == "0" ]]; then
-  fpm_build $WORKSPACE/services/nodemanager arvados-node-manager 'Curoverse, Inc.' 'python' "$nodemanager_version" "--url=https://arvados.org" "--description=The Arvados node manager" --depends "${PYTHON2_PKG_PREFIX}-setuptools" --iteration "$iteration"
-fi
+fpm_build_virtualenv "arvados-node-manager" "services/nodemanager"
 
 # The Docker image cleaner
-cd $WORKSPACE/packages/$TARGET
-rm -rf "$WORKSPACE/services/dockercleaner/build"
-dockercleaner_version=${ARVADOS_BUILDING_VERSION:-$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/dockercleaner/arvados_docker_cleaner.egg-info/PKG-INFO)}
-iteration="${ARVADOS_BUILDING_ITERATION:-4}"
-test_package_presence arvados-docker-cleaner "$dockercleaner_version" python "$iteration"
-if [[ "$?" == "0" ]]; then
-  fpm_build $WORKSPACE/services/dockercleaner arvados-docker-cleaner 'Curoverse, Inc.' 'python3' "$dockercleaner_version" "--url=https://arvados.org" "--description=The Arvados Docker image cleaner" --depends "${PYTHON3_PKG_PREFIX}-websocket-client = 0.37.0" --iteration "$iteration"
-fi
+fpm_build_virtualenv "arvados-docker-cleaner" "services/dockercleaner" "python3"
 
 # The Arvados crunchstat-summary tool
-cd $WORKSPACE/packages/$TARGET
-crunchstat_summary_version=${ARVADOS_BUILDING_VERSION:-$(awk '($1 == "Version:"){print $2}' $WORKSPACE/tools/crunchstat-summary/crunchstat_summary.egg-info/PKG-INFO)}
-iteration="${ARVADOS_BUILDING_ITERATION:-2}"
-test_package_presence "$PYTHON2_PKG_PREFIX"-crunchstat-summary "$crunchstat_summary_version" python "$iteration"
-if [[ "$?" == "0" ]]; then
-  rm -rf "$WORKSPACE/tools/crunchstat-summary/build"
-  fpm_build $WORKSPACE/tools/crunchstat-summary ${PYTHON2_PKG_PREFIX}-crunchstat-summary 'Curoverse, Inc.' 'python' "$crunchstat_summary_version" "--url=https://arvados.org" "--description=Crunchstat-summary reads Arvados Crunch log files and summarize resource usage" --iteration "$iteration"
-fi
-
-# Forked libcloud
-if test_package_presence "$PYTHON2_PKG_PREFIX"-apache-libcloud "$LIBCLOUD_PIN" python 2
-then
-  LIBCLOUD_DIR=$(mktemp -d)
-  (
-      cd $LIBCLOUD_DIR
-      git clone $DASHQ_UNLESS_DEBUG https://github.com/curoverse/libcloud.git .
-      git checkout $DASHQ_UNLESS_DEBUG apache-libcloud-$LIBCLOUD_PIN
-      # libcloud is absurdly noisy without -q, so force -q here
-      OLD_DASHQ_UNLESS_DEBUG=$DASHQ_UNLESS_DEBUG
-      DASHQ_UNLESS_DEBUG=-q
-      handle_python_package
-      DASHQ_UNLESS_DEBUG=$OLD_DASHQ_UNLESS_DEBUG
-  )
-
-  # libcloud >= 2.3.0 now requires python-requests 2.4.3 or higher, otherwise
-  # it throws
-  #   ImportError: No module named packages.urllib3.poolmanager
-  # when loaded. We only see this problem on ubuntu1404, because that is our
-  # only supported distribution that ships with a python-requests older than
-  # 2.4.3.
-  fpm_build $LIBCLOUD_DIR "$PYTHON2_PKG_PREFIX"-apache-libcloud "" python "" --iteration 2 --depends 'python-requests >= 2.4.3'
-  rm -rf $LIBCLOUD_DIR
-fi
-
-# Python 2 dependencies
-declare -a PIP_DOWNLOAD_SWITCHES=(--no-deps)
-# Add --no-use-wheel if this pip knows it.
-pip install --no-use-wheel >/dev/null 2>&1
-case "$?" in
-    0) PIP_DOWNLOAD_SWITCHES+=(--no-use-wheel) ;;
-    1) ;;
-    2) ;;
-    *) echo "WARNING: 'pip install --no-use-wheel' test returned unknown exit code $?" ;;
-esac
-
-while read -r line || [[ -n "$line" ]]; do
-#  echo "Text read from file: $line"
-  if [[ "$line" =~ ^# ]]; then
-    continue
-  fi
-  IFS='|'; arr=($line); unset IFS
-
-  dist=${arr[0]}
-
-  IFS=',';dists=($dist); unset IFS
-
-  MATCH=0
-  for d in "${dists[@]}"; do
-    if [[ "$d" == "$TARGET" ]] || [[ "$d" == "all" ]]; then
-      MATCH=1
-    fi
-  done
-
-  if [[ "$MATCH" != "1" ]]; then
-    continue
-  fi
-  name=${arr[1]}
-  version=${arr[2]}
-  iteration=${arr[3]}
-  pkgtype=${arr[4]}
-  arch=${arr[5]}
-  extra=${arr[6]}
-  declare -a 'extra_arr=('"$extra"')'
-
-  if [[ "$FORMAT" == "rpm" ]]; then
-    if [[ "$arch" == "all" ]]; then
-      arch="noarch"
-    fi
-    if [[ "$arch" == "amd64" ]]; then
-      arch="x86_64"
-    fi
-  fi
-
-  if [[ "$pkgtype" == "python" ]]; then
-    outname=$(echo "$name" | sed -e 's/^python-//' -e 's/_/-/g' -e "s/^/${PYTHON2_PKG_PREFIX}-/")
-  else
-    outname=$(echo "$name" | sed -e 's/^python-//' -e 's/_/-/g' -e "s/^/${PYTHON3_PKG_PREFIX}-/")
-  fi
-
-  if [[ -n "$ONLY_BUILD" ]] && [[ "$outname" != "$ONLY_BUILD" ]] ; then
-      continue
-  fi
-
-  case "$name" in
-      httplib2|google-api-python-client)
-          test_package_presence $outname $version $pkgtype $iteration $arch
-          if [[ "$?" == "0" ]]; then
-            # Work around 0640 permissions on some package files.
-            # See #7591 and #7991.
-            pyfpm_workdir=$(mktemp --tmpdir -d pyfpm-XXXXXX) && (
-                set -e
-                cd "$pyfpm_workdir"
-                PIP_VERSION=`python$PYTHON2_VERSION -c "import pip; print(pip.__version__)" |cut -f1 -d.`
-                if (( $PIP_VERSION < 8 )); then
-                  pip install "${PIP_DOWNLOAD_SWITCHES[@]}" --download . "$name==$version"
-                else
-                  pip download --no-deps --no-binary :all: "$name==$version"
-                fi
-                # Sometimes pip gives us a tarball, sometimes a zip file...
-                DOWNLOADED=`ls $name-*`
-                [[ "$DOWNLOADED" =~ ".tar" ]] && tar -xf $DOWNLOADED
-                [[ "$DOWNLOADED" =~ ".zip" ]] && unzip $DOWNLOADED
-                cd "$name"-*/
-                "python$PYTHON2_VERSION" setup.py $DASHQ_UNLESS_DEBUG egg_info build
-                chmod -R go+rX .
-                set +e
-                fpm_build . "$outname" "" "$pkgtype" "$version" --iteration "$iteration" "${extra_arr[@]}"
-                # The upload step uses the package timestamp to determine
-                # if it is new.  --no-clobber plays nice with that.
-                mv --no-clobber "$outname"*.$FORMAT "$WORKSPACE/packages/$TARGET"
-            )
-            if [ 0 != "$?" ]; then
-                echo "ERROR: $name build process failed"
-                EXITCODE=1
-            fi
-            if [ -n "$pyfpm_workdir" ]; then
-                rm -rf "$pyfpm_workdir"
-            fi
-          fi
-          ;;
-      *)
-          test_package_presence $outname $version $pkgtype $iteration $arch
-          if [[ "$?" == "0" ]]; then
-            fpm_build "$name" "$outname" "" "$pkgtype" "$version" --iteration "$iteration" "${extra_arr[@]}"
-          fi
-          ;;
-  esac
-
-done <`dirname "$(readlink -f "$0")"`"/build.list"
+fpm_build_virtualenv "crunchstat-summary" "tools/crunchstat-summary"
 
 # Build the API server package
 test_rails_package_presence arvados-api-server "$WORKSPACE/services/api"
index b595cc8a06ee1ff8563289e7f197c00bd0fa963e..40589fd565c258240fed5fe1057fad5ab38993b1 100755 (executable)
@@ -161,7 +161,7 @@ package_go_binary() {
     fi
     switches+=("$WORKSPACE/${license_file}=/usr/share/doc/$prog/${license_file}")
 
-    fpm_build "$GOPATH/bin/${basename}=/usr/bin/${prog}" "${prog}" 'Curoverse, Inc.' dir "${version}" "--url=https://arvados.org" "--license=GNU Affero General Public License, version 3.0" "--description=${description}" "${switches[@]}"
+    fpm_build "$GOPATH/bin/${basename}=/usr/bin/${prog}" "${prog}" dir "${version}" "--url=https://arvados.org" "--license=GNU Affero General Public License, version 3.0" "--description=${description}" "${switches[@]}"
 }
 
 default_iteration() {
@@ -232,11 +232,6 @@ test_package_presence() {
       rpm_architecture="x86_64"
       deb_architecture="amd64"
 
-      if [[ "$pkgtype" =~ ^(python|python3)$ ]]; then
-        rpm_architecture="noarch"
-        deb_architecture="all"
-      fi
-
       if [[ "$pkgtype" =~ ^(src)$ ]]; then
         rpm_architecture="noarch"
         deb_architecture="all"
@@ -279,9 +274,9 @@ test_package_presence() {
         echo ${repo_pkg_list} |grep -q ${complete_pkgname}
         if [ $? -eq 0 ] ; then
           echo "Package $complete_pkgname exists, not rebuilding!"
-          curl -o ./${complete_pkgname} http://apt.arvados.org/pool/${D}/main/${repo_subdir}/${complete_pkgname}
+          curl -s -o ./${complete_pkgname} http://apt.arvados.org/pool/${D}/main/${repo_subdir}/${complete_pkgname}
           return 1
-       elif test -f "$WORKSPACE/packages/$TARGET/processed/${complete_pkgname}" ; then
+        elif test -f "$WORKSPACE/packages/$TARGET/processed/${complete_pkgname}" ; then
           echo "Package $complete_pkgname exists, not rebuilding!"
           return 1
         else
@@ -292,11 +287,14 @@ test_package_presence() {
     else
       centos_repo="http://rpm.arvados.org/CentOS/7/dev/x86_64/"
 
-      repo_pkg_list=$(curl -o - ${centos_repo})
+      repo_pkg_list=$(curl -s -o - ${centos_repo})
       echo ${repo_pkg_list} |grep -q ${complete_pkgname}
       if [ $? -eq 0 ]; then
         echo "Package $complete_pkgname exists, not rebuilding!"
-        curl -o ./${complete_pkgname} ${centos_repo}${complete_pkgname}
+        curl -s -o ./${complete_pkgname} ${centos_repo}${complete_pkgname}
+        return 1
+      elif test -f "$WORKSPACE/packages/$TARGET/processed/${complete_pkgname}" ; then
+        echo "Package $complete_pkgname exists, not rebuilding!"
         return 1
       else
         echo "Package $complete_pkgname not found, building"
@@ -332,7 +330,7 @@ handle_rails_package() {
         return 1
     fi
     local railsdir="/var/www/${pkgname%-server}/current"
-    local -a pos_args=("$srcdir/=$railsdir" "$pkgname" "Curoverse, Inc." dir "$version")
+    local -a pos_args=("$srcdir/=$railsdir" "$pkgname" dir "$version")
     local license_arg="$license_path=$railsdir/$(basename "$license_path")"
     local -a switches=(--after-install "$scripts_dir/postinst"
                        --before-remove "$scripts_dir/prerm"
@@ -359,6 +357,291 @@ handle_rails_package() {
     rm -rf "$scripts_dir"
 }
 
+# Build python packages with a virtualenv built-in
+fpm_build_virtualenv () {
+  PKG=$1
+  shift
+  PKG_DIR=$1
+  shift
+  PACKAGE_TYPE=${1:-python}
+  shift
+
+  # Set up
+  STDOUT_IF_DEBUG=/dev/null
+  STDERR_IF_DEBUG=/dev/null
+  DASHQ_UNLESS_DEBUG=-q
+  if [[ "$DEBUG" != "0" ]]; then
+      STDOUT_IF_DEBUG=/dev/stdout
+      STDERR_IF_DEBUG=/dev/stderr
+      DASHQ_UNLESS_DEBUG=
+  fi
+  if [[ "$ARVADOS_BUILDING_ITERATION" == "" ]]; then
+    ARVADOS_BUILDING_ITERATION=1
+  fi
+
+  local python=""
+  case "$PACKAGE_TYPE" in
+    python3)
+        python=python3
+        if [[ "$FORMAT" != "rpm" ]]; then
+          pip=pip3
+        else
+          # In CentOS, we use a different mechanism to get the right version of pip
+          pip=pip
+        fi
+        PACKAGE_PREFIX=$PYTHON3_PKG_PREFIX
+        ;;
+    python)
+        # All Arvados Python2 packages depend on Python 2.7.
+        # Make sure we build with that for consistency.
+        python=python2.7
+        pip=pip
+        PACKAGE_PREFIX=$PYTHON2_PKG_PREFIX
+        ;;
+  esac
+
+  if [[ "$PKG" != "libpam-arvados" ]] &&
+     [[ "$PKG" != "arvados-node-manager" ]] &&
+     [[ "$PKG" != "arvados-docker-cleaner" ]]; then
+    PYTHON_PKG=$PACKAGE_PREFIX-$PKG
+  else
+    # Exception to our package naming convention
+    PYTHON_PKG=$PKG
+  fi
+
+  if [[ -n "$ONLY_BUILD" ]] && [[ "$PYTHON_PKG" != "$ONLY_BUILD" ]] && [[ "$PKG" != "$ONLY_BUILD" ]]; then
+    return 0
+  fi
+
+  cd $WORKSPACE/$PKG_DIR
+
+  rm -rf dist/*
+
+  # Get the latest setuptools
+  if ! $pip install $DASHQ_UNLESS_DEBUG $CACHE_FLAG -U setuptools; then
+    echo "Error, unable to upgrade setuptools with"
+    echo "  $pip install $DASHQ_UNLESS_DEBUG $CACHE_FLAG -U setuptools"
+    exit 1
+  fi
+  if ! $python setup.py $DASHQ_UNLESS_DEBUG sdist; then
+    echo "Error, unable to run $python setup.py sdist for $PKG"
+    exit 1
+  fi
+
+  PACKAGE_PATH=`(cd dist; ls *tar.gz)`
+
+  # Determine the package version from the generated sdist archive
+  PYTHON_VERSION=${ARVADOS_BUILDING_VERSION:-$(awk '($1 == "Version:"){print $2}' *.egg-info/PKG-INFO)}
+
+  # See if we actually need to build this package; does it exist already?
+  # We can't do this earlier than here, because we need PYTHON_VERSION...
+  # This isn't so bad; the sdist call above is pretty quick compared to
+  # the invocation of virtualenv and fpm, below.
+  if ! test_package_presence "$PYTHON_PKG" $PYTHON_VERSION $PACKAGE_TYPE $ARVADOS_BUILDING_ITERATION; then
+    return 0
+  fi
+
+  echo "Building $FORMAT package for $PKG from $PKG_DIR"
+
+  # Package the sdist in a virtualenv
+  echo "Creating virtualenv..."
+
+  cd dist
+
+  rm -rf build
+  rm -f $PYTHON_PKG*deb
+
+  virtualenv_command="virtualenv --python `which $python` $DASHQ_UNLESS_DEBUG build/usr/share/$python/dist/$PYTHON_PKG"
+
+  if ! $virtualenv_command; then
+    echo "Error, unable to run"
+    echo "  $virtualenv_command"
+    exit 1
+  fi
+
+  if ! build/usr/share/$python/dist/$PYTHON_PKG/bin/$pip install $DASHQ_UNLESS_DEBUG $CACHE_FLAG -U pip; then
+    echo "Error, unable to upgrade pip with"
+    echo "  build/usr/share/$python/dist/$PYTHON_PKG/bin/$pip install $DASHQ_UNLESS_DEBUG $CACHE_FLAG -U pip"
+    exit 1
+  fi
+  if ! build/usr/share/$python/dist/$PYTHON_PKG/bin/$pip install $DASHQ_UNLESS_DEBUG $CACHE_FLAG -U setuptools; then
+    echo "Error, unable to upgrade setuptools with"
+    echo "  build/usr/share/$python/dist/$PYTHON_PKG/bin/$pip install $DASHQ_UNLESS_DEBUG $CACHE_FLAG -U setuptools"
+    exit 1
+  fi
+  if ! build/usr/share/$python/dist/$PYTHON_PKG/bin/$pip install $DASHQ_UNLESS_DEBUG $CACHE_FLAG -U wheel; then
+    echo "Error, unable to upgrade wheel with"
+    echo "  build/usr/share/$python/dist/$PYTHON_PKG/bin/$pip install $DASHQ_UNLESS_DEBUG $CACHE_FLAG -U wheel"
+    exit 1
+  fi
+
+  if [[ "$TARGET" != "centos7" ]] || [[ "$PYTHON_PKG" != "python-arvados-fuse" ]]; then
+    build/usr/share/$python/dist/$PYTHON_PKG/bin/$pip install $DASHQ_UNLESS_DEBUG $CACHE_FLAG $PACKAGE_PATH
+  else
+    # centos7 needs these special tweaks to install python-arvados-fuse
+    build/usr/share/$python/dist/$PYTHON_PKG/bin/$pip install $DASHQ_UNLESS_DEBUG $CACHE_FLAG docutils
+    PYCURL_SSL_LIBRARY=nss build/usr/share/$python/dist/$PYTHON_PKG/bin/$pip install $DASHQ_UNLESS_DEBUG $CACHE_FLAG $PACKAGE_PATH
+  fi
+
+  if [[ "$?" != "0" ]]; then
+    echo "Error, unable to run"
+    echo "  build/usr/share/$python/dist/$PYTHON_PKG/bin/$pip install $DASHQ_UNLESS_DEBUG $CACHE_FLAG $PACKAGE_PATH"
+    exit 1
+  fi
+
+  cd build/usr/share/$python/dist/$PYTHON_PKG/
+
+  # Replace the shebang lines in all python scripts, and handle the activate
+  # scripts too This is a functional replacement of the 237 line
+  # virtualenv_tools.py script that doesn't work in python3 without serious
+  # patching, minus the parts we don't need (modifying pyc files, etc).
+  for binfile in `ls bin/`; do
+    if ! file --mime bin/$binfile |grep -q binary; then
+      # Not a binary file
+      if [[ "$binfile" =~ ^activate(.csh|.fish|)$ ]]; then
+        # these 'activate' scripts need special treatment
+        sed -i "s/VIRTUAL_ENV=\".*\"/VIRTUAL_ENV=\"\/usr\/share\/$python\/dist\/$PYTHON_PKG\"/" bin/$binfile
+        sed -i "s/VIRTUAL_ENV \".*\"/VIRTUAL_ENV \"\/usr\/share\/$python\/dist\/$PYTHON_PKG\"/" bin/$binfile
+      else
+        if grep -q -E '^#!.*/bin/python\d?' bin/$binfile; then
+          # Replace shebang line
+          sed -i "1 s/^.*$/#!\/usr\/share\/$python\/dist\/$PYTHON_PKG\/bin\/python/" bin/$binfile
+        fi
+      fi
+    fi
+  done
+
+  cd - >$STDOUT_IF_DEBUG
+
+  find build -iname '*.pyc' -exec rm {} \;
+  find build -iname '*.pyo' -exec rm {} \;
+
+  # Finally, generate the package
+  echo "Creating package..."
+
+  declare -a COMMAND_ARR=("fpm" "-s" "dir" "-t" "$FORMAT")
+
+  if [[ "$MAINTAINER" != "" ]]; then
+    COMMAND_ARR+=('--maintainer' "$MAINTAINER")
+  fi
+
+  if [[ "$VENDOR" != "" ]]; then
+    COMMAND_ARR+=('--vendor' "$VENDOR")
+  fi
+
+  COMMAND_ARR+=('--url' 'https://arvados.org')
+
+  # Get description
+  DESCRIPTION=`grep '\sdescription' $WORKSPACE/$PKG_DIR/setup.py|cut -f2 -d=|sed -e "s/[',\\"]//g"`
+  COMMAND_ARR+=('--description' "$DESCRIPTION")
+
+  # Get license string
+  LICENSE_STRING=`grep license $WORKSPACE/$PKG_DIR/setup.py|cut -f2 -d=|sed -e "s/[',\\"]//g"`
+  COMMAND_ARR+=('--license' "$LICENSE_STRING")
+
+  # 12271 - As FPM-generated packages don't include scripts by default, the
+  # packages cleanup on upgrade depends on files being listed on the %files
+  # section in the generated SPEC files. To remove DIRECTORIES, they need to
+  # be listed in that sectiontoo, so we need to add this parameter to properly
+  # remove lingering dirs. But this only works for python2: if used on
+  # python33, it includes dirs like /opt/rh/python33 that belong to
+  # other packages.
+  if [[ "$FORMAT" == "rpm" ]] && [[ "$python" == "python2.7" ]]; then
+    COMMAND_ARR+=('--rpm-auto-add-directories')
+  fi
+
+  if [[ "$PKG" == "arvados-python-client" ]]; then
+    if [[ "$python" == "python2.7" ]]; then
+      COMMAND_ARR+=('--conflicts' "$PYTHON3_PKG_PREFIX-$PKG")
+    else
+      COMMAND_ARR+=('--conflicts' "$PYTHON2_PKG_PREFIX-$PKG")
+    fi
+  fi
+
+  if [[ "$DEBUG" != "0" ]]; then
+    COMMAND_ARR+=('--verbose' '--log' 'info')
+  fi
+
+  COMMAND_ARR+=('-v' "$PYTHON_VERSION")
+  COMMAND_ARR+=('--iteration' "$ARVADOS_BUILDING_ITERATION")
+  COMMAND_ARR+=('-n' "$PYTHON_PKG")
+  COMMAND_ARR+=('-C' "build")
+
+  if [[ -e "$WORKSPACE/$PKG_DIR/$PKG.service" ]]; then
+    COMMAND_ARR+=('--after-install' "${WORKSPACE}/build/go-python-package-scripts/postinst")
+    COMMAND_ARR+=('--before-remove' "${WORKSPACE}/build/go-python-package-scripts/prerm")
+  fi
+
+  if [[ "$python" == "python2.7" ]]; then
+    COMMAND_ARR+=('--depends' "$PYTHON2_PACKAGE")
+  else
+    COMMAND_ARR+=('--depends' "$PYTHON3_PACKAGE")
+  fi
+
+  # avoid warning
+  COMMAND_ARR+=('--deb-no-default-config-files')
+
+  # Append --depends X and other arguments specified by fpm-info.sh in
+  # the package source dir. These are added last so they can override
+  # the arguments added by this script.
+  declare -a fpm_args=()
+  declare -a fpm_depends=()
+
+  fpminfo="$WORKSPACE/$PKG_DIR/fpm-info.sh"
+  if [[ -e "$fpminfo" ]]; then
+    echo "Loading fpm overrides from $fpminfo"
+    if ! source "$fpminfo"; then
+      echo "Error, unable to source $WORKSPACE/$PKG_DIR/fpm-info.sh for $PKG"
+      exit 1
+    fi
+  fi
+
+  for i in "${fpm_depends[@]}"; do
+    COMMAND_ARR+=('--depends' "$i")
+  done
+
+  COMMAND_ARR+=("${fpm_args[@]}")
+
+  # Make sure to install all our package binaries in /usr/bin.
+  # We have to walk $WORKSPACE/$PKG_DIR/bin rather than
+  # $WORKSPACE/build/usr/share/$python/dist/$PYTHON_PKG/bin/ to get the list
+  # because the latter also includes all the python binaries for the virtualenv.
+  # We have to take the copies of our binaries from the latter directory, though,
+  # because those are the ones we rewrote the shebang line of, above.
+  if [[ -e "$WORKSPACE/$PKG_DIR/bin" ]]; then
+    for binary in `ls $WORKSPACE/$PKG_DIR/bin`; do
+      COMMAND_ARR+=("usr/share/$python/dist/$PYTHON_PKG/bin/$binary=/usr/bin/")
+    done
+  fi
+
+  # the libpam module should place this file in the historically correct place
+  # so as not to break backwards compatibility
+  if [[ -e "$WORKSPACE/$PKG_DIR/dist/build/usr/share/python2.7/dist/libpam-arvados/lib/security/libpam_arvados.py" ]]; then
+    COMMAND_ARR+=("usr/share/$python/dist/$PYTHON_PKG/data/lib/security/libpam_arvados.py=/usr/data/lib/security/")
+  fi
+
+  # the python-arvados-cwl-runner package comes with cwltool, expose that version
+  if [[ -e "$WORKSPACE/$PKG_DIR/dist/build/usr/share/python2.7/dist/python-arvados-cwl-runner/bin/cwltool" ]]; then
+    COMMAND_ARR+=("usr/share/python2.7/dist/python-arvados-cwl-runner/bin/cwltool=/usr/bin/")
+  fi
+
+  COMMAND_ARR+=(".")
+
+  FPM_RESULTS=$("${COMMAND_ARR[@]}")
+  FPM_EXIT_CODE=$?
+
+  # if something went wrong and debug is off, print out the fpm command that errored
+  if ! fpm_verify $FPM_EXIT_CODE $FPM_RESULTS && [[ "$STDOUT_IF_DEBUG" == "/dev/null" ]]; then
+    echo "fpm returned an error executing the command:"
+    echo
+    echo -e "\n${COMMAND_ARR[@]}\n"
+  else
+    echo `ls *$FORMAT`
+    mv $WORKSPACE/$PKG_DIR/dist/*$FORMAT $WORKSPACE/packages/$TARGET/
+  fi
+  echo
+}
+
 # Build packages for everything
 fpm_build () {
   # The package source.  Depending on the source type, this can be a
@@ -369,12 +652,8 @@ fpm_build () {
   # The name of the package to build.
   PACKAGE_NAME=$1
   shift
-  # Optional: the vendor of the package.  Should be "Curoverse, Inc." for
-  # packages of our own software.  Passed to fpm --vendor.
-  VENDOR=$1
-  shift
-  # The type of source package.  Passed to fpm -s.  Default "python".
-  PACKAGE_TYPE=${1:-python}
+  # The type of source package.  Passed to fpm -s.  Default "dir".
+  PACKAGE_TYPE=${1:-dir}
   shift
   # Optional: the package version number.  Passed to fpm -v.
   VERSION=$1
@@ -385,41 +664,8 @@ fpm_build () {
   fi
 
   local default_iteration_value="$(default_iteration "$PACKAGE" "$VERSION" "$PACKAGE_TYPE")"
-  local python=""
-
-  case "$PACKAGE_TYPE" in
-      python)
-          # All Arvados Python2 packages depend on Python 2.7.
-          # Make sure we build with that for consistency.
-          python=python2.7
-          set -- "$@" --python-bin python2.7 \
-              "${PYTHON_FPM_INSTALLER[@]}" \
-              --python-package-name-prefix "$PYTHON2_PKG_PREFIX" \
-              --prefix "$PYTHON2_PREFIX" \
-              --python-install-lib "$PYTHON2_INSTALL_LIB" \
-              --python-install-data . \
-              --exclude "${PYTHON2_INSTALL_LIB#/}/tests" \
-              --depends "$PYTHON2_PACKAGE"
-          ;;
-      python3)
-          # fpm does not actually support a python3 package type.  Instead
-          # we recognize it as a convenience shortcut to add several
-          # necessary arguments to fpm's command line later, after we're
-          # done handling positional arguments.
-          PACKAGE_TYPE=python
-          python=python3
-          set -- "$@" --python-bin python3 \
-              "${PYTHON3_FPM_INSTALLER[@]}" \
-              --python-package-name-prefix "$PYTHON3_PKG_PREFIX" \
-              --prefix "$PYTHON3_PREFIX" \
-              --python-install-lib "$PYTHON3_INSTALL_LIB" \
-              --python-install-data . \
-              --exclude "${PYTHON3_INSTALL_LIB#/}/tests" \
-              --depends "$PYTHON3_PACKAGE"
-          ;;
-  esac
 
-  declare -a COMMAND_ARR=("fpm" "--maintainer=Ward Vandewege <ward@curoverse.com>" "-s" "$PACKAGE_TYPE" "-t" "$FORMAT")
+  declare -a COMMAND_ARR=("fpm" "-s" "$PACKAGE_TYPE" "-t" "$FORMAT")
   if [ python = "$PACKAGE_TYPE" ] && [ deb = "$FORMAT" ]; then
       # Dependencies are built from setup.py.  Since setup.py will never
       # refer to Debian package iterations, it doesn't make sense to
@@ -430,7 +676,7 @@ fpm_build () {
   # 12271 - As FPM-generated packages don't include scripts by default, the
   # packages cleanup on upgrade depends on files being listed on the %files
   # section in the generated SPEC files. To remove DIRECTORIES, they need to
-  # be listed in that sectiontoo, so we need to add this parameter to properly
+  # be listed in that section too, so we need to add this parameter to properly
   # remove lingering dirs. But this only works for python2: if used on
   # python33, it includes dirs like /opt/rh/python33 that belong to
   # other packages.
@@ -438,7 +684,7 @@ fpm_build () {
     COMMAND_ARR+=('--rpm-auto-add-directories')
   fi
 
-  if [[ "${DEBUG:-0}" != "0" ]]; then
+  if [[ "$DEBUG" != "0" ]]; then
     COMMAND_ARR+=('--verbose' '--log' 'info')
   fi
 
@@ -446,6 +692,10 @@ fpm_build () {
     COMMAND_ARR+=('-n' "$PACKAGE_NAME")
   fi
 
+  if [[ "$MAINTAINER" != "" ]]; then
+    COMMAND_ARR+=('--maintainer' "$MAINTAINER")
+  fi
+
   if [[ "$VENDOR" != "" ]]; then
     COMMAND_ARR+=('--vendor' "$VENDOR")
   fi
@@ -459,14 +709,6 @@ fpm_build () {
       COMMAND_ARR+=(--iteration "$default_iteration_value")
   fi
 
-  if [[ python = "$PACKAGE_TYPE" ]] && [[ -e "${PACKAGE}/${PACKAGE_NAME}.service" ]]
-  then
-      COMMAND_ARR+=(
-          --after-install "${WORKSPACE}/build/go-python-package-scripts/postinst"
-          --before-remove "${WORKSPACE}/build/go-python-package-scripts/prerm"
-      )
-  fi
-
   # Append --depends X and other arguments specified by fpm-info.sh in
   # the package source dir. These are added last so they can override
   # the arguments added by this script.
@@ -476,12 +718,7 @@ fpm_build () {
   declare -a fpm_exclude=()
   declare -a fpm_dirs=(
       # source dir part of 'dir' package ("/source=/dest" => "/source"):
-      "${PACKAGE%%=/*}"
-      # backports ("llfuse>=1.0" => "backports/python-llfuse")
-      "${WORKSPACE}/backports/${PACKAGE_TYPE}-${PACKAGE%%[<=>]*}")
-  if [[ -n "$PACKAGE_NAME" ]]; then
-      fpm_dirs+=("${WORKSPACE}/backports/${PACKAGE_NAME}")
-  fi
+      "${PACKAGE%%=/*}")
   for pkgdir in "${fpm_dirs[@]}"; do
       fpminfo="$pkgdir/fpm-info.sh"
       if [[ -e "$fpminfo" ]]; then
index 749075d81576242a29db0d1b075414dc5f1d0270..caaca1f31e51677c3881dbf82ea9197ff53660c2 100755 (executable)
@@ -77,6 +77,7 @@ lib/cmd
 lib/controller
 lib/crunchstat
 lib/cloud
+lib/cloud/azure
 lib/dispatchcloud
 lib/dispatchcloud/container
 lib/dispatchcloud/scheduler
@@ -528,7 +529,7 @@ setup_virtualenv() {
 }
 
 export PERLINSTALLBASE
-export PERLLIB="$PERLINSTALLBASE/lib/perl5:${PERLLIB:+$PERLLIB}"
+export PERL5LIB="$PERLINSTALLBASE/lib/perl5${PERL5LIB:+:$PERL5LIB}"
 
 export R_LIBS
 
@@ -536,7 +537,6 @@ export GOPATH
 (
     set -e
     mkdir -p "$GOPATH/src/git.curoverse.com"
-    rmdir -v --parents --ignore-fail-on-non-empty "${temp}/GOPATH"
     if [[ ! -h "$GOPATH/src/git.curoverse.com/arvados.git" ]]; then
         for d in \
             "$GOPATH/src/git.curoverse.com/arvados.git/tmp/GOPATH" \
@@ -596,6 +596,7 @@ pythonstuff=(
     sdk/python
     sdk/python:py3
     sdk/cwl
+    sdk/cwl:py3
     services/dockercleaner:py3
     services/fuse
     services/nodemanager
@@ -931,6 +932,7 @@ gostuff=(
     lib/controller
     lib/crunchstat
     lib/cloud
+    lib/cloud/azure
     lib/dispatchcloud
     lib/dispatchcloud/container
     lib/dispatchcloud/scheduler
similarity index 89%
rename from cmd/arvados-server/crunch-dispatch-cloud.service
rename to cmd/arvados-server/arvados-dispatch-cloud.service
index f8d71c9753fab219bcb24b17dfcb8a9885f4738f..aa5cc3b4a5d033c3c163b09ad9a4ad411c3237b4 100644 (file)
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: AGPL-3.0
 
 [Unit]
-Description=Arvados cloud dispatch
+Description=arvados-dispatch-cloud
 Documentation=https://doc.arvados.org/
 After=network.target
 AssertPathExists=/etc/arvados/config.yml
@@ -17,7 +17,7 @@ StartLimitIntervalSec=0
 [Service]
 Type=notify
 EnvironmentFile=-/etc/arvados/environment
-ExecStart=/usr/bin/crunch-dispatch-cloud
+ExecStart=/usr/bin/arvados-dispatch-cloud
 Restart=always
 RestartSec=1
 
index 06db793314931b8200651ba24db3df37fd1730f8..ea3640e52a077ba0d5ce626740af691f701f4439 100644 (file)
@@ -73,7 +73,7 @@ h2. Download and tag the latest arvados/jobs docker image
 In order to start workflows from workbench, there needs to be Docker image tagged @arvados/jobs:latest@. The following command downloads the latest arvados/jobs image from Docker Hub, loads it into Keep, and tags it as 'latest'.  In this example @$project_uuid@ should be the the UUID of the "Arvados Standard Docker Images" project.
 
 <notextile>
-<pre><code>~$ <span class="userinput">arv-keepdocker --project-uuid $project_uuid --pull arvados/jobs latest</span>
+<pre><code>~$ <span class="userinput">arv-keepdocker --pull arvados/jobs latest --project-uuid $project_uuid</span>
 </code></pre></notextile>
 
 If the image needs to be downloaded from Docker Hub, the command can take a few minutes to complete, depending on available network bandwidth.
index 6a3e000ca0165649b0418dba566a43999be1be09..eda25d5cc6678524ea1cd44b0500201f49e56e65 100644 (file)
@@ -30,6 +30,44 @@ Note to developers: Add new items at the top. Include the date, issue number, co
 TODO: extract this information based on git commit messages and generate changelogs / release notes automatically.
 {% endcomment %}
 
+h3. current master branch
+
+h4. Python packaging change
+
+As part of story "#9945":https://dev.arvados.org/issues/9945, the distribution packaging (deb/rpm) of our Python packages has changed. These packages now include a built-in virtualenv to reduce dependencies on system packages. We have also stopped packaging and publishing backports for all the Python dependencies of our packages, as they are no longer needed.
+
+One practical consequence of this change is that the use of the Arvados Python SDK (aka "import arvados") will require a tweak if the SDK was installed from a distribution package. It now requires the loading of the virtualenv environment from our packages. The "Install documentation for the Arvados Python SDK":/sdk/python/sdk-python.html reflects this change. This does not affect the use of the command line tools (e.g. arv-get, etc.).
+
+Python scripts that rely on the distribution Arvados Python SDK packages to import the Arvados SDK will need to be tweaked to load the correct Python environment.
+
+This can be done by activating the virtualenv outside of the script:
+
+<notextile>
+<pre>~$ <code class="userinput">source /usr/share/python2.7/dist/python-arvados-python-client/bin/activate</code>
+(python-arvados-python-client) ~$ <code class="userinput">path-to-the-python-script</code>
+</pre>
+</notextile>
+
+Or alternatively, by updating the shebang line at the start of the script to:
+
+<notextile>
+<pre>
+#!/usr/share/python2.7/dist/python-arvados-python-client/bin/python
+</pre>
+</notextile>
+
+h4. python-arvados-cwl-runner deb/rpm package now conflicts with python-cwltool deb/rpm package
+
+As part of story "#9945":https://dev.arvados.org/issues/9945, the distribution packaging (deb/rpm) of our Python packages has changed. The python-arvados-cwl-runner package now includes a version of cwltool. If present, the python-cwltool and cwltool distribution packages will need to be uninstalled before the python-arvados-cwl-runner deb or rpm package can be installed.
+
+h4. Centos7 Python 3 dependency upgraded to rh-python35
+
+As part of story "#9945":https://dev.arvados.org/issues/9945, the Python 3 dependency for Centos7 Arvados packages was upgraded from SCL python33 to rh-python35.
+
+h4. Centos7 package for libpam-arvados depends on the python-pam package, which is available from EPEL
+
+As part of story "#9945":https://dev.arvados.org/issues/9945, it was discovered that the Centos7 package for libpam-arvados was missing a dependency on the python-pam package, which is available from the EPEL repository. The dependency has been added to the libpam-arvados package. This means that going forward, the EPEL repository will need to be enabled to install libpam-arvados on Centos7.
+
 h3. v1.3.0 (2018-12-05)
 
 This release includes several database migrations, which will be executed automatically as part of the API server upgrade. On large Arvados installations, these migrations will take a while. We've seen the upgrade take 30 minutes or more on installations with a lot of collections.
index 2b86261ccc06cdd9dacd268b6b4bc21cc06d74f0..be824399a4c94def5b64af218fa0b93d154d739b 100644 (file)
@@ -12,22 +12,29 @@ SPDX-License-Identifier: CC-BY-SA-3.0
 
 The Python SDK provides access from Python to the Arvados API and Keep.  It also includes a number of command line tools for using and administering Arvados and Keep, and some conveniences for use in Crunch scripts; see "Crunch utility libraries":crunch-utility-libraries.html for details.
 
-h3. Installation
+h2. Installation
 
 If you are logged in to an Arvados VM, the Python SDK should be installed.
 
-To use the Python SDK elsewhere, you can install from a distribution package, PyPI, or source.
+To use the Python SDK elsewhere, you can install from PyPI or a distribution package.
 
 {% include 'notebox_begin' %}
 The Python SDK requires Python 2.7.
 {% include 'notebox_end' %}
 
-h4. Option 1: Install from distribution packages
+h3. Option 1: Install with pip
 
-First, "add the appropriate package repository for your distribution":{{ site.baseurl }}/install/install-manual-prerequisites.html#repos.
+This installation method is recommended to make the SDK available for use in your own Python programs. It can coexist with the system-wide installation method from a distribution package (option 2, below).
+
+Run @pip-2.7 install arvados-python-client@ in an appropriate installation environment, such as a virtualenv.
+
+If your version of @pip@ is 1.4 or newer, the @pip install@ command might give an error: "Could not find a version that satisfies the requirement arvados-python-client". If this happens, try @pip-2.7 install --pre arvados-python-client@.
+
+h3. Option 2: Install from a distribution package
 
-{% assign rh_version = "6" %}
-{% include 'note_python_sc' %}
+This installation method is recommended to make the CLI tools available system-wide. It can coexist with the installation method described in option 1, above.
+
+First, "add the appropriate package repository for your distribution":{{ site.baseurl }}/install/install-manual-prerequisites.html#repos.
 
 On Red Hat-based systems:
 
@@ -43,31 +50,41 @@ On Debian-based systems:
 </code></pre>
 </notextile>
 
-h4. Option 2: Install with pip
+h3. Test installation
 
-Run @pip-2.7 install arvados-python-client@ in an appropriate installation environment, such as a virtualenv.
-
-If your version of @pip@ is 1.4 or newer, the @pip install@ command might give an error: "Could not find a version that satisfies the requirement arvados-python-client". If this happens, try @pip-2.7 install --pre arvados-python-client@.
-
-h4. Option 3: Install from source
+If the SDK is installed and your @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ environment variables are set up correctly (see "api-tokens":{{site.baseurl}}/user/reference/api-tokens.html for details), @import arvados@ should produce no errors.
 
-Install the @python-setuptools@ package from your distribution.  Then run the following:
+If you installed with pip (option 1, above):
 
 <notextile>
-<pre><code>~$ <span class="userinput">git clone https://github.com/curoverse/arvados.git</span>
-~$ <span class="userinput">cd arvados/sdk/python</span>
-~/arvados/sdk/python$ <span class="userinput">python2.7 setup.py install</span>
-</code></pre>
+<pre>~$ <code class="userinput">python</code>
+Python 2.7.4 (default, Sep 26 2013, 03:20:26)
+[GCC 4.7.3] on linux2
+Type "help", "copyright", "credits" or "license" for more information.
+>>> <code class="userinput">import arvados</code>
+>>> <code class="userinput">arvados.api('v1')</code>
+&lt;apiclient.discovery.Resource object at 0x233bb50&gt;
+</pre>
 </notextile>
 
-You may optionally run the final installation command in a virtualenv, or with the @--user@ option.
+If you installed from a distribution package (option 2): the package includes a virtualenv, which means the correct Python environment needs to be loaded before the Arvados SDK can be imported. This can be done by activating the virtualenv first:
 
-h4. Test installation
+<notextile>
+<pre>~$ <code class="userinput">source /usr/share/python2.7/dist/python-arvados-python-client/bin/activate</code>
+(python-arvados-python-client) ~$ <code class="userinput">python</code>
+Python 2.7.4 (default, Sep 26 2013, 03:20:26)
+[GCC 4.7.3] on linux2
+Type "help", "copyright", "credits" or "license" for more information.
+>>> <code class="userinput">import arvados</code>
+>>> <code class="userinput">arvados.api('v1')</code>
+&lt;apiclient.discovery.Resource object at 0x233bb50&gt;
+</pre>
+</notextile>
 
-If the SDK is installed and your @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ environment variables are set up correctly (see "api-tokens":{{site.baseurl}}/user/reference/api-tokens.html for details), @import arvados@ should produce no errors:
+Or alternatively, by using the Python executable from the virtualenv directly:
 
 <notextile>
-<pre>~$ <code class="userinput">python2.7</code>
+<pre>~$ <code class="userinput">/usr/share/python2.7/dist/python-arvados-python-client/bin/python</code>
 Python 2.7.4 (default, Sep 26 2013, 03:20:26)
 [GCC 4.7.3] on linux2
 Type "help", "copyright", "credits" or "license" for more information.
index c21fbd9ad2204c0eb056f473879c057ebbc814a7..f34c21a9d741042610e81891ea37848d12380506 100644 (file)
@@ -210,6 +210,6 @@ h2. Share Docker images
 Docker images are subject to normal Arvados permissions.  If wish to share your Docker image with others (or wish to share a pipeline template that uses your Docker image) you will need to use @arv-keepdocker@ with the @--project-uuid@ option to upload the image to a shared project.
 
 <notextile>
-<pre><code>$ <span class="userinput">arv-keepdocker --project-uuid qr1hi-j7d0g-xxxxxxxxxxxxxxx arvados/jobs-with-r</span>
+<pre><code>$ <span class="userinput">arv-keepdocker arvados/jobs-with-r --project-uuid qr1hi-j7d0g-xxxxxxxxxxxxxxx</span>
 </code></pre>
 </notextile>
index 12c68564bd5cf2a32beb8668ae95a996e98fd8c9..c0fe145db1b292ebc55c536b9d0e7786a7b06daa 100644 (file)
@@ -23,6 +23,9 @@ RUN apt-get install -yq --no-install-recommends \
     python-arvados-python-client=$python_sdk_version \
     python-arvados-cwl-runner=$cwl_runner_version
 
+# use the Python executable from the python-arvados-python-client package
+RUN rm -f /usr/bin/python && ln -s /usr/share/python2.7/dist/python-arvados-python-client/bin/python /usr/bin/python
+
 # Install dependencies and set up system.
 RUN /usr/sbin/adduser --disabled-password \
       --gecos 'Crunch execution user' crunch && \
similarity index 64%
rename from lib/cloud/azure.go
rename to lib/cloud/azure/azure.go
index a194b33180b231cfb74964b11253d5aa6f8d0667..8ae8a44811529f598cb8ecee0044919b76bf467e 100644 (file)
@@ -2,11 +2,12 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package cloud
+package azure
 
 import (
        "context"
        "encoding/base64"
+       "encoding/json"
        "fmt"
        "net/http"
        "regexp"
@@ -15,6 +16,7 @@ import (
        "sync"
        "time"
 
+       "git.curoverse.com/arvados.git/lib/cloud"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-06-01/compute"
        "github.com/Azure/azure-sdk-for-go/services/network/mgmt/2018-06-01/network"
@@ -25,129 +27,133 @@ import (
        "github.com/Azure/go-autorest/autorest/azure/auth"
        "github.com/Azure/go-autorest/autorest/to"
        "github.com/jmcvetta/randutil"
-       "github.com/mitchellh/mapstructure"
        "github.com/sirupsen/logrus"
        "golang.org/x/crypto/ssh"
 )
 
-type AzureInstanceSetConfig struct {
-       SubscriptionID               string  `mapstructure:"subscription_id"`
-       ClientID                     string  `mapstructure:"key"`
-       ClientSecret                 string  `mapstructure:"secret"`
-       TenantID                     string  `mapstructure:"tenant_id"`
-       CloudEnv                     string  `mapstructure:"cloud_environment"`
-       ResourceGroup                string  `mapstructure:"resource_group"`
-       Location                     string  `mapstructure:"region"`
-       Network                      string  `mapstructure:"network"`
-       Subnet                       string  `mapstructure:"subnet"`
-       StorageAccount               string  `mapstructure:"storage_account"`
-       BlobContainer                string  `mapstructure:"blob_container"`
-       Image                        string  `mapstructure:"image"`
-       DeleteDanglingResourcesAfter float64 `mapstructure:"delete_dangling_resources_after"`
-}
-
-type VirtualMachinesClientWrapper interface {
-       CreateOrUpdate(ctx context.Context,
+// Driver is the azure implementation of the cloud.Driver interface.
+var Driver = cloud.DriverFunc(newAzureInstanceSet)
+
+type azureInstanceSetConfig struct {
+       SubscriptionID               string
+       ClientID                     string
+       ClientSecret                 string
+       TenantID                     string
+       CloudEnvironment             string
+       ResourceGroup                string
+       Location                     string
+       Network                      string
+       Subnet                       string
+       StorageAccount               string
+       BlobContainer                string
+       DeleteDanglingResourcesAfter arvados.Duration
+       AdminUsername                string
+}
+
+const tagKeyInstanceSecret = "InstanceSecret"
+
+type virtualMachinesClientWrapper interface {
+       createOrUpdate(ctx context.Context,
                resourceGroupName string,
                VMName string,
                parameters compute.VirtualMachine) (result compute.VirtualMachine, err error)
-       Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error)
-       ListComplete(ctx context.Context, resourceGroupName string) (result compute.VirtualMachineListResultIterator, err error)
+       delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error)
+       listComplete(ctx context.Context, resourceGroupName string) (result compute.VirtualMachineListResultIterator, err error)
 }
 
-type VirtualMachinesClientImpl struct {
+type virtualMachinesClientImpl struct {
        inner compute.VirtualMachinesClient
 }
 
-func (cl *VirtualMachinesClientImpl) CreateOrUpdate(ctx context.Context,
+func (cl *virtualMachinesClientImpl) createOrUpdate(ctx context.Context,
        resourceGroupName string,
        VMName string,
        parameters compute.VirtualMachine) (result compute.VirtualMachine, err error) {
 
        future, err := cl.inner.CreateOrUpdate(ctx, resourceGroupName, VMName, parameters)
        if err != nil {
-               return compute.VirtualMachine{}, WrapAzureError(err)
+               return compute.VirtualMachine{}, wrapAzureError(err)
        }
        future.WaitForCompletionRef(ctx, cl.inner.Client)
        r, err := future.Result(cl.inner)
-       return r, WrapAzureError(err)
+       return r, wrapAzureError(err)
 }
 
-func (cl *VirtualMachinesClientImpl) Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
+func (cl *virtualMachinesClientImpl) delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
        future, err := cl.inner.Delete(ctx, resourceGroupName, VMName)
        if err != nil {
-               return nil, WrapAzureError(err)
+               return nil, wrapAzureError(err)
        }
        err = future.WaitForCompletionRef(ctx, cl.inner.Client)
-       return future.Response(), WrapAzureError(err)
+       return future.Response(), wrapAzureError(err)
 }
 
-func (cl *VirtualMachinesClientImpl) ListComplete(ctx context.Context, resourceGroupName string) (result compute.VirtualMachineListResultIterator, err error) {
+func (cl *virtualMachinesClientImpl) listComplete(ctx context.Context, resourceGroupName string) (result compute.VirtualMachineListResultIterator, err error) {
        r, err := cl.inner.ListComplete(ctx, resourceGroupName)
-       return r, WrapAzureError(err)
+       return r, wrapAzureError(err)
 }
 
-type InterfacesClientWrapper interface {
-       CreateOrUpdate(ctx context.Context,
+type interfacesClientWrapper interface {
+       createOrUpdate(ctx context.Context,
                resourceGroupName string,
                networkInterfaceName string,
                parameters network.Interface) (result network.Interface, err error)
-       Delete(ctx context.Context, resourceGroupName string, networkInterfaceName string) (result *http.Response, err error)
-       ListComplete(ctx context.Context, resourceGroupName string) (result network.InterfaceListResultIterator, err error)
+       delete(ctx context.Context, resourceGroupName string, networkInterfaceName string) (result *http.Response, err error)
+       listComplete(ctx context.Context, resourceGroupName string) (result network.InterfaceListResultIterator, err error)
 }
 
-type InterfacesClientImpl struct {
+type interfacesClientImpl struct {
        inner network.InterfacesClient
 }
 
-func (cl *InterfacesClientImpl) Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
+func (cl *interfacesClientImpl) delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
        future, err := cl.inner.Delete(ctx, resourceGroupName, VMName)
        if err != nil {
-               return nil, WrapAzureError(err)
+               return nil, wrapAzureError(err)
        }
        err = future.WaitForCompletionRef(ctx, cl.inner.Client)
-       return future.Response(), WrapAzureError(err)
+       return future.Response(), wrapAzureError(err)
 }
 
-func (cl *InterfacesClientImpl) CreateOrUpdate(ctx context.Context,
+func (cl *interfacesClientImpl) createOrUpdate(ctx context.Context,
        resourceGroupName string,
        networkInterfaceName string,
        parameters network.Interface) (result network.Interface, err error) {
 
        future, err := cl.inner.CreateOrUpdate(ctx, resourceGroupName, networkInterfaceName, parameters)
        if err != nil {
-               return network.Interface{}, WrapAzureError(err)
+               return network.Interface{}, wrapAzureError(err)
        }
        future.WaitForCompletionRef(ctx, cl.inner.Client)
        r, err := future.Result(cl.inner)
-       return r, WrapAzureError(err)
+       return r, wrapAzureError(err)
 }
 
-func (cl *InterfacesClientImpl) ListComplete(ctx context.Context, resourceGroupName string) (result network.InterfaceListResultIterator, err error) {
+func (cl *interfacesClientImpl) listComplete(ctx context.Context, resourceGroupName string) (result network.InterfaceListResultIterator, err error) {
        r, err := cl.inner.ListComplete(ctx, resourceGroupName)
-       return r, WrapAzureError(err)
+       return r, wrapAzureError(err)
 }
 
 var quotaRe = regexp.MustCompile(`(?i:exceed|quota|limit)`)
 
-type AzureRateLimitError struct {
+type azureRateLimitError struct {
        azure.RequestError
-       earliestRetry time.Time
+       firstRetry time.Time
 }
 
-func (ar *AzureRateLimitError) EarliestRetry() time.Time {
-       return ar.earliestRetry
+func (ar *azureRateLimitError) EarliestRetry() time.Time {
+       return ar.firstRetry
 }
 
-type AzureQuotaError struct {
+type azureQuotaError struct {
        azure.RequestError
 }
 
-func (ar *AzureQuotaError) IsQuotaError() bool {
+func (ar *azureQuotaError) IsQuotaError() bool {
        return true
 }
 
-func WrapAzureError(err error) error {
+func wrapAzureError(err error) error {
        de, ok := err.(autorest.DetailedError)
        if !ok {
                return err
@@ -174,21 +180,21 @@ func WrapAzureError(err error) error {
                                earliestRetry = time.Now().Add(20 * time.Second)
                        }
                }
-               return &AzureRateLimitError{*rq, earliestRetry}
+               return &azureRateLimitError{*rq, earliestRetry}
        }
        if rq.ServiceError == nil {
                return err
        }
        if quotaRe.FindString(rq.ServiceError.Code) != "" || quotaRe.FindString(rq.ServiceError.Message) != "" {
-               return &AzureQuotaError{*rq}
+               return &azureQuotaError{*rq}
        }
        return err
 }
 
-type AzureInstanceSet struct {
-       azconfig          AzureInstanceSetConfig
-       vmClient          VirtualMachinesClientWrapper
-       netClient         InterfacesClientWrapper
+type azureInstanceSet struct {
+       azconfig          azureInstanceSetConfig
+       vmClient          virtualMachinesClientWrapper
+       netClient         interfacesClientWrapper
        storageAcctClient storageacct.AccountsClient
        azureEnv          azure.Environment
        interfaces        map[string]network.Interface
@@ -202,12 +208,14 @@ type AzureInstanceSet struct {
        logger            logrus.FieldLogger
 }
 
-func NewAzureInstanceSet(config map[string]interface{}, dispatcherID InstanceSetID, logger logrus.FieldLogger) (prv InstanceSet, err error) {
-       azcfg := AzureInstanceSetConfig{}
-       if err = mapstructure.Decode(config, &azcfg); err != nil {
+func newAzureInstanceSet(config json.RawMessage, dispatcherID cloud.InstanceSetID, logger logrus.FieldLogger) (prv cloud.InstanceSet, err error) {
+       azcfg := azureInstanceSetConfig{}
+       err = json.Unmarshal(config, &azcfg)
+       if err != nil {
                return nil, err
        }
-       ap := AzureInstanceSet{logger: logger}
+
+       ap := azureInstanceSet{logger: logger}
        err = ap.setup(azcfg, string(dispatcherID))
        if err != nil {
                return nil, err
@@ -215,13 +223,13 @@ func NewAzureInstanceSet(config map[string]interface{}, dispatcherID InstanceSet
        return &ap, nil
 }
 
-func (az *AzureInstanceSet) setup(azcfg AzureInstanceSetConfig, dispatcherID string) (err error) {
+func (az *azureInstanceSet) setup(azcfg azureInstanceSetConfig, dispatcherID string) (err error) {
        az.azconfig = azcfg
        vmClient := compute.NewVirtualMachinesClient(az.azconfig.SubscriptionID)
        netClient := network.NewInterfacesClient(az.azconfig.SubscriptionID)
        storageAcctClient := storageacct.NewAccountsClient(az.azconfig.SubscriptionID)
 
-       az.azureEnv, err = azure.EnvironmentFromName(az.azconfig.CloudEnv)
+       az.azureEnv, err = azure.EnvironmentFromName(az.azconfig.CloudEnvironment)
        if err != nil {
                return err
        }
@@ -241,8 +249,8 @@ func (az *AzureInstanceSet) setup(azcfg AzureInstanceSetConfig, dispatcherID str
        netClient.Authorizer = authorizer
        storageAcctClient.Authorizer = authorizer
 
-       az.vmClient = &VirtualMachinesClientImpl{vmClient}
-       az.netClient = &InterfacesClientImpl{netClient}
+       az.vmClient = &virtualMachinesClientImpl{vmClient}
+       az.netClient = &interfacesClientImpl{netClient}
        az.storageAcctClient = storageAcctClient
 
        az.dispatcherID = dispatcherID
@@ -260,7 +268,7 @@ func (az *AzureInstanceSet) setup(azcfg AzureInstanceSetConfig, dispatcherID str
                                tk.Stop()
                                return
                        case <-tk.C:
-                               az.ManageBlobs()
+                               az.manageBlobs()
                        }
                }
        }()
@@ -268,14 +276,14 @@ func (az *AzureInstanceSet) setup(azcfg AzureInstanceSetConfig, dispatcherID str
        az.deleteNIC = make(chan string)
        az.deleteBlob = make(chan storage.Blob)
 
-       for i := 0; i < 4; i += 1 {
+       for i := 0; i < 4; i++ {
                go func() {
                        for {
                                nicname, ok := <-az.deleteNIC
                                if !ok {
                                        return
                                }
-                               _, delerr := az.netClient.Delete(context.Background(), az.azconfig.ResourceGroup, nicname)
+                               _, delerr := az.netClient.delete(context.Background(), az.azconfig.ResourceGroup, nicname)
                                if delerr != nil {
                                        az.logger.WithError(delerr).Warnf("Error deleting %v", nicname)
                                } else {
@@ -302,19 +310,16 @@ func (az *AzureInstanceSet) setup(azcfg AzureInstanceSetConfig, dispatcherID str
        return nil
 }
 
-func (az *AzureInstanceSet) Create(
+func (az *azureInstanceSet) Create(
        instanceType arvados.InstanceType,
-       imageId ImageID,
-       newTags InstanceTags,
-       publicKey ssh.PublicKey) (Instance, error) {
+       imageID cloud.ImageID,
+       newTags cloud.InstanceTags,
+       initCommand cloud.InitCommand,
+       publicKey ssh.PublicKey) (cloud.Instance, error) {
 
        az.stopWg.Add(1)
        defer az.stopWg.Done()
 
-       if len(newTags["node-token"]) == 0 {
-               return nil, fmt.Errorf("Must provide tag 'node-token'")
-       }
-
        name, err := randutil.String(15, "abcdefghijklmnopqrstuvwxyz0123456789")
        if err != nil {
                return nil, err
@@ -331,8 +336,6 @@ func (az *AzureInstanceSet) Create(
                tags["dispatch-"+k] = &newstr
        }
 
-       tags["dispatch-instance-type"] = &instanceType.Name
-
        nicParameters := network.Interface{
                Location: &az.azconfig.Location,
                Tags:     tags,
@@ -355,19 +358,18 @@ func (az *AzureInstanceSet) Create(
                        },
                },
        }
-       nic, err := az.netClient.CreateOrUpdate(az.ctx, az.azconfig.ResourceGroup, name+"-nic", nicParameters)
+       nic, err := az.netClient.createOrUpdate(az.ctx, az.azconfig.ResourceGroup, name+"-nic", nicParameters)
        if err != nil {
-               return nil, WrapAzureError(err)
+               return nil, wrapAzureError(err)
        }
 
-       instance_vhd := fmt.Sprintf("https://%s.blob.%s/%s/%s-os.vhd",
+       instanceVhd := fmt.Sprintf("https://%s.blob.%s/%s/%s-os.vhd",
                az.azconfig.StorageAccount,
                az.azureEnv.StorageEndpointSuffix,
                az.azconfig.BlobContainer,
                name)
 
-       customData := base64.StdEncoding.EncodeToString([]byte(fmt.Sprintf(`#!/bin/sh
-echo '%s-%s' > /home/crunch/node-token`, name, newTags["node-token"])))
+       customData := base64.StdEncoding.EncodeToString([]byte("#!/bin/sh\n" + initCommand + "\n"))
 
        vmParameters := compute.VirtualMachine{
                Location: &az.azconfig.Location,
@@ -382,10 +384,10 @@ echo '%s-%s' > /home/crunch/node-token`, name, newTags["node-token"])))
                                        Name:         to.StringPtr(name + "-os"),
                                        CreateOption: compute.FromImage,
                                        Image: &compute.VirtualHardDisk{
-                                               URI: to.StringPtr(string(imageId)),
+                                               URI: to.StringPtr(string(imageID)),
                                        },
                                        Vhd: &compute.VirtualHardDisk{
-                                               URI: &instance_vhd,
+                                               URI: &instanceVhd,
                                        },
                                },
                        },
@@ -401,13 +403,13 @@ echo '%s-%s' > /home/crunch/node-token`, name, newTags["node-token"])))
                        },
                        OsProfile: &compute.OSProfile{
                                ComputerName:  &name,
-                               AdminUsername: to.StringPtr("crunch"),
+                               AdminUsername: to.StringPtr(az.azconfig.AdminUsername),
                                LinuxConfiguration: &compute.LinuxConfiguration{
                                        DisablePasswordAuthentication: to.BoolPtr(true),
                                        SSH: &compute.SSHConfiguration{
                                                PublicKeys: &[]compute.SSHPublicKey{
-                                                       compute.SSHPublicKey{
-                                                               Path:    to.StringPtr("/home/crunch/.ssh/authorized_keys"),
+                                                       {
+                                                               Path:    to.StringPtr("/home/" + az.azconfig.AdminUsername + "/.ssh/authorized_keys"),
                                                                KeyData: to.StringPtr(string(ssh.MarshalAuthorizedKey(publicKey))),
                                                        },
                                                },
@@ -418,40 +420,40 @@ echo '%s-%s' > /home/crunch/node-token`, name, newTags["node-token"])))
                },
        }
 
-       vm, err := az.vmClient.CreateOrUpdate(az.ctx, az.azconfig.ResourceGroup, name, vmParameters)
+       vm, err := az.vmClient.createOrUpdate(az.ctx, az.azconfig.ResourceGroup, name, vmParameters)
        if err != nil {
-               return nil, WrapAzureError(err)
+               return nil, wrapAzureError(err)
        }
 
-       return &AzureInstance{
+       return &azureInstance{
                provider: az,
                nic:      nic,
                vm:       vm,
        }, nil
 }
 
-func (az *AzureInstanceSet) Instances(InstanceTags) ([]Instance, error) {
+func (az *azureInstanceSet) Instances(cloud.InstanceTags) ([]cloud.Instance, error) {
        az.stopWg.Add(1)
        defer az.stopWg.Done()
 
-       interfaces, err := az.ManageNics()
+       interfaces, err := az.manageNics()
        if err != nil {
                return nil, err
        }
 
-       result, err := az.vmClient.ListComplete(az.ctx, az.azconfig.ResourceGroup)
+       result, err := az.vmClient.listComplete(az.ctx, az.azconfig.ResourceGroup)
        if err != nil {
-               return nil, WrapAzureError(err)
+               return nil, wrapAzureError(err)
        }
 
-       instances := make([]Instance, 0)
+       instances := make([]cloud.Instance, 0)
 
        for ; result.NotDone(); err = result.Next() {
                if err != nil {
-                       return nil, WrapAzureError(err)
+                       return nil, wrapAzureError(err)
                }
                if strings.HasPrefix(*result.Value().Name, az.namePrefix) {
-                       instances = append(instances, &AzureInstance{
+                       instances = append(instances, &azureInstance{
                                provider: az,
                                vm:       result.Value(),
                                nic:      interfaces[*(*result.Value().NetworkProfile.NetworkInterfaces)[0].ID]})
@@ -465,13 +467,13 @@ func (az *AzureInstanceSet) Instances(InstanceTags) ([]Instance, error) {
 // not associated with a virtual machine and have a "create-at" time
 // more than DeleteDanglingResourcesAfter (to prevent racing and
 // deleting newly created NICs) in the past are deleted.
-func (az *AzureInstanceSet) ManageNics() (map[string]network.Interface, error) {
+func (az *azureInstanceSet) manageNics() (map[string]network.Interface, error) {
        az.stopWg.Add(1)
        defer az.stopWg.Done()
 
-       result, err := az.netClient.ListComplete(az.ctx, az.azconfig.ResourceGroup)
+       result, err := az.netClient.listComplete(az.ctx, az.azconfig.ResourceGroup)
        if err != nil {
-               return nil, WrapAzureError(err)
+               return nil, wrapAzureError(err)
        }
 
        interfaces := make(map[string]network.Interface)
@@ -487,10 +489,10 @@ func (az *AzureInstanceSet) ManageNics() (map[string]network.Interface, error) {
                                interfaces[*result.Value().ID] = result.Value()
                        } else {
                                if result.Value().Tags["created-at"] != nil {
-                                       created_at, err := time.Parse(time.RFC3339Nano, *result.Value().Tags["created-at"])
+                                       createdAt, err := time.Parse(time.RFC3339Nano, *result.Value().Tags["created-at"])
                                        if err == nil {
-                                               if timestamp.Sub(created_at).Seconds() > az.azconfig.DeleteDanglingResourcesAfter {
-                                                       az.logger.Printf("Will delete %v because it is older than %s", *result.Value().Name, az.azconfig.DeleteDanglingResourcesAfter)
+                                               if timestamp.Sub(createdAt) > az.azconfig.DeleteDanglingResourcesAfter.Duration() {
+                                                       az.logger.Printf("Will delete %v because it is older than %s", *result.Value().Name, az.azconfig.DeleteDanglingResourcesAfter)
                                                        az.deleteNIC <- *result.Value().Name
                                                }
                                        }
@@ -506,7 +508,7 @@ func (az *AzureInstanceSet) ManageNics() (map[string]network.Interface, error) {
 // have "namePrefix", are "available" (which means they are not
 // leased to a VM) and haven't been modified for
 // DeleteDanglingResourcesAfter seconds.
-func (az *AzureInstanceSet) ManageBlobs() {
+func (az *azureInstanceSet) manageBlobs() {
        result, err := az.storageAcctClient.ListKeys(az.ctx, az.azconfig.ResourceGroup, az.azconfig.StorageAccount)
        if err != nil {
                az.logger.WithError(err).Warn("Couldn't get account keys")
@@ -537,7 +539,7 @@ func (az *AzureInstanceSet) ManageBlobs() {
                        if b.Properties.BlobType == storage.BlobTypePage &&
                                b.Properties.LeaseState == "available" &&
                                b.Properties.LeaseStatus == "unlocked" &&
-                               age.Seconds() > az.azconfig.DeleteDanglingResourcesAfter {
+                               age.Seconds() > az.azconfig.DeleteDanglingResourcesAfter.Duration().Seconds() {
 
                                az.logger.Printf("Blob %v is unlocked and not modified for %v seconds, will delete", b.Name, age.Seconds())
                                az.deleteBlob <- b
@@ -551,32 +553,32 @@ func (az *AzureInstanceSet) ManageBlobs() {
        }
 }
 
-func (az *AzureInstanceSet) Stop() {
+func (az *azureInstanceSet) Stop() {
        az.stopFunc()
        az.stopWg.Wait()
        close(az.deleteNIC)
        close(az.deleteBlob)
 }
 
-type AzureInstance struct {
-       provider *AzureInstanceSet
+type azureInstance struct {
+       provider *azureInstanceSet
        nic      network.Interface
        vm       compute.VirtualMachine
 }
 
-func (ai *AzureInstance) ID() InstanceID {
-       return InstanceID(*ai.vm.ID)
+func (ai *azureInstance) ID() cloud.InstanceID {
+       return cloud.InstanceID(*ai.vm.ID)
 }
 
-func (ai *AzureInstance) String() string {
+func (ai *azureInstance) String() string {
        return *ai.vm.Name
 }
 
-func (ai *AzureInstance) ProviderType() string {
+func (ai *azureInstance) ProviderType() string {
        return string(ai.vm.VirtualMachineProperties.HardwareProfile.VMSize)
 }
 
-func (ai *AzureInstance) SetTags(newTags InstanceTags) error {
+func (ai *azureInstance) SetTags(newTags cloud.InstanceTags) error {
        ai.provider.stopWg.Add(1)
        defer ai.provider.stopWg.Done()
 
@@ -596,16 +598,16 @@ func (ai *AzureInstance) SetTags(newTags InstanceTags) error {
                Location: &ai.provider.azconfig.Location,
                Tags:     tags,
        }
-       vm, err := ai.provider.vmClient.CreateOrUpdate(ai.provider.ctx, ai.provider.azconfig.ResourceGroup, *ai.vm.Name, vmParameters)
+       vm, err := ai.provider.vmClient.createOrUpdate(ai.provider.ctx, ai.provider.azconfig.ResourceGroup, *ai.vm.Name, vmParameters)
        if err != nil {
-               return WrapAzureError(err)
+               return wrapAzureError(err)
        }
        ai.vm = vm
 
        return nil
 }
 
-func (ai *AzureInstance) Tags() InstanceTags {
+func (ai *azureInstance) Tags() cloud.InstanceTags {
        tags := make(map[string]string)
 
        for k, v := range ai.vm.Tags {
@@ -617,76 +619,22 @@ func (ai *AzureInstance) Tags() InstanceTags {
        return tags
 }
 
-func (ai *AzureInstance) Destroy() error {
+func (ai *azureInstance) Destroy() error {
        ai.provider.stopWg.Add(1)
        defer ai.provider.stopWg.Done()
 
-       _, err := ai.provider.vmClient.Delete(ai.provider.ctx, ai.provider.azconfig.ResourceGroup, *ai.vm.Name)
-       return WrapAzureError(err)
+       _, err := ai.provider.vmClient.delete(ai.provider.ctx, ai.provider.azconfig.ResourceGroup, *ai.vm.Name)
+       return wrapAzureError(err)
 }
 
-func (ai *AzureInstance) Address() string {
+func (ai *azureInstance) Address() string {
        return *(*ai.nic.IPConfigurations)[0].PrivateIPAddress
 }
 
-func (ai *AzureInstance) VerifyHostKey(receivedKey ssh.PublicKey, client *ssh.Client) error {
-       ai.provider.stopWg.Add(1)
-       defer ai.provider.stopWg.Done()
-
-       remoteFingerprint := ssh.FingerprintSHA256(receivedKey)
-
-       tags := ai.Tags()
-
-       tg := tags["ssh-pubkey-fingerprint"]
-       if tg != "" {
-               if remoteFingerprint == tg {
-                       return nil
-               } else {
-                       return fmt.Errorf("Key fingerprint did not match, expected %q got %q", tg, remoteFingerprint)
-               }
-       }
-
-       nodetokenTag := tags["node-token"]
-       if nodetokenTag == "" {
-               return fmt.Errorf("Missing node token tag")
-       }
-
-       sess, err := client.NewSession()
-       if err != nil {
-               return err
-       }
-
-       nodetokenbytes, err := sess.Output("cat /home/crunch/node-token")
-       if err != nil {
-               return err
-       }
-
-       nodetoken := strings.TrimSpace(string(nodetokenbytes))
-
-       expectedToken := fmt.Sprintf("%s-%s", *ai.vm.Name, nodetokenTag)
-
-       if strings.TrimSpace(nodetoken) != expectedToken {
-               return fmt.Errorf("Node token did not match, expected %q got %q", expectedToken, nodetoken)
-       }
-
-       sess, err = client.NewSession()
-       if err != nil {
-               return err
-       }
-
-       keyfingerprintbytes, err := sess.Output("ssh-keygen -E sha256 -l -f /etc/ssh/ssh_host_rsa_key.pub")
-       if err != nil {
-               return err
-       }
-
-       sp := strings.Split(string(keyfingerprintbytes), " ")
-
-       if remoteFingerprint != sp[1] {
-               return fmt.Errorf("Key fingerprint did not match, expected %q got %q", sp[1], remoteFingerprint)
-       }
+func (ai *azureInstance) RemoteUser() string {
+       return ai.provider.azconfig.AdminUsername
+}
 
-       tags["ssh-pubkey-fingerprint"] = sp[1]
-       delete(tags, "node-token")
-       ai.SetTags(tags)
-       return nil
+func (ai *azureInstance) VerifyHostKey(ssh.PublicKey, *ssh.Client) error {
+       return cloud.ErrNotImplemented
 }
similarity index 62%
rename from lib/cloud/azure_test.go
rename to lib/cloud/azure/azure_test.go
index f74688bb180c98cb867eba92ce619a6f0f21b30e..61649c39800d7ad8e31252313285e0e85fe7b16c 100644 (file)
@@ -3,31 +3,35 @@
 // SPDX-License-Identifier: AGPL-3.0
 //
 //
-// How to manually run individual tests against the real cloud
+// How to manually run individual tests against the real cloud:
 //
-// $ go test -v git.curoverse.com/arvados.git/lib/cloud -live-azure-cfg azconfig.yml -check.f=TestListInstances
+// $ go test -v git.curoverse.com/arvados.git/lib/cloud/azure -live-azure-cfg azconfig.yml -check.f=TestCreate
+//
+// Tests should be run individually and in the order they are listed in the file:
 //
 // Example azconfig.yml:
 //
-// subscription_id: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
-// key: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
-// region: centralus
-// cloud_environment: AzurePublicCloud
-// secret: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
-// tenant_id: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
-// resource_group: zzzzz
-// network: zzzzz
-// subnet: zzzzz-subnet-private
-// storage_account: example
-// blob_container: vhds
-// image: "https://example.blob.core.windows.net/system/Microsoft.Compute/Images/images/zzzzz-compute-osDisk.XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX.vhd"
-// delete_dangling_resources_after: 20
-// authorized_key: "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDLQS1ExT2+WjA0d/hntEAyAtgeN1W2ik2QX8c2zO6HjlPHWXL92r07W0WMuDib40Pcevpi1BXeBWXA9ZB5KKMJB+ukaAu22KklnQuUmNvk6ZXnPKSkGxuCYvPQb08WhHf3p1VxiKfP3iauedBDM4x9/bkJohlBBQiFXzNUcQ+a6rKiMzmJN2gbL8ncyUzc+XQ5q4JndTwTGtOlzDiGOc9O4z5Dd76wtAVJneOuuNpwfFRVHThpJM6VThpCZOnl8APaceWXKeuwOuCae3COZMz++xQfxOfZ9Z8aIwo+TlQhsRaNfZ4Vjrop6ej8dtfZtgUFKfbXEOYaHrGrWGotFDTD example@example"
-
-package cloud
+// ImageIDForTestSuite: "https://example.blob.core.windows.net/system/Microsoft.Compute/Images/images/zzzzz-compute-osDisk.XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX.vhd"
+// DriverParameters:
+//      SubscriptionID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+//      ClientID: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+//      Location: centralus
+//      CloudEnvironment: AzurePublicCloud
+//      ClientSecret: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
+//      TenantId: XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
+//      ResourceGroup: zzzzz
+//      Network: zzzzz
+//      Subnet: zzzzz-subnet-private
+//      StorageAccount: example
+//      BlobContainer: vhds
+//      DeleteDanglingResourcesAfter: 20s
+//      AdminUsername: crunch
+
+package azure
 
 import (
        "context"
+       "encoding/json"
        "errors"
        "flag"
        "io/ioutil"
@@ -35,8 +39,10 @@ import (
        "net"
        "net/http"
        "os"
+       "testing"
        "time"
 
+       "git.curoverse.com/arvados.git/lib/cloud"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/config"
        "github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2018-06-01/compute"
@@ -45,21 +51,25 @@ import (
        "github.com/Azure/go-autorest/autorest"
        "github.com/Azure/go-autorest/autorest/azure"
        "github.com/Azure/go-autorest/autorest/to"
-       "github.com/jmcvetta/randutil"
        "github.com/sirupsen/logrus"
        "golang.org/x/crypto/ssh"
        check "gopkg.in/check.v1"
 )
 
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       check.TestingT(t)
+}
+
 type AzureInstanceSetSuite struct{}
 
 var _ = check.Suite(&AzureInstanceSetSuite{})
 
 type VirtualMachinesClientStub struct{}
 
-var testKey []byte = []byte(`ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDLQS1ExT2+WjA0d/hntEAyAtgeN1W2ik2QX8c2zO6HjlPHWXL92r07W0WMuDib40Pcevpi1BXeBWXA9ZB5KKMJB+ukaAu22KklnQuUmNvk6ZXnPKSkGxuCYvPQb08WhHf3p1VxiKfP3iauedBDM4x9/bkJohlBBQiFXzNUcQ+a6rKiMzmJN2gbL8ncyUzc+XQ5q4JndTwTGtOlzDiGOc9O4z5Dd76wtAVJneOuuNpwfFRVHThpJM6VThpCZOnl8APaceWXKeuwOuCae3COZMz++xQfxOfZ9Z8aIwo+TlQhsRaNfZ4Vjrop6ej8dtfZtgUFKfbXEOYaHrGrWGotFDTD example@example`)
+var testKey = []byte(`ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDLQS1ExT2+WjA0d/hntEAyAtgeN1W2ik2QX8c2zO6HjlPHWXL92r07W0WMuDib40Pcevpi1BXeBWXA9ZB5KKMJB+ukaAu22KklnQuUmNvk6ZXnPKSkGxuCYvPQb08WhHf3p1VxiKfP3iauedBDM4x9/bkJohlBBQiFXzNUcQ+a6rKiMzmJN2gbL8ncyUzc+XQ5q4JndTwTGtOlzDiGOc9O4z5Dd76wtAVJneOuuNpwfFRVHThpJM6VThpCZOnl8APaceWXKeuwOuCae3COZMz++xQfxOfZ9Z8aIwo+TlQhsRaNfZ4Vjrop6ej8dtfZtgUFKfbXEOYaHrGrWGotFDTD example@example`)
 
-func (*VirtualMachinesClientStub) CreateOrUpdate(ctx context.Context,
+func (*VirtualMachinesClientStub) createOrUpdate(ctx context.Context,
        resourceGroupName string,
        VMName string,
        parameters compute.VirtualMachine) (result compute.VirtualMachine, err error) {
@@ -68,17 +78,17 @@ func (*VirtualMachinesClientStub) CreateOrUpdate(ctx context.Context,
        return parameters, nil
 }
 
-func (*VirtualMachinesClientStub) Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
+func (*VirtualMachinesClientStub) delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
        return nil, nil
 }
 
-func (*VirtualMachinesClientStub) ListComplete(ctx context.Context, resourceGroupName string) (result compute.VirtualMachineListResultIterator, err error) {
+func (*VirtualMachinesClientStub) listComplete(ctx context.Context, resourceGroupName string) (result compute.VirtualMachineListResultIterator, err error) {
        return compute.VirtualMachineListResultIterator{}, nil
 }
 
 type InterfacesClientStub struct{}
 
-func (*InterfacesClientStub) CreateOrUpdate(ctx context.Context,
+func (*InterfacesClientStub) createOrUpdate(ctx context.Context,
        resourceGroupName string,
        nicName string,
        parameters network.Interface) (result network.Interface, err error) {
@@ -87,17 +97,22 @@ func (*InterfacesClientStub) CreateOrUpdate(ctx context.Context,
        return parameters, nil
 }
 
-func (*InterfacesClientStub) Delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
+func (*InterfacesClientStub) delete(ctx context.Context, resourceGroupName string, VMName string) (result *http.Response, err error) {
        return nil, nil
 }
 
-func (*InterfacesClientStub) ListComplete(ctx context.Context, resourceGroupName string) (result network.InterfaceListResultIterator, err error) {
+func (*InterfacesClientStub) listComplete(ctx context.Context, resourceGroupName string) (result network.InterfaceListResultIterator, err error) {
        return network.InterfaceListResultIterator{}, nil
 }
 
+type testConfig struct {
+       ImageIDForTestSuite string
+       DriverParameters    json.RawMessage
+}
+
 var live = flag.String("live-azure-cfg", "", "Test with real azure API, provide config file")
 
-func GetInstanceSet() (InstanceSet, ImageID, arvados.Cluster, error) {
+func GetInstanceSet() (cloud.InstanceSet, cloud.ImageID, arvados.Cluster, error) {
        cluster := arvados.Cluster{
                InstanceTypes: arvados.InstanceTypeMap(map[string]arvados.InstanceType{
                        "tiny": arvados.InstanceType{
@@ -111,29 +126,29 @@ func GetInstanceSet() (InstanceSet, ImageID, arvados.Cluster, error) {
                        },
                })}
        if *live != "" {
-               cfg := make(map[string]interface{})
-               err := config.LoadFile(&cfg, *live)
+               var exampleCfg testConfig
+               err := config.LoadFile(&exampleCfg, *live)
                if err != nil {
-                       return nil, ImageID(""), cluster, err
+                       return nil, cloud.ImageID(""), cluster, err
                }
-               ap, err := NewAzureInstanceSet(cfg, "test123", logrus.StandardLogger())
-               return ap, ImageID(cfg["image"].(string)), cluster, err
-       } else {
-               ap := AzureInstanceSet{
-                       azconfig: AzureInstanceSetConfig{
-                               BlobContainer: "vhds",
-                       },
-                       dispatcherID: "test123",
-                       namePrefix:   "compute-test123-",
-                       logger:       logrus.StandardLogger(),
-                       deleteNIC:    make(chan string),
-                       deleteBlob:   make(chan storage.Blob),
-               }
-               ap.ctx, ap.stopFunc = context.WithCancel(context.Background())
-               ap.vmClient = &VirtualMachinesClientStub{}
-               ap.netClient = &InterfacesClientStub{}
-               return &ap, ImageID("blob"), cluster, nil
+
+               ap, err := newAzureInstanceSet(exampleCfg.DriverParameters, "test123", logrus.StandardLogger())
+               return ap, cloud.ImageID(exampleCfg.ImageIDForTestSuite), cluster, err
        }
+       ap := azureInstanceSet{
+               azconfig: azureInstanceSetConfig{
+                       BlobContainer: "vhds",
+               },
+               dispatcherID: "test123",
+               namePrefix:   "compute-test123-",
+               logger:       logrus.StandardLogger(),
+               deleteNIC:    make(chan string),
+               deleteBlob:   make(chan storage.Blob),
+       }
+       ap.ctx, ap.stopFunc = context.WithCancel(context.Background())
+       ap.vmClient = &VirtualMachinesClientStub{}
+       ap.netClient = &InterfacesClientStub{}
+       return &ap, cloud.ImageID("blob"), cluster, nil
 }
 
 func (*AzureInstanceSetSuite) TestCreate(c *check.C) {
@@ -145,18 +160,16 @@ func (*AzureInstanceSetSuite) TestCreate(c *check.C) {
        pk, _, _, _, err := ssh.ParseAuthorizedKey(testKey)
        c.Assert(err, check.IsNil)
 
-       nodetoken, err := randutil.String(40, "abcdefghijklmnopqrstuvwxyz0123456789")
-       c.Assert(err, check.IsNil)
-
        inst, err := ap.Create(cluster.InstanceTypes["tiny"],
                img, map[string]string{
-                       "node-token": nodetoken},
-               pk)
+                       "TestTagName": "test tag value",
+               }, "umask 0600; echo -n test-file-data >/var/run/test-file", pk)
 
        c.Assert(err, check.IsNil)
 
-       tg := inst.Tags()
-       log.Printf("Result %v %v %v", inst.String(), inst.Address(), tg)
+       tags := inst.Tags()
+       c.Check(tags["TestTagName"], check.Equals, "test tag value")
+       c.Logf("inst.String()=%v Address()=%v Tags()=%v", inst.String(), inst.Address(), tags)
 
 }
 
@@ -182,7 +195,7 @@ func (*AzureInstanceSetSuite) TestManageNics(c *check.C) {
                c.Fatal("Error making provider", err)
        }
 
-       ap.(*AzureInstanceSet).ManageNics()
+       ap.(*azureInstanceSet).manageNics()
        ap.Stop()
 }
 
@@ -192,7 +205,7 @@ func (*AzureInstanceSetSuite) TestManageBlobs(c *check.C) {
                c.Fatal("Error making provider", err)
        }
 
-       ap.(*AzureInstanceSet).ManageBlobs()
+       ap.(*azureInstanceSet).manageBlobs()
        ap.Stop()
 }
 
@@ -216,7 +229,7 @@ func (*AzureInstanceSetSuite) TestDeleteFake(c *check.C) {
                c.Fatal("Error making provider", err)
        }
 
-       _, err = ap.(*AzureInstanceSet).netClient.Delete(context.Background(), "fakefakefake", "fakefakefake")
+       _, err = ap.(*azureInstanceSet).netClient.delete(context.Background(), "fakefakefake", "fakefakefake")
 
        de, ok := err.(autorest.DetailedError)
        if ok {
@@ -238,8 +251,8 @@ func (*AzureInstanceSetSuite) TestWrapError(c *check.C) {
                        ServiceError: &azure.ServiceError{},
                },
        }
-       wrapped := WrapAzureError(retryError)
-       _, ok := wrapped.(RateLimitError)
+       wrapped := wrapAzureError(retryError)
+       _, ok := wrapped.(cloud.RateLimitError)
        c.Check(ok, check.Equals, true)
 
        quotaError := autorest.DetailedError{
@@ -254,8 +267,8 @@ func (*AzureInstanceSetSuite) TestWrapError(c *check.C) {
                        },
                },
        }
-       wrapped = WrapAzureError(quotaError)
-       _, ok = wrapped.(QuotaError)
+       wrapped = wrapAzureError(quotaError)
+       _, ok = wrapped.(cloud.QuotaError)
        c.Check(ok, check.Equals, true)
 }
 
@@ -291,23 +304,26 @@ func (*AzureInstanceSetSuite) TestSSH(c *check.C) {
        c.Assert(err, check.IsNil)
 
        if len(l) > 0 {
-
                sshclient, err := SetupSSHClient(c, l[0])
                c.Assert(err, check.IsNil)
+               defer sshclient.Conn.Close()
 
                sess, err := sshclient.NewSession()
                c.Assert(err, check.IsNil)
-
-               out, err := sess.Output("cat /home/crunch/node-token")
+               defer sess.Close()
+               _, err = sess.Output("find /var/run/test-file -maxdepth 0 -user root -perm 0600")
                c.Assert(err, check.IsNil)
 
-               log.Printf("%v", string(out))
-
-               sshclient.Conn.Close()
+               sess, err = sshclient.NewSession()
+               c.Assert(err, check.IsNil)
+               defer sess.Close()
+               out, err := sess.Output("sudo cat /var/run/test-file")
+               c.Assert(err, check.IsNil)
+               c.Check(string(out), check.Equals, "test-file-data")
        }
 }
 
-func SetupSSHClient(c *check.C, inst Instance) (*ssh.Client, error) {
+func SetupSSHClient(c *check.C, inst cloud.Instance) (*ssh.Client, error) {
        addr := inst.Address() + ":2222"
        if addr == "" {
                return nil, errors.New("instance has no address")
diff --git a/lib/cloud/gocheck_test.go b/lib/cloud/gocheck_test.go
deleted file mode 100644 (file)
index d839268..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package cloud
-
-import (
-       "testing"
-
-       check "gopkg.in/check.v1"
-)
-
-// Gocheck boilerplate
-func Test(t *testing.T) {
-       check.TestingT(t)
-}
index 969a4bc2ddeb4b6389bf80912f9826009a56400c..792e737a914a1ce7d39d98c05c1a9428e77fb1ff 100644 (file)
@@ -5,6 +5,8 @@
 package cloud
 
 import (
+       "encoding/json"
+       "errors"
        "io"
        "time"
 
@@ -56,17 +58,25 @@ type Executor interface {
        Execute(cmd string, stdin io.Reader) (stdout, stderr []byte, err error)
 }
 
+var ErrNotImplemented = errors.New("not implemented")
+
 // An ExecutorTarget is a remote command execution service.
 type ExecutorTarget interface {
        // SSH server hostname or IP address, or empty string if
        // unknown while instance is booting.
        Address() string
 
+       // Remote username to send during SSH authentication.
+       RemoteUser() string
+
        // Return nil if the given public key matches the instance's
        // SSH server key. If the provided Dialer is not nil,
        // VerifyHostKey can use it to make outgoing network
        // connections from the instance -- e.g., to use the cloud's
        // "this instance's metadata" API.
+       //
+       // Return ErrNotImplemented if no verification mechanism is
+       // available.
        VerifyHostKey(ssh.PublicKey, *ssh.Client) error
 }
 
@@ -101,12 +111,18 @@ type Instance interface {
 // All public methods of an InstanceSet, and all public methods of the
 // instances it returns, are goroutine safe.
 type InstanceSet interface {
-       // Create a new instance. If supported by the driver, add the
+       // Create a new instance with the given type, image, and
+       // initial set of tags. If supported by the driver, add the
        // provided public key to /root/.ssh/authorized_keys.
        //
+       // The given InitCommand should be executed on the newly
+       // created instance. This is optional for a driver whose
+       // instances' VerifyHostKey() method never returns
+       // ErrNotImplemented. InitCommand will be under 1 KiB.
+       //
        // The returned error should implement RateLimitError and
        // QuotaError where applicable.
-       Create(arvados.InstanceType, ImageID, InstanceTags, ssh.PublicKey) (Instance, error)
+       Create(arvados.InstanceType, ImageID, InstanceTags, InitCommand, ssh.PublicKey) (Instance, error)
 
        // Return all instances, including ones that are booting or
        // shutting down. Optionally, filter out nodes that don't have
@@ -124,6 +140,8 @@ type InstanceSet interface {
        Stop()
 }
 
+type InitCommand string
+
 // A Driver returns an InstanceSet that uses the given InstanceSetID
 // and driver-dependent configuration parameters.
 //
@@ -153,9 +171,9 @@ type InstanceSet interface {
 //
 //     type exampleDriver struct {}
 //
-//     func (*exampleDriver) InstanceSet(config map[string]interface{}, id InstanceSetID) (InstanceSet, error) {
+//     func (*exampleDriver) InstanceSet(config json.RawMessage, id InstanceSetID) (InstanceSet, error) {
 //             var is exampleInstanceSet
-//             if err := mapstructure.Decode(config, &is); err != nil {
+//             if err := json.Unmarshal(config, &is); err != nil {
 //                     return nil, err
 //             }
 //             is.ownID = id
@@ -164,17 +182,17 @@ type InstanceSet interface {
 //
 //     var _ = registerCloudDriver("example", &exampleDriver{})
 type Driver interface {
-       InstanceSet(config map[string]interface{}, id InstanceSetID, logger logrus.FieldLogger) (InstanceSet, error)
+       InstanceSet(config json.RawMessage, id InstanceSetID, logger logrus.FieldLogger) (InstanceSet, error)
 }
 
 // DriverFunc makes a Driver using the provided function as its
 // InstanceSet method. This is similar to http.HandlerFunc.
-func DriverFunc(fn func(config map[string]interface{}, id InstanceSetID, logger logrus.FieldLogger) (InstanceSet, error)) Driver {
+func DriverFunc(fn func(config json.RawMessage, id InstanceSetID, logger logrus.FieldLogger) (InstanceSet, error)) Driver {
        return driverFunc(fn)
 }
 
-type driverFunc func(config map[string]interface{}, id InstanceSetID, logger logrus.FieldLogger) (InstanceSet, error)
+type driverFunc func(config json.RawMessage, id InstanceSetID, logger logrus.FieldLogger) (InstanceSet, error)
 
-func (df driverFunc) InstanceSet(config map[string]interface{}, id InstanceSetID, logger logrus.FieldLogger) (InstanceSet, error) {
+func (df driverFunc) InstanceSet(config json.RawMessage, id InstanceSetID, logger logrus.FieldLogger) (InstanceSet, error) {
        return df(config, id, logger)
 }
index 94eb2580bd759c9c781dc2346353440124207899..c1d4657ba47b7801b63bad0222e4a2df71f7881d 100644 (file)
@@ -5,6 +5,8 @@
 package controller
 
 import (
+       "context"
+
        "git.curoverse.com/arvados.git/lib/cmd"
        "git.curoverse.com/arvados.git/lib/service"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
@@ -12,6 +14,6 @@ import (
 
 var Command cmd.Handler = service.Command(arvados.ServiceNameController, newHandler)
 
-func newHandler(cluster *arvados.Cluster, np *arvados.NodeProfile) service.Handler {
+func newHandler(_ context.Context, cluster *arvados.Cluster, np *arvados.NodeProfile) service.Handler {
        return &Handler{Cluster: cluster, NodeProfile: np}
 }
index c935e20be6b29d90577640d643d74c73fa75c84e..62916acd2ac10be14d90d4e02e2703e77949e32b 100644 (file)
@@ -19,6 +19,7 @@ import (
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
        "github.com/sirupsen/logrus"
@@ -29,7 +30,7 @@ import (
 var _ = check.Suite(&FederationSuite{})
 
 type FederationSuite struct {
-       log *logrus.Logger
+       log logrus.FieldLogger
        // testServer and testHandler are the controller being tested,
        // "zhome".
        testServer  *httpserver.Server
@@ -44,9 +45,7 @@ type FederationSuite struct {
 }
 
 func (s *FederationSuite) SetUpTest(c *check.C) {
-       s.log = logrus.New()
-       s.log.Formatter = &logrus.JSONFormatter{}
-       s.log.Out = &logWriter{c.Log}
+       s.log = ctxlog.TestLogger(c)
 
        s.remoteServer = newServerFromIntegrationTestEnv(c)
        c.Assert(s.remoteServer.Start(), check.IsNil)
@@ -555,16 +554,20 @@ func (s *FederationSuite) TestGetRemoteContainerRequest(c *check.C) {
 
 func (s *FederationSuite) TestUpdateRemoteContainerRequest(c *check.C) {
        defer s.localServiceReturns404(c).Close()
-       req := httptest.NewRequest("PATCH", "/arvados/v1/container_requests/"+arvadostest.QueuedContainerRequestUUID,
-               strings.NewReader(`{"container_request": {"priority": 696}}`))
-       req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
-       req.Header.Set("Content-type", "application/json")
-       resp := s.testRequest(req)
-       c.Check(resp.StatusCode, check.Equals, http.StatusOK)
-       var cr arvados.ContainerRequest
-       c.Check(json.NewDecoder(resp.Body).Decode(&cr), check.IsNil)
-       c.Check(cr.UUID, check.Equals, arvadostest.QueuedContainerRequestUUID)
-       c.Check(cr.Priority, check.Equals, 696)
+       setPri := func(pri int) {
+               req := httptest.NewRequest("PATCH", "/arvados/v1/container_requests/"+arvadostest.QueuedContainerRequestUUID,
+                       strings.NewReader(fmt.Sprintf(`{"container_request": {"priority": %d}}`, pri)))
+               req.Header.Set("Authorization", "Bearer "+arvadostest.ActiveToken)
+               req.Header.Set("Content-type", "application/json")
+               resp := s.testRequest(req)
+               c.Check(resp.StatusCode, check.Equals, http.StatusOK)
+               var cr arvados.ContainerRequest
+               c.Check(json.NewDecoder(resp.Body).Decode(&cr), check.IsNil)
+               c.Check(cr.UUID, check.Equals, arvadostest.QueuedContainerRequestUUID)
+               c.Check(cr.Priority, check.Equals, pri)
+       }
+       setPri(696)
+       setPri(1) // Reset fixture so side effect doesn't break other tests.
 }
 
 func (s *FederationSuite) TestCreateRemoteContainerRequest(c *check.C) {
index 295dde7ca42821b1c8f904eec42ac7e7764812fa..53125ae5543b51287e5de80a8b442f2002972a86 100644 (file)
@@ -80,12 +80,10 @@ func (h *Handler) setup() {
        h.handlerStack = mux
 
        sc := *arvados.DefaultSecureClient
-       sc.Timeout = time.Duration(h.Cluster.HTTPRequestTimeout)
        sc.CheckRedirect = neverRedirect
        h.secureClient = &sc
 
        ic := *arvados.InsecureHTTPClient
-       ic.Timeout = time.Duration(h.Cluster.HTTPRequestTimeout)
        ic.CheckRedirect = neverRedirect
        h.insecureClient = &ic
 
index f11228a31350b93f2da70a7b5ab46b8926a47b06..dfe60d90a5f3119909658149b1017f3b782515f3 100644 (file)
@@ -5,6 +5,7 @@
 package controller
 
 import (
+       "context"
        "encoding/json"
        "net/http"
        "net/http/httptest"
@@ -16,6 +17,7 @@ import (
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
        check "gopkg.in/check.v1"
 )
@@ -30,9 +32,13 @@ var _ = check.Suite(&HandlerSuite{})
 type HandlerSuite struct {
        cluster *arvados.Cluster
        handler http.Handler
+       ctx     context.Context
+       cancel  context.CancelFunc
 }
 
 func (s *HandlerSuite) SetUpTest(c *check.C) {
+       s.ctx, s.cancel = context.WithCancel(context.Background())
+       s.ctx = ctxlog.Context(s.ctx, ctxlog.New(os.Stderr, "json", "debug"))
        s.cluster = &arvados.Cluster{
                ClusterID:  "zzzzz",
                PostgreSQL: integrationTestCluster().PostgreSQL,
@@ -44,7 +50,11 @@ func (s *HandlerSuite) SetUpTest(c *check.C) {
                },
        }
        node := s.cluster.NodeProfiles["*"]
-       s.handler = newHandler(s.cluster, &node)
+       s.handler = newHandler(s.ctx, s.cluster, &node)
+}
+
+func (s *HandlerSuite) TearDownTest(c *check.C) {
+       s.cancel()
 }
 
 func (s *HandlerSuite) TestProxyDiscoveryDoc(c *check.C) {
index 95f17e79e6d8370e8c211f928264dff9869189c2..ae89c3d7ea4d073fa44885f193af138f81b85508 100644 (file)
@@ -5,28 +5,16 @@
 package controller
 
 import (
-       "bytes"
        "net/http"
        "os"
        "path/filepath"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
-       "github.com/sirupsen/logrus"
        check "gopkg.in/check.v1"
 )
 
-// logWriter is an io.Writer that writes by calling a "write log"
-// function, typically (*check.C)Log().
-type logWriter struct {
-       logfunc func(...interface{})
-}
-
-func (tl *logWriter) Write(buf []byte) (int, error) {
-       tl.logfunc(string(bytes.TrimRight(buf, "\n")))
-       return len(buf), nil
-}
-
 func integrationTestCluster() *arvados.Cluster {
        cfg, err := arvados.GetConfig(filepath.Join(os.Getenv("WORKSPACE"), "tmp", "arvados.yml"))
        if err != nil {
@@ -42,9 +30,7 @@ func integrationTestCluster() *arvados.Cluster {
 // Return a new unstarted controller server, using the Rails API
 // provided by the integration-testing environment.
 func newServerFromIntegrationTestEnv(c *check.C) *httpserver.Server {
-       log := logrus.New()
-       log.Formatter = &logrus.JSONFormatter{}
-       log.Out = &logWriter{c.Log}
+       log := ctxlog.TestLogger(c)
 
        nodeProfile := arvados.NodeProfile{
                Controller: arvados.SystemServiceInstance{Listen: ":"},
index 92948fb300e703971e59957b4f6f98db176a42ef..7231e839475639c2aa5e6c720091c15b4d4b5ed7 100644 (file)
@@ -5,6 +5,8 @@
 package dispatchcloud
 
 import (
+       "context"
+
        "git.curoverse.com/arvados.git/lib/cmd"
        "git.curoverse.com/arvados.git/lib/service"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
@@ -12,8 +14,8 @@ import (
 
 var Command cmd.Handler = service.Command(arvados.ServiceNameDispatchCloud, newHandler)
 
-func newHandler(cluster *arvados.Cluster, _ *arvados.NodeProfile) service.Handler {
-       d := &dispatcher{Cluster: cluster}
+func newHandler(ctx context.Context, cluster *arvados.Cluster, _ *arvados.NodeProfile) service.Handler {
+       d := &dispatcher{Cluster: cluster, Context: ctx}
        go d.Start()
        return d
 }
index 965407e518a1aaf16444d2b3ce6f84af90be3b30..bbe47625a893d6874d2c3c415952948f290de74f 100644 (file)
@@ -27,8 +27,8 @@ type APIClient interface {
 type QueueEnt struct {
        // The container to run. Only the UUID, State, Priority, and
        // RuntimeConstraints fields are populated.
-       Container    arvados.Container
-       InstanceType arvados.InstanceType
+       Container    arvados.Container    `json:"container"`
+       InstanceType arvados.InstanceType `json:"instance_type"`
 }
 
 // String implements fmt.Stringer by returning the queued container's
@@ -131,7 +131,7 @@ func (cq *Queue) Forget(uuid string) {
        defer cq.mtx.Unlock()
        ctr := cq.current[uuid].Container
        if ctr.State == arvados.ContainerStateComplete || ctr.State == arvados.ContainerStateCancelled {
-               delete(cq.current, uuid)
+               cq.delEnt(uuid, ctr.State)
        }
 }
 
@@ -184,7 +184,9 @@ func (cq *Queue) Update() error {
        cq.mtx.Lock()
        defer cq.mtx.Unlock()
        for uuid, ctr := range next {
-               if _, keep := cq.dontupdate[uuid]; keep {
+               if _, dontupdate := cq.dontupdate[uuid]; dontupdate {
+                       // Don't clobber a local update that happened
+                       // after we started polling.
                        continue
                }
                if cur, ok := cq.current[uuid]; !ok {
@@ -194,13 +196,18 @@ func (cq *Queue) Update() error {
                        cq.current[uuid] = cur
                }
        }
-       for uuid := range cq.current {
-               if _, keep := cq.dontupdate[uuid]; keep {
-                       continue
-               } else if _, keep = next[uuid]; keep {
+       for uuid, ent := range cq.current {
+               if _, dontupdate := cq.dontupdate[uuid]; dontupdate {
+                       // Don't expunge an entry that was
+                       // added/updated locally after we started
+                       // polling.
                        continue
-               } else {
-                       delete(cq.current, uuid)
+               } else if _, stillpresent := next[uuid]; !stillpresent {
+                       // Expunge an entry that no longer appears in
+                       // the poll response (evidently it's
+                       // cancelled, completed, deleted, or taken by
+                       // a different dispatcher).
+                       cq.delEnt(uuid, ent.Container.State)
                }
        }
        cq.dontupdate = nil
@@ -209,13 +216,74 @@ func (cq *Queue) Update() error {
        return nil
 }
 
+// Caller must have lock.
+func (cq *Queue) delEnt(uuid string, state arvados.ContainerState) {
+       cq.logger.WithFields(logrus.Fields{
+               "ContainerUUID": uuid,
+               "State":         state,
+       }).Info("dropping container from queue")
+       delete(cq.current, uuid)
+}
+
 func (cq *Queue) addEnt(uuid string, ctr arvados.Container) {
        it, err := cq.chooseType(&ctr)
-       if err != nil {
-               // FIXME: throttle warnings, cancel after timeout
-               cq.logger.Warnf("cannot run %s", &ctr)
+       if err != nil && (ctr.State == arvados.ContainerStateQueued || ctr.State == arvados.ContainerStateLocked) {
+               // We assume here that any chooseType error is a hard
+               // error: it wouldn't help to try again, or to leave
+               // it for a different dispatcher process to attempt.
+               errorString := err.Error()
+               logger := cq.logger.WithField("ContainerUUID", ctr.UUID)
+               logger.WithError(err).Warn("cancel container with no suitable instance type")
+               go func() {
+                       if ctr.State == arvados.ContainerStateQueued {
+                               // Can't set runtime error without
+                               // locking first. If Lock() is
+                               // successful, it will call addEnt()
+                               // again itself, and we'll fall
+                               // through to the
+                               // setRuntimeError/Cancel code below.
+                               err := cq.Lock(ctr.UUID)
+                               if err != nil {
+                                       logger.WithError(err).Warn("lock failed")
+                                       // ...and try again on the
+                                       // next Update, if the problem
+                                       // still exists.
+                               }
+                               return
+                       }
+                       var err error
+                       defer func() {
+                               if err == nil {
+                                       return
+                               }
+                               // On failure, check current container
+                               // state, and don't log the error if
+                               // the failure came from losing a
+                               // race.
+                               var latest arvados.Container
+                               cq.client.RequestAndDecode(&latest, "GET", "arvados/v1/containers/"+ctr.UUID, nil, map[string][]string{"select": {"state"}})
+                               if latest.State == arvados.ContainerStateCancelled {
+                                       return
+                               }
+                               logger.WithError(err).Warn("error while trying to cancel unsatisfiable container")
+                       }()
+                       err = cq.setRuntimeError(ctr.UUID, errorString)
+                       if err != nil {
+                               return
+                       }
+                       err = cq.Cancel(ctr.UUID)
+                       if err != nil {
+                               return
+                       }
+               }()
                return
        }
+       cq.logger.WithFields(logrus.Fields{
+               "ContainerUUID": ctr.UUID,
+               "State":         ctr.State,
+               "Priority":      ctr.Priority,
+               "InstanceType":  it.Name,
+       }).Info("adding container to queue")
        cq.current[uuid] = QueueEnt{Container: ctr, InstanceType: it}
 }
 
@@ -229,6 +297,18 @@ func (cq *Queue) Unlock(uuid string) error {
        return cq.apiUpdate(uuid, "unlock")
 }
 
+// setRuntimeError sets runtime_status["error"] to the given value.
+// Container should already have state==Locked or Running.
+func (cq *Queue) setRuntimeError(uuid, errorString string) error {
+       return cq.client.RequestAndDecode(nil, "PUT", "arvados/v1/containers/"+uuid, nil, map[string]map[string]map[string]interface{}{
+               "container": {
+                       "runtime_status": {
+                               "error": errorString,
+                       },
+               },
+       })
+}
+
 // Cancel cancels the given container.
 func (cq *Queue) Cancel(uuid string) error {
        err := cq.client.RequestAndDecode(nil, "PUT", "arvados/v1/containers/"+uuid, nil, map[string]map[string]interface{}{
diff --git a/lib/dispatchcloud/container/queue_test.go b/lib/dispatchcloud/container/queue_test.go
new file mode 100644 (file)
index 0000000..91d6535
--- /dev/null
@@ -0,0 +1,131 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package container
+
+import (
+       "errors"
+       "os"
+       "sync"
+       "testing"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "github.com/sirupsen/logrus"
+       check "gopkg.in/check.v1"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       check.TestingT(t)
+}
+
+var _ = check.Suite(&IntegrationSuite{})
+
+func logger() logrus.FieldLogger {
+       logger := logrus.StandardLogger()
+       if os.Getenv("ARVADOS_DEBUG") != "" {
+               logger.SetLevel(logrus.DebugLevel)
+       }
+       return logger
+}
+
+type IntegrationSuite struct{}
+
+func (suite *IntegrationSuite) TearDownTest(c *check.C) {
+       err := arvados.NewClientFromEnv().RequestAndDecode(nil, "POST", "database/reset", nil, nil)
+       c.Check(err, check.IsNil)
+}
+
+func (suite *IntegrationSuite) TestGetLockUnlockCancel(c *check.C) {
+       typeChooser := func(ctr *arvados.Container) (arvados.InstanceType, error) {
+               return arvados.InstanceType{Name: "testType"}, nil
+       }
+
+       client := arvados.NewClientFromEnv()
+       cq := NewQueue(logger(), nil, typeChooser, client)
+
+       err := cq.Update()
+       c.Check(err, check.IsNil)
+
+       ents, threshold := cq.Entries()
+       c.Check(len(ents), check.Not(check.Equals), 0)
+       c.Check(time.Since(threshold) < time.Minute, check.Equals, true)
+       c.Check(time.Since(threshold) > 0, check.Equals, true)
+
+       _, ok := ents[arvadostest.QueuedContainerUUID]
+       c.Check(ok, check.Equals, true)
+
+       var wg sync.WaitGroup
+       for uuid, ent := range ents {
+               c.Check(ent.Container.UUID, check.Equals, uuid)
+               c.Check(ent.InstanceType.Name, check.Equals, "testType")
+               c.Check(ent.Container.State, check.Equals, arvados.ContainerStateQueued)
+               c.Check(ent.Container.Priority > 0, check.Equals, true)
+
+               ctr, ok := cq.Get(uuid)
+               c.Check(ok, check.Equals, true)
+               c.Check(ctr.UUID, check.Equals, uuid)
+
+               wg.Add(1)
+               go func() {
+                       defer wg.Done()
+                       err := cq.Unlock(uuid)
+                       c.Check(err, check.NotNil)
+                       err = cq.Lock(uuid)
+                       c.Check(err, check.IsNil)
+                       ctr, ok := cq.Get(uuid)
+                       c.Check(ok, check.Equals, true)
+                       c.Check(ctr.State, check.Equals, arvados.ContainerStateLocked)
+                       err = cq.Lock(uuid)
+                       c.Check(err, check.NotNil)
+                       err = cq.Unlock(uuid)
+                       c.Check(err, check.IsNil)
+                       ctr, ok = cq.Get(uuid)
+                       c.Check(ok, check.Equals, true)
+                       c.Check(ctr.State, check.Equals, arvados.ContainerStateQueued)
+                       err = cq.Unlock(uuid)
+                       c.Check(err, check.NotNil)
+               }()
+       }
+       wg.Wait()
+
+       err = cq.Cancel(arvadostest.CompletedContainerUUID)
+       c.Check(err, check.ErrorMatches, `.*State cannot change from Complete to Cancelled.*`)
+}
+
+func (suite *IntegrationSuite) TestCancelIfNoInstanceType(c *check.C) {
+       errorTypeChooser := func(ctr *arvados.Container) (arvados.InstanceType, error) {
+               return arvados.InstanceType{}, errors.New("no suitable instance type")
+       }
+
+       client := arvados.NewClientFromEnv()
+       cq := NewQueue(logger(), nil, errorTypeChooser, client)
+
+       var ctr arvados.Container
+       err := client.RequestAndDecode(&ctr, "GET", "arvados/v1/containers/"+arvadostest.QueuedContainerUUID, nil, nil)
+       c.Check(err, check.IsNil)
+       c.Check(ctr.State, check.Equals, arvados.ContainerStateQueued)
+
+       cq.Update()
+
+       // Wait for the cancel operation to take effect. Container
+       // will have state=Cancelled or just disappear from the queue.
+       suite.waitfor(c, time.Second, func() bool {
+               err := client.RequestAndDecode(&ctr, "GET", "arvados/v1/containers/"+arvadostest.QueuedContainerUUID, nil, nil)
+               return err == nil && ctr.State == arvados.ContainerStateCancelled
+       })
+       c.Check(ctr.RuntimeStatus["error"], check.Equals, `no suitable instance type`)
+}
+
+func (suite *IntegrationSuite) waitfor(c *check.C, timeout time.Duration, fn func() bool) {
+       defer func() {
+               c.Check(fn(), check.Equals, true)
+       }()
+       deadline := time.Now().Add(timeout)
+       for !fn() && time.Now().Before(deadline) {
+               time.Sleep(timeout / 1000)
+       }
+}
index 2415094ac00290625794d8204449052ad749ad85..adf1028b35fe16ab13afbfcb4f0c91672ec17849 100644 (file)
@@ -5,6 +5,7 @@
 package dispatchcloud
 
 import (
+       "context"
        "crypto/md5"
        "encoding/json"
        "fmt"
@@ -20,7 +21,9 @@ import (
        "git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/auth"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
+       "github.com/julienschmidt/httprouter"
        "github.com/prometheus/client_golang/prometheus"
        "github.com/prometheus/client_golang/prometheus/promhttp"
        "github.com/sirupsen/logrus"
@@ -35,11 +38,13 @@ const (
 type pool interface {
        scheduler.WorkerPool
        Instances() []worker.InstanceView
+       SetIdleBehavior(cloud.InstanceID, worker.IdleBehavior) error
        Stop()
 }
 
 type dispatcher struct {
        Cluster       *arvados.Cluster
+       Context       context.Context
        InstanceSetID cloud.InstanceSetID
 
        logger      logrus.FieldLogger
@@ -87,6 +92,7 @@ func (disp *dispatcher) Close() {
 // Make a worker.Executor for the given instance.
 func (disp *dispatcher) newExecutor(inst cloud.Instance) worker.Executor {
        exr := ssh_executor.New(inst)
+       exr.SetTargetPort(disp.Cluster.CloudVMs.SSHPort)
        exr.SetSigners(disp.sshKey)
        return exr
 }
@@ -113,9 +119,9 @@ func (disp *dispatcher) initialize() {
        }
        disp.stop = make(chan struct{}, 1)
        disp.stopped = make(chan struct{})
-       disp.logger = logrus.StandardLogger()
+       disp.logger = ctxlog.FromContext(disp.Context)
 
-       if key, err := ssh.ParsePrivateKey(disp.Cluster.Dispatch.PrivateKey); err != nil {
+       if key, err := ssh.ParsePrivateKey([]byte(disp.Cluster.Dispatch.PrivateKey)); err != nil {
                disp.logger.Fatalf("error parsing configured Dispatch.PrivateKey: %s", err)
        } else {
                disp.sshKey = key
@@ -125,9 +131,9 @@ func (disp *dispatcher) initialize() {
        if err != nil {
                disp.logger.Fatalf("error initializing driver: %s", err)
        }
-       disp.instanceSet = &instanceSetProxy{instanceSet}
+       disp.instanceSet = instanceSet
        disp.reg = prometheus.NewRegistry()
-       disp.pool = worker.NewPool(disp.logger, disp.reg, disp.instanceSet, disp.newExecutor, disp.Cluster)
+       disp.pool = worker.NewPool(disp.logger, arvClient, disp.reg, disp.instanceSet, disp.newExecutor, disp.sshKey.PublicKey(), disp.Cluster)
        disp.queue = container.NewQueue(disp.logger, disp.reg, disp.typeChooser, arvClient)
 
        if disp.Cluster.ManagementToken == "" {
@@ -135,14 +141,17 @@ func (disp *dispatcher) initialize() {
                        http.Error(w, "Management API authentication is not configured", http.StatusForbidden)
                })
        } else {
-               mux := http.NewServeMux()
-               mux.HandleFunc("/arvados/v1/dispatch/containers", disp.apiContainers)
-               mux.HandleFunc("/arvados/v1/dispatch/instances", disp.apiInstances)
+               mux := httprouter.New()
+               mux.HandlerFunc("GET", "/arvados/v1/dispatch/containers", disp.apiContainers)
+               mux.HandlerFunc("GET", "/arvados/v1/dispatch/instances", disp.apiInstances)
+               mux.HandlerFunc("POST", "/arvados/v1/dispatch/instances/hold", disp.apiInstanceHold)
+               mux.HandlerFunc("POST", "/arvados/v1/dispatch/instances/drain", disp.apiInstanceDrain)
+               mux.HandlerFunc("POST", "/arvados/v1/dispatch/instances/run", disp.apiInstanceRun)
                metricsH := promhttp.HandlerFor(disp.reg, promhttp.HandlerOpts{
                        ErrorLog: disp.logger,
                })
-               mux.Handle("/metrics", metricsH)
-               mux.Handle("/metrics.json", metricsH)
+               mux.Handler("GET", "/metrics", metricsH)
+               mux.Handler("GET", "/metrics.json", metricsH)
                disp.httpHandler = auth.RequireLiteralToken(disp.Cluster.ManagementToken, mux)
        }
 }
@@ -160,7 +169,7 @@ func (disp *dispatcher) run() {
        if pollInterval <= 0 {
                pollInterval = defaultPollInterval
        }
-       sched := scheduler.New(disp.logger, disp.queue, disp.pool, staleLockTimeout, pollInterval)
+       sched := scheduler.New(disp.Context, disp.queue, disp.pool, staleLockTimeout, pollInterval)
        sched.Start()
        defer sched.Stop()
 
@@ -169,12 +178,8 @@ func (disp *dispatcher) run() {
 
 // Management API: all active and queued containers.
 func (disp *dispatcher) apiContainers(w http.ResponseWriter, r *http.Request) {
-       if r.Method != "GET" {
-               httpserver.Error(w, "method not allowed", http.StatusMethodNotAllowed)
-               return
-       }
        var resp struct {
-               Items []container.QueueEnt
+               Items []container.QueueEnt `json:"items"`
        }
        qEntries, _ := disp.queue.Entries()
        for _, ent := range qEntries {
@@ -185,13 +190,37 @@ func (disp *dispatcher) apiContainers(w http.ResponseWriter, r *http.Request) {
 
 // Management API: all active instances (cloud VMs).
 func (disp *dispatcher) apiInstances(w http.ResponseWriter, r *http.Request) {
-       if r.Method != "GET" {
-               httpserver.Error(w, "method not allowed", http.StatusMethodNotAllowed)
-               return
-       }
        var resp struct {
-               Items []worker.InstanceView
+               Items []worker.InstanceView `json:"items"`
        }
        resp.Items = disp.pool.Instances()
        json.NewEncoder(w).Encode(resp)
 }
+
+// Management API: set idle behavior to "hold" for specified instance.
+func (disp *dispatcher) apiInstanceHold(w http.ResponseWriter, r *http.Request) {
+       disp.apiInstanceIdleBehavior(w, r, worker.IdleBehaviorHold)
+}
+
+// Management API: set idle behavior to "drain" for specified instance.
+func (disp *dispatcher) apiInstanceDrain(w http.ResponseWriter, r *http.Request) {
+       disp.apiInstanceIdleBehavior(w, r, worker.IdleBehaviorDrain)
+}
+
+// Management API: set idle behavior to "run" for specified instance.
+func (disp *dispatcher) apiInstanceRun(w http.ResponseWriter, r *http.Request) {
+       disp.apiInstanceIdleBehavior(w, r, worker.IdleBehaviorRun)
+}
+
+func (disp *dispatcher) apiInstanceIdleBehavior(w http.ResponseWriter, r *http.Request, want worker.IdleBehavior) {
+       id := cloud.InstanceID(r.FormValue("instance_id"))
+       if id == "" {
+               httpserver.Error(w, "instance_id parameter not provided", http.StatusBadRequest)
+               return
+       }
+       err := disp.pool.SetIdleBehavior(id, want)
+       if err != nil {
+               httpserver.Error(w, err.Error(), http.StatusNotFound)
+               return
+       }
+}
index 1f94577434c4076fa0eca25cb2432d579631c657..36b06020748f43f5f4c7bbdefb5302935dedb861 100644 (file)
@@ -5,6 +5,7 @@
 package dispatchcloud
 
 import (
+       "context"
        "encoding/json"
        "io/ioutil"
        "math/rand"
@@ -16,7 +17,7 @@ import (
 
        "git.curoverse.com/arvados.git/lib/dispatchcloud/test"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "github.com/sirupsen/logrus"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "golang.org/x/crypto/ssh"
        check "gopkg.in/check.v1"
 )
@@ -24,41 +25,39 @@ import (
 var _ = check.Suite(&DispatcherSuite{})
 
 type DispatcherSuite struct {
-       cluster     *arvados.Cluster
-       instanceSet *test.LameInstanceSet
-       stubDriver  *test.StubDriver
-       disp        *dispatcher
-}
-
-func (s *DispatcherSuite) SetUpSuite(c *check.C) {
-       if os.Getenv("ARVADOS_DEBUG") != "" {
-               logrus.StandardLogger().SetLevel(logrus.DebugLevel)
-       }
+       ctx        context.Context
+       cancel     context.CancelFunc
+       cluster    *arvados.Cluster
+       stubDriver *test.StubDriver
+       disp       *dispatcher
 }
 
 func (s *DispatcherSuite) SetUpTest(c *check.C) {
+       s.ctx, s.cancel = context.WithCancel(context.Background())
+       s.ctx = ctxlog.Context(s.ctx, ctxlog.TestLogger(c))
        dispatchpub, _ := test.LoadTestKey(c, "test/sshkey_dispatch")
        dispatchprivraw, err := ioutil.ReadFile("test/sshkey_dispatch")
        c.Assert(err, check.IsNil)
 
        _, hostpriv := test.LoadTestKey(c, "test/sshkey_vm")
        s.stubDriver = &test.StubDriver{
-               HostKey:          hostpriv,
-               AuthorizedKeys:   []ssh.PublicKey{dispatchpub},
-               ErrorRateDestroy: 0.1,
+               HostKey:                   hostpriv,
+               AuthorizedKeys:            []ssh.PublicKey{dispatchpub},
+               ErrorRateDestroy:          0.1,
+               MinTimeBetweenCreateCalls: time.Millisecond,
        }
 
        s.cluster = &arvados.Cluster{
                CloudVMs: arvados.CloudVMs{
                        Driver:          "test",
                        SyncInterval:    arvados.Duration(10 * time.Millisecond),
-                       TimeoutIdle:     arvados.Duration(30 * time.Millisecond),
-                       TimeoutBooting:  arvados.Duration(30 * time.Millisecond),
+                       TimeoutIdle:     arvados.Duration(150 * time.Millisecond),
+                       TimeoutBooting:  arvados.Duration(150 * time.Millisecond),
                        TimeoutProbe:    arvados.Duration(15 * time.Millisecond),
                        TimeoutShutdown: arvados.Duration(5 * time.Millisecond),
                },
                Dispatch: arvados.Dispatch{
-                       PrivateKey:         dispatchprivraw,
+                       PrivateKey:         string(dispatchprivraw),
                        PollInterval:       arvados.Duration(5 * time.Millisecond),
                        ProbeInterval:      arvados.Duration(5 * time.Millisecond),
                        StaleLockTimeout:   arvados.Duration(5 * time.Millisecond),
@@ -80,13 +79,17 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) {
                        },
                },
        }
-       s.disp = &dispatcher{Cluster: s.cluster}
+       s.disp = &dispatcher{
+               Cluster: s.cluster,
+               Context: s.ctx,
+       }
        // Test cases can modify s.cluster before calling
        // initialize(), and then modify private state before calling
        // go run().
 }
 
 func (s *DispatcherSuite) TearDownTest(c *check.C) {
+       s.cancel()
        s.disp.Close()
 }
 
@@ -163,7 +166,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
                c.Fatalf("timed out; still waiting for %d containers: %q", len(waiting), waiting)
        }
 
-       deadline := time.Now().Add(time.Second)
+       deadline := time.Now().Add(5 * time.Second)
        for range time.NewTicker(10 * time.Millisecond).C {
                insts, err := s.stubDriver.InstanceSets()[0].Instances(nil)
                c.Check(err, check.IsNil)
@@ -228,11 +231,11 @@ func (s *DispatcherSuite) TestInstancesAPI(c *check.C) {
 
        type instance struct {
                Instance             string
-               WorkerState          string
+               WorkerState          string `json:"worker_state"`
                Price                float64
-               LastContainerUUID    string
-               ArvadosInstanceType  string
-               ProviderInstanceType string
+               LastContainerUUID    string `json:"last_container_uuid"`
+               ArvadosInstanceType  string `json:"arvados_instance_type"`
+               ProviderInstanceType string `json:"provider_instance_type"`
        }
        type instancesResponse struct {
                Items []instance
@@ -254,8 +257,8 @@ func (s *DispatcherSuite) TestInstancesAPI(c *check.C) {
 
        ch := s.disp.pool.Subscribe()
        defer s.disp.pool.Unsubscribe(ch)
-       err := s.disp.pool.Create(test.InstanceType(1))
-       c.Check(err, check.IsNil)
+       ok := s.disp.pool.Create(test.InstanceType(1))
+       c.Check(ok, check.Equals, true)
        <-ch
 
        sr = getInstances()
index a6e62e05bb1a4cb611d6698812ddc459585f24fe..2ac69e04c17bb94ce706979547c8501b3f80b609 100644 (file)
@@ -8,12 +8,13 @@ import (
        "fmt"
 
        "git.curoverse.com/arvados.git/lib/cloud"
+       "git.curoverse.com/arvados.git/lib/cloud/azure"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "github.com/sirupsen/logrus"
 )
 
 var drivers = map[string]cloud.Driver{
-       "azure": cloud.DriverFunc(cloud.NewAzureInstanceSet),
+       "azure": azure.Driver,
 }
 
 func newInstanceSet(cluster *arvados.Cluster, setID cloud.InstanceSetID, logger logrus.FieldLogger) (cloud.InstanceSet, error) {
diff --git a/lib/dispatchcloud/instance_set_proxy.go b/lib/dispatchcloud/instance_set_proxy.go
deleted file mode 100644 (file)
index e728b67..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package dispatchcloud
-
-import (
-       "git.curoverse.com/arvados.git/lib/cloud"
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "golang.org/x/crypto/ssh"
-)
-
-type instanceSetProxy struct {
-       cloud.InstanceSet
-}
-
-func (is *instanceSetProxy) Create(it arvados.InstanceType, id cloud.ImageID, tags cloud.InstanceTags, pk ssh.PublicKey) (cloud.Instance, error) {
-       // TODO: return if Create failed recently with a RateLimitError or QuotaError
-       return is.InstanceSet.Create(it, id, tags, pk)
-}
-
-func (is *instanceSetProxy) Instances(tags cloud.InstanceTags) ([]cloud.Instance, error) {
-       // TODO: return if Instances failed recently with a RateLimitError
-       return is.InstanceSet.Instances(tags)
-}
diff --git a/lib/dispatchcloud/readme_states.txt b/lib/dispatchcloud/readme_states.txt
new file mode 100644 (file)
index 0000000..b654bbf
--- /dev/null
@@ -0,0 +1,31 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+# cpan -I -T install Graph::Easy
+# (eval `perl -I ~/perl5/lib/perl5 -Mlocal::lib`; cpan -T install Graph::Easy)
+# graph-easy --as=svg < readme_states.txt
+
+[Nonexistent] - appears in cloud list -> [Unknown]
+[Nonexistent] - create() returns ID -> [Booting]
+[Unknown] - create() returns ID -> [Booting]
+[Unknown] - boot timeout -> [Shutdown]
+[Booting] - boot+run probes succeed -> [Idle]
+[Idle] - idle timeout -> [Shutdown]
+[Idle] - probe timeout -> [Shutdown]
+[Idle] - want=drain -> [Shutdown]
+[Idle] - container starts -> [Running]
+[Running] - container ends -> [Idle]
+[Running] - container ends, want=drain -> [Shutdown]
+[Shutdown] - instance disappears from cloud -> [Gone]
+
+# Layouter fails if we add these
+#[Hold] - want=run -> [Booting]
+#[Hold] - want=drain -> [Shutdown]
+#[Running] - container ends, want=hold -> [Hold]
+#[Unknown] - want=hold -> [Hold]
+#[Booting] - want=hold -> [Hold]
+#[Idle] - want=hold -> [Hold]
+
+# Not worth saying?
+#[Booting] - boot probe succeeds, run probe fails -> [Booting]
index 264f9e4ec6bbc3747401858a37c3f70b259116c1..148b653c2e52305b2ece2255c49d98bf6cb72f50 100644 (file)
@@ -19,24 +19,15 @@ import (
 func (sch *Scheduler) fixStaleLocks() {
        wp := sch.pool.Subscribe()
        defer sch.pool.Unsubscribe(wp)
+
+       var stale []string
        timeout := time.NewTimer(sch.staleLockTimeout)
 waiting:
-       for {
-               unlock := false
-               select {
-               case <-wp:
-                       // If all workers have been contacted, unlock
-                       // containers that aren't claimed by any
-                       // worker.
-                       unlock = sch.pool.CountWorkers()[worker.StateUnknown] == 0
-               case <-timeout.C:
-                       // Give up and unlock the containers, even
-                       // though they might be working.
-                       unlock = true
-               }
-
+       for sch.pool.CountWorkers()[worker.StateUnknown] > 0 {
                running := sch.pool.Running()
                qEntries, _ := sch.queue.Entries()
+
+               stale = nil
                for uuid, ent := range qEntries {
                        if ent.Container.State != arvados.ContainerStateLocked {
                                continue
@@ -44,14 +35,25 @@ waiting:
                        if _, running := running[uuid]; running {
                                continue
                        }
-                       if !unlock {
-                               continue waiting
-                       }
-                       err := sch.queue.Unlock(uuid)
-                       if err != nil {
-                               sch.logger.Warnf("Unlock %s: %s", uuid, err)
-                       }
+                       stale = append(stale, uuid)
+               }
+               if len(stale) == 0 {
+                       return
+               }
+
+               select {
+               case <-wp:
+               case <-timeout.C:
+                       // Give up.
+                       break waiting
+               }
+
+       }
+
+       for _, uuid := range stale {
+               err := sch.queue.Unlock(uuid)
+               if err != nil {
+                       sch.logger.Warnf("Unlock %s: %s", uuid, err)
                }
-               return
        }
 }
index 59700c393523844094a0845922b767cd7da56e31..18cdc94fa52156ceab01d7dbe135d8db20029176 100644 (file)
@@ -13,7 +13,8 @@ import (
 )
 
 // A ContainerQueue is a set of containers that need to be started or
-// stopped. Implemented by container.Queue and test stubs.
+// stopped. Implemented by container.Queue and test stubs. See
+// container.Queue method documentation for details.
 type ContainerQueue interface {
        Entries() (entries map[string]container.QueueEnt, updated time.Time)
        Lock(uuid string) error
@@ -28,13 +29,13 @@ type ContainerQueue interface {
 
 // A WorkerPool asynchronously starts and stops worker VMs, and starts
 // and stops containers on them. Implemented by worker.Pool and test
-// stubs.
+// stubs. See worker.Pool method documentation for details.
 type WorkerPool interface {
        Running() map[string]time.Time
        Unallocated() map[arvados.InstanceType]int
        CountWorkers() map[worker.State]int
        AtQuota() bool
-       Create(arvados.InstanceType) error
+       Create(arvados.InstanceType) bool
        Shutdown(arvados.InstanceType) bool
        StartContainer(arvados.InstanceType, arvados.Container) bool
        KillContainer(uuid string)
index 8e74caef0d565c81e23a3a148f4f53a379450ac9..d102d2fd2041c71d8a7a60f0d5fed4730119875b 100644 (file)
@@ -6,8 +6,8 @@ package scheduler
 
 import (
        "sort"
+       "time"
 
-       "git.curoverse.com/arvados.git/lib/cloud"
        "git.curoverse.com/arvados.git/lib/dispatchcloud/container"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "github.com/sirupsen/logrus"
@@ -51,7 +51,7 @@ tryrun:
                                overquota = sorted[i:]
                                break tryrun
                        }
-                       sch.bgLock(logger, ctr.UUID)
+                       go sch.lockContainer(logger, ctr.UUID)
                        unalloc[it]--
                case arvados.ContainerStateLocked:
                        if unalloc[it] > 0 {
@@ -62,11 +62,13 @@ tryrun:
                                break tryrun
                        } else {
                                logger.Info("creating new instance")
-                               err := sch.pool.Create(it)
-                               if err != nil {
-                                       if _, ok := err.(cloud.QuotaError); !ok {
-                                               logger.WithError(err).Warn("error creating worker")
-                                       }
+                               if !sch.pool.Create(it) {
+                                       // (Note pool.Create works
+                                       // asynchronously and logs its
+                                       // own failures, so we don't
+                                       // need to log this as a
+                                       // failure.)
+
                                        sch.queue.Unlock(ctr.UUID)
                                        // Don't let lower-priority
                                        // containers starve this one
@@ -119,22 +121,16 @@ tryrun:
        }
 }
 
-// Start an API call to lock the given container, and return
-// immediately while waiting for the response in a new goroutine. Do
-// nothing if a lock request is already in progress for this
-// container.
-func (sch *Scheduler) bgLock(logger logrus.FieldLogger, uuid string) {
-       logger.Debug("locking")
-       sch.mtx.Lock()
-       defer sch.mtx.Unlock()
-       if sch.locking[uuid] {
-               logger.Debug("locking in progress, doing nothing")
+// Lock the given container. Should be called in a new goroutine.
+func (sch *Scheduler) lockContainer(logger logrus.FieldLogger, uuid string) {
+       if !sch.uuidLock(uuid, "lock") {
                return
        }
+       defer sch.uuidUnlock(uuid)
        if ctr, ok := sch.queue.Get(uuid); !ok || ctr.State != arvados.ContainerStateQueued {
                // This happens if the container has been cancelled or
                // locked since runQueue called sch.queue.Entries(),
-               // possibly by a bgLock() call from a previous
+               // possibly by a lockContainer() call from a previous
                // runQueue iteration. In any case, we will respond
                // appropriately on the next runQueue iteration, which
                // will have already been triggered by the queue
@@ -142,24 +138,50 @@ func (sch *Scheduler) bgLock(logger logrus.FieldLogger, uuid string) {
                logger.WithField("State", ctr.State).Debug("container no longer queued by the time we decided to lock it, doing nothing")
                return
        }
-       sch.locking[uuid] = true
-       go func() {
-               defer func() {
-                       sch.mtx.Lock()
-                       defer sch.mtx.Unlock()
-                       delete(sch.locking, uuid)
-               }()
-               err := sch.queue.Lock(uuid)
-               if err != nil {
-                       logger.WithError(err).Warn("error locking container")
-                       return
-               }
-               logger.Debug("lock succeeded")
-               ctr, ok := sch.queue.Get(uuid)
-               if !ok {
-                       logger.Error("(BUG?) container disappeared from queue after Lock succeeded")
-               } else if ctr.State != arvados.ContainerStateLocked {
-                       logger.Warnf("(race?) container has state=%q after Lock succeeded", ctr.State)
-               }
-       }()
+       err := sch.queue.Lock(uuid)
+       if err != nil {
+               logger.WithError(err).Warn("error locking container")
+               return
+       }
+       logger.Debug("lock succeeded")
+       ctr, ok := sch.queue.Get(uuid)
+       if !ok {
+               logger.Error("(BUG?) container disappeared from queue after Lock succeeded")
+       } else if ctr.State != arvados.ContainerStateLocked {
+               logger.Warnf("(race?) container has state=%q after Lock succeeded", ctr.State)
+       }
+}
+
+// Acquire a non-blocking lock for specified UUID, returning true if
+// successful.  The op argument is used only for debug logs.
+//
+// If the lock is not available, uuidLock arranges to wake up the
+// scheduler after a short delay, so it can retry whatever operation
+// is trying to get the lock (if that operation is still worth doing).
+//
+// This mechanism helps avoid spamming the controller/database with
+// concurrent updates for any single container, even when the
+// scheduler loop is running frequently.
+func (sch *Scheduler) uuidLock(uuid, op string) bool {
+       sch.mtx.Lock()
+       defer sch.mtx.Unlock()
+       logger := sch.logger.WithFields(logrus.Fields{
+               "ContainerUUID": uuid,
+               "Op":            op,
+       })
+       if op, locked := sch.uuidOp[uuid]; locked {
+               logger.Debugf("uuidLock not available, Op=%s in progress", op)
+               // Make sure the scheduler loop wakes up to retry.
+               sch.wakeup.Reset(time.Second / 4)
+               return false
+       }
+       logger.Debug("uuidLock acquired")
+       sch.uuidOp[uuid] = op
+       return true
+}
+
+func (sch *Scheduler) uuidUnlock(uuid string) {
+       sch.mtx.Lock()
+       defer sch.mtx.Unlock()
+       delete(sch.uuidOp, uuid)
 }
index 8945f88a14385af1961f080f0e05601a994badeb..4296a1364c911fc94d44af28512ecac195b4e5f5 100644 (file)
@@ -5,19 +5,18 @@
 package scheduler
 
 import (
-       "errors"
+       "context"
+       "sync"
        "time"
 
        "git.curoverse.com/arvados.git/lib/dispatchcloud/test"
        "git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "github.com/sirupsen/logrus"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        check "gopkg.in/check.v1"
 )
 
 var (
-       logger = logrus.StandardLogger()
-
        // arbitrary example container UUIDs
        uuids = func() (r []string) {
                for i := 0; i < 16; i++ {
@@ -43,36 +42,53 @@ type stubPool struct {
        creates   []arvados.InstanceType
        starts    []string
        shutdowns int
+       sync.Mutex
 }
 
-func (p *stubPool) AtQuota() bool                 { return p.atQuota }
-func (p *stubPool) Subscribe() <-chan struct{}    { return p.notify }
-func (p *stubPool) Unsubscribe(<-chan struct{})   {}
-func (p *stubPool) Running() map[string]time.Time { return p.running }
+func (p *stubPool) AtQuota() bool               { return p.atQuota }
+func (p *stubPool) Subscribe() <-chan struct{}  { return p.notify }
+func (p *stubPool) Unsubscribe(<-chan struct{}) {}
+func (p *stubPool) Running() map[string]time.Time {
+       p.Lock()
+       defer p.Unlock()
+       r := map[string]time.Time{}
+       for k, v := range p.running {
+               r[k] = v
+       }
+       return r
+}
 func (p *stubPool) Unallocated() map[arvados.InstanceType]int {
+       p.Lock()
+       defer p.Unlock()
        r := map[arvados.InstanceType]int{}
        for it, n := range p.unalloc {
                r[it] = n
        }
        return r
 }
-func (p *stubPool) Create(it arvados.InstanceType) error {
+func (p *stubPool) Create(it arvados.InstanceType) bool {
+       p.Lock()
+       defer p.Unlock()
        p.creates = append(p.creates, it)
        if p.canCreate < 1 {
-               return stubQuotaError{errors.New("quota")}
+               return false
        }
        p.canCreate--
        p.unalloc[it]++
-       return nil
+       return true
 }
 func (p *stubPool) KillContainer(uuid string) {
-       p.running[uuid] = time.Now()
+       p.Lock()
+       defer p.Unlock()
+       delete(p.running, uuid)
 }
 func (p *stubPool) Shutdown(arvados.InstanceType) bool {
        p.shutdowns++
        return false
 }
 func (p *stubPool) CountWorkers() map[worker.State]int {
+       p.Lock()
+       defer p.Unlock()
        return map[worker.State]int{
                worker.StateBooting: len(p.unalloc) - len(p.idle),
                worker.StateIdle:    len(p.idle),
@@ -80,6 +96,8 @@ func (p *stubPool) CountWorkers() map[worker.State]int {
        }
 }
 func (p *stubPool) StartContainer(it arvados.InstanceType, ctr arvados.Container) bool {
+       p.Lock()
+       defer p.Unlock()
        p.starts = append(p.starts, ctr.UUID)
        if p.idle[it] == 0 {
                return false
@@ -90,6 +108,10 @@ func (p *stubPool) StartContainer(it arvados.InstanceType, ctr arvados.Container
        return true
 }
 
+func chooseType(ctr *arvados.Container) (arvados.InstanceType, error) {
+       return test.InstanceType(ctr.RuntimeConstraints.VCPUs), nil
+}
+
 var _ = check.Suite(&SchedulerSuite{})
 
 type SchedulerSuite struct{}
@@ -100,10 +122,9 @@ type SchedulerSuite struct{}
 // immediately. Don't try to create any other nodes after the failed
 // create.
 func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
+       ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
        queue := test.Queue{
-               ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
-                       return test.InstanceType(ctr.RuntimeConstraints.VCPUs), nil
-               },
+               ChooseType: chooseType,
                Containers: []arvados.Container{
                        {
                                UUID:     test.ContainerUUID(1),
@@ -156,7 +177,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
                running:   map[string]time.Time{},
                canCreate: 0,
        }
-       New(logger, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
+       New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
        c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(1)})
        c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(4)})
        c.Check(pool.running, check.HasLen, 1)
@@ -168,6 +189,7 @@ func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
 // If Create() fails, shutdown some nodes, and don't call Create()
 // again.  Don't call Create() at all if AtQuota() is true.
 func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
+       ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
        for quota := 0; quota < 2; quota++ {
                c.Logf("quota=%d", quota)
                shouldCreate := []arvados.InstanceType{}
@@ -175,9 +197,7 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
                        shouldCreate = append(shouldCreate, test.InstanceType(3))
                }
                queue := test.Queue{
-                       ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
-                               return test.InstanceType(ctr.RuntimeConstraints.VCPUs), nil
-                       },
+                       ChooseType: chooseType,
                        Containers: []arvados.Container{
                                {
                                        UUID:     test.ContainerUUID(2),
@@ -213,7 +233,7 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
                        starts:    []string{},
                        canCreate: 0,
                }
-               New(logger, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
+               New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
                c.Check(pool.creates, check.DeepEquals, shouldCreate)
                c.Check(pool.starts, check.DeepEquals, []string{})
                c.Check(pool.shutdowns, check.Not(check.Equals), 0)
@@ -223,6 +243,7 @@ func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
 // Start lower-priority containers while waiting for new/existing
 // workers to come up for higher-priority containers.
 func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
+       ctx := ctxlog.Context(context.Background(), ctxlog.TestLogger(c))
        pool := stubPool{
                unalloc: map[arvados.InstanceType]int{
                        test.InstanceType(1): 2,
@@ -236,9 +257,7 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
                canCreate: 4,
        }
        queue := test.Queue{
-               ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
-                       return test.InstanceType(ctr.RuntimeConstraints.VCPUs), nil
-               },
+               ChooseType: chooseType,
                Containers: []arvados.Container{
                        {
                                // create a new worker
@@ -303,7 +322,7 @@ func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
                },
        }
        queue.Update()
-       New(logger, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
+       New(ctx, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
        c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(2), test.InstanceType(1)})
        c.Check(pool.starts, check.DeepEquals, []string{uuids[6], uuids[5], uuids[3], uuids[2]})
        running := map[string]bool{}
index 83fc08a9ffdb28c285965ca7a3f6cd41aba4dd7d..eb82c488390e3751fd7d3383acb1c1cf72af5e37 100644 (file)
@@ -7,9 +7,11 @@
 package scheduler
 
 import (
+       "context"
        "sync"
        "time"
 
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "github.com/sirupsen/logrus"
 )
 
@@ -32,8 +34,9 @@ type Scheduler struct {
        staleLockTimeout    time.Duration
        queueUpdateInterval time.Duration
 
-       locking map[string]bool
-       mtx     sync.Mutex
+       uuidOp map[string]string // operation in progress: "lock", "cancel", ...
+       mtx    sync.Mutex
+       wakeup *time.Timer
 
        runOnce sync.Once
        stop    chan struct{}
@@ -44,16 +47,17 @@ type Scheduler struct {
 //
 // Any given queue and pool should not be used by more than one
 // scheduler at a time.
-func New(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool, staleLockTimeout, queueUpdateInterval time.Duration) *Scheduler {
+func New(ctx context.Context, queue ContainerQueue, pool WorkerPool, staleLockTimeout, queueUpdateInterval time.Duration) *Scheduler {
        return &Scheduler{
-               logger:              logger,
+               logger:              ctxlog.FromContext(ctx),
                queue:               queue,
                pool:                pool,
                staleLockTimeout:    staleLockTimeout,
                queueUpdateInterval: queueUpdateInterval,
+               wakeup:              time.NewTimer(time.Second),
                stop:                make(chan struct{}),
                stopped:             make(chan struct{}),
-               locking:             map[string]bool{},
+               uuidOp:              map[string]string{},
        }
 }
 
@@ -75,7 +79,10 @@ func (sch *Scheduler) run() {
        // Ensure the queue is fetched once before attempting anything.
        for err := sch.queue.Update(); err != nil; err = sch.queue.Update() {
                sch.logger.Errorf("error updating queue: %s", err)
-               d := sch.queueUpdateInterval / 60
+               d := sch.queueUpdateInterval / 10
+               if d < time.Second {
+                       d = time.Second
+               }
                sch.logger.Infof("waiting %s before retry", d)
                time.Sleep(d)
        }
@@ -111,6 +118,7 @@ func (sch *Scheduler) run() {
                        return
                case <-queueNotify:
                case <-poolNotify:
+               case <-sch.wakeup.C:
                }
        }
 }
index 47c754e243dab20ae127cd8741c9decb9eea9688..23fc621dea26c76be659ddc4f88bea7565f4bd4c 100644 (file)
@@ -6,7 +6,6 @@ package scheduler
 
 import (
        "fmt"
-       "time"
 
        "git.curoverse.com/arvados.git/lib/dispatchcloud/container"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
@@ -25,32 +24,17 @@ import (
 // cancelled.
 func (sch *Scheduler) sync() {
        running := sch.pool.Running()
-       cancel := func(ent container.QueueEnt, reason string) {
-               uuid := ent.Container.UUID
-               logger := sch.logger.WithField("ContainerUUID", uuid)
-               logger.Infof("cancelling container because %s", reason)
-               err := sch.queue.Cancel(uuid)
-               if err != nil {
-                       logger.WithError(err).Print("error cancelling container")
-               }
-       }
-       kill := func(ent container.QueueEnt, reason string) {
-               uuid := ent.Container.UUID
-               logger := sch.logger.WithField("ContainerUUID", uuid)
-               logger.Debugf("killing crunch-run process because %s", reason)
-               sch.pool.KillContainer(uuid)
-       }
        qEntries, qUpdated := sch.queue.Entries()
        for uuid, ent := range qEntries {
                exited, running := running[uuid]
                switch ent.Container.State {
                case arvados.ContainerStateRunning:
                        if !running {
-                               go cancel(ent, "not running on any worker")
+                               go sch.cancel(ent, "not running on any worker")
                        } else if !exited.IsZero() && qUpdated.After(exited) {
-                               go cancel(ent, "state=\"Running\" after crunch-run exited")
+                               go sch.cancel(ent, "state=\"Running\" after crunch-run exited")
                        } else if ent.Container.Priority == 0 {
-                               go kill(ent, fmt.Sprintf("priority=%d", ent.Container.Priority))
+                               go sch.kill(ent, "priority=0")
                        }
                case arvados.ContainerStateComplete, arvados.ContainerStateCancelled:
                        if running {
@@ -62,7 +46,7 @@ func (sch *Scheduler) sync() {
                                // of kill() will be to make the
                                // worker available for the next
                                // container.
-                               go kill(ent, fmt.Sprintf("state=%q", ent.Container.State))
+                               go sch.kill(ent, fmt.Sprintf("state=%q", ent.Container.State))
                        } else {
                                sch.logger.WithFields(logrus.Fields{
                                        "ContainerUUID": uuid,
@@ -76,22 +60,60 @@ func (sch *Scheduler) sync() {
                                // a network outage and is still
                                // preparing to run a container that
                                // has already been unlocked/requeued.
-                               go kill(ent, fmt.Sprintf("state=%q", ent.Container.State))
+                               go sch.kill(ent, fmt.Sprintf("state=%q", ent.Container.State))
                        }
                case arvados.ContainerStateLocked:
                        if running && !exited.IsZero() && qUpdated.After(exited) {
-                               logger := sch.logger.WithFields(logrus.Fields{
-                                       "ContainerUUID": uuid,
-                                       "Exited":        time.Since(exited).Seconds(),
-                               })
-                               logger.Infof("requeueing container because state=%q after crunch-run exited", ent.Container.State)
-                               err := sch.queue.Unlock(uuid)
-                               if err != nil {
-                                       logger.WithError(err).Info("error requeueing container")
-                               }
+                               go sch.requeue(ent, "crunch-run exited")
+                       } else if running && exited.IsZero() && ent.Container.Priority == 0 {
+                               go sch.kill(ent, "priority=0")
+                       } else if !running && ent.Container.Priority == 0 {
+                               go sch.requeue(ent, "priority=0")
                        }
                default:
-                       sch.logger.WithField("ContainerUUID", uuid).Errorf("BUG: unexpected state %q", ent.Container.State)
+                       sch.logger.WithFields(logrus.Fields{
+                               "ContainerUUID": uuid,
+                               "State":         ent.Container.State,
+                       }).Error("BUG: unexpected state")
                }
        }
 }
+
+func (sch *Scheduler) cancel(ent container.QueueEnt, reason string) {
+       uuid := ent.Container.UUID
+       if !sch.uuidLock(uuid, "cancel") {
+               return
+       }
+       defer sch.uuidUnlock(uuid)
+       logger := sch.logger.WithField("ContainerUUID", uuid)
+       logger.Infof("cancelling container because %s", reason)
+       err := sch.queue.Cancel(uuid)
+       if err != nil {
+               logger.WithError(err).Print("error cancelling container")
+       }
+}
+
+func (sch *Scheduler) kill(ent container.QueueEnt, reason string) {
+       uuid := ent.Container.UUID
+       logger := sch.logger.WithField("ContainerUUID", uuid)
+       logger.Debugf("killing crunch-run process because %s", reason)
+       sch.pool.KillContainer(uuid)
+}
+
+func (sch *Scheduler) requeue(ent container.QueueEnt, reason string) {
+       uuid := ent.Container.UUID
+       if !sch.uuidLock(uuid, "cancel") {
+               return
+       }
+       defer sch.uuidUnlock(uuid)
+       logger := sch.logger.WithFields(logrus.Fields{
+               "ContainerUUID": uuid,
+               "State":         ent.Container.State,
+               "Priority":      ent.Container.Priority,
+       })
+       logger.Infof("requeueing locked container because %s", reason)
+       err := sch.queue.Unlock(uuid)
+       if err != nil {
+               logger.WithError(err).Error("error requeueing container")
+       }
+}
index b5dba9870dc041bf730e71e2a3f0dd2bca1ab7fc..feed1c2a78b82a84821f22eee99e39e960dbd431 100644 (file)
@@ -36,9 +36,11 @@ func New(t cloud.ExecutorTarget) *Executor {
 //
 // An Executor must not be copied.
 type Executor struct {
-       target  cloud.ExecutorTarget
-       signers []ssh.Signer
-       mtx     sync.RWMutex // controls access to instance after creation
+       target     cloud.ExecutorTarget
+       targetPort string
+       targetUser string
+       signers    []ssh.Signer
+       mtx        sync.RWMutex // controls access to instance after creation
 
        client      *ssh.Client
        clientErr   error
@@ -67,6 +69,17 @@ func (exr *Executor) SetTarget(t cloud.ExecutorTarget) {
        exr.target = t
 }
 
+// SetTargetPort sets the default port (name or number) to connect
+// to. This is used only when the address returned by the target's
+// Address() method does not specify a port. If the given port is
+// empty (or SetTargetPort is not called at all), the default port is
+// "ssh".
+func (exr *Executor) SetTargetPort(port string) {
+       exr.mtx.Lock()
+       defer exr.mtx.Unlock()
+       exr.targetPort = port
+}
+
 // Target returns the current target.
 func (exr *Executor) Target() cloud.ExecutorTarget {
        exr.mtx.RLock()
@@ -76,12 +89,18 @@ func (exr *Executor) Target() cloud.ExecutorTarget {
 
 // Execute runs cmd on the target. If an existing connection is not
 // usable, it sets up a new connection to the current target.
-func (exr *Executor) Execute(cmd string, stdin io.Reader) ([]byte, []byte, error) {
+func (exr *Executor) Execute(env map[string]string, cmd string, stdin io.Reader) ([]byte, []byte, error) {
        session, err := exr.newSession()
        if err != nil {
                return nil, nil, err
        }
        defer session.Close()
+       for k, v := range env {
+               err = session.Setenv(k, v)
+               if err != nil {
+                       return nil, nil, err
+               }
+       }
        var stdout, stderr bytes.Buffer
        session.Stdin = stdin
        session.Stdout = &stdout
@@ -161,9 +180,20 @@ func (exr *Executor) setupSSHClient() (*ssh.Client, error) {
        if addr == "" {
                return nil, errors.New("instance has no address")
        }
+       if h, p, err := net.SplitHostPort(addr); err != nil || p == "" {
+               // Target address does not specify a port.  Use
+               // targetPort, or "ssh".
+               if h == "" {
+                       h = addr
+               }
+               if p = exr.targetPort; p == "" {
+                       p = "ssh"
+               }
+               addr = net.JoinHostPort(h, p)
+       }
        var receivedKey ssh.PublicKey
        client, err := ssh.Dial("tcp", addr, &ssh.ClientConfig{
-               User: "root",
+               User: target.RemoteUser(),
                Auth: []ssh.AuthMethod{
                        ssh.PublicKeys(exr.signers...),
                },
index 8dabfecad86451d6d3b178b50becffff46fa179e..e7c023586b4bb3c09ac8968c35c2cc3f1ed01ee2 100644 (file)
@@ -6,8 +6,10 @@ package ssh_executor
 
 import (
        "bytes"
+       "fmt"
        "io"
        "io/ioutil"
+       "net"
        "sync"
        "testing"
        "time"
@@ -32,17 +34,72 @@ func (*testTarget) VerifyHostKey(ssh.PublicKey, *ssh.Client) error {
        return nil
 }
 
+// Address returns the wrapped SSHService's host, with the port
+// stripped. This ensures the executor won't work until
+// SetTargetPort() is called -- see (*testTarget)Port().
+func (tt *testTarget) Address() string {
+       h, _, err := net.SplitHostPort(tt.SSHService.Address())
+       if err != nil {
+               panic(err)
+       }
+       return h
+}
+
+func (tt *testTarget) Port() string {
+       _, p, err := net.SplitHostPort(tt.SSHService.Address())
+       if err != nil {
+               panic(err)
+       }
+       return p
+}
+
+type mitmTarget struct {
+       test.SSHService
+}
+
+func (*mitmTarget) VerifyHostKey(key ssh.PublicKey, client *ssh.Client) error {
+       return fmt.Errorf("host key failed verification: %#v", key)
+}
+
 type ExecutorSuite struct{}
 
+func (s *ExecutorSuite) TestBadHostKey(c *check.C) {
+       _, hostpriv := test.LoadTestKey(c, "../test/sshkey_vm")
+       clientpub, clientpriv := test.LoadTestKey(c, "../test/sshkey_dispatch")
+       target := &mitmTarget{
+               SSHService: test.SSHService{
+                       Exec: func(map[string]string, string, io.Reader, io.Writer, io.Writer) uint32 {
+                               c.Error("Target Exec func called even though host key verification failed")
+                               return 0
+                       },
+                       HostKey:        hostpriv,
+                       AuthorizedUser: "username",
+                       AuthorizedKeys: []ssh.PublicKey{clientpub},
+               },
+       }
+
+       err := target.Start()
+       c.Check(err, check.IsNil)
+       c.Logf("target address %q", target.Address())
+       defer target.Close()
+
+       exr := New(target)
+       exr.SetSigners(clientpriv)
+
+       _, _, err = exr.Execute(nil, "true", nil)
+       c.Check(err, check.ErrorMatches, "host key failed verification: .*")
+}
+
 func (s *ExecutorSuite) TestExecute(c *check.C) {
        command := `foo 'bar' "baz"`
        stdinData := "foobar\nbaz\n"
        _, hostpriv := test.LoadTestKey(c, "../test/sshkey_vm")
        clientpub, clientpriv := test.LoadTestKey(c, "../test/sshkey_dispatch")
        for _, exitcode := range []int{0, 1, 2} {
-               srv := &testTarget{
+               target := &testTarget{
                        SSHService: test.SSHService{
-                               Exec: func(cmd string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+                               Exec: func(env map[string]string, cmd string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+                                       c.Check(env["TESTVAR"], check.Equals, "test value")
                                        c.Check(cmd, check.Equals, command)
                                        var wg sync.WaitGroup
                                        wg.Add(2)
@@ -65,20 +122,37 @@ func (s *ExecutorSuite) TestExecute(c *check.C) {
                                        return uint32(exitcode)
                                },
                                HostKey:        hostpriv,
+                               AuthorizedUser: "username",
                                AuthorizedKeys: []ssh.PublicKey{clientpub},
                        },
                }
-               err := srv.Start()
+               err := target.Start()
                c.Check(err, check.IsNil)
-               c.Logf("srv address %q", srv.Address())
-               defer srv.Close()
+               c.Logf("target address %q", target.Address())
+               defer target.Close()
 
-               exr := New(srv)
+               exr := New(target)
                exr.SetSigners(clientpriv)
 
+               // Use the default target port (ssh). Execute will
+               // return a connection error or an authentication
+               // error, depending on whether the test host is
+               // running an SSH server.
+               _, _, err = exr.Execute(nil, command, nil)
+               c.Check(err, check.ErrorMatches, `.*(unable to authenticate|connection refused).*`)
+
+               // Use a bogus target port. Execute will return a
+               // connection error.
+               exr.SetTargetPort("0")
+               _, _, err = exr.Execute(nil, command, nil)
+               c.Check(err, check.ErrorMatches, `.*connection refused.*`)
+
+               // Use the test server's listening port.
+               exr.SetTargetPort(target.Port())
+
                done := make(chan bool)
                go func() {
-                       stdout, stderr, err := exr.Execute(command, bytes.NewBufferString(stdinData))
+                       stdout, stderr, err := exr.Execute(map[string]string{"TESTVAR": "test value"}, command, bytes.NewBufferString(stdinData))
                        if exitcode == 0 {
                                c.Check(err, check.IsNil)
                        } else {
diff --git a/lib/dispatchcloud/test/lame_instance_set.go b/lib/dispatchcloud/test/lame_instance_set.go
deleted file mode 100644 (file)
index baab407..0000000
+++ /dev/null
@@ -1,118 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package test
-
-import (
-       "fmt"
-       "math/rand"
-       "sync"
-
-       "git.curoverse.com/arvados.git/lib/cloud"
-       "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "golang.org/x/crypto/ssh"
-)
-
-// LameInstanceSet creates instances that boot but can't run
-// containers.
-type LameInstanceSet struct {
-       Hold chan bool // set to make(chan bool) to hold operations until Release is called
-
-       mtx       sync.Mutex
-       instances map[*lameInstance]bool
-}
-
-// Create returns a new instance.
-func (p *LameInstanceSet) Create(instType arvados.InstanceType, imageID cloud.ImageID, tags cloud.InstanceTags, pubkey ssh.PublicKey) (cloud.Instance, error) {
-       inst := &lameInstance{
-               p:            p,
-               id:           cloud.InstanceID(fmt.Sprintf("lame-%x", rand.Uint64())),
-               providerType: instType.ProviderType,
-       }
-       inst.SetTags(tags)
-       if p.Hold != nil {
-               p.Hold <- true
-       }
-       p.mtx.Lock()
-       defer p.mtx.Unlock()
-       if p.instances == nil {
-               p.instances = map[*lameInstance]bool{}
-       }
-       p.instances[inst] = true
-       return inst, nil
-}
-
-// Instances returns the instances that haven't been destroyed.
-func (p *LameInstanceSet) Instances(cloud.InstanceTags) ([]cloud.Instance, error) {
-       p.mtx.Lock()
-       defer p.mtx.Unlock()
-       var instances []cloud.Instance
-       for i := range p.instances {
-               instances = append(instances, i)
-       }
-       return instances, nil
-}
-
-// Stop is a no-op, but exists to satisfy cloud.InstanceSet.
-func (p *LameInstanceSet) Stop() {
-}
-
-// Release n held calls. Blocks if n calls aren't already
-// waiting. Blocks forever if Hold is nil.
-func (p *LameInstanceSet) Release(n int) {
-       for i := 0; i < n; i++ {
-               <-p.Hold
-       }
-}
-
-type lameInstance struct {
-       p            *LameInstanceSet
-       id           cloud.InstanceID
-       providerType string
-       tags         cloud.InstanceTags
-}
-
-func (inst *lameInstance) ID() cloud.InstanceID {
-       return inst.id
-}
-
-func (inst *lameInstance) String() string {
-       return fmt.Sprint(inst.id)
-}
-
-func (inst *lameInstance) ProviderType() string {
-       return inst.providerType
-}
-
-func (inst *lameInstance) Address() string {
-       return "0.0.0.0:1234"
-}
-
-func (inst *lameInstance) SetTags(tags cloud.InstanceTags) error {
-       inst.p.mtx.Lock()
-       defer inst.p.mtx.Unlock()
-       inst.tags = cloud.InstanceTags{}
-       for k, v := range tags {
-               inst.tags[k] = v
-       }
-       return nil
-}
-
-func (inst *lameInstance) Destroy() error {
-       if inst.p.Hold != nil {
-               inst.p.Hold <- true
-       }
-       inst.p.mtx.Lock()
-       defer inst.p.mtx.Unlock()
-       delete(inst.p.instances, inst)
-       return nil
-}
-
-func (inst *lameInstance) Tags() cloud.InstanceTags {
-       return inst.tags
-}
-
-func (inst *lameInstance) VerifyHostKey(ssh.PublicKey, *ssh.Client) error {
-       return nil
-}
index b1e4e03b12ea142e925b45fe689217a499e59bb9..f1fde4f422ce55198742871883f8a0bbd7c682d3 100644 (file)
@@ -32,13 +32,14 @@ func LoadTestKey(c *check.C, fnm string) (ssh.PublicKey, ssh.Signer) {
 
 // An SSHExecFunc handles an "exec" session on a multiplexed SSH
 // connection.
-type SSHExecFunc func(command string, stdin io.Reader, stdout, stderr io.Writer) uint32
+type SSHExecFunc func(env map[string]string, command string, stdin io.Reader, stdout, stderr io.Writer) uint32
 
 // An SSHService accepts SSH connections on an available TCP port and
 // passes clients' "exec" sessions to the provided SSHExecFunc.
 type SSHService struct {
        Exec           SSHExecFunc
        HostKey        ssh.Signer
+       AuthorizedUser string
        AuthorizedKeys []ssh.PublicKey
 
        listener net.Listener
@@ -64,6 +65,11 @@ func (ss *SSHService) Address() string {
        return ln.Addr().String()
 }
 
+// RemoteUser returns the username that will be accepted.
+func (ss *SSHService) RemoteUser() string {
+       return ss.AuthorizedUser
+}
+
 // Close shuts down the server and releases resources. Established
 // connections are unaffected.
 func (ss *SSHService) Close() {
@@ -103,7 +109,7 @@ func (ss *SSHService) run() {
        }
        config.AddHostKey(ss.HostKey)
 
-       listener, err := net.Listen("tcp", ":")
+       listener, err := net.Listen("tcp", "127.0.0.1:")
        if err != nil {
                ss.err = err
                return
@@ -146,22 +152,37 @@ func (ss *SSHService) serveConn(nConn net.Conn, config *ssh.ServerConfig) {
                        log.Printf("accept channel: %s", err)
                        return
                }
-               var execReq struct {
-                       Command string
-               }
+               didExec := false
+               sessionEnv := map[string]string{}
                go func() {
                        for req := range reqs {
-                               if req.Type == "exec" && execReq.Command == "" {
+                               switch {
+                               case didExec:
+                                       // Reject anything after exec
+                                       req.Reply(false, nil)
+                               case req.Type == "exec":
+                                       var execReq struct {
+                                               Command string
+                                       }
                                        req.Reply(true, nil)
                                        ssh.Unmarshal(req.Payload, &execReq)
                                        go func() {
                                                var resp struct {
                                                        Status uint32
                                                }
-                                               resp.Status = ss.Exec(execReq.Command, ch, ch, ch.Stderr())
+                                               resp.Status = ss.Exec(sessionEnv, execReq.Command, ch, ch, ch.Stderr())
                                                ch.SendRequest("exit-status", false, ssh.Marshal(&resp))
                                                ch.Close()
                                        }()
+                                       didExec = true
+                               case req.Type == "env":
+                                       var envReq struct {
+                                               Name  string
+                                               Value string
+                                       }
+                                       req.Reply(true, nil)
+                                       ssh.Unmarshal(req.Payload, &envReq)
+                                       sessionEnv[envReq.Name] = envReq.Value
                                }
                        }
                }()
index f738e206641edebffb8337bc7361511375ed8683..5873e492213b86f58eaa98850c5c00c073cd2aee 100644 (file)
@@ -6,9 +6,11 @@ package test
 
 import (
        "crypto/rand"
+       "encoding/json"
        "errors"
        "fmt"
        "io"
+       "io/ioutil"
        math_rand "math/rand"
        "regexp"
        "strings"
@@ -17,7 +19,6 @@ import (
 
        "git.curoverse.com/arvados.git/lib/cloud"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "github.com/mitchellh/mapstructure"
        "github.com/sirupsen/logrus"
        "golang.org/x/crypto/ssh"
 )
@@ -41,19 +42,36 @@ type StubDriver struct {
        // Destroy. 0=always succeed, 1=always fail.
        ErrorRateDestroy float64
 
+       // If Create() or Instances() is called too frequently, return
+       // rate-limiting errors.
+       MinTimeBetweenCreateCalls    time.Duration
+       MinTimeBetweenInstancesCalls time.Duration
+
+       // If true, Create and Destroy calls block until Release() is
+       // called.
+       HoldCloudOps bool
+
        instanceSets []*StubInstanceSet
+       holdCloudOps chan bool
 }
 
 // InstanceSet returns a new *StubInstanceSet.
-func (sd *StubDriver) InstanceSet(params map[string]interface{}, id cloud.InstanceSetID,
-       logger logrus.FieldLogger) (cloud.InstanceSet, error) {
-
+func (sd *StubDriver) InstanceSet(params json.RawMessage, id cloud.InstanceSetID, logger logrus.FieldLogger) (cloud.InstanceSet, error) {
+       if sd.holdCloudOps == nil {
+               sd.holdCloudOps = make(chan bool)
+       }
        sis := StubInstanceSet{
                driver:  sd,
+               logger:  logger,
                servers: map[cloud.InstanceID]*StubVM{},
        }
        sd.instanceSets = append(sd.instanceSets, &sis)
-       return &sis, mapstructure.Decode(params, &sis)
+
+       var err error
+       if params != nil {
+               err = json.Unmarshal(params, &sis)
+       }
+       return &sis, err
 }
 
 // InstanceSets returns all instances that have been created by the
@@ -63,19 +81,41 @@ func (sd *StubDriver) InstanceSets() []*StubInstanceSet {
        return sd.instanceSets
 }
 
+// ReleaseCloudOps releases n pending Create/Destroy calls. If there
+// are fewer than n blocked calls pending, it waits for the rest to
+// arrive.
+func (sd *StubDriver) ReleaseCloudOps(n int) {
+       for i := 0; i < n; i++ {
+               <-sd.holdCloudOps
+       }
+}
+
 type StubInstanceSet struct {
        driver  *StubDriver
+       logger  logrus.FieldLogger
        servers map[cloud.InstanceID]*StubVM
        mtx     sync.RWMutex
        stopped bool
+
+       allowCreateCall    time.Time
+       allowInstancesCall time.Time
 }
 
-func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, tags cloud.InstanceTags, authKey ssh.PublicKey) (cloud.Instance, error) {
+func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, tags cloud.InstanceTags, cmd cloud.InitCommand, authKey ssh.PublicKey) (cloud.Instance, error) {
+       if sis.driver.HoldCloudOps {
+               sis.driver.holdCloudOps <- true
+       }
        sis.mtx.Lock()
        defer sis.mtx.Unlock()
        if sis.stopped {
                return nil, errors.New("StubInstanceSet: Create called after Stop")
        }
+       if sis.allowCreateCall.After(time.Now()) {
+               return nil, RateLimitError{sis.allowCreateCall}
+       } else {
+               sis.allowCreateCall = time.Now().Add(sis.driver.MinTimeBetweenCreateCalls)
+       }
+
        ak := sis.driver.AuthorizedKeys
        if authKey != nil {
                ak = append([]ssh.PublicKey{authKey}, ak...)
@@ -85,9 +125,11 @@ func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID,
                id:           cloud.InstanceID(fmt.Sprintf("stub-%s-%x", it.ProviderType, math_rand.Int63())),
                tags:         copyTags(tags),
                providerType: it.ProviderType,
+               initCommand:  cmd,
        }
        svm.SSHService = SSHService{
                HostKey:        sis.driver.HostKey,
+               AuthorizedUser: "root",
                AuthorizedKeys: ak,
                Exec:           svm.Exec,
        }
@@ -101,6 +143,11 @@ func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID,
 func (sis *StubInstanceSet) Instances(cloud.InstanceTags) ([]cloud.Instance, error) {
        sis.mtx.RLock()
        defer sis.mtx.RUnlock()
+       if sis.allowInstancesCall.After(time.Now()) {
+               return nil, RateLimitError{sis.allowInstancesCall}
+       } else {
+               sis.allowInstancesCall = time.Now().Add(sis.driver.MinTimeBetweenInstancesCalls)
+       }
        var r []cloud.Instance
        for _, ss := range sis.servers {
                r = append(r, ss.Instance())
@@ -117,6 +164,11 @@ func (sis *StubInstanceSet) Stop() {
        sis.stopped = true
 }
 
+type RateLimitError struct{ Retry time.Time }
+
+func (e RateLimitError) Error() string            { return fmt.Sprintf("rate limited until %s", e.Retry) }
+func (e RateLimitError) EarliestRetry() time.Time { return e.Retry }
+
 // StubVM is a fake server that runs an SSH service. It represents a
 // VM running in a fake cloud.
 //
@@ -135,6 +187,7 @@ type StubVM struct {
        sis          *StubInstanceSet
        id           cloud.InstanceID
        tags         cloud.InstanceTags
+       initCommand  cloud.InitCommand
        providerType string
        SSHService   SSHService
        running      map[string]bool
@@ -157,7 +210,12 @@ func (svm *StubVM) Instance() stubInstance {
        }
 }
 
-func (svm *StubVM) Exec(command string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+func (svm *StubVM) Exec(env map[string]string, command string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+       stdinData, err := ioutil.ReadAll(stdin)
+       if err != nil {
+               fmt.Fprintf(stderr, "error reading stdin: %s\n", err)
+               return 1
+       }
        queue := svm.sis.driver.Queue
        uuid := regexp.MustCompile(`.{5}-dz642-.{15}`).FindString(command)
        if eta := svm.Boot.Sub(time.Now()); eta > 0 {
@@ -172,7 +230,19 @@ func (svm *StubVM) Exec(command string, stdin io.Reader, stdout, stderr io.Write
                fmt.Fprint(stderr, "crunch-run: command not found\n")
                return 1
        }
-       if strings.HasPrefix(command, "crunch-run --detach ") {
+       if strings.HasPrefix(command, "crunch-run --detach --stdin-env ") {
+               var stdinKV map[string]string
+               err := json.Unmarshal(stdinData, &stdinKV)
+               if err != nil {
+                       fmt.Fprintf(stderr, "unmarshal stdin: %s (stdin was: %q)\n", err, stdinData)
+                       return 1
+               }
+               for _, name := range []string{"ARVADOS_API_HOST", "ARVADOS_API_TOKEN"} {
+                       if stdinKV[name] == "" {
+                               fmt.Fprintf(stderr, "%s env var missing from stdin %q\n", name, stdin)
+                               return 1
+                       }
+               }
                svm.Lock()
                if svm.running == nil {
                        svm.running = map[string]bool{}
@@ -181,7 +251,10 @@ func (svm *StubVM) Exec(command string, stdin io.Reader, stdout, stderr io.Write
                svm.Unlock()
                time.Sleep(svm.CrunchRunDetachDelay)
                fmt.Fprintf(stderr, "starting %s\n", uuid)
-               logger := logrus.WithField("ContainerUUID", uuid)
+               logger := svm.sis.logger.WithFields(logrus.Fields{
+                       "Instance":      svm.id,
+                       "ContainerUUID": uuid,
+               })
                logger.Printf("[test] starting crunch-run stub")
                go func() {
                        crashluck := math_rand.Float64()
@@ -263,12 +336,19 @@ func (si stubInstance) Address() string {
        return si.addr
 }
 
+func (si stubInstance) RemoteUser() string {
+       return si.svm.SSHService.AuthorizedUser
+}
+
 func (si stubInstance) Destroy() error {
+       sis := si.svm.sis
+       if sis.driver.HoldCloudOps {
+               sis.driver.holdCloudOps <- true
+       }
        if math_rand.Float64() < si.svm.sis.driver.ErrorRateDestroy {
                return errors.New("instance could not be destroyed")
        }
        si.svm.SSHService.Close()
-       sis := si.svm.sis
        sis.mtx.Lock()
        defer sis.mtx.Unlock()
        delete(sis.servers, si.svm.id)
index fc3301d8636593cfd79aee008061c9cfafab577a..14f6a3efced3815f11b19b6e08612ead4326e4f6 100644 (file)
@@ -5,6 +5,9 @@
 package worker
 
 import (
+       "crypto/rand"
+       "errors"
+       "fmt"
        "io"
        "sort"
        "strings"
@@ -15,28 +18,32 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "github.com/prometheus/client_golang/prometheus"
        "github.com/sirupsen/logrus"
+       "golang.org/x/crypto/ssh"
 )
 
 const (
-       tagKeyInstanceType = "InstanceType"
-       tagKeyHold         = "Hold"
+       tagKeyInstanceType   = "InstanceType"
+       tagKeyIdleBehavior   = "IdleBehavior"
+       tagKeyInstanceSecret = "InstanceSecret"
 )
 
 // An InstanceView shows a worker's current state and recent activity.
 type InstanceView struct {
-       Instance             string
-       Price                float64
-       ArvadosInstanceType  string
-       ProviderInstanceType string
-       LastContainerUUID    string
-       LastBusy             time.Time
-       WorkerState          string
+       Instance             cloud.InstanceID `json:"instance"`
+       Address              string           `json:"address"`
+       Price                float64          `json:"price"`
+       ArvadosInstanceType  string           `json:"arvados_instance_type"`
+       ProviderInstanceType string           `json:"provider_instance_type"`
+       LastContainerUUID    string           `json:"last_container_uuid"`
+       LastBusy             time.Time        `json:"last_busy"`
+       WorkerState          string           `json:"worker_state"`
+       IdleBehavior         IdleBehavior     `json:"idle_behavior"`
 }
 
 // An Executor executes shell commands on a remote host.
 type Executor interface {
        // Run cmd on the current target.
-       Execute(cmd string, stdin io.Reader) (stdout, stderr []byte, err error)
+       Execute(env map[string]string, cmd string, stdin io.Reader) (stdout, stderr []byte, err error)
 
        // Use the given target for subsequent operations. The new
        // target is the same host as the previous target, but it
@@ -61,6 +68,13 @@ const (
        defaultTimeoutBooting     = time.Minute * 10
        defaultTimeoutProbe       = time.Minute * 10
        defaultTimeoutShutdown    = time.Second * 10
+
+       // Time after a quota error to try again anyway, even if no
+       // instances have been shutdown.
+       quotaErrorTTL = time.Minute
+
+       // Time between "X failed because rate limiting" messages
+       logRateLimitErrorInterval = time.Second * 10
 )
 
 func duration(conf arvados.Duration, def time.Duration) time.Duration {
@@ -75,10 +89,11 @@ func duration(conf arvados.Duration, def time.Duration) time.Duration {
 //
 // New instances are configured and set up according to the given
 // cluster configuration.
-func NewPool(logger logrus.FieldLogger, reg *prometheus.Registry, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, cluster *arvados.Cluster) *Pool {
+func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *prometheus.Registry, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, installPublicKey ssh.PublicKey, cluster *arvados.Cluster) *Pool {
        wp := &Pool{
                logger:             logger,
-               instanceSet:        instanceSet,
+               arvClient:          arvClient,
+               instanceSet:        &throttledInstanceSet{InstanceSet: instanceSet},
                newExecutor:        newExecutor,
                bootProbeCommand:   cluster.CloudVMs.BootProbeCommand,
                imageID:            cloud.ImageID(cluster.CloudVMs.ImageID),
@@ -90,6 +105,7 @@ func NewPool(logger logrus.FieldLogger, reg *prometheus.Registry, instanceSet cl
                timeoutBooting:     duration(cluster.CloudVMs.TimeoutBooting, defaultTimeoutBooting),
                timeoutProbe:       duration(cluster.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
                timeoutShutdown:    duration(cluster.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown),
+               installPublicKey:   installPublicKey,
                stop:               make(chan bool),
        }
        wp.registerMetrics(reg)
@@ -107,7 +123,8 @@ func NewPool(logger logrus.FieldLogger, reg *prometheus.Registry, instanceSet cl
 type Pool struct {
        // configuration
        logger             logrus.FieldLogger
-       instanceSet        cloud.InstanceSet
+       arvClient          *arvados.Client
+       instanceSet        *throttledInstanceSet
        newExecutor        func(cloud.Instance) Executor
        bootProbeCommand   string
        imageID            cloud.ImageID
@@ -119,6 +136,7 @@ type Pool struct {
        timeoutBooting     time.Duration
        timeoutProbe       time.Duration
        timeoutShutdown    time.Duration
+       installPublicKey   ssh.PublicKey
 
        // private state
        subscribers  map[<-chan struct{}]chan<- struct{}
@@ -132,23 +150,31 @@ type Pool struct {
        mtx          sync.RWMutex
        setupOnce    sync.Once
 
-       mInstances         prometheus.Gauge
+       throttleCreate    throttle
+       throttleInstances throttle
+
        mContainersRunning prometheus.Gauge
-       mVCPUs             prometheus.Gauge
-       mVCPUsInuse        prometheus.Gauge
-       mMemory            prometheus.Gauge
-       mMemoryInuse       prometheus.Gauge
+       mInstances         *prometheus.GaugeVec
+       mInstancesPrice    *prometheus.GaugeVec
+       mVCPUs             *prometheus.GaugeVec
+       mMemory            *prometheus.GaugeVec
 }
 
-// Subscribe returns a channel that becomes ready whenever a worker's
-// state changes.
+// Subscribe returns a buffered channel that becomes ready after any
+// change to the pool's state that could have scheduling implications:
+// a worker's state changes, a new worker appears, the cloud
+// provider's API rate limiting period ends, etc.
+//
+// Additional events that occur while the channel is already ready
+// will be dropped, so it is OK if the caller services the channel
+// slowly.
 //
 // Example:
 //
 //     ch := wp.Subscribe()
 //     defer wp.Unsubscribe(ch)
 //     for range ch {
-//             // ...try scheduling some work...
+//             tryScheduling(wp)
 //             if done {
 //                     break
 //             }
@@ -171,7 +197,8 @@ func (wp *Pool) Unsubscribe(ch <-chan struct{}) {
 }
 
 // Unallocated returns the number of unallocated (creating + booting +
-// idle + unknown) workers for each instance type.
+// idle + unknown) workers for each instance type.  Workers in
+// hold/drain mode are not included.
 func (wp *Pool) Unallocated() map[arvados.InstanceType]int {
        wp.setupOnce.Do(wp.setup)
        wp.mtx.RLock()
@@ -182,7 +209,14 @@ func (wp *Pool) Unallocated() map[arvados.InstanceType]int {
                creating[it] = len(times)
        }
        for _, wkr := range wp.workers {
-               if !(wkr.state == StateIdle || wkr.state == StateBooting || wkr.state == StateUnknown) {
+               // Skip workers that are not expected to become
+               // available soon. Note len(wkr.running)>0 is not
+               // redundant here: it can be true even in
+               // StateUnknown.
+               if wkr.state == StateShutdown ||
+                       wkr.state == StateRunning ||
+                       wkr.idleBehavior != IdleBehaviorRun ||
+                       len(wkr.running) > 0 {
                        continue
                }
                it := wkr.instType
@@ -212,20 +246,31 @@ func (wp *Pool) Unallocated() map[arvados.InstanceType]int {
 // Create a new instance with the given type, and add it to the worker
 // pool. The worker is added immediately; instance creation runs in
 // the background.
-func (wp *Pool) Create(it arvados.InstanceType) error {
+//
+// Create returns false if a pre-existing error state prevents it from
+// even attempting to create a new instance. Those errors are logged
+// by the Pool, so the caller does not need to log anything in such
+// cases.
+func (wp *Pool) Create(it arvados.InstanceType) bool {
        logger := wp.logger.WithField("InstanceType", it.Name)
        wp.setupOnce.Do(wp.setup)
        wp.mtx.Lock()
        defer wp.mtx.Unlock()
-       if time.Now().Before(wp.atQuotaUntil) {
-               return wp.atQuotaErr
+       if time.Now().Before(wp.atQuotaUntil) || wp.throttleCreate.Error() != nil {
+               return false
        }
-       tags := cloud.InstanceTags{tagKeyInstanceType: it.Name}
        now := time.Now()
        wp.creating[it] = append(wp.creating[it], now)
        go func() {
                defer wp.notify()
-               inst, err := wp.instanceSet.Create(it, wp.imageID, tags, nil)
+               secret := randomHex(instanceSecretLength)
+               tags := cloud.InstanceTags{
+                       tagKeyInstanceType:   it.Name,
+                       tagKeyIdleBehavior:   string(IdleBehaviorRun),
+                       tagKeyInstanceSecret: secret,
+               }
+               initCmd := cloud.InitCommand(fmt.Sprintf("umask 0177 && echo -n %q >%s", secret, instanceSecretFilename))
+               inst, err := wp.instanceSet.Create(it, wp.imageID, tags, initCmd, wp.installPublicKey)
                wp.mtx.Lock()
                defer wp.mtx.Unlock()
                // Remove our timestamp marker from wp.creating
@@ -236,17 +281,19 @@ func (wp *Pool) Create(it arvados.InstanceType) error {
                                break
                        }
                }
-               if err, ok := err.(cloud.QuotaError); ok && err.IsQuotaError() {
-                       wp.atQuotaErr = err
-                       wp.atQuotaUntil = time.Now().Add(time.Minute)
-               }
                if err != nil {
+                       if err, ok := err.(cloud.QuotaError); ok && err.IsQuotaError() {
+                               wp.atQuotaErr = err
+                               wp.atQuotaUntil = time.Now().Add(quotaErrorTTL)
+                               time.AfterFunc(quotaErrorTTL, wp.notify)
+                       }
                        logger.WithError(err).Error("create failed")
+                       wp.instanceSet.throttleCreate.CheckRateLimitError(err, wp.logger, "create instance", wp.notify)
                        return
                }
                wp.updateWorker(inst, it, StateBooting)
        }()
-       return nil
+       return true
 }
 
 // AtQuota returns true if Create is not expected to work at the
@@ -257,6 +304,21 @@ func (wp *Pool) AtQuota() bool {
        return time.Now().Before(wp.atQuotaUntil)
 }
 
+// SetIdleBehavior determines how the indicated instance will behave
+// when it has no containers running.
+func (wp *Pool) SetIdleBehavior(id cloud.InstanceID, idleBehavior IdleBehavior) error {
+       wp.mtx.Lock()
+       defer wp.mtx.Unlock()
+       wkr, ok := wp.workers[id]
+       if !ok {
+               return errors.New("requested instance does not exist")
+       }
+       wkr.idleBehavior = idleBehavior
+       wkr.saveTags()
+       wkr.shutdownIfIdle()
+       return nil
+}
+
 // Add or update worker attached to the given instance. Use
 // initialState if a new worker is created.
 //
@@ -264,6 +326,7 @@ func (wp *Pool) AtQuota() bool {
 //
 // Caller must have lock.
 func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initialState State) (*worker, bool) {
+       inst = tagVerifier{inst}
        id := inst.ID()
        if wkr := wp.workers[id]; wkr != nil {
                wkr.executor.SetTarget(inst)
@@ -272,32 +335,47 @@ func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initi
                if initialState == StateBooting && wkr.state == StateUnknown {
                        wkr.state = StateBooting
                }
+               wkr.saveTags()
                return wkr, false
        }
-       if initialState == StateUnknown && inst.Tags()[tagKeyHold] != "" {
-               initialState = StateHold
+
+       // If an instance has a valid IdleBehavior tag when it first
+       // appears, initialize the new worker accordingly (this is how
+       // we restore IdleBehavior that was set by a prior dispatch
+       // process); otherwise, default to "run". After this,
+       // wkr.idleBehavior is the source of truth, and will only be
+       // changed via SetIdleBehavior().
+       idleBehavior := IdleBehavior(inst.Tags()[tagKeyIdleBehavior])
+       if !validIdleBehavior[idleBehavior] {
+               idleBehavior = IdleBehaviorRun
        }
+
        logger := wp.logger.WithFields(logrus.Fields{
                "InstanceType": it.Name,
-               "Instance":     inst,
+               "Instance":     inst.ID(),
+               "Address":      inst.Address(),
        })
-       logger.WithField("State", initialState).Infof("instance appeared in cloud")
+       logger.WithFields(logrus.Fields{
+               "State":        initialState,
+               "IdleBehavior": idleBehavior,
+       }).Infof("instance appeared in cloud")
        now := time.Now()
        wkr := &worker{
-               mtx:      &wp.mtx,
-               wp:       wp,
-               logger:   logger,
-               executor: wp.newExecutor(inst),
-               state:    initialState,
-               instance: inst,
-               instType: it,
-               appeared: now,
-               probed:   now,
-               busy:     now,
-               updated:  now,
-               running:  make(map[string]struct{}),
-               starting: make(map[string]struct{}),
-               probing:  make(chan struct{}, 1),
+               mtx:          &wp.mtx,
+               wp:           wp,
+               logger:       logger,
+               executor:     wp.newExecutor(inst),
+               state:        initialState,
+               idleBehavior: idleBehavior,
+               instance:     inst,
+               instType:     it,
+               appeared:     now,
+               probed:       now,
+               busy:         now,
+               updated:      now,
+               running:      make(map[string]struct{}),
+               starting:     make(map[string]struct{}),
+               probing:      make(chan struct{}, 1),
        }
        wp.workers[id] = wkr
        return wkr, true
@@ -320,7 +398,7 @@ func (wp *Pool) Shutdown(it arvados.InstanceType) bool {
                // TODO: shutdown the worker with the longest idle
                // time (Idle) or the earliest create time (Booting)
                for _, wkr := range wp.workers {
-                       if wkr.state == tryState && wkr.instType == it {
+                       if wkr.idleBehavior != IdleBehaviorHold && wkr.state == tryState && wkr.instType == it {
                                logger.WithField("Instance", wkr.instance).Info("shutting down")
                                wkr.shutdown()
                                return true
@@ -343,6 +421,12 @@ func (wp *Pool) CountWorkers() map[State]int {
 }
 
 // Running returns the container UUIDs being prepared/run on workers.
+//
+// In the returned map, the time value indicates when the Pool
+// observed that the container process had exited. A container that
+// has not yet exited has a zero time value. The caller should use
+// KillContainer() to garbage-collect the entries for exited
+// containers.
 func (wp *Pool) Running() map[string]time.Time {
        wp.setupOnce.Do(wp.setup)
        wp.mtx.Lock()
@@ -408,10 +492,14 @@ func (wp *Pool) KillContainer(uuid string) {
 func (wp *Pool) kill(wkr *worker, uuid string) {
        logger := wp.logger.WithFields(logrus.Fields{
                "ContainerUUID": uuid,
-               "Instance":      wkr.instance,
+               "Instance":      wkr.instance.ID(),
        })
        logger.Debug("killing process")
-       stdout, stderr, err := wkr.executor.Execute("crunch-run --kill 15 "+uuid, nil)
+       cmd := "crunch-run --kill 15 " + uuid
+       if u := wkr.instance.RemoteUser(); u != "root" {
+               cmd = "sudo " + cmd
+       }
+       stdout, stderr, err := wkr.executor.Execute(nil, cmd, nil)
        if err != nil {
                logger.WithFields(logrus.Fields{
                        "stderr": string(stderr),
@@ -437,13 +525,6 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
        if reg == nil {
                reg = prometheus.NewRegistry()
        }
-       wp.mInstances = prometheus.NewGauge(prometheus.GaugeOpts{
-               Namespace: "arvados",
-               Subsystem: "dispatchcloud",
-               Name:      "instances_total",
-               Help:      "Number of cloud VMs including pending, booting, running, held, and shutting down.",
-       })
-       reg.MustRegister(wp.mInstances)
        wp.mContainersRunning = prometheus.NewGauge(prometheus.GaugeOpts{
                Namespace: "arvados",
                Subsystem: "dispatchcloud",
@@ -451,35 +532,34 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
                Help:      "Number of containers reported running by cloud VMs.",
        })
        reg.MustRegister(wp.mContainersRunning)
-
-       wp.mVCPUs = prometheus.NewGauge(prometheus.GaugeOpts{
+       wp.mInstances = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "instances_total",
+               Help:      "Number of cloud VMs.",
+       }, []string{"category"})
+       reg.MustRegister(wp.mInstances)
+       wp.mInstancesPrice = prometheus.NewGaugeVec(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "instances_price",
+               Help:      "Price of cloud VMs.",
+       }, []string{"category"})
+       reg.MustRegister(wp.mInstancesPrice)
+       wp.mVCPUs = prometheus.NewGaugeVec(prometheus.GaugeOpts{
                Namespace: "arvados",
                Subsystem: "dispatchcloud",
                Name:      "vcpus_total",
                Help:      "Total VCPUs on all cloud VMs.",
-       })
+       }, []string{"category"})
        reg.MustRegister(wp.mVCPUs)
-       wp.mVCPUsInuse = prometheus.NewGauge(prometheus.GaugeOpts{
-               Namespace: "arvados",
-               Subsystem: "dispatchcloud",
-               Name:      "vcpus_inuse",
-               Help:      "VCPUs on cloud VMs that are running containers.",
-       })
-       reg.MustRegister(wp.mVCPUsInuse)
-       wp.mMemory = prometheus.NewGauge(prometheus.GaugeOpts{
+       wp.mMemory = prometheus.NewGaugeVec(prometheus.GaugeOpts{
                Namespace: "arvados",
                Subsystem: "dispatchcloud",
                Name:      "memory_bytes_total",
                Help:      "Total memory on all cloud VMs.",
-       })
+       }, []string{"category"})
        reg.MustRegister(wp.mMemory)
-       wp.mMemoryInuse = prometheus.NewGauge(prometheus.GaugeOpts{
-               Namespace: "arvados",
-               Subsystem: "dispatchcloud",
-               Name:      "memory_bytes_inuse",
-               Help:      "Memory on cloud VMs that are running containers.",
-       })
-       reg.MustRegister(wp.mMemoryInuse)
 }
 
 func (wp *Pool) runMetrics() {
@@ -494,23 +574,38 @@ func (wp *Pool) updateMetrics() {
        wp.mtx.RLock()
        defer wp.mtx.RUnlock()
 
-       var alloc, cpu, cpuInuse, mem, memInuse int64
+       instances := map[string]int64{}
+       price := map[string]float64{}
+       cpu := map[string]int64{}
+       mem := map[string]int64{}
+       var running int64
        for _, wkr := range wp.workers {
-               cpu += int64(wkr.instType.VCPUs)
-               mem += int64(wkr.instType.RAM)
-               if len(wkr.running)+len(wkr.starting) == 0 {
-                       continue
+               var cat string
+               switch {
+               case len(wkr.running)+len(wkr.starting) > 0:
+                       cat = "inuse"
+               case wkr.idleBehavior == IdleBehaviorHold:
+                       cat = "hold"
+               case wkr.state == StateBooting:
+                       cat = "booting"
+               case wkr.state == StateUnknown:
+                       cat = "unknown"
+               default:
+                       cat = "idle"
                }
-               alloc += int64(len(wkr.running) + len(wkr.starting))
-               cpuInuse += int64(wkr.instType.VCPUs)
-               memInuse += int64(wkr.instType.RAM)
-       }
-       wp.mInstances.Set(float64(len(wp.workers)))
-       wp.mContainersRunning.Set(float64(alloc))
-       wp.mVCPUs.Set(float64(cpu))
-       wp.mMemory.Set(float64(mem))
-       wp.mVCPUsInuse.Set(float64(cpuInuse))
-       wp.mMemoryInuse.Set(float64(memInuse))
+               instances[cat]++
+               price[cat] += wkr.instType.Price
+               cpu[cat] += int64(wkr.instType.VCPUs)
+               mem[cat] += int64(wkr.instType.RAM)
+               running += int64(len(wkr.running) + len(wkr.starting))
+       }
+       for _, cat := range []string{"inuse", "hold", "booting", "unknown", "idle"} {
+               wp.mInstances.WithLabelValues(cat).Set(float64(instances[cat]))
+               wp.mInstancesPrice.WithLabelValues(cat).Set(price[cat])
+               wp.mVCPUs.WithLabelValues(cat).Set(float64(cpu[cat]))
+               wp.mMemory.WithLabelValues(cat).Set(float64(mem[cat]))
+       }
+       wp.mContainersRunning.Set(float64(running))
 }
 
 func (wp *Pool) runProbes() {
@@ -588,18 +683,20 @@ func (wp *Pool) Instances() []InstanceView {
        wp.mtx.Lock()
        for _, w := range wp.workers {
                r = append(r, InstanceView{
-                       Instance:             w.instance.String(),
+                       Instance:             w.instance.ID(),
+                       Address:              w.instance.Address(),
                        Price:                w.instType.Price,
                        ArvadosInstanceType:  w.instType.Name,
                        ProviderInstanceType: w.instType.ProviderType,
                        LastContainerUUID:    w.lastUUID,
                        LastBusy:             w.busy,
                        WorkerState:          w.state.String(),
+                       IdleBehavior:         w.idleBehavior,
                })
        }
        wp.mtx.Unlock()
        sort.Slice(r, func(i, j int) bool {
-               return strings.Compare(r[i].Instance, r[j].Instance) < 0
+               return strings.Compare(string(r[i].Instance), string(r[j].Instance)) < 0
        })
        return r
 }
@@ -624,10 +721,14 @@ func (wp *Pool) notify() {
 
 func (wp *Pool) getInstancesAndSync() error {
        wp.setupOnce.Do(wp.setup)
+       if err := wp.instanceSet.throttleInstances.Error(); err != nil {
+               return err
+       }
        wp.logger.Debug("getting instance list")
        threshold := time.Now()
        instances, err := wp.instanceSet.Instances(cloud.InstanceTags{})
        if err != nil {
+               wp.instanceSet.throttleInstances.CheckRateLimitError(err, wp.logger, "list instances", wp.notify)
                return err
        }
        wp.sync(threshold, instances)
@@ -664,7 +765,7 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
                        continue
                }
                logger := wp.logger.WithFields(logrus.Fields{
-                       "Instance":    wkr.instance,
+                       "Instance":    wkr.instance.ID(),
                        "WorkerState": wkr.state,
                })
                logger.Info("instance disappeared in cloud")
@@ -682,3 +783,14 @@ func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
                go wp.notify()
        }
 }
+
+// Return a random string of n hexadecimal digits (n*4 random bits). n
+// must be even.
+func randomHex(n int) string {
+       buf := make([]byte, n/2)
+       _, err := rand.Read(buf)
+       if err != nil {
+               panic(err)
+       }
+       return fmt.Sprintf("%x", buf)
+}
index 7551caff9547178338e5ba7c900dadfde2c88184..da9e650b8121889511886e9b16dc8eb827fcee14 100644 (file)
@@ -5,13 +5,15 @@
 package worker
 
 import (
-       "io"
+       "sort"
+       "strings"
        "time"
 
        "git.curoverse.com/arvados.git/lib/cloud"
        "git.curoverse.com/arvados.git/lib/dispatchcloud/test"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
-       "github.com/sirupsen/logrus"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+       "github.com/prometheus/client_golang/prometheus"
        check "gopkg.in/check.v1"
 )
 
@@ -31,29 +33,114 @@ var less = &lessChecker{&check.CheckerInfo{Name: "less", Params: []string{"obtai
 
 type PoolSuite struct{}
 
-func (suite *PoolSuite) SetUpSuite(c *check.C) {
-       logrus.StandardLogger().SetLevel(logrus.DebugLevel)
-}
+func (suite *PoolSuite) TestResumeAfterRestart(c *check.C) {
+       type1 := test.InstanceType(1)
+       type2 := test.InstanceType(2)
+       type3 := test.InstanceType(3)
+       waitForIdle := func(pool *Pool, notify <-chan struct{}) {
+               timeout := time.NewTimer(time.Second)
+               for {
+                       instances := pool.Instances()
+                       sort.Slice(instances, func(i, j int) bool {
+                               return strings.Compare(instances[i].ArvadosInstanceType, instances[j].ArvadosInstanceType) < 0
+                       })
+                       if len(instances) == 3 &&
+                               instances[0].ArvadosInstanceType == type1.Name &&
+                               instances[0].WorkerState == StateIdle.String() &&
+                               instances[1].ArvadosInstanceType == type1.Name &&
+                               instances[1].WorkerState == StateIdle.String() &&
+                               instances[2].ArvadosInstanceType == type2.Name &&
+                               instances[2].WorkerState == StateIdle.String() {
+                               return
+                       }
+                       select {
+                       case <-timeout.C:
+                               c.Logf("pool.Instances() == %#v", instances)
+                               c.Error("timed out")
+                               return
+                       case <-notify:
+                       }
+               }
+       }
 
-func (suite *PoolSuite) TestStartContainer(c *check.C) {
-       // TODO: use an instanceSet stub with an SSH server
-}
+       logger := ctxlog.TestLogger(c)
+       driver := &test.StubDriver{}
+       is, err := driver.InstanceSet(nil, "", logger)
+       c.Assert(err, check.IsNil)
+
+       newExecutor := func(cloud.Instance) Executor {
+               return stubExecutor{
+                       "crunch-run --list": stubResp{},
+                       "true":              stubResp{},
+               }
+       }
+
+       cluster := &arvados.Cluster{
+               Dispatch: arvados.Dispatch{
+                       MaxProbesPerSecond: 1000,
+                       ProbeInterval:      arvados.Duration(time.Millisecond * 10),
+               },
+               CloudVMs: arvados.CloudVMs{
+                       BootProbeCommand: "true",
+                       SyncInterval:     arvados.Duration(time.Millisecond * 10),
+               },
+               InstanceTypes: arvados.InstanceTypeMap{
+                       type1.Name: type1,
+                       type2.Name: type2,
+                       type3.Name: type3,
+               },
+       }
 
-func (suite *PoolSuite) TestVerifyHostKey(c *check.C) {
-       // TODO: use an instanceSet stub with an SSH server
+       pool := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, nil, cluster)
+       notify := pool.Subscribe()
+       defer pool.Unsubscribe(notify)
+       pool.Create(type1)
+       pool.Create(type1)
+       pool.Create(type2)
+       waitForIdle(pool, notify)
+       var heldInstanceID cloud.InstanceID
+       for _, inst := range pool.Instances() {
+               if inst.ArvadosInstanceType == type2.Name {
+                       heldInstanceID = cloud.InstanceID(inst.Instance)
+                       pool.SetIdleBehavior(heldInstanceID, IdleBehaviorHold)
+               }
+       }
+       pool.Stop()
+
+       c.Log("------- starting new pool, waiting to recover state")
+
+       pool2 := NewPool(logger, arvados.NewClientFromEnv(), prometheus.NewRegistry(), is, newExecutor, nil, cluster)
+       notify2 := pool2.Subscribe()
+       defer pool2.Unsubscribe(notify2)
+       waitForIdle(pool2, notify2)
+       for _, inst := range pool2.Instances() {
+               if inst.ArvadosInstanceType == type2.Name {
+                       c.Check(inst.Instance, check.Equals, heldInstanceID)
+                       c.Check(inst.IdleBehavior, check.Equals, IdleBehaviorHold)
+               } else {
+                       c.Check(inst.IdleBehavior, check.Equals, IdleBehaviorRun)
+               }
+       }
+       pool2.Stop()
 }
 
 func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) {
-       lameInstanceSet := &test.LameInstanceSet{Hold: make(chan bool)}
+       logger := ctxlog.TestLogger(c)
+       driver := test.StubDriver{HoldCloudOps: true}
+       instanceSet, err := driver.InstanceSet(nil, "", logger)
+       c.Assert(err, check.IsNil)
+
        type1 := arvados.InstanceType{Name: "a1s", ProviderType: "a1.small", VCPUs: 1, RAM: 1 * GiB, Price: .01}
        type2 := arvados.InstanceType{Name: "a2m", ProviderType: "a2.medium", VCPUs: 2, RAM: 2 * GiB, Price: .02}
+       type3 := arvados.InstanceType{Name: "a2l", ProviderType: "a2.large", VCPUs: 4, RAM: 4 * GiB, Price: .04}
        pool := &Pool{
-               logger:      logrus.StandardLogger(),
-               newExecutor: func(cloud.Instance) Executor { return &stubExecutor{} },
-               instanceSet: lameInstanceSet,
+               logger:      logger,
+               newExecutor: func(cloud.Instance) Executor { return stubExecutor{} },
+               instanceSet: &throttledInstanceSet{InstanceSet: instanceSet},
                instanceTypes: arvados.InstanceTypeMap{
                        type1.Name: type1,
                        type2.Name: type2,
+                       type3.Name: type3,
                },
        }
        notify := pool.Subscribe()
@@ -63,23 +150,42 @@ func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) {
 
        c.Check(pool.Unallocated()[type1], check.Equals, 0)
        c.Check(pool.Unallocated()[type2], check.Equals, 0)
+       c.Check(pool.Unallocated()[type3], check.Equals, 0)
        pool.Create(type2)
        pool.Create(type1)
        pool.Create(type2)
+       pool.Create(type3)
        c.Check(pool.Unallocated()[type1], check.Equals, 1)
        c.Check(pool.Unallocated()[type2], check.Equals, 2)
+       c.Check(pool.Unallocated()[type3], check.Equals, 1)
 
        // Unblock the pending Create calls.
-       go lameInstanceSet.Release(3)
+       go driver.ReleaseCloudOps(4)
 
        // Wait for each instance to either return from its Create
        // call, or show up in a poll.
        suite.wait(c, pool, notify, func() bool {
                pool.mtx.RLock()
                defer pool.mtx.RUnlock()
-               return len(pool.workers) == 3
+               return len(pool.workers) == 4
        })
 
+       // Place type3 node on admin-hold
+       ivs := suite.instancesByType(pool, type3)
+       c.Assert(ivs, check.HasLen, 1)
+       type3instanceID := ivs[0].Instance
+       err = pool.SetIdleBehavior(type3instanceID, IdleBehaviorHold)
+       c.Check(err, check.IsNil)
+
+       // Check admin-hold behavior: refuse to shutdown, and don't
+       // report as Unallocated ("available now or soon").
+       c.Check(pool.Shutdown(type3), check.Equals, false)
+       suite.wait(c, pool, notify, func() bool {
+               return pool.Unallocated()[type3] == 0
+       })
+       c.Check(suite.instancesByType(pool, type3), check.HasLen, 1)
+
+       // Shutdown both type2 nodes
        c.Check(pool.Shutdown(type2), check.Equals, true)
        suite.wait(c, pool, notify, func() bool {
                return pool.Unallocated()[type1] == 1 && pool.Unallocated()[type2] == 1
@@ -99,16 +205,58 @@ func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) {
                }
                break
        }
+
+       // Shutdown type1 node
        c.Check(pool.Shutdown(type1), check.Equals, true)
        suite.wait(c, pool, notify, func() bool {
-               return pool.Unallocated()[type1] == 0 && pool.Unallocated()[type2] == 0
+               return pool.Unallocated()[type1] == 0 && pool.Unallocated()[type2] == 0 && pool.Unallocated()[type3] == 0
        })
        select {
        case <-notify2:
        case <-time.After(time.Second):
                c.Error("notify did not receive")
        }
-       go lameInstanceSet.Release(3) // unblock Destroy calls
+
+       // Put type3 node back in service.
+       err = pool.SetIdleBehavior(type3instanceID, IdleBehaviorRun)
+       c.Check(err, check.IsNil)
+       suite.wait(c, pool, notify, func() bool {
+               return pool.Unallocated()[type3] == 1
+       })
+
+       // Check admin-drain behavior: shut down right away, and don't
+       // report as Unallocated.
+       err = pool.SetIdleBehavior(type3instanceID, IdleBehaviorDrain)
+       c.Check(err, check.IsNil)
+       suite.wait(c, pool, notify, func() bool {
+               return pool.Unallocated()[type3] == 0
+       })
+       suite.wait(c, pool, notify, func() bool {
+               ivs := suite.instancesByType(pool, type3)
+               return len(ivs) == 1 && ivs[0].WorkerState == StateShutdown.String()
+       })
+
+       // Unblock all pending Destroy calls. Pool calls Destroy again
+       // if a node still appears in the provider list after a
+       // previous attempt, so there might be more than 4 Destroy
+       // calls to unblock.
+       go driver.ReleaseCloudOps(4444)
+
+       // Sync until all instances disappear from the provider list.
+       suite.wait(c, pool, notify, func() bool {
+               pool.getInstancesAndSync()
+               return len(pool.Instances()) == 0
+       })
+}
+
+func (suite *PoolSuite) instancesByType(pool *Pool, it arvados.InstanceType) []InstanceView {
+       var ivs []InstanceView
+       for _, iv := range pool.Instances() {
+               if iv.ArvadosInstanceType == it.Name {
+                       ivs = append(ivs, iv)
+               }
+       }
+       return ivs
 }
 
 func (suite *PoolSuite) wait(c *check.C, pool *Pool, notify <-chan struct{}, ready func() bool) {
@@ -123,13 +271,3 @@ func (suite *PoolSuite) wait(c *check.C, pool *Pool, notify <-chan struct{}, rea
        }
        c.Check(ready(), check.Equals, true)
 }
-
-type stubExecutor struct{}
-
-func (*stubExecutor) SetTarget(cloud.ExecutorTarget) {}
-
-func (*stubExecutor) Execute(cmd string, stdin io.Reader) ([]byte, []byte, error) {
-       return nil, nil, nil
-}
-
-func (*stubExecutor) Close() {}
diff --git a/lib/dispatchcloud/worker/throttle.go b/lib/dispatchcloud/worker/throttle.go
new file mode 100644 (file)
index 0000000..c5ea793
--- /dev/null
@@ -0,0 +1,68 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package worker
+
+import (
+       "fmt"
+       "sync"
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/cloud"
+       "github.com/sirupsen/logrus"
+)
+
+type throttle struct {
+       err   error
+       until time.Time
+       mtx   sync.Mutex
+}
+
+// CheckRateLimitError checks whether the given error is a
+// cloud.RateLimitError, and if so, ensures Error() returns a non-nil
+// error until the rate limiting holdoff period expires.
+//
+// If a notify func is given, it will be called after the holdoff
+// period expires.
+func (thr *throttle) CheckRateLimitError(err error, logger logrus.FieldLogger, callType string, notify func()) {
+       rle, ok := err.(cloud.RateLimitError)
+       if !ok {
+               return
+       }
+       until := rle.EarliestRetry()
+       if !until.After(time.Now()) {
+               return
+       }
+       dur := until.Sub(time.Now())
+       logger.WithFields(logrus.Fields{
+               "CallType": callType,
+               "Duration": dur,
+               "ResumeAt": until,
+       }).Info("suspending remote calls due to rate-limit error")
+       thr.ErrorUntil(fmt.Errorf("remote calls are suspended for %s, until %s", dur, until), until, notify)
+}
+
+func (thr *throttle) ErrorUntil(err error, until time.Time, notify func()) {
+       thr.mtx.Lock()
+       defer thr.mtx.Unlock()
+       thr.err, thr.until = err, until
+       if notify != nil {
+               time.AfterFunc(until.Sub(time.Now()), notify)
+       }
+}
+
+func (thr *throttle) Error() error {
+       thr.mtx.Lock()
+       defer thr.mtx.Unlock()
+       if thr.err != nil && time.Now().After(thr.until) {
+               thr.err = nil
+       }
+       return thr.err
+}
+
+type throttledInstanceSet struct {
+       cloud.InstanceSet
+       throttleCreate    throttle
+       throttleInstances throttle
+}
diff --git a/lib/dispatchcloud/worker/throttle_test.go b/lib/dispatchcloud/worker/throttle_test.go
new file mode 100644 (file)
index 0000000..045b617
--- /dev/null
@@ -0,0 +1,32 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package worker
+
+import (
+       "errors"
+       "time"
+
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&ThrottleSuite{})
+
+type ThrottleSuite struct{}
+
+func (s *ThrottleSuite) TestRateLimitError(c *check.C) {
+       var t throttle
+       c.Check(t.Error(), check.IsNil)
+       t.ErrorUntil(errors.New("wait"), time.Now().Add(time.Second), nil)
+       c.Check(t.Error(), check.NotNil)
+       t.ErrorUntil(nil, time.Now(), nil)
+       c.Check(t.Error(), check.IsNil)
+
+       notified := false
+       t.ErrorUntil(errors.New("wait"), time.Now().Add(time.Millisecond), func() { notified = true })
+       c.Check(t.Error(), check.NotNil)
+       time.Sleep(time.Millisecond * 10)
+       c.Check(t.Error(), check.IsNil)
+       c.Check(notified, check.Equals, true)
+}
diff --git a/lib/dispatchcloud/worker/verify.go b/lib/dispatchcloud/worker/verify.go
new file mode 100644 (file)
index 0000000..e22c85d
--- /dev/null
@@ -0,0 +1,56 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package worker
+
+import (
+       "bytes"
+       "errors"
+       "fmt"
+
+       "git.curoverse.com/arvados.git/lib/cloud"
+       "golang.org/x/crypto/ssh"
+)
+
+var (
+       errBadInstanceSecret = errors.New("bad instance secret")
+
+       // filename on instance, as given to shell (quoted accordingly)
+       instanceSecretFilename = "/var/run/arvados-instance-secret"
+       instanceSecretLength   = 40 // hex digits
+)
+
+type tagVerifier struct {
+       cloud.Instance
+}
+
+func (tv tagVerifier) VerifyHostKey(pubKey ssh.PublicKey, client *ssh.Client) error {
+       expectSecret := tv.Instance.Tags()[tagKeyInstanceSecret]
+       if err := tv.Instance.VerifyHostKey(pubKey, client); err != cloud.ErrNotImplemented || expectSecret == "" {
+               // If the wrapped instance indicates it has a way to
+               // verify the key, return that decision.
+               return err
+       }
+       session, err := client.NewSession()
+       if err != nil {
+               return err
+       }
+       defer session.Close()
+       var stdout, stderr bytes.Buffer
+       session.Stdin = bytes.NewBuffer(nil)
+       session.Stdout = &stdout
+       session.Stderr = &stderr
+       cmd := fmt.Sprintf("cat %s", instanceSecretFilename)
+       if u := tv.RemoteUser(); u != "root" {
+               cmd = "sudo " + cmd
+       }
+       err = session.Run(cmd)
+       if err != nil {
+               return err
+       }
+       if stdout.String() != expectSecret {
+               return errBadInstanceSecret
+       }
+       return nil
+}
index db6bc185b1af2895117046c6bbd9254f25b9ea0a..9be9f41f43b7ef51cbb1d1257e4ac39f642472aa 100644 (file)
@@ -6,15 +6,23 @@ package worker
 
 import (
        "bytes"
+       "encoding/json"
+       "fmt"
        "strings"
        "sync"
        "time"
 
        "git.curoverse.com/arvados.git/lib/cloud"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/stats"
        "github.com/sirupsen/logrus"
 )
 
+const (
+       // TODO: configurable
+       maxPingFailTime = 10 * time.Minute
+)
+
 // State indicates whether a worker is available to do work, and (if
 // not) whether/when it is expected to become ready.
 type State int
@@ -25,12 +33,6 @@ const (
        StateIdle                  // instance booted, no containers are running
        StateRunning               // instance is running one or more containers
        StateShutdown              // worker has stopped monitoring the instance
-       StateHold                  // running, but not available to run new containers
-)
-
-const (
-       // TODO: configurable
-       maxPingFailTime = 10 * time.Minute
 )
 
 var stateString = map[State]string{
@@ -39,7 +41,6 @@ var stateString = map[State]string{
        StateIdle:     "idle",
        StateRunning:  "running",
        StateShutdown: "shutdown",
-       StateHold:     "hold",
 }
 
 // String implements fmt.Stringer.
@@ -53,26 +54,42 @@ func (s State) MarshalText() ([]byte, error) {
        return []byte(stateString[s]), nil
 }
 
+// IdleBehavior indicates the behavior desired when a node becomes idle.
+type IdleBehavior string
+
+const (
+       IdleBehaviorRun   IdleBehavior = "run"   // run containers, or shutdown on idle timeout
+       IdleBehaviorHold  IdleBehavior = "hold"  // don't shutdown or run more containers
+       IdleBehaviorDrain IdleBehavior = "drain" // shutdown immediately when idle
+)
+
+var validIdleBehavior = map[IdleBehavior]bool{
+       IdleBehaviorRun:   true,
+       IdleBehaviorHold:  true,
+       IdleBehaviorDrain: true,
+}
+
 type worker struct {
        logger   logrus.FieldLogger
        executor Executor
        wp       *Pool
 
-       mtx       sync.Locker // must be wp's Locker.
-       state     State
-       instance  cloud.Instance
-       instType  arvados.InstanceType
-       vcpus     int64
-       memory    int64
-       appeared  time.Time
-       probed    time.Time
-       updated   time.Time
-       busy      time.Time
-       destroyed time.Time
-       lastUUID  string
-       running   map[string]struct{} // remember to update state idle<->running when this changes
-       starting  map[string]struct{} // remember to update state idle<->running when this changes
-       probing   chan struct{}
+       mtx          sync.Locker // must be wp's Locker.
+       state        State
+       idleBehavior IdleBehavior
+       instance     cloud.Instance
+       instType     arvados.InstanceType
+       vcpus        int64
+       memory       int64
+       appeared     time.Time
+       probed       time.Time
+       updated      time.Time
+       busy         time.Time
+       destroyed    time.Time
+       lastUUID     string
+       running      map[string]struct{} // remember to update state idle<->running when this changes
+       starting     map[string]struct{} // remember to update state idle<->running when this changes
+       probing      chan struct{}
 }
 
 // caller must have lock.
@@ -81,12 +98,28 @@ func (wkr *worker) startContainer(ctr arvados.Container) {
                "ContainerUUID": ctr.UUID,
                "Priority":      ctr.Priority,
        })
-       logger = logger.WithField("Instance", wkr.instance)
+       logger = logger.WithField("Instance", wkr.instance.ID())
        logger.Debug("starting container")
        wkr.starting[ctr.UUID] = struct{}{}
        wkr.state = StateRunning
        go func() {
-               stdout, stderr, err := wkr.executor.Execute("crunch-run --detach '"+ctr.UUID+"'", nil)
+               env := map[string]string{
+                       "ARVADOS_API_HOST":  wkr.wp.arvClient.APIHost,
+                       "ARVADOS_API_TOKEN": wkr.wp.arvClient.AuthToken,
+               }
+               if wkr.wp.arvClient.Insecure {
+                       env["ARVADOS_API_HOST_INSECURE"] = "1"
+               }
+               envJSON, err := json.Marshal(env)
+               if err != nil {
+                       panic(err)
+               }
+               stdin := bytes.NewBuffer(envJSON)
+               cmd := "crunch-run --detach --stdin-env '" + ctr.UUID + "'"
+               if u := wkr.instance.RemoteUser(); u != "root" {
+                       cmd = "sudo " + cmd
+               }
+               stdout, stderr, err := wkr.executor.Execute(nil, cmd, stdin)
                wkr.mtx.Lock()
                defer wkr.mtx.Unlock()
                now := time.Now()
@@ -126,57 +159,94 @@ func (wkr *worker) ProbeAndUpdate() {
        }
 }
 
-// should be called in a new goroutine
+// probeAndUpdate calls probeBooted and/or probeRunning if needed, and
+// updates state accordingly.
+//
+// In StateUnknown: Call both probeBooted and probeRunning.
+// In StateBooting: Call probeBooted; if successful, call probeRunning.
+// In StateRunning: Call probeRunning.
+// In StateIdle: Call probeRunning.
+// In StateShutdown: Do nothing.
+//
+// If both probes succeed, wkr.state changes to
+// StateIdle/StateRunning.
+//
+// If probeRunning succeeds, wkr.running is updated. (This means
+// wkr.running might be non-empty even in StateUnknown, if the boot
+// probe failed.)
+//
+// probeAndUpdate should be called in a new goroutine.
 func (wkr *worker) probeAndUpdate() {
        wkr.mtx.Lock()
        updated := wkr.updated
-       needProbeRunning := wkr.state == StateRunning || wkr.state == StateIdle
-       needProbeBooted := wkr.state == StateUnknown || wkr.state == StateBooting
+       initialState := wkr.state
        wkr.mtx.Unlock()
-       if !needProbeBooted && !needProbeRunning {
-               return
-       }
 
        var (
+               booted   bool
                ctrUUIDs []string
                ok       bool
-               stderr   []byte
+               stderr   []byte // from probeBooted
        )
-       if needProbeBooted {
-               ok, stderr = wkr.probeBooted()
-               wkr.mtx.Lock()
-               if ok || wkr.state == StateRunning || wkr.state == StateIdle {
-                       wkr.logger.Info("instance booted; will try probeRunning")
-                       needProbeRunning = true
+
+       switch initialState {
+       case StateShutdown:
+               return
+       case StateIdle, StateRunning:
+               booted = true
+       case StateUnknown, StateBooting:
+       default:
+               panic(fmt.Sprintf("unknown state %s", initialState))
+       }
+
+       probeStart := time.Now()
+       logger := wkr.logger.WithField("ProbeStart", probeStart)
+
+       if !booted {
+               booted, stderr = wkr.probeBooted()
+               if !booted {
+                       // Pretend this probe succeeded if another
+                       // concurrent attempt succeeded.
+                       wkr.mtx.Lock()
+                       booted = wkr.state == StateRunning || wkr.state == StateIdle
+                       wkr.mtx.Unlock()
+               }
+               if booted {
+                       logger.Info("instance booted; will try probeRunning")
                }
-               wkr.mtx.Unlock()
        }
-       if needProbeRunning {
-               ctrUUIDs, ok, stderr = wkr.probeRunning()
+       if booted || wkr.state == StateUnknown {
+               ctrUUIDs, ok = wkr.probeRunning()
        }
-       logger := wkr.logger.WithField("stderr", string(stderr))
        wkr.mtx.Lock()
        defer wkr.mtx.Unlock()
-       if !ok {
+       if !ok || (!booted && len(ctrUUIDs) == 0 && len(wkr.running) == 0) {
                if wkr.state == StateShutdown && wkr.updated.After(updated) {
                        // Skip the logging noise if shutdown was
                        // initiated during probe.
                        return
                }
-               dur := time.Since(wkr.probed)
-               logger := logger.WithFields(logrus.Fields{
-                       "Duration": dur,
-                       "State":    wkr.state,
-               })
-               if wkr.state == StateBooting && !needProbeRunning {
-                       // If we know the instance has never passed a
-                       // boot probe, it's not noteworthy that it
-                       // hasn't passed this probe.
-                       logger.Debug("new instance not responding")
-               } else {
-                       logger.Info("instance not responding")
+               // Using the start time of the probe as the timeout
+               // threshold ensures we always initiate at least one
+               // probe attempt after the boot/probe timeout expires
+               // (otherwise, a slow probe failure could cause us to
+               // shutdown an instance even though it did in fact
+               // boot/recover before the timeout expired).
+               dur := probeStart.Sub(wkr.probed)
+               if wkr.shutdownIfBroken(dur) {
+                       // stderr from failed run-probes will have
+                       // been logged already, but boot-probe
+                       // failures are normal so they are logged only
+                       // at Debug level. This is our chance to log
+                       // some evidence about why the node never
+                       // booted, even in non-debug mode.
+                       if !booted {
+                               logger.WithFields(logrus.Fields{
+                                       "Duration": dur,
+                                       "stderr":   string(stderr),
+                               }).Info("boot failed")
+                       }
                }
-               wkr.shutdownIfBroken(dur)
                return
        }
 
@@ -201,11 +271,21 @@ func (wkr *worker) probeAndUpdate() {
                // advantage of the non-busy state, though.
                wkr.busy = updateTime
        }
-       running := map[string]struct{}{}
        changed := false
+
+       // Build a new "running" map. Set changed=true if it differs
+       // from the existing map (wkr.running) to ensure the scheduler
+       // gets notified below.
+       running := map[string]struct{}{}
        for _, uuid := range ctrUUIDs {
                running[uuid] = struct{}{}
                if _, ok := wkr.running[uuid]; !ok {
+                       if _, ok := wkr.starting[uuid]; !ok {
+                               // We didn't start it -- it must have
+                               // been started by a previous
+                               // dispatcher process.
+                               logger.WithField("ContainerUUID", uuid).Info("crunch-run process detected")
+                       }
                        changed = true
                }
        }
@@ -216,38 +296,66 @@ func (wkr *worker) probeAndUpdate() {
                        changed = true
                }
        }
-       if wkr.state == StateUnknown || wkr.state == StateBooting {
+
+       // Update state if this was the first successful boot-probe.
+       if booted && (wkr.state == StateUnknown || wkr.state == StateBooting) {
+               // Note: this will change again below if
+               // len(wkr.starting)+len(wkr.running) > 0.
                wkr.state = StateIdle
                changed = true
        }
-       if changed {
-               wkr.running = running
-               if wkr.state == StateIdle && len(wkr.starting)+len(wkr.running) > 0 {
-                       wkr.state = StateRunning
-               } else if wkr.state == StateRunning && len(wkr.starting)+len(wkr.running) == 0 {
-                       wkr.state = StateIdle
-               }
-               wkr.updated = updateTime
-               go wkr.wp.notify()
+
+       // If wkr.state and wkr.running aren't changing then there's
+       // no need to log anything, notify the scheduler, move state
+       // back and forth between idle/running, etc.
+       if !changed {
+               return
        }
+
+       // Log whenever a run-probe reveals crunch-run processes
+       // appearing/disappearing before boot-probe succeeds.
+       if wkr.state == StateUnknown && len(running) != len(wkr.running) {
+               logger.WithFields(logrus.Fields{
+                       "RunningContainers": len(running),
+                       "State":             wkr.state,
+               }).Info("crunch-run probe succeeded, but boot probe is still failing")
+       }
+
+       wkr.running = running
+       if wkr.state == StateIdle && len(wkr.starting)+len(wkr.running) > 0 {
+               wkr.state = StateRunning
+       } else if wkr.state == StateRunning && len(wkr.starting)+len(wkr.running) == 0 {
+               wkr.state = StateIdle
+       }
+       wkr.updated = updateTime
+       if booted && (initialState == StateUnknown || initialState == StateBooting) {
+               logger.WithFields(logrus.Fields{
+                       "RunningContainers": len(running),
+                       "State":             wkr.state,
+               }).Info("probes succeeded, instance is in service")
+       }
+       go wkr.wp.notify()
 }
 
-func (wkr *worker) probeRunning() (running []string, ok bool, stderr []byte) {
+func (wkr *worker) probeRunning() (running []string, ok bool) {
        cmd := "crunch-run --list"
-       stdout, stderr, err := wkr.executor.Execute(cmd, nil)
+       if u := wkr.instance.RemoteUser(); u != "root" {
+               cmd = "sudo " + cmd
+       }
+       stdout, stderr, err := wkr.executor.Execute(nil, cmd, nil)
        if err != nil {
                wkr.logger.WithFields(logrus.Fields{
                        "Command": cmd,
                        "stdout":  string(stdout),
                        "stderr":  string(stderr),
                }).WithError(err).Warn("probe failed")
-               return nil, false, stderr
+               return nil, false
        }
        stdout = bytes.TrimRight(stdout, "\n")
        if len(stdout) == 0 {
-               return nil, true, stderr
+               return nil, true
        }
-       return strings.Split(string(stdout), "\n"), true, stderr
+       return strings.Split(string(stdout), "\n"), true
 }
 
 func (wkr *worker) probeBooted() (ok bool, stderr []byte) {
@@ -255,7 +363,7 @@ func (wkr *worker) probeBooted() (ok bool, stderr []byte) {
        if cmd == "" {
                cmd = "true"
        }
-       stdout, stderr, err := wkr.executor.Execute(cmd, nil)
+       stdout, stderr, err := wkr.executor.Execute(nil, cmd, nil)
        logger := wkr.logger.WithFields(logrus.Fields{
                "Command": cmd,
                "stdout":  string(stdout),
@@ -270,16 +378,17 @@ func (wkr *worker) probeBooted() (ok bool, stderr []byte) {
 }
 
 // caller must have lock.
-func (wkr *worker) shutdownIfBroken(dur time.Duration) {
-       if wkr.state == StateHold {
-               return
+func (wkr *worker) shutdownIfBroken(dur time.Duration) bool {
+       if wkr.idleBehavior == IdleBehaviorHold {
+               // Never shut down.
+               return false
        }
        label, threshold := "", wkr.wp.timeoutProbe
-       if wkr.state == StateBooting {
+       if wkr.state == StateUnknown || wkr.state == StateBooting {
                label, threshold = "new ", wkr.wp.timeoutBooting
        }
        if dur < threshold {
-               return
+               return false
        }
        wkr.logger.WithFields(logrus.Fields{
                "Duration": dur,
@@ -287,23 +396,35 @@ func (wkr *worker) shutdownIfBroken(dur time.Duration) {
                "State":    wkr.state,
        }).Warnf("%sinstance unresponsive, shutting down", label)
        wkr.shutdown()
+       return true
 }
 
 // caller must have lock.
 func (wkr *worker) shutdownIfIdle() bool {
-       if wkr.state != StateIdle {
+       if wkr.idleBehavior == IdleBehaviorHold {
+               // Never shut down.
                return false
        }
        age := time.Since(wkr.busy)
-       if age < wkr.wp.timeoutIdle {
+
+       old := age >= wkr.wp.timeoutIdle
+       draining := wkr.idleBehavior == IdleBehaviorDrain
+       shouldShutdown := ((old || draining) && wkr.state == StateIdle) ||
+               (draining && wkr.state == StateBooting)
+       if !shouldShutdown {
                return false
        }
-       wkr.logger.WithField("Age", age).Info("shutdown idle worker")
+
+       wkr.logger.WithFields(logrus.Fields{
+               "State":        wkr.state,
+               "IdleDuration": stats.Duration(age),
+               "IdleBehavior": wkr.idleBehavior,
+       }).Info("shutdown idle worker")
        wkr.shutdown()
        return true
 }
 
-// caller must have lock
+// caller must have lock.
 func (wkr *worker) shutdown() {
        now := time.Now()
        wkr.updated = now
@@ -318,3 +439,29 @@ func (wkr *worker) shutdown() {
                }
        }()
 }
+
+// Save worker tags to cloud provider metadata, if they don't already
+// match. Caller must have lock.
+func (wkr *worker) saveTags() {
+       instance := wkr.instance
+       tags := instance.Tags()
+       update := cloud.InstanceTags{
+               tagKeyInstanceType: wkr.instType.Name,
+               tagKeyIdleBehavior: string(wkr.idleBehavior),
+       }
+       save := false
+       for k, v := range update {
+               if tags[k] != v {
+                       tags[k] = v
+                       save = true
+               }
+       }
+       if save {
+               go func() {
+                       err := instance.SetTags(tags)
+                       if err != nil {
+                               wkr.wp.logger.WithField("Instance", instance.ID()).WithError(err).Warnf("error updating tags")
+                       }
+               }()
+       }
+}
diff --git a/lib/dispatchcloud/worker/worker_test.go b/lib/dispatchcloud/worker/worker_test.go
new file mode 100644 (file)
index 0000000..3bc33b6
--- /dev/null
@@ -0,0 +1,240 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package worker
+
+import (
+       "errors"
+       "io"
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/cloud"
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/test"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&WorkerSuite{})
+
+type WorkerSuite struct{}
+
+func (suite *WorkerSuite) TestProbeAndUpdate(c *check.C) {
+       logger := ctxlog.TestLogger(c)
+       bootTimeout := time.Minute
+       probeTimeout := time.Second
+
+       is, err := (&test.StubDriver{}).InstanceSet(nil, "", logger)
+       c.Assert(err, check.IsNil)
+       inst, err := is.Create(arvados.InstanceType{}, "", nil, "echo InitCommand", nil)
+       c.Assert(err, check.IsNil)
+
+       type trialT struct {
+               testCaseComment string // displayed in test output to help identify failure case
+               age             time.Duration
+               state           State
+               running         int
+               starting        int
+               respBoot        stubResp // zero value is success
+               respRun         stubResp // zero value is success + nothing running
+               expectState     State
+               expectRunning   int
+       }
+
+       errFail := errors.New("failed")
+       respFail := stubResp{"", "command failed\n", errFail}
+       respContainerRunning := stubResp{"zzzzz-dz642-abcdefghijklmno\n", "", nil}
+       for _, trial := range []trialT{
+               {
+                       testCaseComment: "Unknown, probes fail",
+                       state:           StateUnknown,
+                       respBoot:        respFail,
+                       respRun:         respFail,
+                       expectState:     StateUnknown,
+               },
+               {
+                       testCaseComment: "Unknown, boot probe fails, but one container is running",
+                       state:           StateUnknown,
+                       respBoot:        respFail,
+                       respRun:         respContainerRunning,
+                       expectState:     StateUnknown,
+                       expectRunning:   1,
+               },
+               {
+                       testCaseComment: "Unknown, boot probe fails, previously running container has exited",
+                       state:           StateUnknown,
+                       running:         1,
+                       respBoot:        respFail,
+                       expectState:     StateUnknown,
+                       expectRunning:   0,
+               },
+               {
+                       testCaseComment: "Unknown, boot timeout exceeded, boot probe fails",
+                       state:           StateUnknown,
+                       age:             bootTimeout + time.Second,
+                       respBoot:        respFail,
+                       respRun:         respFail,
+                       expectState:     StateShutdown,
+               },
+               {
+                       testCaseComment: "Unknown, boot timeout exceeded, boot probe succeeds but crunch-run fails",
+                       state:           StateUnknown,
+                       age:             bootTimeout * 2,
+                       respRun:         respFail,
+                       expectState:     StateShutdown,
+               },
+               {
+                       testCaseComment: "Unknown, boot timeout exceeded, boot probe fails but crunch-run succeeds",
+                       state:           StateUnknown,
+                       age:             bootTimeout * 2,
+                       respBoot:        respFail,
+                       expectState:     StateShutdown,
+               },
+               {
+                       testCaseComment: "Unknown, boot timeout exceeded, boot probe fails but container is running",
+                       state:           StateUnknown,
+                       age:             bootTimeout * 2,
+                       respBoot:        respFail,
+                       respRun:         respContainerRunning,
+                       expectState:     StateUnknown,
+                       expectRunning:   1,
+               },
+               {
+                       testCaseComment: "Booting, boot probe fails, run probe fails",
+                       state:           StateBooting,
+                       respBoot:        respFail,
+                       respRun:         respFail,
+                       expectState:     StateBooting,
+               },
+               {
+                       testCaseComment: "Booting, boot probe fails, run probe succeeds (but isn't expected to be called)",
+                       state:           StateBooting,
+                       respBoot:        respFail,
+                       expectState:     StateBooting,
+               },
+               {
+                       testCaseComment: "Booting, boot probe succeeds, run probe fails",
+                       state:           StateBooting,
+                       respRun:         respFail,
+                       expectState:     StateBooting,
+               },
+               {
+                       testCaseComment: "Booting, boot probe succeeds, run probe succeeds",
+                       state:           StateBooting,
+                       expectState:     StateIdle,
+               },
+               {
+                       testCaseComment: "Booting, boot probe succeeds, run probe succeeds, container is running",
+                       state:           StateBooting,
+                       respRun:         respContainerRunning,
+                       expectState:     StateRunning,
+                       expectRunning:   1,
+               },
+               {
+                       testCaseComment: "Booting, boot timeout exceeded",
+                       state:           StateBooting,
+                       age:             bootTimeout * 2,
+                       respRun:         respFail,
+                       expectState:     StateShutdown,
+               },
+               {
+                       testCaseComment: "Idle, probe timeout exceeded, one container running",
+                       state:           StateIdle,
+                       age:             probeTimeout * 2,
+                       respRun:         respContainerRunning,
+                       expectState:     StateRunning,
+                       expectRunning:   1,
+               },
+               {
+                       testCaseComment: "Idle, probe timeout exceeded, one container running, probe fails",
+                       state:           StateIdle,
+                       age:             probeTimeout * 2,
+                       running:         1,
+                       respRun:         respFail,
+                       expectState:     StateShutdown,
+                       expectRunning:   1,
+               },
+               {
+                       testCaseComment: "Idle, probe timeout exceeded, nothing running, probe fails",
+                       state:           StateIdle,
+                       age:             probeTimeout * 2,
+                       respRun:         respFail,
+                       expectState:     StateShutdown,
+               },
+               {
+                       testCaseComment: "Running, one container still running",
+                       state:           StateRunning,
+                       running:         1,
+                       respRun:         respContainerRunning,
+                       expectState:     StateRunning,
+                       expectRunning:   1,
+               },
+               {
+                       testCaseComment: "Running, container has exited",
+                       state:           StateRunning,
+                       running:         1,
+                       expectState:     StateIdle,
+                       expectRunning:   0,
+               },
+               {
+                       testCaseComment: "Running, probe timeout exceeded, nothing running, new container being started",
+                       state:           StateRunning,
+                       age:             probeTimeout * 2,
+                       starting:        1,
+                       expectState:     StateRunning,
+               },
+       } {
+               c.Logf("------- %#v", trial)
+               ctime := time.Now().Add(-trial.age)
+               exr := stubExecutor{
+                       "bootprobe":         trial.respBoot,
+                       "crunch-run --list": trial.respRun,
+               }
+               wp := &Pool{
+                       newExecutor:      func(cloud.Instance) Executor { return exr },
+                       bootProbeCommand: "bootprobe",
+                       timeoutBooting:   bootTimeout,
+                       timeoutProbe:     probeTimeout,
+                       exited:           map[string]time.Time{},
+               }
+               wkr := &worker{
+                       logger:   logger,
+                       executor: exr,
+                       wp:       wp,
+                       mtx:      &wp.mtx,
+                       state:    trial.state,
+                       instance: inst,
+                       appeared: ctime,
+                       busy:     ctime,
+                       probed:   ctime,
+                       updated:  ctime,
+               }
+               if trial.running > 0 {
+                       wkr.running = map[string]struct{}{"zzzzz-dz642-abcdefghijklmno": struct{}{}}
+               }
+               if trial.starting > 0 {
+                       wkr.starting = map[string]struct{}{"zzzzz-dz642-abcdefghijklmno": struct{}{}}
+               }
+               wkr.probeAndUpdate()
+               c.Check(wkr.state, check.Equals, trial.expectState)
+               c.Check(len(wkr.running), check.Equals, trial.expectRunning)
+       }
+}
+
+type stubResp struct {
+       stdout string
+       stderr string
+       err    error
+}
+type stubExecutor map[string]stubResp
+
+func (se stubExecutor) SetTarget(cloud.ExecutorTarget) {}
+func (se stubExecutor) Close()                         {}
+func (se stubExecutor) Execute(env map[string]string, cmd string, stdin io.Reader) (stdout, stderr []byte, err error) {
+       resp, ok := se[cmd]
+       if !ok {
+               return nil, []byte("command not found\n"), errors.New("command not found")
+       }
+       return []byte(resp.stdout), []byte(resp.stderr), resp.err
+}
index 7f6b0236cb9571cd3ca30420cb6d41af6d787bd3..d99af0eea15428054fd5adc16596ca89b1de7820 100644 (file)
@@ -6,6 +6,7 @@
 package service
 
 import (
+       "context"
        "flag"
        "fmt"
        "io"
@@ -14,6 +15,7 @@ import (
 
        "git.curoverse.com/arvados.git/lib/cmd"
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
        "github.com/coreos/go-systemd/daemon"
        "github.com/sirupsen/logrus"
@@ -24,7 +26,7 @@ type Handler interface {
        CheckHealth() error
 }
 
-type NewHandlerFunc func(*arvados.Cluster, *arvados.NodeProfile) Handler
+type NewHandlerFunc func(context.Context, *arvados.Cluster, *arvados.NodeProfile) Handler
 
 type command struct {
        newHandler NewHandlerFunc
@@ -45,11 +47,7 @@ func Command(svcName arvados.ServiceName, newHandler NewHandlerFunc) cmd.Handler
 }
 
 func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
-       log := logrus.New()
-       log.Formatter = &logrus.JSONFormatter{
-               TimestampFormat: rfc3339NanoFixed,
-       }
-       log.Out = stderr
+       log := ctxlog.New(stderr, "json", "info")
 
        var err error
        defer func() {
@@ -76,6 +74,10 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
        if err != nil {
                return 1
        }
+       log = ctxlog.New(stderr, cluster.Logging.Format, cluster.Logging.Level).WithFields(logrus.Fields{
+               "PID": os.Getpid(),
+       })
+       ctx := ctxlog.Context(context.Background(), log)
        profileName := *nodeProfile
        if profileName == "" {
                profileName = os.Getenv("ARVADOS_NODE_PROFILE")
@@ -89,7 +91,7 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
                err = fmt.Errorf("configuration does not enable the %s service on this host", c.svcName)
                return 1
        }
-       handler := c.newHandler(cluster, profile)
+       handler := c.newHandler(ctx, cluster, profile)
        if err = handler.CheckHealth(); err != nil {
                return 1
        }
index 80abc9c497f2da9f0f72ebd022e47ae4fb07a14e..c7e20e2a72947d2e74f147e6a6c0fd68d14254f8 100644 (file)
@@ -30,7 +30,7 @@ Gem::Specification.new do |s|
   s.executables << "arv-crunch-job"
   s.executables << "arv-tag"
   s.required_ruby_version = '>= 2.1.0'
-  s.add_runtime_dependency 'arvados', '~> 1.2.0', '>= 1.2.0'
+  s.add_runtime_dependency 'arvados', '~> 1.3.0', '>= 1.3.0'
   # Our google-api-client dependency used to be < 0.9, but that could be
   # satisfied by the buggy 0.9.pre*.  https://dev.arvados.org/issues/9213
   s.add_runtime_dependency 'cure-google-api-client', '~> 0.6', '>= 0.6.3', '<0.8.9'
index b8afe638ac3c6a517058fd3e85a49b90607f150c..b98df8a66faf66afd479647bb9ebb210c2a022ed 100755 (executable)
@@ -2156,6 +2156,7 @@ sub find_docker_image {
           return (undef, undef);  # More than one file in the Collection.
         } else {
           $filename = (split(/:/, $filedata, 3))[2];
+          $filename =~ s/\\([0-3][0-7][0-7])/chr(oct($1))/ge;
         }
       }
     }
index 7e149528308fc9c6e38e0021af858da5450b58f8..52fd4d21a115f29ee7fb388c11ebeb0564b40ae2 100644 (file)
@@ -6,6 +6,9 @@
 # Implement cwl-runner interface for submitting and running work on Arvados, using
 # either the Crunch jobs API or Crunch containers API.
 
+from future.utils import viewitems
+from builtins import str
+
 import argparse
 import logging
 import os
@@ -66,9 +69,9 @@ def versionstring():
 def arg_parser():  # type: () -> argparse.ArgumentParser
     parser = argparse.ArgumentParser(description='Arvados executor for Common Workflow Language')
 
-    parser.add_argument("--basedir", type=str,
+    parser.add_argument("--basedir",
                         help="Base directory used to resolve relative references in the input, default to directory of input object file or current directory (if inputs piped/provided on command line).")
-    parser.add_argument("--outdir", type=str, default=os.path.abspath('.'),
+    parser.add_argument("--outdir", default=os.path.abspath('.'),
                         help="Output directory, default current directory")
 
     parser.add_argument("--eval-timeout",
@@ -99,9 +102,9 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         default=True, dest="enable_reuse",
                         help="Disable job or container reuse")
 
-    parser.add_argument("--project-uuid", type=str, metavar="UUID", help="Project that will own the workflow jobs, if not provided, will go to home project.")
-    parser.add_argument("--output-name", type=str, help="Name to use for collection that stores the final output.", default=None)
-    parser.add_argument("--output-tags", type=str, help="Tags for the final output collection separated by commas, e.g., '--output-tags tag0,tag1,tag2'.", default=None)
+    parser.add_argument("--project-uuid", metavar="UUID", help="Project that will own the workflow jobs, if not provided, will go to home project.")
+    parser.add_argument("--output-name", help="Name to use for collection that stores the final output.", default=None)
+    parser.add_argument("--output-tags", help="Tags for the final output collection separated by commas, e.g., '--output-tags tag0,tag1,tag2'.", default=None)
     parser.add_argument("--ignore-docker-for-reuse", action="store_true",
                         help="Ignore Docker image version when deciding whether to reuse past jobs.",
                         default=False)
@@ -114,7 +117,7 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
     exgroup.add_argument("--create-template", action="store_true", help="(Deprecated) synonym for --create-workflow.",
                          dest="create_workflow")
     exgroup.add_argument("--create-workflow", action="store_true", help="Create an Arvados workflow (if using the 'containers' API) or pipeline template (if using the 'jobs' API). See --api.")
-    exgroup.add_argument("--update-workflow", type=str, metavar="UUID", help="Update an existing Arvados workflow or pipeline template with the given UUID.")
+    exgroup.add_argument("--update-workflow", metavar="UUID", help="Update an existing Arvados workflow or pipeline template with the given UUID.")
 
     exgroup = parser.add_mutually_exclusive_group()
     exgroup.add_argument("--wait", action="store_true", help="After submitting workflow runner job, wait for completion.",
@@ -128,7 +131,7 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
     exgroup.add_argument("--no-log-timestamps", action="store_false", help="No timestamp on logging lines",
                         default=True, dest="log_timestamps")
 
-    parser.add_argument("--api", type=str,
+    parser.add_argument("--api",
                         default=None, dest="work_api",
                         choices=("jobs", "containers"),
                         help="Select work submission API.  Default is 'jobs' if that API is available, otherwise 'containers'.")
@@ -141,7 +144,7 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         help="RAM (in MiB) required for the workflow runner job (default 1024)",
                         default=None)
 
-    parser.add_argument("--submit-runner-image", type=str,
+    parser.add_argument("--submit-runner-image",
                         help="Docker image for workflow runner job, default arvados/jobs:%s" % __version__,
                         default=None)
 
@@ -150,11 +153,11 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         default=False)
 
     exgroup = parser.add_mutually_exclusive_group()
-    exgroup.add_argument("--submit-request-uuid", type=str,
+    exgroup.add_argument("--submit-request-uuid",
                          default=None,
                          help="Update and commit to supplied container request instead of creating a new one (containers API only).",
                          metavar="UUID")
-    exgroup.add_argument("--submit-runner-cluster", type=str,
+    exgroup.add_argument("--submit-runner-cluster",
                          help="Submit workflow runner to a remote cluster (containers API only)",
                          default=None,
                          metavar="CLUSTER_ID")
@@ -163,7 +166,7 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         default=None,
                         help="Collection cache size (in MiB, default 256).")
 
-    parser.add_argument("--name", type=str,
+    parser.add_argument("--name",
                         help="Name to use for workflow execution instance.",
                         default=None)
 
@@ -175,7 +178,7 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
     parser.add_argument("--enable-dev", action="store_true",
                         help="Enable loading and running development versions "
                              "of CWL spec.", default=False)
-    parser.add_argument('--storage-classes', default="default", type=str,
+    parser.add_argument('--storage-classes', default="default",
                         help="Specify comma separated list of storage classes to be used when saving workflow output to Keep.")
 
     parser.add_argument("--intermediate-output-ttl", type=int, metavar="N",
@@ -208,7 +211,7 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         default=False, dest="trash_intermediate",
                         help="Do not trash intermediate outputs (default).")
 
-    parser.add_argument("workflow", type=str, default=None, help="The workflow to execute")
+    parser.add_argument("workflow", default=None, help="The workflow to execute")
     parser.add_argument("job_order", nargs=argparse.REMAINDER, help="The input object to the workflow.")
 
     return parser
@@ -232,7 +235,7 @@ def add_arv_hints():
     ])
 
 def exit_signal_handler(sigcode, frame):
-    logger.error("Caught signal {}, exiting.".format(sigcode))
+    logger.error(str(u"Caught signal {}, exiting.").format(sigcode))
     sys.exit(-sigcode)
 
 def main(args, stdout, stderr, api_client=None, keep_client=None,
@@ -243,7 +246,7 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
     arvargs = parser.parse_args(args)
 
     if len(arvargs.storage_classes.strip().split(',')) > 1:
-        logger.error("Multiple storage classes are not supported currently.")
+        logger.error(str(u"Multiple storage classes are not supported currently."))
         return 1
 
     arvargs.use_container = True
@@ -261,7 +264,7 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
         else:
             want_api = None
         if want_api and arvargs.work_api and want_api != arvargs.work_api:
-            logger.error('--update-workflow arg {!r} uses {!r} API, but --api={!r} specified'.format(
+            logger.error(str(u'--update-workflow arg {!r} uses {!r} API, but --api={!r} specified').format(
                 arvargs.update_workflow, want_api, arvargs.work_api))
             return 1
         arvargs.work_api = want_api
@@ -271,7 +274,7 @@ def main(args, stdout, stderr, api_client=None, keep_client=None,
 
     add_arv_hints()
 
-    for key, val in cwltool.argparser.get_default_args().items():
+    for key, val in viewitems(cwltool.argparser.get_default_args()):
         if not hasattr(arvargs, key):
             setattr(arvargs, key, val)
 
index 6a91d6ff3c5076a28e06a685ed8c73bf45a84218..af7c02a8f30010bfe85e51a6928e63a5a617d37e 100644 (file)
@@ -2,10 +2,14 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from future import standard_library
+standard_library.install_aliases()
+from builtins import str
+
 import logging
 import json
 import os
-import urllib
+import urllib.request, urllib.parse, urllib.error
 import time
 import datetime
 import ciso8601
@@ -136,7 +140,7 @@ class ArvadosContainer(JobBase):
                 generatemapper = NoFollowPathMapper(self.generatefiles["listing"], "", "",
                                                     separateDirs=False)
 
-                sorteditems = sorted(generatemapper.items(), None, key=lambda n: n[1].target)
+                sorteditems = sorted(generatemapper.items(), key=lambda n: n[1].target)
 
                 logger.debug("generatemapper is %s", sorteditems)
 
@@ -158,7 +162,7 @@ class ArvadosContainer(JobBase):
                                 }
                             else:
                                 with vwd.open(p.target, "w") as n:
-                                    n.write(p.resolved.encode("utf-8"))
+                                    n.write(p.resolved)
 
                 def keepemptydirs(p):
                     if isinstance(p, arvados.collection.RichCollectionBase):
@@ -495,6 +499,9 @@ class RunnerContainer(Runner):
             extra_submit_params["cluster_id"] = runtimeContext.submit_runner_cluster
 
         if runtimeContext.submit_request_uuid:
+            if "cluster_id" in extra_submit_params:
+                # Doesn't make sense for "update" and actually fails
+                del extra_submit_params["cluster_id"]
             response = self.arvrunner.api.container_requests().update(
                 uuid=runtimeContext.submit_request_uuid,
                 body=job_spec,
index 87d6d1049a08d349d8d45b41b47a5fccb99f237c..69fe7e2a8f1b632675b16eeb3f6ad07ee76c00e8 100644 (file)
@@ -2,6 +2,10 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from past.builtins import basestring
+from builtins import object
+from future.utils import viewitems
+
 import logging
 import re
 import copy
@@ -243,7 +247,7 @@ class ArvadosJob(JobBase):
                                                                    api_client=self.arvrunner.api,
                                                                    keep_client=self.arvrunner.keep_client,
                                                                    num_retries=self.arvrunner.num_retries)
-                        log = logc.open(logc.keys()[0])
+                        log = logc.open(list(logc.keys())[0])
                         dirs = {
                             "tmpdir": "/tmpdir",
                             "outdir": "/outdir",
@@ -343,7 +347,7 @@ class RunnerJob(Runner):
             find_or_create=self.enable_reuse
         ).execute(num_retries=self.arvrunner.num_retries)
 
-        for k,v in job_spec["script_parameters"].items():
+        for k,v in viewitems(job_spec["script_parameters"]):
             if v is False or v is None or isinstance(v, dict):
                 job_spec["script_parameters"][k] = {"value": v}
 
index c4e9f44abb0b20ecb66a7bdc13c5240beaaeeccb..31e6be12b533cb9d96c65f74189935fad8c3fcbe 100644 (file)
@@ -16,7 +16,7 @@ def validate_cluster_target(arvrunner, runtimeContext):
         runtimeContext.submit_runner_cluster not in arvrunner.api._rootDesc["remoteHosts"] and
         runtimeContext.submit_runner_cluster != arvrunner.api._rootDesc["uuidPrefix"]):
         raise WorkflowException("Unknown or invalid cluster id '%s' known remote clusters are %s" % (runtimeContext.submit_runner_cluster,
-                                                                                                  ", ".join(arvrunner.api._rootDesc["remoteHosts"].keys())))
+                                                                                                  ", ".join(list(arvrunner.api._rootDesc["remoteHosts"].keys()))))
 def set_cluster_target(tool, arvrunner, builder, runtimeContext):
     cluster_target_req = None
     for field in ("hints", "requirements"):
index ea167d4044d76fa91953eb401962107afd6b878e..8e6bff4f04cb525106ee18a62cfb6146542f3c18 100644 (file)
@@ -2,6 +2,9 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from past.builtins import basestring
+from future.utils import viewitems
+
 import os
 import json
 import copy
@@ -135,7 +138,7 @@ class ArvadosWorkflowStep(WorkflowStep):
         runtimeContext = runtimeContext.copy()
         runtimeContext.toplevel = True  # Preserve behavior for #13365
 
-        builder = make_builder({shortname(k): v for k,v in joborder.items()}, self.hints, self.requirements, runtimeContext)
+        builder = make_builder({shortname(k): v for k,v in viewitems(joborder)}, self.hints, self.requirements, runtimeContext)
         runtimeContext = set_cluster_target(self.tool, self.arvrunner, builder, runtimeContext)
         return super(ArvadosWorkflowStep, self).job(joborder, output_callback, runtimeContext)
 
index 61f9cbbe0dc80a7ce7c4894ccb2697c0b0310652..c886550d41606e60e04f0142e55519e5ea33c89a 100644 (file)
@@ -10,6 +10,9 @@
 # tool.  When the workflow completes, record the output object in an output
 # collection for this runner job.
 
+from past.builtins import basestring
+from future.utils import viewitems
+
 import arvados
 import arvados_cwl
 import arvados.collection
@@ -61,7 +64,7 @@ def run():
             if "location" in v:
                 v["location"] = keeppath(v["location"])
 
-        for k,v in job_order_object.items():
+        for k,v in viewitems(job_order_object):
             if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
                 job_order_object[k] = {
                     "class": "File",
index 6d46e79cb84b33bee3af508c86498201aa37acd1..9b26ad7064207e8e76e4f819604833ab7b7a1dd7 100644 (file)
@@ -2,6 +2,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from future.utils import viewvalues
+
 import re
 from cwltool.errors import WorkflowException
 from collections import deque
@@ -63,7 +65,7 @@ def logtail(logcollection, logfunc, header, maxlen=25):
     containersapi = ("crunch-run.txt" in logcollection)
     mergelogs = {}
 
-    for log in logcollection.keys():
+    for log in list(logcollection):
         if not containersapi or log in ("crunch-run.txt", "stdout.txt", "stderr.txt"):
             logname = log[:-4]
             logt = deque([], maxlen)
@@ -77,7 +79,7 @@ def logtail(logcollection, logfunc, header, maxlen=25):
                         logt.append(l)
 
     if containersapi:
-        keys = mergelogs.keys()
+        keys = list(mergelogs)
         loglines = []
         while True:
             earliest = None
@@ -91,7 +93,7 @@ def logtail(logcollection, logfunc, header, maxlen=25):
             loglines.append("%s %s %s" % (ts, earliest, msg))
         loglines = loglines[-maxlen:]
     else:
-        loglines = mergelogs.values()[0]
+        loglines = mergelogs[list(mergelogs)[0]]
 
     logtxt = "\n  ".join(l.strip() for l in loglines)
     logfunc("%s\n\n  %s", header, logtxt)
index 27774b2f7cf6bd1fbb9bd8474f5dde4e7e4d6d51..535cfd7582b985ad806d659f518f9da9ce0e6fbc 100644 (file)
@@ -2,6 +2,12 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from __future__ import division
+from builtins import next
+from builtins import object
+from builtins import str
+from future.utils import viewvalues
+
 import argparse
 import logging
 import os
@@ -157,7 +163,7 @@ class ArvCwlExecutor(object):
                 raise Exception("Unsupported API '%s', expected one of %s" % (arvargs.work_api, expected_api))
 
         if self.work_api == "jobs":
-            logger.warn("""
+            logger.warning("""
 *******************************
 Using the deprecated 'jobs' API.
 
@@ -180,6 +186,11 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
         # if running inside a container
         if arvados_cwl.util.get_current_container(self.api, self.num_retries, logger):
             root_logger = logging.getLogger('')
+
+            # Remove existing RuntimeStatusLoggingHandlers if they exist
+            handlers = [h for h in root_logger.handlers if not isinstance(h, RuntimeStatusLoggingHandler)]
+            root_logger.handlers = handlers
+
             handler = RuntimeStatusLoggingHandler(self.runtime_status_update)
             root_logger.addHandler(handler)
 
@@ -332,7 +343,7 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                 if self.stop_polling.is_set():
                     break
                 with self.workflow_eval_lock:
-                    keys = list(self.processes.keys())
+                    keys = list(self.processes)
                 if not keys:
                     remain_wait = self.poll_interval
                     continue
@@ -351,7 +362,7 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                     try:
                         proc_states = table.list(filters=[["uuid", "in", page]]).execute(num_retries=self.num_retries)
                     except Exception as e:
-                        logger.warn("Error checking states on API server: %s", e)
+                        logger.warning("Error checking states on API server: %s", e)
                         remain_wait = self.poll_interval
                         continue
 
@@ -383,7 +394,7 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
             try:
                 self.api.collections().delete(uuid=i).execute(num_retries=self.num_retries)
             except:
-                logger.warn("Failed to delete intermediate output: %s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
+                logger.warning("Failed to delete intermediate output: %s", sys.exc_info()[1], exc_info=(sys.exc_info()[1] if self.debug else False))
             if sys.exc_info()[0] is KeyboardInterrupt or sys.exc_info()[0] is SystemExit:
                 break
 
@@ -401,7 +412,7 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                             "Option 'dockerOutputDirectory' must be an absolute path.")
             if obj.get("class") == "http://commonwl.org/cwltool#Secrets" and self.work_api != "containers":
                 raise SourceLine(obj, "class", UnsupportedRequirement).makeError("Secrets not supported with --api=jobs")
-            for v in obj.itervalues():
+            for v in viewvalues(obj):
                 self.check_features(v)
         elif isinstance(obj, list):
             for i,v in enumerate(obj):
@@ -445,7 +456,7 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                 logger.error("Creating CollectionReader for '%s' '%s': %s", k, v, e)
                 raise
             except IOError as e:
-                logger.warn("While preparing output collection: %s", e)
+                logger.warning("While preparing output collection: %s", e)
 
         def rewrite(fileobj):
             fileobj["location"] = generatemapper.mapper(fileobj["location"]).target
@@ -457,7 +468,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
         adjustFileObjs(outputObj, rewrite)
 
         with final.open("cwl.output.json", "w") as f:
-            json.dump(outputObj, f, sort_keys=True, indent=4, separators=(',',': '))
+            res = str(json.dumps(outputObj, sort_keys=True, indent=4, separators=(',',': '), ensure_ascii=False))
+            f.write(res)           
 
         final.save_new(name=name, owner_uuid=self.project_uuid, storage_classes=storage_classes, ensure_unique_name=True)
 
@@ -608,7 +620,7 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                         visited.add(m.group(1))
                         estimated_size[0] += int(m.group(2))
             visit_class(job_order, ("File", "Directory"), estimate_collection_cache)
-            runtimeContext.collection_cache_size = max(((estimated_size[0]*192) / (1024*1024))+1, 256)
+            runtimeContext.collection_cache_size = max(((estimated_size[0]*192) // (1024*1024))+1, 256)
             self.collection_cache.set_cap(runtimeContext.collection_cache_size*1024*1024)
 
         logger.info("Using collection cache size %s MiB", runtimeContext.collection_cache_size)
@@ -660,7 +672,7 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                            runtimeContext)
 
         if runtimeContext.submit and not runtimeContext.wait:
-            runnerjob = jobiter.next()
+            runnerjob = next(jobiter)
             runnerjob.run(runtimeContext)
             return (runnerjob.uuid, "success")
 
index 0816ee8fc05b74198ae9abad69887905bf8113ee..fc7cc42d15c61f19021f006cb1eec18ab94178cd 100644 (file)
@@ -2,10 +2,16 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from future import standard_library
+standard_library.install_aliases()
+from builtins import object
+from builtins import str
+from future.utils import viewvalues
+
 import fnmatch
 import os
 import errno
-import urlparse
+import urllib.parse
 import re
 import logging
 import threading
@@ -48,7 +54,7 @@ class CollectionCache(object):
 
     def cap_cache(self, required):
         # ordered dict iterates from oldest to newest
-        for pdh, v in self.collections.items():
+        for pdh, v in list(self.collections.items()):
             available = self.cap - self.total
             if available >= required or len(self.collections) < self.min_entries:
                 return
@@ -90,7 +96,7 @@ class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess):
         p = sp[0]
         if p.startswith("keep:") and arvados.util.keep_locator_pattern.match(p[5:]):
             pdh = p[5:]
-            return (self.collection_cache.get(pdh), urlparse.unquote(sp[1]) if len(sp) == 2 else None)
+            return (self.collection_cache.get(pdh), urllib.parse.unquote(sp[1]) if len(sp) == 2 else None)
         else:
             return (None, path)
 
@@ -188,7 +194,7 @@ class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess):
                 raise IOError(errno.ENOENT, "Directory '%s' in '%s' not found" % (rest, collection.portable_data_hash()))
             if not isinstance(dir, arvados.collection.RichCollectionBase):
                 raise IOError(errno.ENOENT, "Path '%s' in '%s' is not a Directory" % (rest, collection.portable_data_hash()))
-            return [abspath(l, fn) for l in dir.keys()]
+            return [abspath(l, fn) for l in list(dir.keys())]
         else:
             return super(CollectionFsAccess, self).listdir(fn)
 
@@ -243,11 +249,11 @@ class CollectionFetcher(DefaultFetcher):
         if not url:
             return base_url
 
-        urlsp = urlparse.urlsplit(url)
+        urlsp = urllib.parse.urlsplit(url)
         if urlsp.scheme or not base_url:
             return url
 
-        basesp = urlparse.urlsplit(base_url)
+        basesp = urllib.parse.urlsplit(base_url)
         if basesp.scheme in ("keep", "arvwf"):
             if not basesp.path:
                 raise IOError(errno.EINVAL, "Invalid Keep locator", base_url)
@@ -268,7 +274,7 @@ class CollectionFetcher(DefaultFetcher):
                 baseparts.pop()
 
             path = "/".join([pdh] + baseparts + urlparts)
-            return urlparse.urlunsplit((basesp.scheme, "", path, "", urlsp.fragment))
+            return urllib.parse.urlunsplit((basesp.scheme, "", path, "", urlsp.fragment))
 
         return super(CollectionFetcher, self).urljoin(base_url, url)
 
@@ -283,21 +289,21 @@ pipeline_template_uuid_pattern = re.compile(r'[a-z0-9]{5}-p5p6p-[a-z0-9]{15}')
 
 def collectionResolver(api_client, document_loader, uri, num_retries=4):
     if uri.startswith("keep:") or uri.startswith("arvwf:"):
-        return uri
+        return str(uri)
 
     if workflow_uuid_pattern.match(uri):
-        return "arvwf:%s#main" % (uri)
+        return u"arvwf:%s#main" % (uri)
 
     if pipeline_template_uuid_pattern.match(uri):
         pt = api_client.pipeline_templates().get(uuid=uri).execute(num_retries=num_retries)
-        return "keep:" + pt["components"].values()[0]["script_parameters"]["cwl:tool"]
+        return u"keep:" + viewvalues(pt["components"])[0]["script_parameters"]["cwl:tool"]
 
     p = uri.split("/")
     if arvados.util.keep_locator_pattern.match(p[0]):
-        return "keep:%s" % (uri)
+        return u"keep:%s" % (uri)
 
     if arvados.util.collection_uuid_pattern.match(p[0]):
-        return "keep:%s%s" % (api_client.collections().
+        return u"keep:%s%s" % (api_client.collections().
                               get(uuid=p[0]).execute()["portable_data_hash"],
                               uri[len(p[0]):])
 
index 4516de021b8522f25990cf2988b7b4345f13849d..47a304372c58a27ecde8d8c13bb55d6435f9cf79 100644 (file)
@@ -2,6 +2,10 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from __future__ import division
+from future import standard_library
+standard_library.install_aliases()
+
 import requests
 import email.utils
 import time
@@ -9,7 +13,7 @@ import datetime
 import re
 import arvados
 import arvados.collection
-import urlparse
+import urllib.parse
 import logging
 import calendar
 
@@ -91,13 +95,13 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
         if fresh_cache(url, properties, now):
             # Do nothing
             cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
-            return "keep:%s/%s" % (item["portable_data_hash"], cr.keys()[0])
+            return "keep:%s/%s" % (item["portable_data_hash"], list(cr.keys())[0])
 
         if not changed(url, properties, now):
             # ETag didn't change, same content, just update headers
             api.collections().update(uuid=item["uuid"], body={"collection":{"properties": properties}}).execute()
             cr = arvados.collection.CollectionReader(item["portable_data_hash"], api_client=api)
-            return "keep:%s/%s" % (item["portable_data_hash"], cr.keys()[0])
+            return "keep:%s/%s" % (item["portable_data_hash"], list(cr.keys())[0])
 
     properties = {}
     req = requests.get(url, stream=True, allow_redirects=True)
@@ -123,25 +127,25 @@ def http_to_keep(api, project_uuid, url, utcnow=datetime.datetime.utcnow):
         else:
             name = grp.group(4)
     else:
-        name = urlparse.urlparse(url).path.split("/")[-1]
+        name = urllib.parse.urlparse(url).path.split("/")[-1]
 
     count = 0
     start = time.time()
     checkpoint = start
-    with c.open(name, "w") as f:
+    with c.open(name, "wb") as f:
         for chunk in req.iter_content(chunk_size=1024):
             count += len(chunk)
             f.write(chunk)
             loopnow = time.time()
             if (loopnow - checkpoint) > 20:
-                bps = (float(count)/float(loopnow - start))
+                bps = count / (loopnow - start)
                 if cl is not None:
                     logger.info("%2.1f%% complete, %3.2f MiB/s, %1.0f seconds left",
-                                float(count * 100) / float(cl),
-                                bps/(1024*1024),
-                                (cl-count)/bps)
+                                ((count * 100) / cl),
+                                (bps // (1024*1024)),
+                                ((cl-count) // bps))
                 else:
-                    logger.info("%d downloaded, %3.2f MiB/s", count, bps/(1024*1024))
+                    logger.info("%d downloaded, %3.2f MiB/s", count, (bps / (1024*1024)))
                 checkpoint = loopnow
 
     c.save_new(name="Downloaded from %s" % url, owner_uuid=project_uuid, ensure_unique_name=True)
index 0b2a22788e6f98537b0f5a3437a2d540a57d47ee..e0445febdc9a0731314607417739747c8f0e632c 100644 (file)
@@ -2,11 +2,17 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from future import standard_library
+standard_library.install_aliases()
+from builtins import str
+from past.builtins import basestring
+from future.utils import viewitems
+
 import re
 import logging
 import uuid
 import os
-import urllib
+import urllib.request, urllib.parse, urllib.error
 
 import arvados_cwl.util
 import arvados.commands.run
@@ -60,7 +66,7 @@ class ArvPathMapper(PathMapper):
             src = src[:src.index("#")]
 
         if isinstance(src, basestring) and ArvPathMapper.pdh_dirpath.match(src):
-            self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.unquote(src[5:]), srcobj["class"], True)
+            self._pathmap[src] = MapperEnt(src, self.collection_pattern % urllib.parse.unquote(src[5:]), srcobj["class"], True)
 
         debug = logger.isEnabledFor(logging.DEBUG)
 
@@ -77,7 +83,7 @@ class ArvPathMapper(PathMapper):
                     if isinstance(st, arvados.commands.run.UploadFile):
                         uploadfiles.add((src, ab, st))
                     elif isinstance(st, arvados.commands.run.ArvFile):
-                        self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % urllib.unquote(st.fn[5:]), "File", True)
+                        self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % urllib.parse.unquote(st.fn[5:]), "File", True)
                     else:
                         raise WorkflowException("Input file path '%s' is invalid" % st)
             elif src.startswith("_:"):
@@ -114,7 +120,7 @@ class ArvPathMapper(PathMapper):
             remap.append((obj["location"], path + "/" + obj["basename"]))
         elif obj["location"].startswith("_:") and "contents" in obj:
             with c.open(path + "/" + obj["basename"], "w") as f:
-                f.write(obj["contents"].encode("utf-8"))
+                f.write(obj["contents"])
             remap.append((obj["location"], path + "/" + obj["basename"]))
         else:
             raise SourceLine(obj, "location", WorkflowException).makeError("Don't know what to do with '%s'" % obj["location"])
@@ -176,7 +182,7 @@ class ArvPathMapper(PathMapper):
                                          packed=False)
 
         for src, ab, st in uploadfiles:
-            self._pathmap[src] = MapperEnt(urllib.quote(st.fn, "/:+@"), self.collection_pattern % st.fn[5:],
+            self._pathmap[src] = MapperEnt(urllib.parse.quote(st.fn, "/:+@"), self.collection_pattern % st.fn[5:],
                                            "Directory" if os.path.isdir(ab) else "File", True)
 
         for srcobj in referenced_files:
@@ -228,7 +234,7 @@ class ArvPathMapper(PathMapper):
                                                               ab, "File", True)
                 if srcobj.get("secondaryFiles"):
                     ab = self.collection_pattern % c.portable_data_hash()
-                    self._pathmap["_:" + unicode(uuid.uuid4())] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True)
+                    self._pathmap["_:" + str(uuid.uuid4())] = MapperEnt("keep:"+c.portable_data_hash(), ab, "Directory", True)
 
             if remap:
                 for loc, sub in remap:
@@ -301,7 +307,7 @@ class VwdPathMapper(StagingPathMapper):
         # with any secondary files.
         self.visitlisting(referenced_files, self.stagedir, basedir)
 
-        for path, (ab, tgt, type, staged) in self._pathmap.items():
+        for path, (ab, tgt, type, staged) in viewitems(self._pathmap):
             if type in ("File", "Directory") and ab.startswith("keep:"):
                 self._pathmap[path] = MapperEnt("$(task.keep)/%s" % ab[5:], tgt, type, staged)
 
index 39f475fe8d30e6fda700e2f3c965577745b0638f..cc3ea969df99cb000119cc60ffd5c29c28656d01 100644 (file)
@@ -2,6 +2,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from builtins import object
+
 import time
 import uuid
 
index 4b3275fa36b4ee497ec83c3de2b3e8a0c938fb62..c0d165aa9eed4e5cbaeaf9d365a302957a3921b1 100644 (file)
@@ -2,15 +2,23 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from future import standard_library
+standard_library.install_aliases()
+from future.utils import  viewvalues, viewitems
+
 import os
-import urlparse
+import sys
+import urllib.parse
 from functools import partial
 import logging
 import json
-import subprocess32 as subprocess
 from collections import namedtuple
+from io import StringIO
 
-from StringIO import StringIO
+if os.name == "posix" and sys.version_info[0] < 3:
+    import subprocess32 as subprocess
+else:
+    import subprocess
 
 from schema_salad.sourceline import SourceLine, cmap
 
@@ -61,7 +69,7 @@ def find_defaults(d, op):
         if "default" in d:
             op(d)
         else:
-            for i in d.itervalues():
+            for i in viewvalues(d):
                 find_defaults(i, op)
 
 def setSecondary(t, fileobj, discovered):
@@ -98,7 +106,7 @@ def upload_dependencies(arvrunner, name, document_loader,
     loaded = set()
     def loadref(b, u):
         joined = document_loader.fetcher.urljoin(b, u)
-        defrg, _ = urlparse.urldefrag(joined)
+        defrg, _ = urllib.parse.urldefrag(joined)
         if defrg not in loaded:
             loaded.add(defrg)
             # Use fetch_text to get raw file (before preprocessing).
@@ -171,7 +179,7 @@ def upload_dependencies(arvrunner, name, document_loader,
 
     visit_class(workflowobj, ("CommandLineTool", "Workflow"), discover_default_secondary_files)
 
-    for d in list(discovered.keys()):
+    for d in list(discovered):
         # Only interested in discovered secondaryFiles which are local
         # files that need to be uploaded.
         if d.startswith("file:"):
@@ -232,7 +240,7 @@ def packed_workflow(arvrunner, tool, merged_map):
     packed = pack(tool.doc_loader, tool.doc_loader.fetch(tool.tool["id"]),
                   tool.tool["id"], tool.metadata, rewrite_out=rewrites)
 
-    rewrite_to_orig = {v: k for k,v in rewrites.items()}
+    rewrite_to_orig = {v: k for k,v in viewitems(rewrites)}
 
     def visit(v, cur_id):
         if isinstance(v, dict):
@@ -463,7 +471,7 @@ class Runner(Process):
             if "cwl.output.json" in outc:
                 with outc.open("cwl.output.json", "rb") as f:
                     if f.size() > 0:
-                        outputs = json.load(f)
+                        outputs = json.loads(f.read().decode())
             def keepify(fileobj):
                 path = fileobj["location"]
                 if not path.startswith("keep:"):
index 1c233fac0ad98f4b0421a4e0856b00fd19d1422f..d75fec6c63e719949d6f19b7d2813d9f828262a6 100644 (file)
@@ -2,7 +2,12 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-import Queue
+from future import standard_library
+standard_library.install_aliases()
+from builtins import range
+from builtins import object
+
+import queue
 import threading
 import logging
 
@@ -11,13 +16,13 @@ logger = logging.getLogger('arvados.cwl-runner')
 class TaskQueue(object):
     def __init__(self, lock, thread_count):
         self.thread_count = thread_count
-        self.task_queue = Queue.Queue(maxsize=self.thread_count)
+        self.task_queue = queue.Queue(maxsize=self.thread_count)
         self.task_queue_threads = []
         self.lock = lock
         self.in_flight = 0
         self.error = None
 
-        for r in xrange(0, self.thread_count):
+        for r in range(0, self.thread_count):
             t = threading.Thread(target=self.task_queue_func)
             self.task_queue_threads.append(t)
             t.start()
@@ -51,7 +56,7 @@ class TaskQueue(object):
                     return
                 self.task_queue.put(task, block=True, timeout=3)
                 return
-            except Queue.Full:
+            except queue.Full:
                 pass
             finally:
                 unlock.acquire()
@@ -62,7 +67,7 @@ class TaskQueue(object):
             # Drain queue
             while not self.task_queue.empty():
                 self.task_queue.get(True, .1)
-        except Queue.Empty:
+        except queue.Empty:
             pass
 
     def join(self):
index 98a2a89a1d281e056ee3067752e6094349123115..776fc6bc25dae06e232e2546cab501246d6cd6b3 100644 (file)
@@ -26,6 +26,9 @@ def get_current_container(api, num_retries=0, logger=None):
         current_container = api.containers().current().execute(num_retries=num_retries)
     except ApiError as e:
         # Status code 404 just means we're not running in a container.
-        if e.resp.status != 404 and logger:
-            logger.info("Getting current container: %s", e)
+        if e.resp.status != 404:
+            if logger:
+                logger.info("Getting current container: %s", e)
+            raise e
+            
     return current_container
similarity index 51%
rename from backports/python-pycrypto/fpm-info.sh
rename to sdk/cwl/fpm-info.sh
index 85c401f4aefc27f74e1b8e3740c3eff0ea03afa9..5c47532db9ac9efa12ff9cc10b4e17b9d8ec9ae1 100644 (file)
@@ -3,13 +3,12 @@
 # SPDX-License-Identifier: Apache-2.0
 
 case "$TARGET" in
-    centos*)
-        fpm_depends+=(glibc)
-        ;;
     debian8)
-        fpm_depends+=(libc6 libgmp10)
+        fpm_depends+=(libgnutls-deb0-28 libcurl3-gnutls)
         ;;
     debian* | ubuntu*)
-        fpm_depends+=(libc6)
+        fpm_depends+=(libcurl3-gnutls libpython2.7)
         ;;
 esac
+
+fpm_args+=(--conflicts=python-cwltool --conflicts=cwltool)
index 4dc8448476123934dae7193fe680141671a2b7ec..d6a4c24a785e0ca6258550dee4b41c2cc01b9ad7 100644 (file)
@@ -2,6 +2,9 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from builtins import str
+from builtins import next
+
 from setuptools.command.egg_info import egg_info
 import subprocess
 import time
index 3bd62532a8b80b3d3513107470b827ba6ca4d02c..a815dedaf3a30924145939598b476c1302915bcb 100644 (file)
@@ -37,14 +37,20 @@ setup(name='arvados-cwl-runner',
           'schema-salad==3.0.20181129082112',
           'typing >= 3.6.4',
           'ruamel.yaml >=0.15.54, <=0.15.77',
-          'arvados-python-client>=1.2.1.20181130020805',
+          'arvados-python-client>=1.3.0.20190205182514',
           'setuptools',
           'ciso8601 >=1.0.6, <2.0.0',
-          'subprocess32>=3.5.1',
       ],
+      extras_require={
+          ':os.name=="posix" and python_version<"3"': ['subprocess32 >= 3.5.1'],
+      },
       data_files=[
           ('share/doc/arvados-cwl-runner', ['LICENSE-2.0.txt', 'README.rst']),
       ],
+      classifiers=[
+          'Programming Language :: Python :: 2',
+          'Programming Language :: Python :: 3',
+      ],
       test_suite='tests',
       tests_require=[
           'mock>=1.0',
index a6f4022cc3559adb40f7a5891495de2fbfd78c2a..37eb5171ebbc55779f86e0e275e4eb8d7a490cd6 100755 (executable)
@@ -13,6 +13,9 @@ reset_container=1
 leave_running=0
 config=dev
 tag="latest"
+pythoncmd=python
+suite=conformance
+runapi=containers
 
 while test -n "$1" ; do
     arg="$1"
@@ -33,8 +36,24 @@ while test -n "$1" ; do
             tag=$2
             shift ; shift
             ;;
+        --build)
+            build=1
+            shift
+            ;;
+        --pythoncmd)
+            pythoncmd=$2
+            shift ; shift
+            ;;
+        --suite)
+            suite=$2
+            shift ; shift
+            ;;
+       --api)
+           runapi=$2
+            shift ; shift
+            ;;
         -h|--help)
-            echo "$0 [--no-reset-container] [--leave-running] [--config dev|localdemo] [--tag docker_tag]"
+            echo "$0 [--no-reset-container] [--leave-running] [--config dev|localdemo] [--tag docker_tag] [--build] [--pythoncmd python(2|3)] [--suite (integration|conformance)]"
             exit
             ;;
         *)
@@ -60,13 +79,21 @@ set -eu -o pipefail
 
 . /usr/local/lib/arvbox/common.sh
 
+export PYCMD=$pythoncmd
+
 if test $config = dev ; then
   cd /usr/src/arvados/sdk/cwl
-  python setup.py sdist
+  \$PYCMD setup.py sdist
   pip_install \$(ls -r dist/arvados-cwl-runner-*.tar.gz | head -n1)
 fi
 
-pip install cwltest
+set -x
+
+if [ \$PYCMD = "python3" ]; then
+    pip3 install cwltest
+else
+    pip install cwltest
+fi
 
 mkdir -p /tmp/cwltest
 cd /tmp/cwltest
@@ -80,7 +107,9 @@ export ARVADOS_API_HOST_INSECURE=1
 export ARVADOS_API_TOKEN=\$(cat /var/lib/arvados/superuser_token)
 
 
-if test "$tag" = "latest" ; then
+if test -n "$build" ; then
+   /usr/src/arvados/build/build-dev-docker-jobs-image.sh
+elif test "$tag" = "latest" ; then
   arv-keepdocker --pull arvados/jobs $tag
 else
   jobsimg=\$(curl https://versions.arvados.org/v1/commit/$tag | python -c "import json; import sys; sys.stdout.write(json.load(sys.stdin)['Versions']['Docker']['arvados/jobs'])")
@@ -102,7 +131,12 @@ EOF2
 chmod +x /tmp/cwltest/arv-cwl-containers
 
 env
-exec ./run_test.sh RUNNER=/tmp/cwltest/arv-cwl-containers EXTRA=--compute-checksum $@
+if [[ "$suite" = "conformance" ]] ; then
+   exec ./run_test.sh RUNNER=/tmp/cwltest/arv-cwl-${runapi} EXTRA=--compute-checksum $@
+elif [[ "$suite" = "integration" ]] ; then
+   cd /usr/src/arvados/sdk/cwl/tests
+   exec ./arvados-tests.sh $@
+fi
 EOF
 
 CODE=$?
index a7445449af6030e7afee4bdb524ac55afc90b8ec..697f38017d4f0060afdae7915f472e90a9652770 100644 (file)
@@ -14,6 +14,7 @@ expression: |
   ${
     var samples = {};
     var pattern = /^(.+)(_S[0-9]{1,3}_)(.+)$/;
+    inputs.dir.listing = inputs.dir.listing.sort(function(a, b) { return a.basename.localeCompare(b.basename); });
     for (var i = 0; i < inputs.dir.listing.length; i++) {
       var file = inputs.dir.listing[i];
       var groups = file.basename.match(pattern);
@@ -32,4 +33,4 @@ expression: |
                  "listing": samples[sampleid]});
     });
     return {"out": dirs};
-  }
\ No newline at end of file
+  }
index e51c7a2531dbea456112ae577ec4698c88883e09..8b8ff28c789d771ba0c2e54855beb82381ee6a83 100644 (file)
             "size": 32
         },
         {
-            "checksum": "sha1$83483b9c65d99967aecc794c14f9f4743314d186",
-            "location": "sample2_S01_R3_001.fastq.txt",
+            "checksum": "sha1$5f3b4df1b0f7fdced751fc6079778600ad9fdb45",
+            "location": "sample2_S01_R1_001.fastq.txt",
             "class": "File",
             "size": 32
         }
index b3338939edd983850ec7d426fa32619176cc4d84..1458772a3f65f22dfe494df7ddbe55d7ca308f2e 100644 (file)
@@ -2,6 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from __future__ import print_function
 import arvados
 import json
 
index 6fe90813e7a720c9ba5c11d9650af34e49ee9cdd..40bb843b2980877a0dbe10f18b41463c255609e5 100644 (file)
@@ -2,6 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from __future__ import print_function
 import arvados
 import json
 
index 99c26523ed471dc6d14b165aee9cceba5d9e57b5..e45bd72642df7e735bdc0d5529286536bf536de8 100644 (file)
@@ -2,4 +2,5 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-print "Hello world"
+from __future__ import print_function
+print("Hello world")
index 50c0c60b1d0a337527ddd2342ae1ed46e22b1708..04e67b7dbd48aaefec075e77c2a8836d81cb2631 100644 (file)
@@ -2,6 +2,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from builtins import object
+
 import difflib
 import json
 import re
index a34d550f62ad2b24a1ff156d7b740377c811cab5..de21fc0b92d1c7437978be0a5018c3ea51fd76c3 100644 (file)
@@ -2,6 +2,9 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from builtins import str
+from builtins import object
+
 import arvados_cwl
 import arvados_cwl.context
 import arvados_cwl.util
index 0c66c39c0b4607e492725a280d8dfca3aa52ec19..4119fee383e27bcfe30a97d3de754d1879c067a9 100644 (file)
@@ -2,8 +2,11 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from future import standard_library
+standard_library.install_aliases()
+
 import copy
-import cStringIO
+import io
 import functools
 import hashlib
 import json
@@ -57,7 +60,7 @@ class TestHttpToKeep(unittest.TestCase):
 
         getmock.assert_called_with("http://example.com/file1.txt", stream=True, allow_redirects=True)
 
-        cm.open.assert_called_with("file1.txt", "w")
+        cm.open.assert_called_with("file1.txt", "wb")
         cm.save_new.assert_called_with(name="Downloaded from http://example.com/file1.txt",
                                        owner_uuid=None, ensure_unique_name=True)
 
@@ -185,7 +188,7 @@ class TestHttpToKeep(unittest.TestCase):
 
         getmock.assert_called_with("http://example.com/file1.txt", stream=True, allow_redirects=True)
 
-        cm.open.assert_called_with("file1.txt", "w")
+        cm.open.assert_called_with("file1.txt", "wb")
         cm.save_new.assert_called_with(name="Downloaded from http://example.com/file1.txt",
                                        owner_uuid=None, ensure_unique_name=True)
 
@@ -276,7 +279,7 @@ class TestHttpToKeep(unittest.TestCase):
 
         getmock.assert_called_with("http://example.com/download?fn=/file1.txt", stream=True, allow_redirects=True)
 
-        cm.open.assert_called_with("file1.txt", "w")
+        cm.open.assert_called_with("file1.txt", "wb")
         cm.save_new.assert_called_with(name="Downloaded from http://example.com/download?fn=/file1.txt",
                                        owner_uuid=None, ensure_unique_name=True)
 
index 2aaac0ae50699f5c012f36ba2f28eee1ccd281c4..022d75be11a09031746adf7a39975793ec293e83 100644 (file)
@@ -2,6 +2,11 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from future import standard_library
+standard_library.install_aliases()
+from builtins import str
+from builtins import next
+
 import functools
 import json
 import logging
@@ -9,7 +14,7 @@ import mock
 import os
 import unittest
 import copy
-import StringIO
+import io
 
 import arvados
 import arvados_cwl
@@ -126,7 +131,7 @@ class TestJob(unittest.TestCase):
                     # sharing link on the job
                     runner.api.links().create.side_effect = ApiError(
                         mock.MagicMock(return_value={'status': 403}),
-                        'Permission denied')
+                        bytes(b'Permission denied'))
                     j.run(runtimeContext)
                 else:
                     assert not runner.api.links().create.called
@@ -213,11 +218,12 @@ class TestJob(unittest.TestCase):
         runner.num_retries = 0
         runner.ignore_docker_for_reuse = False
 
-        reader().open.return_value = StringIO.StringIO(
-            """2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.tmpdir)=/tmp/crunch-job-task-work/compute3.1/tmpdir
+        reader().keys.return_value = "log.txt"
+        reader().open.return_value = io.StringIO(
+            str(u"""2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.tmpdir)=/tmp/crunch-job-task-work/compute3.1/tmpdir
 2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.outdir)=/tmp/crunch-job-task-work/compute3.1/outdir
 2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.keep)=/keep
-        """)
+        """))
         api.collections().list().execute.side_effect = ({"items": []},
                                                         {"items": [{"manifest_text": "XYZ"}]},
                                                         {"items": []},
@@ -286,11 +292,12 @@ class TestJob(unittest.TestCase):
         runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
         runner.num_retries = 0
 
-        reader().open.return_value = StringIO.StringIO(
-            """2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.tmpdir)=/tmp/crunch-job-task-work/compute3.1/tmpdir
+        reader().keys.return_value = "log.txt"
+        reader().open.return_value = io.StringIO(
+            str(u"""2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.tmpdir)=/tmp/crunch-job-task-work/compute3.1/tmpdir
 2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.outdir)=/tmp/crunch-job-task-work/compute3.1/outdir
 2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.keep)=/keep
-        """)
+        """))
 
         api.collections().list().execute.side_effect = (
             {"items": [{"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2"}]},
@@ -398,8 +405,8 @@ class TestWorkflow(unittest.TestCase):
         arvtool.formatgraph = None
         it = arvtool.job({}, mock.MagicMock(), runtimeContext)
 
-        it.next().run(runtimeContext)
-        it.next().run(runtimeContext)
+        next(it).run(runtimeContext)
+        next(it).run(runtimeContext)
 
         with open("tests/wf/scatter2_subwf.cwl") as f:
             subwf = StripYAMLComments(f.read())
@@ -435,7 +442,7 @@ class TestWorkflow(unittest.TestCase):
 
         mockc.open().__enter__().write.assert_has_calls([mock.call(subwf)])
         mockc.open().__enter__().write.assert_has_calls([mock.call(
-'''{
+bytes(b'''{
   "fileblub": {
     "basename": "token.txt",
     "class": "File",
@@ -443,7 +450,7 @@ class TestWorkflow(unittest.TestCase):
     "size": 0
   },
   "sleeptime": 5
-}''')])
+}'''))])
 
     # The test passes no builder.resources
     # Hence the default resources will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
@@ -477,8 +484,9 @@ class TestWorkflow(unittest.TestCase):
         arvtool = arvados_cwl.ArvadosWorkflow(runner, tool, loadingContext)
         arvtool.formatgraph = None
         it = arvtool.job({}, mock.MagicMock(), runtimeContext)
-        it.next().run(runtimeContext)
-        it.next().run(runtimeContext)
+        
+        next(it).run(runtimeContext)
+        next(it).run(runtimeContext)
 
         with open("tests/wf/echo-subwf.cwl") as f:
             subwf = StripYAMLComments(f.read())
index baeb4145ee6dbc5ba4db326f88acd54ce04352f4..562d1765daa24254cf4ad1a33b03af1129f57eff 100644 (file)
@@ -2,12 +2,15 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from future import standard_library
+standard_library.install_aliases()
+
 import functools
 import json
 import logging
 import mock
 import os
-import StringIO
+import io
 import unittest
 
 import arvados
@@ -35,7 +38,7 @@ class TestMakeOutput(unittest.TestCase):
         final_uuid = final.manifest_locator()
         num_retries = runner.num_retries
 
-        cwlout = StringIO.StringIO()
+        cwlout = io.StringIO()
         openmock = mock.MagicMock()
         final.open.return_value = openmock
         openmock.__enter__.return_value = cwlout
index 90dab01471ef61ab380955e6301a73306648edef..39117d86e3ca976ffaa19e1e5596e37bf018b842 100644 (file)
@@ -2,8 +2,14 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from future import standard_library
+standard_library.install_aliases()
+from builtins import object
+from builtins import str
+from future.utils import viewvalues
+
 import copy
-import cStringIO
+import io
 import functools
 import hashlib
 import json
@@ -12,6 +18,8 @@ import mock
 import sys
 import unittest
 
+from io import BytesIO, StringIO
+
 import arvados
 import arvados.collection
 import arvados_cwl
@@ -33,7 +41,7 @@ def stubs(func):
     @mock.patch("arvados.keep.KeepClient")
     @mock.patch("arvados.events.subscribe")
     def wrapped(self, events, keep_client1, keep_client2, keepdocker, *args, **kwargs):
-        class Stubs:
+        class Stubs(object):
             pass
         stubs = Stubs()
         stubs.events = events
@@ -60,6 +68,11 @@ def stubs(func):
         stubs.fake_user_uuid = "zzzzz-tpzed-zzzzzzzzzzzzzzz"
         stubs.fake_container_uuid = "zzzzz-dz642-zzzzzzzzzzzzzzz"
 
+        if sys.version_info[0] < 3:
+            stubs.capture_stdout = BytesIO()
+        else:
+            stubs.capture_stdout = StringIO()
+
         stubs.api = mock.MagicMock()
         stubs.api._rootDesc = get_rootDesc()
 
@@ -78,18 +91,18 @@ def stubs(func):
                 return self.exe
 
         def collection_createstub(created_collections, body, ensure_unique_name=None):
-            mt = body["manifest_text"]
+            mt = body["manifest_text"].encode('utf-8')
             uuid = "zzzzz-4zz18-zzzzzzzzzzzzzx%d" % len(created_collections)
             pdh = "%s+%i" % (hashlib.md5(mt).hexdigest(), len(mt))
             created_collections[uuid] = {
                 "uuid": uuid,
                 "portable_data_hash": pdh,
-                "manifest_text": mt
+                "manifest_text": mt.decode('utf-8')
             }
             return CollectionExecute(created_collections[uuid])
 
         def collection_getstub(created_collections, uuid):
-            for v in created_collections.itervalues():
+            for v in viewvalues(created_collections):
                 if uuid in (v["uuid"], v["portable_data_hash"]):
                     return CollectionExecute(v)
 
@@ -318,12 +331,10 @@ class TestSubmit(unittest.TestCase):
                 return '999999999999999999999999999999d4+99'
         arvdock.side_effect = get_image
 
-        capture_stdout = cStringIO.StringIO()
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--api=jobs", "--debug",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         stubs.api.collections().create.assert_has_calls([
             mock.call(body=JsonDiffMatcher({
@@ -354,19 +365,17 @@ class TestSubmit(unittest.TestCase):
         expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
         stubs.api.pipeline_instances().create.assert_called_with(
             body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_pipeline_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @mock.patch("time.sleep")
     @stubs
     def test_submit_no_reuse(self, stubs, tm):
-        capture_stdout = cStringIO.StringIO()
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--api=jobs", "--debug", "--disable-reuse",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
         expect_pipeline["components"]["cwl-runner"]["script_parameters"]["arv:enable_reuse"] = {"value": False}
@@ -374,8 +383,9 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.pipeline_instances().create.assert_called_with(
             body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_pipeline_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_error_when_multiple_storage_classes_specified(self, stubs):
@@ -389,49 +399,44 @@ class TestSubmit(unittest.TestCase):
     @mock.patch("time.sleep")
     @stubs
     def test_submit_on_error(self, stubs, tm):
-        capture_stdout = cStringIO.StringIO()
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--api=jobs", "--debug", "--on-error=stop",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
         expect_pipeline["components"]["cwl-runner"]["script_parameters"]["arv:on_error"] = "stop"
 
         stubs.api.pipeline_instances().create.assert_called_with(
             body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_pipeline_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @mock.patch("time.sleep")
     @stubs
     def test_submit_runner_ram(self, stubs, tm):
-        capture_stdout = cStringIO.StringIO()
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--debug", "--submit-runner-ram=2048",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
         expect_pipeline["components"]["cwl-runner"]["runtime_constraints"]["min_ram_mb_per_node"] = 2048
 
         stubs.api.pipeline_instances().create.assert_called_with(
             body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_pipeline_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @mock.patch("time.sleep")
     @stubs
     def test_submit_invalid_runner_ram(self, stubs, tm):
-        capture_stdout = cStringIO.StringIO()
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--debug", "--submit-runner-ram=-2048",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
         self.assertEqual(exited, 1)
 
     @mock.patch("time.sleep")
@@ -439,30 +444,27 @@ class TestSubmit(unittest.TestCase):
     def test_submit_output_name(self, stubs, tm):
         output_name = "test_output_name"
 
-        capture_stdout = cStringIO.StringIO()
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--debug", "--output-name", output_name,
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
         expect_pipeline["components"]["cwl-runner"]["script_parameters"]["arv:output_name"] = output_name
 
         stubs.api.pipeline_instances().create.assert_called_with(
             body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_pipeline_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @mock.patch("time.sleep")
     @stubs
     def test_submit_pipeline_name(self, stubs, tm):
-        capture_stdout = cStringIO.StringIO()
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--debug", "--name=hello job 123",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
         self.assertEqual(exited, 0)
 
         expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
@@ -470,7 +472,7 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.pipeline_instances().create.assert_called_with(
             body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_pipeline_uuid + '\n')
 
     @mock.patch("time.sleep")
@@ -478,11 +480,10 @@ class TestSubmit(unittest.TestCase):
     def test_submit_output_tags(self, stubs, tm):
         output_tags = "tag0,tag1,tag2"
 
-        capture_stdout = cStringIO.StringIO()
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--debug", "--output-tags", output_tags,
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
         self.assertEqual(exited, 0)
 
         expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
@@ -490,7 +491,7 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.pipeline_instances().create.assert_called_with(
             body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_pipeline_uuid + '\n')
 
     @mock.patch("time.sleep")
@@ -512,15 +513,10 @@ class TestSubmit(unittest.TestCase):
 
     @stubs
     def test_submit_container(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         stubs.api.collections().create.assert_has_calls([
             mock.call(body=JsonDiffMatcher({
@@ -539,20 +535,16 @@ class TestSubmit(unittest.TestCase):
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_container_no_reuse(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--disable-reuse",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--disable-reuse",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = [
@@ -566,18 +558,16 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_container_reuse_disabled_by_workflow(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--api=containers", "--debug",
              "tests/wf/submit_wf_no_reuse.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
         self.assertEqual(exited, 0)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
@@ -602,21 +592,16 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
 
 
     @stubs
     def test_submit_container_on_error(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--on-error=stop",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--on-error=stop",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
@@ -628,22 +613,18 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_container_output_name(self, stubs):
         output_name = "test_output_name"
 
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--output-name", output_name,
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--output-name", output_name,
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
@@ -656,20 +637,16 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_storage_classes(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--debug", "--submit", "--no-wait", "--api=containers", "--storage-classes=foo",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--debug", "--submit", "--no-wait", "--api=containers", "--storage-classes=foo",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
@@ -681,8 +658,9 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @mock.patch("arvados_cwl.task_queue.TaskQueue")
     @mock.patch("arvados_cwl.arvworkflow.ArvadosWorkflow.job")
@@ -694,16 +672,13 @@ class TestSubmit(unittest.TestCase):
             return []
         job.side_effect = set_final_output
 
-        try:
-            exited = arvados_cwl.main(
-                ["--debug", "--local", "--storage-classes=foo",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                sys.stdin, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--debug", "--local", "--storage-classes=foo",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            sys.stdin, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         make_output.assert_called_with(u'Output of submit_wf.cwl', ['foo'], '', 'zzzzz-4zz18-zzzzzzzzzzzzzzzz')
+        self.assertEqual(exited, 0)
 
     @mock.patch("arvados_cwl.task_queue.TaskQueue")
     @mock.patch("arvados_cwl.arvworkflow.ArvadosWorkflow.job")
@@ -715,28 +690,20 @@ class TestSubmit(unittest.TestCase):
             return []
         job.side_effect = set_final_output
 
-        try:
-            exited = arvados_cwl.main(
-                ["--debug", "--local",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                sys.stdin, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--debug", "--local",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            sys.stdin, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         make_output.assert_called_with(u'Output of submit_wf.cwl', ['default'], '', 'zzzzz-4zz18-zzzzzzzzzzzzzzzz')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_container_output_ttl(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--intermediate-output-ttl", "3600",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--intermediate-output-ttl", "3600",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
@@ -749,20 +716,17 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_container_trash_intermediate(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--trash-intermediate",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--trash-intermediate",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
@@ -775,22 +739,18 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_container_output_tags(self, stubs):
         output_tags = "tag0,tag1,tag2"
 
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--output-tags", output_tags,
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--output-tags", output_tags,
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
@@ -802,56 +762,48 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_container_runner_ram(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--submit-runner-ram=2048",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--submit-runner-ram=2048",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["runtime_constraints"]["ram"] = (2048+256)*1024*1024
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @mock.patch("arvados.collection.CollectionReader")
     @mock.patch("time.sleep")
     @stubs
     def test_submit_file_keepref(self, stubs, tm, collectionReader):
-        capture_stdout = cStringIO.StringIO()
         collectionReader().find.return_value = arvados.arvfile.ArvadosFile(mock.MagicMock(), "blorp.txt")
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--api=containers", "--debug",
              "tests/wf/submit_keepref_wf.cwl"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
         self.assertEqual(exited, 0)
 
-
     @mock.patch("arvados.collection.CollectionReader")
     @mock.patch("time.sleep")
     @stubs
     def test_submit_keepref(self, stubs, tm, reader):
-        capture_stdout = cStringIO.StringIO()
-
         with open("tests/wf/expect_arvworkflow.cwl") as f:
             reader().open().__enter__().read.return_value = f.read()
 
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--api=containers", "--debug",
              "keep:99999999999999999999999999999994+99/expect_arvworkflow.cwl#main", "-x", "XxX"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         expect_container = {
             'priority': 500,
@@ -896,24 +848,21 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @mock.patch("arvados.collection.CollectionReader")
     @mock.patch("time.sleep")
     @stubs
     def test_submit_jobs_keepref(self, stubs, tm, reader):
-        capture_stdout = cStringIO.StringIO()
-
         with open("tests/wf/expect_arvworkflow.cwl") as f:
             reader().open().__enter__().read.return_value = f.read()
 
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--api=jobs", "--debug",
              "keep:99999999999999999999999999999994+99/expect_arvworkflow.cwl#main", "-x", "XxX"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
         expect_pipeline["components"]["cwl-runner"]["script_parameters"]["x"] = "XxX"
@@ -923,20 +872,18 @@ class TestSubmit(unittest.TestCase):
         expect_pipeline["name"] = "expect_arvworkflow.cwl#main"
         stubs.api.pipeline_instances().create.assert_called_with(
             body=JsonDiffMatcher(expect_pipeline))
+        self.assertEqual(exited, 0)
 
     @mock.patch("time.sleep")
     @stubs
     def test_submit_arvworkflow(self, stubs, tm):
-        capture_stdout = cStringIO.StringIO()
-
         with open("tests/wf/expect_arvworkflow.cwl") as f:
             stubs.api.workflows().get().execute.return_value = {"definition": f.read(), "name": "a test workflow"}
 
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--api=containers", "--debug",
              "962eh-7fd4e-gkbzl62qqtfig37", "-x", "XxX"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         expect_container = {
             'priority': 500,
@@ -1020,60 +967,47 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_container_name(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--name=hello container 123",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--name=hello container 123",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["name"] = "hello container 123"
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_missing_input(self, stubs):
-        capture_stdout = cStringIO.StringIO()
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--api=containers", "--debug",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
         self.assertEqual(exited, 0)
 
-        capture_stdout = cStringIO.StringIO()
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--api=containers", "--debug",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job_missing.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
         self.assertEqual(exited, 1)
 
-
     @stubs
     def test_submit_container_project(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--project-uuid="+project_uuid,
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--project-uuid="+project_uuid,
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["owner_uuid"] = project_uuid
@@ -1087,21 +1021,16 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_container_eval_timeout(self, stubs):
-        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--eval-timeout=60",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--eval-timeout=60",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
@@ -1113,21 +1042,16 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_container_collection_cache(self, stubs):
-        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--collection-cache-size=500",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--collection-cache-size=500",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
@@ -1140,22 +1064,16 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_container_thread_count(self, stubs):
-        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--thread-count=20",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--thread-count=20",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["command"] = ['arvados-cwl-runner', '--local', '--api=containers',
@@ -1167,82 +1085,64 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_job_runner_image(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=jobs", "--debug", "--submit-runner-image=arvados/jobs:123",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=jobs", "--debug", "--submit-runner-image=arvados/jobs:123",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         stubs.expect_pipeline_instance["components"]["cwl-runner"]["runtime_constraints"]["docker_image"] = "999999999999999999999999999999d5+99"
 
         expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
         stubs.api.pipeline_instances().create.assert_called_with(
             body=JsonDiffMatcher(expect_pipeline))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_pipeline_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_container_runner_image(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--submit-runner-image=arvados/jobs:123",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--submit-runner-image=arvados/jobs:123",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         stubs.expect_container_spec["container_image"] = "999999999999999999999999999999d5+99"
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_priority(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--priority=669",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--priority=669",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         stubs.expect_container_spec["priority"] = 669
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_wf_runner_resources(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug",
-                 "tests/wf/submit_wf_runner_resources.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug",
+                "tests/wf/submit_wf_runner_resources.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
         expect_container["runtime_constraints"] = {
@@ -1270,8 +1170,9 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     def tearDown(self):
         arvados_cwl.arvdocker.arv_docker_clear_cache()
@@ -1315,16 +1216,10 @@ class TestSubmit(unittest.TestCase):
 
     @stubs
     def test_submit_secrets(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug",
-                 "tests/wf/secret_wf.cwl", "tests/secret_test_job.yml"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
-
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug",
+                "tests/wf/secret_wf.cwl", "tests/secret_test_job.yml"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = {
             "command": [
@@ -1479,8 +1374,9 @@ class TestSubmit(unittest.TestCase):
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_request_uuid(self, stubs):
@@ -1492,50 +1388,41 @@ class TestSubmit(unittest.TestCase):
             "state": "Queued"
         }
 
-        capture_stdout = cStringIO.StringIO()
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--submit-request-uuid=zzzzz-xvhdp-yyyyyyyyyyyyyyy",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--submit-request-uuid=zzzzz-xvhdp-yyyyyyyyyyyyyyy",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         stubs.api.container_requests().update.assert_called_with(
-            uuid="zzzzz-xvhdp-yyyyyyyyyyyyyyy", body=JsonDiffMatcher(stubs.expect_container_spec), cluster_id="zzzzz")
-        self.assertEqual(capture_stdout.getvalue(),
+            uuid="zzzzz-xvhdp-yyyyyyyyyyyyyyy", body=JsonDiffMatcher(stubs.expect_container_spec))
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_container_cluster_id(self, stubs):
-        capture_stdout = cStringIO.StringIO()
         stubs.api._rootDesc["remoteHosts"]["zbbbb"] = "123"
-        try:
-            exited = arvados_cwl.main(
-                ["--submit", "--no-wait", "--api=containers", "--debug", "--submit-runner-cluster=zbbbb",
-                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
-            self.assertEqual(exited, 0)
-        except:
-            logging.exception("")
+
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug", "--submit-runner-cluster=zbbbb",
+                "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
 
         expect_container = copy.deepcopy(stubs.expect_container_spec)
 
         stubs.api.container_requests().create.assert_called_with(
             body=JsonDiffMatcher(expect_container), cluster_id="zbbbb")
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_submit_validate_cluster_id(self, stubs):
-        capture_stdout = cStringIO.StringIO()
         stubs.api._rootDesc["remoteHosts"]["zbbbb"] = "123"
         exited = arvados_cwl.main(
             ["--submit", "--no-wait", "--api=containers", "--debug", "--submit-runner-cluster=zcccc",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
         self.assertEqual(exited, 1)
 
 
@@ -1565,15 +1452,12 @@ class TestCreateTemplate(unittest.TestCase):
     def test_create(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
 
-        capture_stdout = cStringIO.StringIO()
-
         exited = arvados_cwl.main(
             ["--create-workflow", "--debug",
              "--api=jobs",
              "--project-uuid", project_uuid,
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         stubs.api.pipeline_instances().create.refute_called()
         stubs.api.jobs().create.refute_called()
@@ -1590,24 +1474,21 @@ class TestCreateTemplate(unittest.TestCase):
         stubs.api.pipeline_templates().create.assert_called_with(
             body=JsonDiffMatcher(expect_template), ensure_unique_name=True)
 
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_pipeline_template_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_create_name(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
 
-        capture_stdout = cStringIO.StringIO()
-
         exited = arvados_cwl.main(
             ["--create-workflow", "--debug",
              "--project-uuid", project_uuid,
              "--api=jobs",
              "--name", "testing 123",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         stubs.api.pipeline_instances().create.refute_called()
         stubs.api.jobs().create.refute_called()
@@ -1624,16 +1505,14 @@ class TestCreateTemplate(unittest.TestCase):
         stubs.api.pipeline_templates().create.assert_called_with(
             body=JsonDiffMatcher(expect_template), ensure_unique_name=True)
 
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_pipeline_template_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_update_name(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
 
-        capture_stdout = cStringIO.StringIO()
-
         exited = arvados_cwl.main(
             ["--update-workflow", self.existing_template_uuid,
              "--debug",
@@ -1641,8 +1520,7 @@ class TestCreateTemplate(unittest.TestCase):
              "--api=jobs",
              "--name", "testing 123",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         stubs.api.pipeline_instances().create.refute_called()
         stubs.api.jobs().create.refute_called()
@@ -1660,8 +1538,9 @@ class TestCreateTemplate(unittest.TestCase):
         stubs.api.pipeline_templates().update.assert_called_with(
             body=JsonDiffMatcher(expect_template), uuid=self.existing_template_uuid)
 
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          self.existing_template_uuid + '\n')
+        self.assertEqual(exited, 0)
 
 
 class TestCreateWorkflow(unittest.TestCase):
@@ -1673,15 +1552,12 @@ class TestCreateWorkflow(unittest.TestCase):
     def test_create(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
 
-        capture_stdout = cStringIO.StringIO()
-
         exited = arvados_cwl.main(
             ["--create-workflow", "--debug",
              "--api=containers",
              "--project-uuid", project_uuid,
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         stubs.api.pipeline_templates().create.refute_called()
         stubs.api.container_requests().create.refute_called()
@@ -1697,24 +1573,21 @@ class TestCreateWorkflow(unittest.TestCase):
         stubs.api.workflows().create.assert_called_with(
             body=JsonDiffMatcher(body))
 
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_workflow_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_create_name(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
 
-        capture_stdout = cStringIO.StringIO()
-
         exited = arvados_cwl.main(
             ["--create-workflow", "--debug",
              "--api=containers",
              "--project-uuid", project_uuid,
              "--name", "testing 123",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         stubs.api.pipeline_templates().create.refute_called()
         stubs.api.container_requests().create.refute_called()
@@ -1730,14 +1603,16 @@ class TestCreateWorkflow(unittest.TestCase):
         stubs.api.workflows().create.assert_called_with(
             body=JsonDiffMatcher(body))
 
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_workflow_uuid + '\n')
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_incompatible_api(self, stubs):
-        capture_stderr = cStringIO.StringIO()
-        logging.getLogger('arvados.cwl-runner').addHandler(
-            logging.StreamHandler(capture_stderr))
+        capture_stderr = io.StringIO()
+        acr_logger = logging.getLogger('arvados.cwl-runner')
+        stderr_logger = logging.StreamHandler(capture_stderr)
+        acr_logger.addHandler(stderr_logger)
 
         exited = arvados_cwl.main(
             ["--update-workflow", self.existing_workflow_uuid,
@@ -1749,17 +1624,15 @@ class TestCreateWorkflow(unittest.TestCase):
         self.assertRegexpMatches(
             capture_stderr.getvalue(),
             "--update-workflow arg '{}' uses 'containers' API, but --api='jobs' specified".format(self.existing_workflow_uuid))
+        acr_logger.removeHandler(stderr_logger)
 
     @stubs
     def test_update(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-
         exited = arvados_cwl.main(
             ["--update-workflow", self.existing_workflow_uuid,
              "--debug",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         body = {
             "workflow": {
@@ -1771,20 +1644,17 @@ class TestCreateWorkflow(unittest.TestCase):
         stubs.api.workflows().update.assert_called_with(
             uuid=self.existing_workflow_uuid,
             body=JsonDiffMatcher(body))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          self.existing_workflow_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_update_name(self, stubs):
-        capture_stdout = cStringIO.StringIO()
-
         exited = arvados_cwl.main(
             ["--update-workflow", self.existing_workflow_uuid,
              "--debug", "--name", "testing 123",
              "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         body = {
             "workflow": {
@@ -1796,23 +1666,20 @@ class TestCreateWorkflow(unittest.TestCase):
         stubs.api.workflows().update.assert_called_with(
             uuid=self.existing_workflow_uuid,
             body=JsonDiffMatcher(body))
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          self.existing_workflow_uuid + '\n')
-
+        self.assertEqual(exited, 0)
 
     @stubs
     def test_create_collection_per_tool(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
 
-        capture_stdout = cStringIO.StringIO()
-
         exited = arvados_cwl.main(
             ["--create-workflow", "--debug",
              "--api=containers",
              "--project-uuid", project_uuid,
              "tests/collection_per_tool/collection_per_tool.cwl"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         toolfile = "tests/collection_per_tool/collection_per_tool_packed.cwl"
         expect_workflow = StripYAMLComments(open(toolfile).read())
@@ -1828,8 +1695,9 @@ class TestCreateWorkflow(unittest.TestCase):
         stubs.api.workflows().create.assert_called_with(
             body=JsonDiffMatcher(body))
 
-        self.assertEqual(capture_stdout.getvalue(),
+        self.assertEqual(stubs.capture_stdout.getvalue(),
                          stubs.expect_workflow_uuid + '\n')
+        self.assertEqual(exited, 0)
 
 class TestTemplateInputs(unittest.TestCase):
     expect_template = {
@@ -1884,19 +1752,19 @@ class TestTemplateInputs(unittest.TestCase):
         exited = arvados_cwl.main(
             ["--create-template",
              "tests/wf/inputs_test.cwl", "tests/order/empty_order.json"],
-            cStringIO.StringIO(), sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         stubs.api.pipeline_templates().create.assert_called_with(
             body=JsonDiffMatcher(self.expect_template), ensure_unique_name=True)
 
+        self.assertEqual(exited, 0)
+
     @stubs
     def test_inputs(self, stubs):
         exited = arvados_cwl.main(
             ["--create-template",
              "tests/wf/inputs_test.cwl", "tests/order/inputs_test_order.json"],
-            cStringIO.StringIO(), sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
+            stubs.capture_stdout, sys.stderr, api_client=stubs.api)
 
         expect_template = copy.deepcopy(self.expect_template)
         params = expect_template[
@@ -1908,3 +1776,4 @@ class TestTemplateInputs(unittest.TestCase):
 
         stubs.api.pipeline_templates().create.assert_called_with(
             body=JsonDiffMatcher(expect_template), ensure_unique_name=True)
+        self.assertEqual(exited, 0)
index 948f31d4f9815f3deadf595c1f5cbd3e1a464c29..86a053ea484835980c791d4a728824919d01f78f 100644 (file)
@@ -23,37 +23,37 @@ class TestUrljoin(unittest.TestCase):
 
         cf = CollectionFetcher({}, None)
 
-        self.assertEquals("keep:99999999999999999999999999999991+99/hw.py",
+        self.assertEqual("keep:99999999999999999999999999999991+99/hw.py",
                           cf.urljoin("keep:99999999999999999999999999999991+99", "hw.py"))
 
-        self.assertEquals("keep:99999999999999999999999999999991+99/hw.py",
+        self.assertEqual("keep:99999999999999999999999999999991+99/hw.py",
                           cf.urljoin("keep:99999999999999999999999999999991+99/", "hw.py"))
 
-        self.assertEquals("keep:99999999999999999999999999999991+99/hw.py#main",
+        self.assertEqual("keep:99999999999999999999999999999991+99/hw.py#main",
                           cf.urljoin("keep:99999999999999999999999999999991+99", "hw.py#main"))
 
-        self.assertEquals("keep:99999999999999999999999999999991+99/hw.py#main",
+        self.assertEqual("keep:99999999999999999999999999999991+99/hw.py#main",
                           cf.urljoin("keep:99999999999999999999999999999991+99/hw.py", "#main"))
 
-        self.assertEquals("keep:99999999999999999999999999999991+99/dir/hw.py#main",
+        self.assertEqual("keep:99999999999999999999999999999991+99/dir/hw.py#main",
                           cf.urljoin("keep:99999999999999999999999999999991+99/dir/hw.py", "#main"))
 
-        self.assertEquals("keep:99999999999999999999999999999991+99/dir/wh.py",
+        self.assertEqual("keep:99999999999999999999999999999991+99/dir/wh.py",
                           cf.urljoin("keep:99999999999999999999999999999991+99/dir/hw.py", "wh.py"))
 
-        self.assertEquals("keep:99999999999999999999999999999991+99/wh.py",
+        self.assertEqual("keep:99999999999999999999999999999991+99/wh.py",
                           cf.urljoin("keep:99999999999999999999999999999991+99/dir/hw.py", "/wh.py"))
 
-        self.assertEquals("keep:99999999999999999999999999999991+99/wh.py#main",
+        self.assertEqual("keep:99999999999999999999999999999991+99/wh.py#main",
                           cf.urljoin("keep:99999999999999999999999999999991+99/dir/hw.py", "/wh.py#main"))
 
-        self.assertEquals("keep:99999999999999999999999999999991+99/wh.py",
+        self.assertEqual("keep:99999999999999999999999999999991+99/wh.py",
                           cf.urljoin("keep:99999999999999999999999999999991+99/hw.py#main", "wh.py"))
 
-        self.assertEquals("keep:99999999999999999999999999999992+99",
+        self.assertEqual("keep:99999999999999999999999999999992+99",
                           cf.urljoin("keep:99999999999999999999999999999991+99", "keep:99999999999999999999999999999992+99"))
 
-        self.assertEquals("keep:99999999999999999999999999999991+99/dir/wh.py",
+        self.assertEqual("keep:99999999999999999999999999999991+99/dir/wh.py",
                           cf.urljoin("keep:99999999999999999999999999999991+99/dir/", "wh.py"))
 
     def test_resolver(self):
index 2532bd596c0f3dfac3554d3b5fe31ffb9d7e50d4..3ca02c7df65f023c32dab341224534f588d32e67 100644 (file)
@@ -2,6 +2,8 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from builtins import bytes
+
 import unittest
 import mock
 import datetime
@@ -39,7 +41,16 @@ class TestUtil(unittest.TestCase):
 
     def test_get_current_container_error(self):
         api = mock.MagicMock()
-        api.containers().current().execute.side_effect = ApiError(httplib2.Response({"status": 300}), "")
+        api.containers().current().execute.side_effect = ApiError(httplib2.Response({"status": 300}), bytes(b""))
+        logger = mock.MagicMock()
+
+        with self.assertRaises(ApiError):
+            get_current_container(api, num_retries=0, logger=logger)
+
+    def test_get_current_container_404_error(self):
+        api = mock.MagicMock()
+        api.containers().current().execute.side_effect = ApiError(httplib2.Response({"status": 404}), bytes(b""))
         logger = mock.MagicMock()
 
-        self.assertRaises(ApiError, get_current_container(api, num_retries=0, logger=logger))
+        current_container = get_current_container(api, num_retries=0, logger=logger)
+        self.assertEqual(current_container, None)
\ No newline at end of file
index 355872232bc7f430a8b61f7e8f8dffbe09cc5530..b4322a809320b7be5823296f6cb72a39d4273f24 100644 (file)
@@ -2,6 +2,9 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+from __future__ import print_function
+from __future__ import division
+
 import arvados
 import sys
 import os
@@ -9,7 +12,7 @@ import os
 if "JOB_UUID" in os.environ:
     requested = arvados.api().jobs().get(uuid=os.environ["JOB_UUID"]).execute()["runtime_constraints"]["min_ram_mb_per_node"]
 else:
-    requested = arvados.api().containers().current().execute()["runtime_constraints"]["ram"]/(1024*1024)
+    requested = arvados.api().containers().current().execute()["runtime_constraints"]["ram"] // (1024*1024)
 
 print("Requested %d expected %d" % (requested, int(sys.argv[1])))
 
index d33956ccc3f74caa7d6b64958b4c9863f09bbd70..f9e370794d4e90d87a12deb54167c1d700ba16f1 100644 (file)
@@ -18,9 +18,17 @@ MAINTAINER Ward Vandewege <ward@curoverse.com>
 
 ENV DEBIAN_FRONTEND noninteractive
 
-RUN apt-get update -q && apt-get install -qy git python-pip python-virtualenv python-dev libcurl4-gnutls-dev libgnutls28-dev nodejs python-pyasn1-modules
+ARG pythoncmd=python
 
-RUN pip install -U setuptools six requests
+RUN apt-get update -q && apt-get install -qy --no-install-recommends \
+    git ${pythoncmd}-pip ${pythoncmd}-virtualenv ${pythoncmd}-dev libcurl4-gnutls-dev \
+    libgnutls28-dev nodejs ${pythoncmd}-pyasn1-modules build-essential
+
+RUN if [ "$pythoncmd" = "python3" ]; then \
+       pip3 install -U setuptools six requests ; \
+    else \
+       pip install -U setuptools six requests ; \
+    fi
 
 ARG sdk
 ARG runner
@@ -32,10 +40,10 @@ ADD cwl/salad_dist/$salad /tmp/
 ADD cwl/cwltool_dist/$cwltool /tmp/
 ADD cwl/dist/$runner /tmp/
 
-RUN cd /tmp/arvados-python-client-* && python setup.py install
-RUN if test -d /tmp/schema-salad-* ; then cd /tmp/schema-salad-* && python setup.py install ; fi
-RUN if test -d /tmp/cwltool-* ; then cd /tmp/cwltool-* && python setup.py install ; fi
-RUN cd /tmp/arvados-cwl-runner-* && python setup.py install
+RUN cd /tmp/arvados-python-client-* && $pythoncmd setup.py install
+RUN if test -d /tmp/schema-salad-* ; then cd /tmp/schema-salad-* && $pythoncmd setup.py install ; fi
+RUN if test -d /tmp/cwltool-* ; then cd /tmp/cwltool-* && $pythoncmd setup.py install ; fi
+RUN cd /tmp/arvados-cwl-runner-* && $pythoncmd setup.py install
 
 # Install dependencies and set up system.
 RUN /usr/sbin/adduser --disabled-password \
index cca9f9bf1be8e946b7b9594f1ed839e92aa73485..787e01ab8f7dc8be892e7c754bca4a29cba84b13 100644 (file)
@@ -210,14 +210,19 @@ func (c *Client) RequestAndDecode(dst interface{}, method, path string, body io.
        if err != nil {
                return err
        }
-       if (method == "GET" || body != nil) && urlValues != nil {
-               // FIXME: what if params don't fit in URL
+       if urlValues == nil {
+               // Nothing to send
+       } else if method == "GET" || method == "HEAD" || body != nil {
+               // Must send params in query part of URL (FIXME: what
+               // if resulting URL is too long?)
                u, err := url.Parse(urlString)
                if err != nil {
                        return err
                }
                u.RawQuery = urlValues.Encode()
                urlString = u.String()
+       } else {
+               body = strings.NewReader(urlValues.Encode())
        }
        req, err := http.NewRequest(method, urlString, body)
        if err != nil {
index bfa86abf6a48a1fcf30eda2618bdfaf28b0a2efb..c2154d0f29cd1dbb6decad7962036dd9073bd24e 100644 (file)
@@ -66,6 +66,12 @@ type Cluster struct {
        RemoteClusters     map[string]RemoteCluster
        PostgreSQL         PostgreSQL
        RequestLimits      RequestLimits
+       Logging            Logging
+}
+
+type Logging struct {
+       Level  string
+       Format string
 }
 
 type PostgreSQL struct {
@@ -100,7 +106,7 @@ type InstanceType struct {
 type Dispatch struct {
        // PEM encoded SSH key (RSA, DSA, or ECDSA) able to log in to
        // cloud VMs.
-       PrivateKey []byte
+       PrivateKey string
 
        // Max time for workers to come up before abandoning stale
        // locks from previous run
@@ -121,7 +127,12 @@ type CloudVMs struct {
        // and ready to run containers, e.g., "mount | grep
        // /encrypted-tmp"
        BootProbeCommand string
-       SyncInterval     Duration
+
+       // Listening port (name or number) of SSH servers on worker
+       // VMs
+       SSHPort string
+
+       SyncInterval Duration
 
        // Maximum idle time before automatic shutdown
        TimeoutIdle Duration
@@ -138,7 +149,7 @@ type CloudVMs struct {
        ImageID string
 
        Driver           string
-       DriverParameters map[string]interface{}
+       DriverParameters json.RawMessage
 }
 
 type InstanceTypeMap map[string]InstanceType
@@ -163,6 +174,9 @@ func (it *InstanceTypeMap) UnmarshalJSON(data []byte) error {
                        if _, ok := (*it)[t.Name]; ok {
                                return errDuplicateInstanceTypeName
                        }
+                       if t.ProviderType == "" {
+                               t.ProviderType = t.Name
+                       }
                        (*it)[t.Name] = t
                }
                return nil
@@ -172,10 +186,14 @@ func (it *InstanceTypeMap) UnmarshalJSON(data []byte) error {
        if err != nil {
                return err
        }
-       // Fill in Name field using hash key.
+       // Fill in Name field (and ProviderType field, if not
+       // specified) using hash key.
        *it = InstanceTypeMap(hash)
        for name, t := range *it {
                t.Name = name
+               if t.ProviderType == "" {
+                       t.ProviderType = name
+               }
                (*it)[name] = t
        }
        return nil
index 02a0d76decbad272baee737282b5087a72a33c60..fb095481bb07b2aa97489a17347e56c65166b356 100644 (file)
@@ -8,21 +8,22 @@ import "time"
 
 // Container is an arvados#container resource.
 type Container struct {
-       UUID                 string               `json:"uuid"`
-       CreatedAt            time.Time            `json:"created_at"`
-       Command              []string             `json:"command"`
-       ContainerImage       string               `json:"container_image"`
-       Cwd                  string               `json:"cwd"`
-       Environment          map[string]string    `json:"environment"`
-       LockedByUUID         string               `json:"locked_by_uuid"`
-       Mounts               map[string]Mount     `json:"mounts"`
-       Output               string               `json:"output"`
-       OutputPath           string               `json:"output_path"`
-       Priority             int64                `json:"priority"`
-       RuntimeConstraints   RuntimeConstraints   `json:"runtime_constraints"`
-       State                ContainerState       `json:"state"`
-       SchedulingParameters SchedulingParameters `json:"scheduling_parameters"`
-       ExitCode             int                  `json:"exit_code"`
+       UUID                 string                 `json:"uuid"`
+       CreatedAt            time.Time              `json:"created_at"`
+       Command              []string               `json:"command"`
+       ContainerImage       string                 `json:"container_image"`
+       Cwd                  string                 `json:"cwd"`
+       Environment          map[string]string      `json:"environment"`
+       LockedByUUID         string                 `json:"locked_by_uuid"`
+       Mounts               map[string]Mount       `json:"mounts"`
+       Output               string                 `json:"output"`
+       OutputPath           string                 `json:"output_path"`
+       Priority             int64                  `json:"priority"`
+       RuntimeConstraints   RuntimeConstraints     `json:"runtime_constraints"`
+       State                ContainerState         `json:"state"`
+       SchedulingParameters SchedulingParameters   `json:"scheduling_parameters"`
+       ExitCode             int                    `json:"exit_code"`
+       RuntimeStatus        map[string]interface{} `json:"runtime_status"`
 }
 
 // Container is an arvados#container resource.
index d2a19a024c1aaecb9150d998bc79e6f3281a7b1b..25eed010f26c534ef8e36dfa119065731d1e2ac4 100644 (file)
@@ -14,7 +14,7 @@ import (
 // a number of nanoseconds.
 type Duration time.Duration
 
-// UnmarshalJSON implements json.Unmarshaler
+// UnmarshalJSON implements json.Unmarshaler.
 func (d *Duration) UnmarshalJSON(data []byte) error {
        if data[0] == '"' {
                return d.Set(string(data[1 : len(data)-1]))
@@ -22,22 +22,22 @@ func (d *Duration) UnmarshalJSON(data []byte) error {
        return fmt.Errorf("duration must be given as a string like \"600s\" or \"1h30m\"")
 }
 
-// MarshalJSON implements json.Marshaler
+// MarshalJSON implements json.Marshaler.
 func (d *Duration) MarshalJSON() ([]byte, error) {
        return json.Marshal(d.String())
 }
 
-// String implements fmt.Stringer
+// String implements fmt.Stringer.
 func (d Duration) String() string {
        return time.Duration(d).String()
 }
 
-// Duration returns a time.Duration
+// Duration returns a time.Duration.
 func (d Duration) Duration() time.Duration {
        return time.Duration(d)
 }
 
-// Value implements flag.Value
+// Set implements the flag.Value interface and sets the duration value by using time.ParseDuration to parse the string.
 func (d *Duration) Set(s string) error {
        dur, err := time.ParseDuration(s)
        *d = Duration(dur)
index e0f2483131a98a64856116bda8c14b4de7bd7051..4f648e9b437e7b5eead7abf4b0db302011725cb7 100644 (file)
@@ -41,6 +41,10 @@ const (
        QueuedContainerRequestUUID = "zzzzz-xvhdp-cr4queuedcontnr"
        QueuedContainerUUID        = "zzzzz-dz642-queuedcontainer"
 
+       RunningContainerUUID = "zzzzz-dz642-runningcontainr"
+
+       CompletedContainerUUID = "zzzzz-dz642-compltcontainer"
+
        ArvadosRepoUUID = "zzzzz-s0uqq-arvadosrepo0123"
        ArvadosRepoName = "arvados"
        FooRepoUUID     = "zzzzz-s0uqq-382brsig8rp3666"
index 45e4efdbeff2c5e5e507e92c94c85c8a189d8263..e66eeadee1e1fc8d6b50cd3e10fa59e8a5a66a80 100644 (file)
@@ -5,9 +5,13 @@
 package ctxlog
 
 import (
+       "bytes"
        "context"
+       "io"
+       "os"
 
        "github.com/sirupsen/logrus"
+       check "gopkg.in/check.v1"
 )
 
 var (
@@ -19,45 +23,87 @@ const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
 
 // Context returns a new child context such that FromContext(child)
 // returns the given logger.
-func Context(ctx context.Context, logger *logrus.Entry) context.Context {
+func Context(ctx context.Context, logger logrus.FieldLogger) context.Context {
        return context.WithValue(ctx, loggerCtxKey, logger)
 }
 
 // FromContext returns the logger suitable for the given context -- the one
 // attached by contextWithLogger() if applicable, otherwise the
 // top-level logger with no fields/values.
-func FromContext(ctx context.Context) *logrus.Entry {
+func FromContext(ctx context.Context) logrus.FieldLogger {
        if ctx != nil {
-               if logger, ok := ctx.Value(loggerCtxKey).(*logrus.Entry); ok {
+               if logger, ok := ctx.Value(loggerCtxKey).(logrus.FieldLogger); ok {
                        return logger
                }
        }
        return rootLogger.WithFields(nil)
 }
 
+// New returns a new logger with the indicated format and
+// level.
+func New(out io.Writer, format, level string) logrus.FieldLogger {
+       logger := logrus.New()
+       logger.Out = out
+       setFormat(logger, format)
+       setLevel(logger, level)
+       return logger
+}
+
+func TestLogger(c *check.C) logrus.FieldLogger {
+       logger := logrus.New()
+       logger.Out = &logWriter{c.Log}
+       setFormat(logger, "text")
+       if d := os.Getenv("ARVADOS_DEBUG"); d != "0" && d != "" {
+               setLevel(logger, "debug")
+       } else {
+               setLevel(logger, "info")
+       }
+       return logger
+}
+
 // SetLevel sets the current logging level. See logrus for level
 // names.
 func SetLevel(level string) {
-       lvl, err := logrus.ParseLevel(level)
-       if err != nil {
-               logrus.Fatal(err)
+       setLevel(rootLogger, level)
+}
+
+func setLevel(logger *logrus.Logger, level string) {
+       if level == "" {
+       } else if lvl, err := logrus.ParseLevel(level); err != nil {
+               logrus.WithField("Level", level).Fatal("unknown log level")
+       } else {
+               logger.Level = lvl
        }
-       rootLogger.Level = lvl
 }
 
 // SetFormat sets the current logging format to "json" or "text".
 func SetFormat(format string) {
+       setFormat(rootLogger, format)
+}
+
+func setFormat(logger *logrus.Logger, format string) {
        switch format {
        case "text":
-               rootLogger.Formatter = &logrus.TextFormatter{
+               logger.Formatter = &logrus.TextFormatter{
                        FullTimestamp:   true,
                        TimestampFormat: rfc3339NanoFixed,
                }
-       case "json":
-               rootLogger.Formatter = &logrus.JSONFormatter{
+       case "json", "":
+               logger.Formatter = &logrus.JSONFormatter{
                        TimestampFormat: rfc3339NanoFixed,
                }
        default:
-               logrus.WithField("LogFormat", format).Fatal("unknown log format")
+               logrus.WithField("Format", format).Fatal("unknown log format")
        }
 }
+
+// logWriter is an io.Writer that writes by calling a "write log"
+// function, typically (*check.C)Log().
+type logWriter struct {
+       logfunc func(...interface{})
+}
+
+func (tl *logWriter) Write(buf []byte) (int, error) {
+       tl.logfunc(string(bytes.TrimRight(buf, "\n")))
+       return len(buf), nil
+}
index 6440bdfbd4795756ac6d059738b443e5e8b06d97..6c323f5a453086615051b78a5744e071d127f8fc 100644 (file)
@@ -4,7 +4,10 @@
 
 case "$TARGET" in
     debian* | ubuntu*)
-        fpm_depends+=('libpam-python')
+        fpm_depends+=('libpam-python' 'libcurl3-gnutls')
+        ;;
+    centos*)
+        fpm_depends+=('python-pam')
         ;;
     *)
         echo >&2 "ERROR: $PACKAGE: pam_python.so dependency unavailable in $TARGET."
index 2c0045d6a0d60d9bb71514d3eb2ab0b7a7d3336b..7c3406d30040228e9d5e9a333ee56e9459605eb2 100644 (file)
@@ -2,4 +2,6 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
+import sys
+sys.path.append('/usr/share/python2.7/dist/libpam-arvados/lib/python2.7/site-packages')
 from arvados_pam import *
index 55fc6b626d5de9798b989d2b252d158ca89baab6..ac387a6f6c7a29ea56e143279f20aca5c166947f 100644 (file)
@@ -10,17 +10,20 @@ import errno
 import json
 import os
 import re
-import subprocess32 as subprocess
 import sys
 import tarfile
 import tempfile
 import shutil
 import _strptime
 import fcntl
-
 from operator import itemgetter
 from stat import *
 
+if os.name == "posix" and sys.version_info[0] < 3:
+    import subprocess32 as subprocess
+else:
+    import subprocess
+
 import arvados
 import arvados.util
 import arvados.commands._util as arv_cmd
@@ -133,6 +136,7 @@ def docker_images():
     next(list_output)  # Ignore the header line
     for line in list_output:
         words = line.split()
+        words = [word.decode() for word in words]
         size_index = len(words) - 2
         repo, tag, imageid = words[:3]
         ctime = ' '.join(words[3:size_index])
@@ -503,7 +507,7 @@ def main(arguments=None, stdout=sys.stdout, install_sig_handlers=True, api=None)
         else:
             json_filename = raw_image_hash + '/json'
         json_file = image_tar.extractfile(image_tar.getmember(json_filename))
-        image_metadata = json.load(json_file)
+        image_metadata = json.loads(json_file.read().decode())
         json_file.close()
         image_tar.close()
         link_base = {'head_uuid': coll_uuid, 'properties': {}}
similarity index 68%
rename from backports/deb-fuse/fpm-info.sh
rename to sdk/python/fpm-info.sh
index 46088c0c6dc88d77addda471e2617768f355a0cd..7a89cf03a0935c109bb7dbfa4af7d27968dcc84f 100644 (file)
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 case "$TARGET" in
-    ubuntu1204)
-        fpm_depends+=('libfuse2 = 2.9.2-5')
+    debian* | ubuntu*)
+        fpm_depends+=(libcurl3-gnutls)
         ;;
 esac
index 9b38f07140049807947c8c3f3221966136a7a3d9..aaa07103381ccb8dd78d321cb90907bec727ec75 100644 (file)
@@ -54,7 +54,13 @@ setup(name='arvados-python-client',
           'ruamel.yaml >=0.15.54, <=0.15.77',
           'setuptools',
           'ws4py >=0.4.2',
-          'subprocess32 >=3.5.1',
+      ],
+      extras_require={
+          ':os.name=="posix" and python_version<"3"': ['subprocess32 >= 3.5.1'],
+      },
+      classifiers=[
+          'Programming Language :: Python :: 2',
+          'Programming Language :: Python :: 3',
       ],
       test_suite='tests',
       tests_require=['pbr<1.7.0', 'mock>=1.0', 'PyYAML'],
index da4c39d9014431fe750a78fa3c1bdb07232a0bff..2cda8bcb1441967135b1feb5c2adf344e970da81 100644 (file)
@@ -30,3 +30,6 @@
 
 # Generated git-commit.version file
 /git-commit.version
+
+# Generated when building distribution packages
+/package-build.version
index 39253e1036ba9a52b2070f9e0a7d4043fecb2d43..38538cb4ffbe8d6db29fcc430cc67620f25641b4 100644 (file)
@@ -155,6 +155,12 @@ class ApiClientAuthorization < ArvadosModel
         clnt = HTTPClient.new
         if Rails.configuration.sso_insecure
           clnt.ssl_config.verify_mode = OpenSSL::SSL::VERIFY_NONE
+        else
+          # Use system CA certificates
+          ["/etc/ssl/certs/ca-certificates.crt",
+           "/etc/pki/tls/certs/ca-bundle.crt"]
+            .select { |ca_path| File.readable?(ca_path) }
+            .each { |ca_path| clnt.ssl_config.add_trust_ca(ca_path) }
         end
         remote_user = SafeJSON.load(
           clnt.get_content('https://' + host + '/arvados/v1/users/current',
index 33cc686d4f5a14f3a432bc3df077ab258797a4a8..6147b79f9f5aa16c6b9e24ef5164bab43139373a 100644 (file)
@@ -262,6 +262,7 @@ class Collection < ArvadosModel
       sync_past_versions if syncable_updates.any?
       if snapshot
         snapshot.attributes = self.syncable_updates
+        snapshot.manifest_text = snapshot.signed_manifest_text
         snapshot.save
       end
     end
index bd586907ee2eaf205616251be126bc7cf9c94b09..abcfdbd296b3ab71cf7e8466e7c9279076f2c93f 100644 (file)
@@ -346,7 +346,7 @@ class Container < ArvadosModel
     transaction do
       reload
       check_lock_fail
-      update_attributes!(state: Locked)
+      update_attributes!(state: Locked, lock_count: self.lock_count+1)
     end
   end
 
@@ -364,7 +364,14 @@ class Container < ArvadosModel
     transaction do
       reload(lock: 'FOR UPDATE')
       check_unlock_fail
-      update_attributes!(state: Queued)
+      if self.lock_count < Rails.configuration.max_container_dispatch_attempts
+        update_attributes!(state: Queued)
+      else
+        update_attributes!(state: Cancelled,
+                           runtime_status: {
+                             error: "Container exceeded 'max_container_dispatch_attempts' (lock_count=#{self.lock_count}."
+                           })
+      end
     end
   end
 
@@ -375,6 +382,9 @@ class Container < ArvadosModel
     else
       kwargs = {}
     end
+    if users_list.select { |u| u.is_admin }.any?
+      return super
+    end
     Container.where(ContainerRequest.readable_by(*users_list).where("containers.uuid = container_requests.container_uuid").exists)
   end
 
@@ -454,7 +464,7 @@ class Container < ArvadosModel
 
     case self.state
     when Locked
-      permitted.push :priority, :runtime_status, :log
+      permitted.push :priority, :runtime_status, :log, :lock_count
 
     when Queued
       permitted.push :priority
@@ -475,7 +485,7 @@ class Container < ArvadosModel
       when Running
         permitted.push :finished_at, *progress_attrs
       when Queued, Locked
-        permitted.push :finished_at, :log
+        permitted.push :finished_at, :log, :runtime_status
       end
 
     else
index dcf270e3fb5d1a59a25e9858fc65e2eb2b901c42..d0f3a4caeb11d9f931772a8b1036fb257c2632fe 100644 (file)
@@ -529,6 +529,10 @@ common:
   # > 0 = auto-create a new version when older than the specified number of seconds.
   preserve_version_if_idle: -1
 
+  # Number of times a container can be unlocked before being
+  # automatically cancelled.
+  max_container_dispatch_attempts: 5
+
 development:
   force_ssl: false
   cache_classes: false
diff --git a/services/api/db/migrate/20190214214814_add_container_lock_count.rb b/services/api/db/migrate/20190214214814_add_container_lock_count.rb
new file mode 100644 (file)
index 0000000..a496eb0
--- /dev/null
@@ -0,0 +1,5 @@
+class AddContainerLockCount < ActiveRecord::Migration
+  def change
+    add_column :containers, :lock_count, :int, :null => false, :default => 0
+  end
+end
index 211fa5043fda2aedc33646f8c98dff863bec8d7a..f766f33e1b35e1f85a64aa2c5d87bf85e2bb6d0f 100644 (file)
@@ -362,7 +362,8 @@ CREATE TABLE public.containers (
     runtime_status jsonb DEFAULT '{}'::jsonb,
     runtime_user_uuid text,
     runtime_auth_scopes jsonb,
-    runtime_token text
+    runtime_token text,
+    lock_count integer DEFAULT 0 NOT NULL
 );
 
 
@@ -3217,3 +3218,5 @@ INSERT INTO schema_migrations (version) VALUES ('20181011184200');
 
 INSERT INTO schema_migrations (version) VALUES ('20181213183234');
 
+INSERT INTO schema_migrations (version) VALUES ('20190214214814');
+
index 26b8290e6961452e97f505ad3b239f6ef5a28596..997d89d5cd13d72c97dd3edcaa177bca36e1efed 100644 (file)
@@ -1279,7 +1279,6 @@ EOS
         version: 42,
         current_version_uuid: collections(:collection_owned_by_active).uuid,
         manifest_text: manifest_text,
-        # portable_data_hash: "d30fe8ae534397864cb96c544f4cf102+47"
       }
     }
     assert_response :success
@@ -1287,4 +1286,28 @@ EOS
     assert_equal 1, resp['version']
     assert_equal resp['uuid'], resp['current_version_uuid']
   end
+
+  test "update collection with versioning enabled" do
+    Rails.configuration.collection_versioning = true
+    Rails.configuration.preserve_version_if_idle = 1 # 1 second
+
+    col = collections(:collection_owned_by_active)
+    assert_equal 2, col.version
+    assert col.modified_at < Time.now - 1.second
+
+    token = api_client_authorizations(:active).v2token
+    signed = Blob.sign_locator(
+      'acbd18db4cc2f85cedef654fccc4a4d8+3',
+      key: Rails.configuration.blob_signing_key,
+      api_token: token)
+    authorize_with_token token
+    put :update, {
+          id: col.uuid,
+          collection: {
+            manifest_text: ". #{signed} 0:3:foo.txt\n",
+          },
+        }
+    assert_response :success
+    assert_equal 3, json_response['version']
+  end
 end
index 2a9ff5bf4cc6985a413f62a03d7b9555e9c0f938..178135ead87098b23874b3eeb607437458ee2eb0 100644 (file)
@@ -663,6 +663,52 @@ class ContainerTest < ActiveSupport::TestCase
     assert_operator auth_exp, :<, db_current_time
   end
 
+  test "Exceed maximum lock-unlock cycles" do
+    Rails.configuration.max_container_dispatch_attempts = 3
+
+    set_user_from_auth :active
+    c, cr = minimal_new
+
+    set_user_from_auth :dispatch1
+    assert_equal Container::Queued, c.state
+    assert_equal 0, c.lock_count
+
+    c.lock
+    c.reload
+    assert_equal 1, c.lock_count
+    assert_equal Container::Locked, c.state
+
+    c.unlock
+    c.reload
+    assert_equal 1, c.lock_count
+    assert_equal Container::Queued, c.state
+
+    c.lock
+    c.reload
+    assert_equal 2, c.lock_count
+    assert_equal Container::Locked, c.state
+
+    c.unlock
+    c.reload
+    assert_equal 2, c.lock_count
+    assert_equal Container::Queued, c.state
+
+    c.lock
+    c.reload
+    assert_equal 3, c.lock_count
+    assert_equal Container::Locked, c.state
+
+    c.unlock
+    c.reload
+    assert_equal 3, c.lock_count
+    assert_equal Container::Cancelled, c.state
+
+    assert_raise(ArvadosModel::LockFailedError) do
+      # Cancelled to Locked is not allowed
+      c.lock
+    end
+  end
+
   test "Container queued cancel" do
     set_user_from_auth :active
     c, cr = minimal_new({container_count_max: 1})
@@ -677,6 +723,14 @@ class ContainerTest < ActiveSupport::TestCase
     assert_equal 1, Container.readable_by(users(:active)).where(state: "Queued").count
   end
 
+  test "Containers with no matching request are readable by admin" do
+    uuids = Container.includes('container_requests').where(container_requests: {uuid: nil}).collect(&:uuid)
+    assert_not_empty uuids
+    assert_empty Container.readable_by(users(:active)).where(uuid: uuids)
+    assert_not_empty Container.readable_by(users(:admin)).where(uuid: uuids)
+    assert_equal uuids.count, Container.readable_by(users(:admin)).where(uuid: uuids).count
+  end
+
   test "Container locked cancel" do
     set_user_from_auth :active
     c, _ = minimal_new
index a50853837085f6b7a6fd89bb61eba381dc9f6098..b3c530e69013e91dfb7599c677347ecef3856d78 100644 (file)
@@ -8,7 +8,6 @@ import (
        "encoding/json"
        "fmt"
        "io"
-       "io/ioutil"
        "os"
        "os/exec"
        "path/filepath"
@@ -25,16 +24,17 @@ var (
 
 // procinfo is saved in each process's lockfile.
 type procinfo struct {
-       UUID   string
-       PID    int
-       Stdout string
-       Stderr string
+       UUID string
+       PID  int
 }
 
 // Detach acquires a lock for the given uuid, and starts the current
 // program as a child process (with -no-detach prepended to the given
 // arguments so the child knows not to detach again). The lock is
 // passed along to the child process.
+//
+// Stdout and stderr in the child process are sent to the systemd
+// journal using the systemd-cat program.
 func Detach(uuid string, args []string, stdout, stderr io.Writer) int {
        return exitcode(stderr, detach(uuid, args, stdout, stderr))
 }
@@ -49,14 +49,15 @@ func detach(uuid string, args []string, stdout, stderr io.Writer) error {
                        return nil, err
                }
                defer dirlock.Close()
-               lockfile, err := os.OpenFile(filepath.Join(lockdir, lockprefix+uuid+locksuffix), os.O_CREATE|os.O_RDWR, 0700)
+               lockfilename := filepath.Join(lockdir, lockprefix+uuid+locksuffix)
+               lockfile, err := os.OpenFile(lockfilename, os.O_CREATE|os.O_RDWR, 0700)
                if err != nil {
-                       return nil, err
+                       return nil, fmt.Errorf("open %s: %s", lockfilename, err)
                }
                err = syscall.Flock(int(lockfile.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
                if err != nil {
                        lockfile.Close()
-                       return nil, err
+                       return nil, fmt.Errorf("lock %s: %s", lockfilename, err)
                }
                return lockfile, nil
        }()
@@ -66,21 +67,7 @@ func detach(uuid string, args []string, stdout, stderr io.Writer) error {
        defer lockfile.Close()
        lockfile.Truncate(0)
 
-       outfile, err := ioutil.TempFile("", "crunch-run-"+uuid+"-stdout-")
-       if err != nil {
-               return err
-       }
-       defer outfile.Close()
-       errfile, err := ioutil.TempFile("", "crunch-run-"+uuid+"-stderr-")
-       if err != nil {
-               os.Remove(outfile.Name())
-               return err
-       }
-       defer errfile.Close()
-
-       cmd := exec.Command(args[0], append([]string{"-no-detach"}, args[1:]...)...)
-       cmd.Stdout = outfile
-       cmd.Stderr = errfile
+       cmd := exec.Command("systemd-cat", append([]string{"--identifier=crunch-run", args[0], "-no-detach"}, args[1:]...)...)
        // Child inherits lockfile.
        cmd.ExtraFiles = []*os.File{lockfile}
        // Ensure child isn't interrupted even if we receive signals
@@ -89,24 +76,14 @@ func detach(uuid string, args []string, stdout, stderr io.Writer) error {
        cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
        err = cmd.Start()
        if err != nil {
-               os.Remove(outfile.Name())
-               os.Remove(errfile.Name())
-               return err
+               return fmt.Errorf("exec %s: %s", cmd.Path, err)
        }
 
        w := io.MultiWriter(stdout, lockfile)
-       err = json.NewEncoder(w).Encode(procinfo{
-               UUID:   uuid,
-               PID:    cmd.Process.Pid,
-               Stdout: outfile.Name(),
-               Stderr: errfile.Name(),
+       return json.NewEncoder(w).Encode(procinfo{
+               UUID: uuid,
+               PID:  cmd.Process.Pid,
        })
-       if err != nil {
-               os.Remove(outfile.Name())
-               os.Remove(errfile.Name())
-               return err
-       }
-       return nil
 }
 
 // KillProcess finds the crunch-run process corresponding to the given
@@ -123,14 +100,14 @@ func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error {
        if os.IsNotExist(err) {
                return nil
        } else if err != nil {
-               return err
+               return fmt.Errorf("open %s: %s", path, err)
        }
        defer f.Close()
 
        var pi procinfo
        err = json.NewDecoder(f).Decode(&pi)
        if err != nil {
-               return fmt.Errorf("%s: %s\n", path, err)
+               return fmt.Errorf("decode %s: %s\n", path, err)
        }
 
        if pi.UUID != uuid || pi.PID == 0 {
@@ -139,7 +116,7 @@ func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error {
 
        proc, err := os.FindProcess(pi.PID)
        if err != nil {
-               return err
+               return fmt.Errorf("%s: find process %d: %s", uuid, pi.PID, err)
        }
 
        err = proc.Signal(signal)
@@ -147,16 +124,19 @@ func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error {
                err = proc.Signal(syscall.Signal(0))
        }
        if err == nil {
-               return fmt.Errorf("pid %d: sent signal %d (%s) but process is still alive", pi.PID, signal, signal)
+               return fmt.Errorf("%s: pid %d: sent signal %d (%s) but process is still alive", uuid, pi.PID, signal, signal)
        }
-       fmt.Fprintf(stderr, "pid %d: %s\n", pi.PID, err)
+       fmt.Fprintf(stderr, "%s: pid %d: %s\n", uuid, pi.PID, err)
        return nil
 }
 
 // List UUIDs of active crunch-run processes.
 func ListProcesses(stdout, stderr io.Writer) int {
-       return exitcode(stderr, filepath.Walk(lockdir, func(path string, info os.FileInfo, err error) error {
-               if info.IsDir() {
+       // filepath.Walk does not follow symlinks, so we must walk
+       // lockdir+"/." in case lockdir itself is a symlink.
+       walkdir := lockdir + "/."
+       return exitcode(stderr, filepath.Walk(walkdir, func(path string, info os.FileInfo, err error) error {
+               if info.IsDir() && path != walkdir {
                        return filepath.SkipDir
                }
                if name := info.Name(); !strings.HasPrefix(name, lockprefix) || !strings.HasSuffix(name, locksuffix) {
@@ -186,7 +166,7 @@ func ListProcesses(stdout, stderr io.Writer) int {
                        err := os.Remove(path)
                        dirlock.Close()
                        if err != nil {
-                               fmt.Fprintln(stderr, err)
+                               fmt.Fprintf(stderr, "unlink %s: %s\n", f.Name(), err)
                        }
                        return nil
                }
@@ -224,14 +204,15 @@ func exitcode(stderr io.Writer, err error) int {
 //
 // Caller releases the lock by closing the returned file.
 func lockall() (*os.File, error) {
-       f, err := os.OpenFile(filepath.Join(lockdir, lockprefix+"all"+locksuffix), os.O_CREATE|os.O_RDWR, 0700)
+       lockfile := filepath.Join(lockdir, lockprefix+"all"+locksuffix)
+       f, err := os.OpenFile(lockfile, os.O_CREATE|os.O_RDWR, 0700)
        if err != nil {
-               return nil, err
+               return nil, fmt.Errorf("open %s: %s", lockfile, err)
        }
        err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
        if err != nil {
                f.Close()
-               return nil, err
+               return nil, fmt.Errorf("lock %s: %s", lockfile, err)
        }
        return f, nil
 }
index 2b9a119581dfd7c4f3245b1e57317ae95155f5b9..0576337aa13c280841187db3a7aea2dcf4af65c0 100644 (file)
@@ -1737,6 +1737,7 @@ func main() {
        cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container")
        caCertsPath := flag.String("ca-certs", "", "Path to TLS root certificates")
        detach := flag.Bool("detach", false, "Detach from parent process and run in the background")
+       stdinEnv := flag.Bool("stdin-env", false, "Load environment variables from JSON message on stdin")
        sleep := flag.Duration("sleep", 0, "Delay before starting (testing use only)")
        kill := flag.Int("kill", -1, "Send signal to an existing crunch-run process for given UUID")
        list := flag.Bool("list", false, "List UUIDs of existing crunch-run processes")
@@ -1766,6 +1767,13 @@ func main() {
 
        flag.Parse()
 
+       if *stdinEnv && !ignoreDetachFlag {
+               // Load env vars on stdin if asked (but not in a
+               // detached child process, in which case stdin is
+               // /dev/null).
+               loadEnv(os.Stdin)
+       }
+
        switch {
        case *detach && !ignoreDetachFlag:
                os.Exit(Detach(flag.Arg(0), os.Args, os.Stdout, os.Stderr))
@@ -1856,3 +1864,21 @@ func main() {
                log.Fatalf("%s: %v", containerId, runerr)
        }
 }
+
+func loadEnv(rdr io.Reader) {
+       buf, err := ioutil.ReadAll(rdr)
+       if err != nil {
+               log.Fatalf("read stdin: %s", err)
+       }
+       var env map[string]string
+       err = json.Unmarshal(buf, &env)
+       if err != nil {
+               log.Fatalf("decode stdin: %s", err)
+       }
+       for k, v := range env {
+               err = os.Setenv(k, v)
+               if err != nil {
+                       log.Fatalf("setenv(%q): %s", k, err)
+               }
+       }
+}
diff --git a/services/dockercleaner/README.rst b/services/dockercleaner/README.rst
new file mode 100644 (file)
index 0000000..dd2b7e9
--- /dev/null
@@ -0,0 +1,5 @@
+.. Copyright (C) The Arvados Authors. All rights reserved.
+..
+.. SPDX-License-Identifier: Apache-2.0
+
+Arvados Docker Cleaner.
index 0221707cf410f7a0e417a42ccfda8afa27d5bf70..29697e440a3dfb5a76f3752b97185301248cdb70 100644 (file)
@@ -23,7 +23,7 @@ RestartPreventExitStatus=2
 # This unwieldy ExecStart command detects at runtime whether
 # arvados-docker-cleaner is installed with the Python 3.3 Software
 # Collection, and if so, invokes it with the "scl" wrapper.
-ExecStart=/bin/sh -c 'if [ -e /opt/rh/python33/root/bin/arvados-docker-cleaner ]; then exec scl enable python33 arvados-docker-cleaner; else exec arvados-docker-cleaner; fi'
+ExecStart=/bin/sh -c 'if [ -e /opt/rh/rh-python35/root/bin/arvados-docker-cleaner ]; then exec scl enable rh-python35 arvados-docker-cleaner; else exec arvados-docker-cleaner; fi'
 
 # systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
 StartLimitInterval=0
diff --git a/services/dockercleaner/bin/arvados-docker-cleaner b/services/dockercleaner/bin/arvados-docker-cleaner
new file mode 100755 (executable)
index 0000000..c00593f
--- /dev/null
@@ -0,0 +1,9 @@
+#!/usr/bin/env python
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+from __future__ import absolute_import, print_function
+
+from arvados_docker.cleaner import main
+main()
index 6cf69fb782ee5ce876fe9ae6a64a6c7dbce3239d..fd94ef7afa3340edea84a486e4abd03810fe1b8d 100644 (file)
@@ -3,3 +3,12 @@
 # SPDX-License-Identifier: AGPL-3.0
 
 fpm_depends+=(fuse)
+
+case "$TARGET" in
+    centos*)
+        fpm_depends+=(fuse-libs)
+        ;;
+    debian* | ubuntu*)
+        fpm_depends+=(libcurl3-gnutls libpython2.7)
+        ;;
+esac
index 5da2055b7736d117f6a7015a8486a948ee80a4d7..4f7339facf4ace001ac886a5076afc217e040c18 100644 (file)
@@ -603,6 +603,9 @@ func (v *AzureBlobVolume) translateError(err error) error {
        switch {
        case err == nil:
                return err
+       case strings.Contains(err.Error(), "StatusCode=503"):
+               // "storage: service returned error: StatusCode=503, ErrorCode=ServerBusy, ErrorMessage=The server is busy" (See #14804)
+               return VolumeBusyError
        case strings.Contains(err.Error(), "Not Found"):
                // "storage: service returned without a response body (404 Not Found)"
                return os.ErrNotExist
index c37a4d112fb8b86aaa076431f08524930ce83d0b..32b360b1276940c9da69bc4b44b02785ffefc97f 100644 (file)
@@ -49,6 +49,7 @@ type RequestTester struct {
 //   - permissions on, authenticated request, unsigned locator
 //   - permissions on, unauthenticated request, signed locator
 //   - permissions on, authenticated request, expired locator
+//   - permissions on, authenticated request, signed locator, transient error from backend
 //
 func TestGetHandler(t *testing.T) {
        defer teardown()
@@ -151,6 +152,23 @@ func TestGetHandler(t *testing.T) {
        ExpectStatusCode(t,
                "Authenticated request, expired locator",
                ExpiredError.HTTPCode, response)
+
+       // Authenticated request, signed locator
+       // => 503 Server busy (transient error)
+
+       // Set up the block owning volume to respond with errors
+       vols[0].(*MockVolume).Bad = true
+       vols[0].(*MockVolume).BadVolumeError = VolumeBusyError
+       response = IssueRequest(&RequestTester{
+               method:   "GET",
+               uri:      signedLocator,
+               apiToken: knownToken,
+       })
+       // A transient error from one volume while the other doesn't find the block
+       // should make the service return a 503 so that clients can retry.
+       ExpectStatusCode(t,
+               "Volume backend busy",
+               503, response)
 }
 
 // Test PutBlockHandler on the following situations:
index e079b96784a16b985ed6ce47f99655e39a571ce9..2a1bbc972ffa6e4fe0675291b0c923efc4d4ac8d 100644 (file)
@@ -20,11 +20,10 @@ import (
        "sync"
        "time"
 
-       "github.com/gorilla/mux"
-
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/health"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
+       "github.com/gorilla/mux"
 )
 
 type router struct {
@@ -669,6 +668,11 @@ func GetBlock(ctx context.Context, hash string, buf []byte, resp http.ResponseWr
                        if !os.IsNotExist(err) {
                                log.Printf("%s: Get(%s): %s", vol, hash, err)
                        }
+                       // If some volume returns a transient error, return it to the caller
+                       // instead of "Not found" so it can retry.
+                       if err == VolumeBusyError {
+                               errorToCaller = err.(*KeepError)
+                       }
                        continue
                }
                // Check the file checksum.
index 6ae414bf931ce9164f7beefcc0d9be294da6e9c5..a6c8cd99545c24fdc2a56f6c2ff1866682a6ed6d 100644 (file)
@@ -50,6 +50,7 @@ var (
        DiskHashError       = &KeepError{500, "Hash mismatch in stored data"}
        ExpiredError        = &KeepError{401, "Expired permission signature"}
        NotFoundError       = &KeepError{404, "Not Found"}
+       VolumeBusyError     = &KeepError{503, "Volume backend busy"}
        GenericError        = &KeepError{500, "Fail"}
        FullError           = &KeepError{503, "Full"}
        SizeRequiredError   = &KeepError{411, "Missing Content-Length"}
index 26d49946a4555647f88e41658d2f7a2f949c830b..d1d380466ba5983d4a7752c95ff47cf3e9312a75 100644 (file)
@@ -7,6 +7,7 @@ package main
 import (
        "bytes"
        "context"
+       "errors"
        "fmt"
        "io/ioutil"
        "os"
@@ -165,6 +166,7 @@ func TestPutBlockOneVol(t *testing.T) {
 
        vols := KeepVM.AllWritable()
        vols[0].(*MockVolume).Bad = true
+       vols[0].(*MockVolume).BadVolumeError = errors.New("Bad volume")
 
        // Check that PutBlock stores the data as expected.
        if n, err := PutBlock(context.Background(), TestBlock, TestHash); err != nil || n < 1 {
index 43ddd090cc1cfd22419e80aa86f1e838ffebd479..046f3fac2e0c8c27081c22fea69a0aae7f02acda 100644 (file)
@@ -40,7 +40,8 @@ type MockVolume struct {
        Timestamps map[string]time.Time
 
        // Bad volumes return an error for every operation.
-       Bad bool
+       Bad            bool
+       BadVolumeError error
 
        // Touchable volumes' Touch() method succeeds for a locator
        // that has been Put().
@@ -104,7 +105,7 @@ func (v *MockVolume) Compare(ctx context.Context, loc string, buf []byte) error
        v.gotCall("Compare")
        <-v.Gate
        if v.Bad {
-               return errors.New("Bad volume")
+               return v.BadVolumeError
        } else if block, ok := v.Store[loc]; ok {
                if fmt.Sprintf("%x", md5.Sum(block)) != loc {
                        return DiskHashError
@@ -122,7 +123,7 @@ func (v *MockVolume) Get(ctx context.Context, loc string, buf []byte) (int, erro
        v.gotCall("Get")
        <-v.Gate
        if v.Bad {
-               return 0, errors.New("Bad volume")
+               return 0, v.BadVolumeError
        } else if block, ok := v.Store[loc]; ok {
                copy(buf[:len(block)], block)
                return len(block), nil
@@ -134,7 +135,7 @@ func (v *MockVolume) Put(ctx context.Context, loc string, block []byte) error {
        v.gotCall("Put")
        <-v.Gate
        if v.Bad {
-               return errors.New("Bad volume")
+               return v.BadVolumeError
        }
        if v.Readonly {
                return MethodDisabledError
@@ -162,7 +163,7 @@ func (v *MockVolume) Mtime(loc string) (time.Time, error) {
        var mtime time.Time
        var err error
        if v.Bad {
-               err = errors.New("Bad volume")
+               err = v.BadVolumeError
        } else if t, ok := v.Timestamps[loc]; ok {
                mtime = t
        } else {
index 605e8540ee1df59d3b96618ebef50f4b39567384..b64aab2dc6cb0e189341ab93d175e27d38a659ce 100644 (file)
@@ -24,7 +24,7 @@ Gem::Specification.new do |s|
   s.files       = ["bin/arvados-login-sync", "agpl-3.0.txt"]
   s.executables << "arvados-login-sync"
   s.required_ruby_version = '>= 2.1.0'
-  s.add_runtime_dependency 'arvados', '~> 1.2.0', '>= 1.2.0'
+  s.add_runtime_dependency 'arvados', '~> 1.3.0', '>= 1.3.0'
   s.homepage    =
     'https://arvados.org'
 end
diff --git a/services/nodemanager/fpm-info.sh b/services/nodemanager/fpm-info.sh
new file mode 100644 (file)
index 0000000..c4a9dbb
--- /dev/null
@@ -0,0 +1,9 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+case "$TARGET" in
+    debian* | ubuntu*)
+        fpm_depends+=(libcurl3-gnutls libpython2.7)
+        ;;
+esac
index 7bd1498158304ea3ab8a969c0c90129241ab1028..63bdb49e5b6ad8675b26077deba8e29ead2e5b0f 100644 (file)
@@ -44,7 +44,7 @@ type v0session struct {
        permChecker   permChecker
        subscriptions []v0subscribe
        lastMsgID     uint64
-       log           *logrus.Entry
+       log           logrus.FieldLogger
        mtx           sync.Mutex
        setupOnce     sync.Once
 }
index a258b8e2329eeb2aa1c6b6707678e89d750849df..66aebf80d4236d6b575a178641bd5eae8c14a1bc 100755 (executable)
@@ -361,7 +361,12 @@ case "$subcmd" in
         ;;
 
     sh*)
-        exec docker exec -ti -e LINES=$(tput lines) -e COLUMNS=$(tput cols) -e TERM=$TERM -e GEM_HOME=/var/lib/gems $ARVBOX_CONTAINER /bin/bash
+        exec docker exec -ti \
+              -e LINES=$(tput lines) \
+              -e COLUMNS=$(tput cols) \
+              -e TERM=$TERM \
+              -e GEM_HOME=/var/lib/gems \
+              $ARVBOX_CONTAINER /bin/bash
         ;;
 
     pipe)
index 1c1ad17814b5e4b8f12ce51450afee5dcf8da42a..162edc927fe04a566422a53aca7735ee1bd31096 100644 (file)
@@ -19,7 +19,8 @@ RUN apt-get update && \
     apt-transport-https ca-certificates slurm-wlm \
     linkchecker python3-virtualenv python-virtualenv xvfb iceweasel \
     libgnutls28-dev python3-dev vim cadaver cython gnupg dirmngr \
-    libsecret-1-dev r-base r-cran-testthat libxml2-dev pandoc && \
+    libsecret-1-dev r-base r-cran-testthat libxml2-dev pandoc \
+    python3-setuptools python3-pip && \
     apt-get clean
 
 ENV RUBYVERSION_MINOR 2.3
index 56d0fa01351c20e02039ca0d801dcf3e8ca10cbd..bbd11f03416a9783904a48cb6823136ceb5c0686 100644 (file)
@@ -66,6 +66,7 @@ run_bundler() {
     fi
 }
 
+PYCMD=""
 pip_install() {
     pushd /var/lib/pip
     for p in $(ls http*.tar.gz) $(ls http*.tar.bz2) $(ls http*.whl) $(ls http*.zip) ; do
@@ -75,7 +76,13 @@ pip_install() {
     done
     popd
 
-    if ! pip install --no-index --find-links /var/lib/pip $1 ; then
-        pip install $1
+    if [ "$PYCMD" = "python3" ]; then
+       if ! pip3 install --no-index --find-links /var/lib/pip $1 ; then
+            pip3 install $1
+       fi
+    else
+       if ! pip install --no-index --find-links /var/lib/pip $1 ; then
+            pip install $1
+       fi
     fi
 }
index 2b802f2ab09c0be2b4aa43c934970f0e6706dd92..1b062ad8d131c141dd55a18bf0a474a6991a0186 100755 (executable)
@@ -37,6 +37,13 @@ if test ! -s /var/lib/arvados/root-cert.pem ; then
 fi
 
 if test ! -s /var/lib/arvados/server-cert-${localip}.pem ; then
+
+    if [[ $localip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
+       san=IP:$localip
+    else
+       san=DNS:$localip
+    fi
+
     # req           signing request sub-command
     # -new          new certificate request
     # -nodes        "no des" don't encrypt key
@@ -56,7 +63,7 @@ if test ! -s /var/lib/arvados/server-cert-${localip}.pem ; then
            -reqexts x509_ext \
            -extensions x509_ext \
            -config <(cat /etc/ssl/openssl.cnf \
-                         <(printf "\n[x509_ext]\nkeyUsage=critical,digitalSignature,keyEncipherment\nsubjectAltName=DNS:localhost,IP:$localip")) \
+                         <(printf "\n[x509_ext]\nkeyUsage=critical,digitalSignature,keyEncipherment\nsubjectAltName=DNS:localhost,$san")) \
             -out /var/lib/arvados/server-cert-${localip}.csr \
             -keyout /var/lib/arvados/server-cert-${localip}.key \
             -days 365
@@ -69,7 +76,7 @@ if test ! -s /var/lib/arvados/server-cert-${localip}.pem ; then
            -out /var/lib/arvados/server-cert-${localip}.pem \
            -set_serial $RANDOM$RANDOM \
            -extfile <(cat /etc/ssl/openssl.cnf \
-                         <(printf "\n[x509_ext]\nkeyUsage=critical,digitalSignature,keyEncipherment\nsubjectAltName=DNS:localhost,IP:$localip")) \
+                         <(printf "\n[x509_ext]\nkeyUsage=critical,digitalSignature,keyEncipherment\nsubjectAltName=DNS:localhost,$san")) \
            -extensions x509_ext
 
     chown arvbox:arvbox /var/lib/arvados/server-cert-${localip}.*
@@ -78,4 +85,4 @@ fi
 cp /var/lib/arvados/root-cert.pem /usr/local/share/ca-certificates/arvados-testing-cert.crt
 update-ca-certificates
 
-sv stop certificate
\ No newline at end of file
+sv stop certificate
index 68c87233f0001b25a05e38917a3b1356fa49822c..6f13ee0278f8c67c333b03f338c998c741a8d9a8 100755 (executable)
@@ -44,6 +44,7 @@ $RAILS_ENV:
   arvados_docsite: http://$localip:${services[doc]}/
   force_ssl: false
   composer_url: http://$localip:${services[composer]}
+  workbench2_url: https://$localip:${services[workbench2-ssl]}
 EOF
 
 bundle exec rake assets:precompile
diff --git a/tools/crunchstat-summary/README.rst b/tools/crunchstat-summary/README.rst
new file mode 100644 (file)
index 0000000..fa8e1bd
--- /dev/null
@@ -0,0 +1,5 @@
+.. Copyright (C) The Arvados Authors. All rights reserved.
+..
+.. SPDX-License-Identifier: Apache-2.0
+
+Arvados Crunchstat Summary.
diff --git a/tools/crunchstat-summary/fpm-info.sh b/tools/crunchstat-summary/fpm-info.sh
new file mode 100644 (file)
index 0000000..0abc6a0
--- /dev/null
@@ -0,0 +1,9 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+case "$TARGET" in
+    debian* | ubuntu*)
+        fpm_depends+=(libcurl3-gnutls)
+        ;;
+esac
index 04adba8494e20e5151b972ac7b7a3f547e09dd44..642428181c685858fa1b6ee4d4861c6836e3e211 100755 (executable)
@@ -23,7 +23,7 @@ if '--short-tests-only' in sys.argv:
 
 setup(name='crunchstat_summary',
       version=version,
-      description='read crunch log files and summarize resource usage',
+      description='Arvados crunchstat-summary reads crunch log files and summarizes resource usage',
       author='Arvados',
       author_email='info@arvados.org',
       url="https://arvados.org",
index ec5fa21e2a61b6e5d462246e898d7257a22f9d66..db69f9fa46832dc36aff1ef20bd176fb7d5c22e5 100644 (file)
                        "revision": "2bb1b664bcff821e02b2a0644cd29c7e824d54f8",
                        "revisionTime": "2015-08-17T12:26:01Z"
                },
+               {
+                       "checksumSHA1": "X7g98YfLr+zM7aN76AZvAfpZyfk=",
+                       "path": "github.com/julienschmidt/httprouter",
+                       "revision": "adbc77eec0d91467376ca515bc3a14b8434d0f18",
+                       "revisionTime": "2018-04-11T15:45:01Z"
+               },
                {
                        "checksumSHA1": "oX6jFQD74oOApvDIhOzW2dXpg5Q=",
                        "path": "github.com/kevinburke/ssh_config",
                        "revision": "b8bc1bf767474819792c23f32d8286a45736f1c6",
                        "revisionTime": "2016-12-03T19:45:07Z"
                },
-               {
-                       "checksumSHA1": "ewGq4nGalpCQOHcmBTdAEQx1wW0=",
-                       "path": "github.com/mitchellh/mapstructure",
-                       "revision": "bb74f1db0675b241733089d5a1faa5dd8b0ef57b",
-                       "revisionTime": "2018-05-11T14:21:26Z"
-               },
                {
                        "checksumSHA1": "OFNit1Qx2DdWhotfREKodDNUwCM=",
                        "path": "github.com/opencontainers/go-digest",