9945: Merge branch 'master' into 9945-make-python-package-dependency-free
authorWard Vandewege <wvandewege@veritasgenetics.com>
Fri, 1 Feb 2019 15:39:16 +0000 (10:39 -0500)
committerWard Vandewege <wvandewege@veritasgenetics.com>
Fri, 1 Feb 2019 15:39:37 +0000 (10:39 -0500)
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <wvandewege@veritasgenetics.com>

103 files changed:
.licenseignore
README.md
apps/workbench/Gemfile.lock
apps/workbench/app/helpers/application_helper.rb
apps/workbench/app/views/users/_virtual_machines.html.erb
apps/workbench/package-build.version [new file with mode: 0644]
apps/workbench/test/the.patch [new file with mode: 0644]
build/run-build-packages.sh
build/run-tests.sh
cmd/arvados-server/cmd.go
cmd/arvados-server/crunch-dispatch-cloud.service [new file with mode: 0644]
doc/README.textile
doc/Rakefile
doc/_config.yml
doc/admin/upgrading.html.textile.liquid
doc/index.html.liquid
doc/install/install-controller.html.textile.liquid
doc/user/getting_started/community.html.textile.liquid
doc/user/getting_started/ssh-access-unix.html.textile.liquid
lib/cloud/interfaces.go [new file with mode: 0644]
lib/cmd/cmd.go
lib/dispatchcloud/cmd.go [new file with mode: 0644]
lib/dispatchcloud/container/queue.go [new file with mode: 0644]
lib/dispatchcloud/dispatcher.go [new file with mode: 0644]
lib/dispatchcloud/dispatcher_test.go [new file with mode: 0644]
lib/dispatchcloud/driver.go [new file with mode: 0644]
lib/dispatchcloud/instance_set_proxy.go [new file with mode: 0644]
lib/dispatchcloud/logger.go [new file with mode: 0644]
lib/dispatchcloud/node_size.go
lib/dispatchcloud/node_size_test.go
lib/dispatchcloud/readme.go [new file with mode: 0644]
lib/dispatchcloud/scheduler/fix_stale_locks.go [new file with mode: 0644]
lib/dispatchcloud/scheduler/gocheck_test.go [new file with mode: 0644]
lib/dispatchcloud/scheduler/interfaces.go [new file with mode: 0644]
lib/dispatchcloud/scheduler/run_queue.go [new file with mode: 0644]
lib/dispatchcloud/scheduler/run_queue_test.go [new file with mode: 0644]
lib/dispatchcloud/scheduler/scheduler.go [new file with mode: 0644]
lib/dispatchcloud/scheduler/sync.go [new file with mode: 0644]
lib/dispatchcloud/ssh_executor/executor.go [new file with mode: 0644]
lib/dispatchcloud/ssh_executor/executor_test.go [new file with mode: 0644]
lib/dispatchcloud/test/doc.go [new file with mode: 0644]
lib/dispatchcloud/test/fixtures.go [new file with mode: 0644]
lib/dispatchcloud/test/lame_instance_set.go [new file with mode: 0644]
lib/dispatchcloud/test/queue.go [new file with mode: 0644]
lib/dispatchcloud/test/ssh_service.go [new file with mode: 0644]
lib/dispatchcloud/test/sshkey_dispatch [new file with mode: 0644]
lib/dispatchcloud/test/sshkey_dispatch.pub [new file with mode: 0644]
lib/dispatchcloud/test/sshkey_vm [new file with mode: 0644]
lib/dispatchcloud/test/sshkey_vm.pub [new file with mode: 0644]
lib/dispatchcloud/test/stub_driver.go [new file with mode: 0644]
lib/dispatchcloud/worker/gocheck_test.go [new file with mode: 0644]
lib/dispatchcloud/worker/pool.go [new file with mode: 0644]
lib/dispatchcloud/worker/pool_test.go [new file with mode: 0644]
lib/dispatchcloud/worker/worker.go [new file with mode: 0644]
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/arvados_cwl/arvtool.py
sdk/cwl/arvados_cwl/arvworkflow.py
sdk/cwl/arvados_cwl/crunch_script.py
sdk/cwl/arvados_cwl/executor.py
sdk/cwl/arvados_cwl/pathmapper.py
sdk/cwl/arvados_cwl/runner.py
sdk/cwl/setup.py
sdk/cwl/test_with_arvbox.sh
sdk/cwl/tests/submit_test_job_missing.json [new file with mode: 0644]
sdk/cwl/tests/test_pathmapper.py
sdk/cwl/tests/test_submit.py
sdk/go/arvados/config.go
sdk/go/arvados/container.go
sdk/go/arvados/contextgroup.go [new file with mode: 0644]
sdk/go/arvados/fs_collection.go
sdk/go/arvados/fs_collection_test.go
sdk/go/arvados/throttle.go [new file with mode: 0644]
sdk/go/health/aggregator_test.go
sdk/python/arvados/arvfile.py
sdk/python/arvados/collection.py
sdk/python/arvados/commands/put.py
sdk/python/arvados/commands/run.py
sdk/python/arvados/keep.py
sdk/python/tests/test_arv_put.py
sdk/python/tests/test_arvfile.py
sdk/python/tests/test_collections.py
services/api/Gemfile.lock
services/api/app/controllers/arvados/v1/links_controller.rb
services/api/app/models/arvados_model.rb
services/api/app/models/collection.rb
services/api/app/models/container.rb
services/api/db/migrate/20181213183234_add_expression_index_to_links.rb [new file with mode: 0644]
services/api/db/structure.sql
services/api/lib/arvados_model_updates.rb
services/api/lib/record_filters.rb
services/api/test/fixtures/links.yml
services/api/test/functional/arvados/v1/links_controller_test.rb
services/api/test/unit/container_test.rb
services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
services/crunch-dispatch-slurm/node_type.go [new file with mode: 0644]
services/crunch-run/background.go [new file with mode: 0644]
services/crunch-run/crunchrun.go
services/crunch-run/crunchrun_test.go
tools/arvbox/lib/arvbox/docker/service/controller/run
tools/sync-groups/sync-groups.go
vendor/vendor.json

index 83c81b2fc21ca1db86206d82c2f50cae297b9b9c..06519a98e8bc45afcebdad584198a6b6bb47bf71 100644 (file)
@@ -71,4 +71,5 @@ sdk/R/NAMESPACE
 sdk/R/.Rbuildignore
 sdk/R/ArvadosR.Rproj
 *.Rd
+lib/dispatchcloud/test/sshkey_*
 *.asc
index 12fdd219fc698226033e0283baec5a7ad087e920..5843bb84da97eb737c7d8ef319d76a35433cea6d 100644 (file)
--- a/README.md
+++ b/README.md
@@ -44,11 +44,11 @@ doc/README.textile for instructions.
 
 ## Community
 
-The [#arvados](irc://irc.oftc.net:6667/#arvados) IRC (Internet Relay Chat)
-channel at the
-[Open and Free Technology Community (irc.oftc.net)](http://www.oftc.net/oftc/)
-is available for live discussion and support.  You can use a traditional IRC
-client or [join OFTC over the web.](https://webchat.oftc.net/?channels=arvados)
+[![Join the chat at https://gitter.im/curoverse/arvados](https://badges.gitter.im/curoverse/arvados.svg)](https://gitter.im/curoverse/arvados?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
+
+The [curoverse/arvados channel](https://gitter.im/curoverse/arvados)
+channel at [gitter.im](https://gitter.im) is available for live
+discussion and support.
 
 The
 [Arvados user mailing list](http://lists.arvados.org/mailman/listinfo/arvados)
index e06e416bbd8294f6a65febec3409bffb12b07b8d..cc45ca66f2cb9bb9f09efa4269f7533fe572bdb0 100644 (file)
@@ -9,38 +9,38 @@ GEM
   remote: https://rubygems.org/
   specs:
     RedCloth (4.3.2)
-    actionmailer (4.2.10)
-      actionpack (= 4.2.10)
-      actionview (= 4.2.10)
-      activejob (= 4.2.10)
+    actionmailer (4.2.11)
+      actionpack (= 4.2.11)
+      actionview (= 4.2.11)
+      activejob (= 4.2.11)
       mail (~> 2.5, >= 2.5.4)
       rails-dom-testing (~> 1.0, >= 1.0.5)
-    actionpack (4.2.10)
-      actionview (= 4.2.10)
-      activesupport (= 4.2.10)
+    actionpack (4.2.11)
+      actionview (= 4.2.11)
+      activesupport (= 4.2.11)
       rack (~> 1.6)
       rack-test (~> 0.6.2)
       rails-dom-testing (~> 1.0, >= 1.0.5)
       rails-html-sanitizer (~> 1.0, >= 1.0.2)
-    actionview (4.2.10)
-      activesupport (= 4.2.10)
+    actionview (4.2.11)
+      activesupport (= 4.2.11)
       builder (~> 3.1)
       erubis (~> 2.7.0)
       rails-dom-testing (~> 1.0, >= 1.0.5)
       rails-html-sanitizer (~> 1.0, >= 1.0.3)
-    activejob (4.2.10)
-      activesupport (= 4.2.10)
+    activejob (4.2.11)
+      activesupport (= 4.2.11)
       globalid (>= 0.3.0)
-    activemodel (4.2.10)
-      activesupport (= 4.2.10)
+    activemodel (4.2.11)
+      activesupport (= 4.2.11)
       builder (~> 3.1)
-    activerecord (4.2.10)
-      activemodel (= 4.2.10)
-      activesupport (= 4.2.10)
+    activerecord (4.2.11)
+      activemodel (= 4.2.11)
+      activesupport (= 4.2.11)
       arel (~> 6.0)
     activerecord-nulldb-adapter (0.3.8)
       activerecord (>= 2.0.0)
-    activesupport (4.2.10)
+    activesupport (4.2.11)
       i18n (~> 0.7)
       minitest (~> 5.1)
       thread_safe (~> 0.3, >= 0.3.4)
@@ -92,7 +92,7 @@ GEM
       execjs
     coffee-script-source (1.12.2)
     commonjs (0.2.7)
-    concurrent-ruby (1.0.5)
+    concurrent-ruby (1.1.4)
     crass (1.0.4)
     deep_merge (1.2.1)
     docile (1.1.5)
@@ -159,15 +159,15 @@ GEM
     loofah (2.2.3)
       crass (~> 1.0.2)
       nokogiri (>= 1.5.9)
-    mail (2.7.0)
+    mail (2.7.1)
       mini_mime (>= 0.1.1)
     memoist (0.16.0)
     metaclass (0.0.4)
     mime-types (3.1)
       mime-types-data (~> 3.2015)
     mime-types-data (3.2016.0521)
-    mini_mime (1.0.0)
-    mini_portile2 (2.3.0)
+    mini_mime (1.0.1)
+    mini_portile2 (2.4.0)
     minitest (5.10.3)
     mocha (1.3.0)
       metaclass (~> 0.0.1)
@@ -182,8 +182,8 @@ GEM
     net-ssh (4.2.0)
     net-ssh-gateway (2.0.0)
       net-ssh (>= 4.0.0)
-    nokogiri (1.8.5)
-      mini_portile2 (~> 2.3.0)
+    nokogiri (1.9.1)
+      mini_portile2 (~> 2.4.0)
     npm-rails (0.2.1)
       rails (>= 3.2)
     oj (3.6.4)
@@ -206,16 +206,16 @@ GEM
       rack (>= 1.2.0)
     rack-test (0.6.3)
       rack (>= 1.0)
-    rails (4.2.10)
-      actionmailer (= 4.2.10)
-      actionpack (= 4.2.10)
-      actionview (= 4.2.10)
-      activejob (= 4.2.10)
-      activemodel (= 4.2.10)
-      activerecord (= 4.2.10)
-      activesupport (= 4.2.10)
+    rails (4.2.11)
+      actionmailer (= 4.2.11)
+      actionpack (= 4.2.11)
+      actionview (= 4.2.11)
+      activejob (= 4.2.11)
+      activemodel (= 4.2.11)
+      activerecord (= 4.2.11)
+      activesupport (= 4.2.11)
       bundler (>= 1.3.0, < 2.0)
-      railties (= 4.2.10)
+      railties (= 4.2.11)
       sprockets-rails
     rails-deprecated_sanitizer (1.0.3)
       activesupport (>= 4.2.0.alpha)
@@ -226,12 +226,12 @@ GEM
     rails-html-sanitizer (1.0.4)
       loofah (~> 2.2, >= 2.2.2)
     rails-perftest (0.0.7)
-    railties (4.2.10)
-      actionpack (= 4.2.10)
-      activesupport (= 4.2.10)
+    railties (4.2.11)
+      actionpack (= 4.2.11)
+      activesupport (= 4.2.11)
       rake (>= 0.8.7)
       thor (>= 0.18.1, < 2.0)
-    rake (12.3.1)
+    rake (12.3.2)
     raphael-rails (2.1.2)
     rb-fsevent (0.10.3)
     rb-inotify (0.9.10)
@@ -286,7 +286,7 @@ GEM
     therubyracer (0.12.3)
       libv8 (~> 3.16.14.15)
       ref
-    thor (0.20.0)
+    thor (0.20.3)
     thread_safe (0.3.6)
     tilt (2.0.8)
     tzinfo (1.2.5)
@@ -356,4 +356,4 @@ DEPENDENCIES
   wiselinks
 
 BUNDLED WITH
-   1.16.3
+   1.17.2
index c4a801d68b0a645fe7c10de9cdee91f642ed4ab7..15bf77fa094f188e5b3f8be980c5c57e3d73bcfe 100644 (file)
@@ -15,6 +15,10 @@ module ApplicationHelper
     Rails.configuration.arvados_v1_base.gsub /https?:\/\/|\/arvados\/v1/,''
   end
 
+  def current_uuid_prefix
+    current_api_host[0..4]
+  end
+
   def render_markup(markup)
     allowed_tags = Rails::Html::Sanitizer.white_list_sanitizer.allowed_tags + %w(table tbody th tr td col colgroup caption thead tfoot)
     sanitize(raw(RedCloth.new(markup.to_s).to_html(:refs_arvados, :textile)), tags: allowed_tags) if markup
index 928f50f0b6f50fb0fab0701b7e8f61e7940847fd..c891b0c594af329b9f0a7790217596b11b0109fc 100644 (file)
@@ -85,7 +85,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
             <td style="word-break:break-all;">
               <% if @my_vm_logins[vm[:uuid]] %>
                 <% @my_vm_logins[vm[:uuid]].each do |login| %>
-                  <code>ssh&nbsp;<%= login %>@<%= vm[:hostname] %>.arvados</code>
+                  <code>ssh&nbsp;<%= login %>@<%= vm[:hostname] %>.<%= current_uuid_prefix || 'xyzzy' %></code>
                 <% end %>
               <% end %>
             </td>
@@ -106,7 +106,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
 </div>
 </div>
   <p>In order to access virtual machines using SSH, <%= link_to ssh_keys_user_path(current_user) do%> add an SSH key to your account<%end%> and add a section like this to your SSH configuration file ( <i>~/.ssh/config</i>):</p>
-    <pre>Host *.arvados
+    <pre>Host *.<%= current_uuid_prefix || 'xyzzy' %>
       TCPKeepAlive yes
       ServerAliveInterval 60
       ProxyCommand ssh -p2222 turnout@switchyard.<%= current_api_host || 'xyzzy.arvadosapi.com' %> -x -a $SSH_PROXY_FLAGS %h
diff --git a/apps/workbench/package-build.version b/apps/workbench/package-build.version
new file mode 100644 (file)
index 0000000..41eb2c7
--- /dev/null
@@ -0,0 +1 @@
+1.2.1.20181126194329
diff --git a/apps/workbench/test/the.patch b/apps/workbench/test/the.patch
new file mode 100644 (file)
index 0000000..5a55679
--- /dev/null
@@ -0,0 +1,3 @@
++    echo -n 'geckodriver: '
++    which geckodriver || fatal "No geckodriver. Unable to find Mozilla geckodriver. Please download the server from https://github.com/mozilla/geckodriver/releases and place it somewhere on your PATH. More info at https://developer.mozilla.org/en-US/docs/Mozilla/QA/Marionette/WebDriver."
+
index f8ac4fcf2073cd4d2558bb121fd672686da8caee..0919faf37f3056572987c9782fabcdc78530658c 100755 (executable)
@@ -303,6 +303,9 @@ package_go_binary cmd/arvados-server arvados-server \
     "Arvados server daemons"
 package_go_binary cmd/arvados-server arvados-controller \
     "Arvados cluster controller daemon"
+# No package until #14325
+#package_go_binary cmd/arvados-server crunch-dispatch-cloud \
+#    "Arvados cluster cloud dispatch"
 package_go_binary sdk/go/crunchrunner crunchrunner \
     "Crunchrunner executes a command inside a container and uploads the output"
 package_go_binary services/arv-git-httpd arvados-git-httpd \
index cd44347cbabed54d1d19dc8373383160fde7dc1a..cb44372566f8eb36ec6163f9108c97e464d6fca8 100755 (executable)
@@ -77,6 +77,10 @@ lib/cmd
 lib/controller
 lib/crunchstat
 lib/dispatchcloud
+lib/dispatchcloud/container
+lib/dispatchcloud/scheduler
+lib/dispatchcloud/ssh_executor
+lib/dispatchcloud/worker
 services/api
 services/arv-git-httpd
 services/crunchstat
@@ -926,6 +930,10 @@ gostuff=(
     lib/controller
     lib/crunchstat
     lib/dispatchcloud
+    lib/dispatchcloud/container
+    lib/dispatchcloud/scheduler
+    lib/dispatchcloud/ssh_executor
+    lib/dispatchcloud/worker
     sdk/go/arvados
     sdk/go/arvadosclient
     sdk/go/auth
index 1af3745df0c3a4c54d296e848532c1d19c124e43..cd15d25dda760a41c427b8bfd4b621fb43e2130a 100644 (file)
@@ -9,6 +9,7 @@ import (
 
        "git.curoverse.com/arvados.git/lib/cmd"
        "git.curoverse.com/arvados.git/lib/controller"
+       "git.curoverse.com/arvados.git/lib/dispatchcloud"
 )
 
 var (
@@ -18,7 +19,8 @@ var (
                "-version":  cmd.Version(version),
                "--version": cmd.Version(version),
 
-               "controller": controller.Command,
+               "controller":     controller.Command,
+               "dispatch-cloud": dispatchcloud.Command,
        })
 )
 
diff --git a/cmd/arvados-server/crunch-dispatch-cloud.service b/cmd/arvados-server/crunch-dispatch-cloud.service
new file mode 100644 (file)
index 0000000..f8d71c9
--- /dev/null
@@ -0,0 +1,28 @@
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+[Unit]
+Description=Arvados cloud dispatch
+Documentation=https://doc.arvados.org/
+After=network.target
+AssertPathExists=/etc/arvados/config.yml
+
+# systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
+StartLimitInterval=0
+
+# systemd>=230 (debian:9) obeys StartLimitIntervalSec in the [Unit] section
+StartLimitIntervalSec=0
+
+[Service]
+Type=notify
+EnvironmentFile=-/etc/arvados/environment
+ExecStart=/usr/bin/crunch-dispatch-cloud
+Restart=always
+RestartSec=1
+
+# systemd<=219 (centos:7, debian:8, ubuntu:trusty) obeys StartLimitInterval in the [Service] section
+StartLimitInterval=0
+
+[Install]
+WantedBy=multi-user.target
index 5059a07be5bff4140e329e891f12b26776b3ebaf..75a30e9ef2ade8cfa04d8dec2e6275750a41047d 100644 (file)
@@ -55,7 +55,7 @@ h2. Publish HTML pages inside Workbench
 
 (or some other web site)
 
-You can set @baseurl@ (the URL prefix for all internal links), @arvados_api_host@ and @arvados_workbench_host@ without changing @_config.yml@:
+You can set @baseurl@ (the URL prefix for all internal links), @arvados_cluster_uuid@, @arvados_api_host@ and @arvados_workbench_host@ without changing @_config.yml@:
 
 <pre>
 arvados/doc$ rake generate baseurl=/doc arvados_api_host=xyzzy.arvadosapi.com
index 079f7da27f46b52721849ae9539d6bbe4921dac0..9deca3a28cf8cc8c6911097aee68f01426d86177 100644 (file)
@@ -7,7 +7,7 @@ require "rubygems"
 require "colorize"
 
 task :generate => [ :realclean, 'sdk/python/arvados/index.html', 'sdk/R/arvados/index.html' ] do
-  vars = ['baseurl', 'arvados_api_host', 'arvados_workbench_host']
+  vars = ['baseurl', 'arvados_cluster_uuid', 'arvados_api_host', 'arvados_workbench_host']
   vars.each do |v|
     if ENV[v]
       website.config.h[v] = ENV[v]
index 94c95399662057d9f40d6733d9504419e8e2ed7f..1e17d047062efd8fbf324edcb57979ef83b740df 100644 (file)
@@ -12,6 +12,7 @@
 
 baseurl:
 arvados_api_host: localhost
+arvados_cluster_uuid: local
 arvados_workbench_host: http://localhost
 
 exclude: ["Rakefile", "tmp", "vendor"]
index 74e0357537eb135911797ec95e3a8cf77e4e2033..fc4ccac5f1fa67fe097ff47d6d2f4611b4b0eab3 100644 (file)
@@ -56,6 +56,14 @@ Or alternatively, by updating the shebang line at the start of the script to:
 </pre>
 </notextile>
 
+h3. v1.3.0 (2018-12-05)
+
+This release includes several database migrations, which will be executed automatically as part of the API server upgrade. On large Arvados installations, these migrations will take a while. We've seen the upgrade take 30 minutes or more on installations with a lot of collections.
+
+The @arvados-controller@ component now requires the /etc/arvados/config.yml file to be present. See <a href="{{ site.baseurl }}/install/install-controller.html#configuration">the @arvados-controller@ installation instructions</a>.
+
+Support for the deprecated "jobs" API is broken in this release.  Users who rely on it should not upgrade.  This will be fixed in an upcoming 1.3.1 patch release, however users are "encouraged to migrate":upgrade-crunch2.html as support for the "jobs" API will be dropped in an upcoming release.  Users who are already using the "containers" API are not affected.
+
 h3. v1.2.1 (2018-11-26)
 
 There are no special upgrade notes for this release.
index 0e4aa558452ab678a7762ef12312724525fd9b18..ee3538778683be5f48d37730958e7ba8dd5c54f2 100644 (file)
@@ -36,7 +36,7 @@ SPDX-License-Identifier: CC-BY-SA-3.0
 
 <p>The recommended place to ask a question about Arvados is on Biostars. After you have <a href="//www.biostars.org/t/arvados/">read previous questions and answers</a> you can <a href="https://www.biostars.org/p/new/post/?tag_val=arvados">post your question using the 'arvados' tag</a>.</p>
 
-      <p>There is a <a href="http://lists.arvados.org/mailman/listinfo/arvados">mailing list</a>, and chat on IRC: <a href="irc://irc.oftc.net:6667/#arvados">#arvados</a> @ OFTC (you can <a href="https://webchat.oftc.net/?channels=arvados">join in your browser</a>).
+      <p>There is a <a href="http://lists.arvados.org/mailman/listinfo/arvados">mailing list</a>. The <a href="https://gitter.im/curoverse/arvados">#arvados channel</a> at gitter.im is available for live discussion and community support.
       </p>
 
       <p>Curoverse, a Veritas Genetics company, provides managed Arvados installations as well as commercial support for Arvados. Please visit <a href="https://curoverse.com">curoverse.com</a> or contact <a href="mailto:researchsales@veritasgenetics.com">researchsales@veritasgenetics.com</a> for more information.</p>
index ccb8d980aebc1f3f658a5ed603459ca15878736d..3e94b290d54076e77a12a44097061f6ed935f79f 100644 (file)
@@ -85,7 +85,7 @@ Restart Nginx to apply the new configuration.
 </code></pre>
 </notextile>
 
-h3. Configure arvados-controller
+h3(#configuration). Configure arvados-controller
 
 Create the cluster configuration file @/etc/arvados/config.yml@ using the following template.
 
index a089983cf629883569b92285fc5bc86e4c83bf63..40c67ad11371bf500b31baa0b8e49123c9173d31 100644 (file)
@@ -17,9 +17,9 @@ h2. Mailing lists
 
 The "Arvados user mailing list":http://lists.arvados.org/mailman/listinfo/arvados is a forum for general discussion, questions, and news about Arvados development.  The "Arvados developer mailing list":http://lists.arvados.org/mailman/listinfo/arvados-dev is a forum for more technical discussion, intended for developers and contributers to Arvados.
 
-h2. IRC
+h2. Chat
 
-The "#arvados":irc://irc.oftc.net:6667/#arvados IRC (Internet Relay Chat) channel at the "Open and Free Technology Community (irc.oftc.net)":http://www.oftc.net/oftc/ is available for live discussion and support.  You can use a traditional IRC client or "join OFTC over the web.":https://webchat.oftc.net/?channels=arvados
+The "curoverse/arvados channel":https://gitter.im/curoverse/arvados channel at "gitter.im":https://gitter.im is available for live discussion and support.
 
 h2. Bug tracking
 
index aeeb37579bcf13d3ef79943baf39ca22f77b5cf9..284d0a1f04aca0117e54737cffba8586c6a57188 100644 (file)
@@ -84,18 +84,17 @@ h3. Connecting to the virtual machine
 
 Use the following command to connect to the _shell_ VM instance as _you_.  Replace *<code>you@shell</code>* at the end of the following command with your *login* and *hostname* from Workbench:
 
-notextile. <pre><code>$ <span class="userinput">ssh -o "ProxyCommand ssh -a -x -p2222 turnout@switchyard.{{ site.arvados_api_host }} <b>shell</b>" -A -x <b>you@shell</b></span></code></pre>
+notextile. <pre><code>$ <span class="userinput">ssh -o "ProxyCommand ssh -p2222 turnout@switchyard.{{ site.arvados_api_host }} -x -a <b>shell</b>" -x <b>you@shell</b></span></code></pre>
 
 This command does several things at once. You usually cannot log in directly to virtual machines over the public Internet.  Instead, you log into a "switchyard" server and then tell the switchyard which virtual machine you want to connect to.
 
 * @-o "ProxyCommand ..."@ configures SSH to run the specified command to create a proxy and route your connection through it.
-* @-a@ tells SSH not to forward your ssh-agent credentials to the switchyard.
-* @-x@ tells SSH not to forward your X session to the switchyard.
 * @-p2222@ specifies that the switchyard is running on non-standard port 2222.
 * <code>turnout@switchyard.{{ site.arvados_api_host }}</code> specifies the user (@turnout@) and hostname (@switchyard.{{ site.arvados_api_host }}@) of the switchyard server that will proxy our connection to the VM.
+* @-x@ tells SSH not to forward your X session to the switchyard.
+* @-a@ tells SSH not to forward your ssh-agent credentials to the switchyard.
 * *@shell@* is the name of the VM that we want to connect to.  This is sent to the switchyard server as if it were an SSH command, and the switchyard server connects to the VM on our behalf.
 * After the ProxyCommand section, we repeat @-x@ to disable X session forwarding to the virtual machine.
-* @-A@ specifies that we want to forward access to @ssh-agent@ to the VM.
 * Finally, *<code>you@shell</code>* specifies your login name and repeats the hostname of the VM.  The username can be found in the *logins* column in the VMs Workbench page, discussed in the previous section.
 
 You should now be able to log into the Arvados VM and "check your environment.":check-environment.html
@@ -105,16 +104,16 @@ h3. Configuration (recommended)
 The command line above is cumbersome, but you can configure SSH to remember many of these settings.  Add this text to the file @.ssh/config@ in your home directory (create a new file if @.ssh/config@ doesn't exist):
 
 <notextile>
-<pre><code class="userinput">Host *.arvados
-  ProxyCommand ssh -a -x -p2222 turnout@switchyard.{{ site.arvados_api_host }} $SSH_PROXY_FLAGS %h
+<pre><code class="userinput">Host *.{{ site.arvados_cluster_uuid }}
+  TCPKeepAlive yes
+  ServerAliveInterval 60
+  ProxyCommand ssh -p2222 turnout@switchyard.{{ site.arvados_api_host }} -x -a $SSH_PROXY_FLAGS %h
   User <b>you</b>
-  ForwardAgent yes
-  ForwardX11 no
 </code></pre>
 </notextile>
 
-This will recognize any host ending in ".arvados" and automatically apply the proxy, user and forwarding settings from the configuration file, allowing you to log in with a much simpler command:
+This will recognize any host ending in ".{{ site.arvados_cluster_uuid }}" and automatically apply the proxy, user and forwarding settings from the configuration file, allowing you to log in with a much simpler command:
 
-notextile. <pre><code>$ <span class="userinput">ssh <b>shell</b>.arvados</span></code></pre>
+notextile. <pre><code>$ <span class="userinput">ssh <b>shell</b>.{{ site.arvados_cluster_uuid }}</span></code></pre>
 
 You should now be able to log into the Arvados VM and "check your environment.":check-environment.html
diff --git a/lib/cloud/interfaces.go b/lib/cloud/interfaces.go
new file mode 100644 (file)
index 0000000..e3a0725
--- /dev/null
@@ -0,0 +1,179 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package cloud
+
+import (
+       "io"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "golang.org/x/crypto/ssh"
+)
+
+// A RateLimitError should be returned by an InstanceSet when the
+// cloud service indicates it is rejecting all API calls for some time
+// interval.
+type RateLimitError interface {
+       // Time before which the caller should expect requests to
+       // fail.
+       EarliestRetry() time.Time
+       error
+}
+
+// A QuotaError should be returned by an InstanceSet when the cloud
+// service indicates the account cannot create more VMs than already
+// exist.
+type QuotaError interface {
+       // If true, don't create more instances until some existing
+       // instances are destroyed. If false, don't handle the error
+       // as a quota error.
+       IsQuotaError() bool
+       error
+}
+
+type InstanceSetID string
+type InstanceTags map[string]string
+type InstanceID string
+type ImageID string
+
+// An Executor executes commands on an ExecutorTarget.
+type Executor interface {
+       // Update the set of private keys used to authenticate to
+       // targets.
+       SetSigners(...ssh.Signer)
+
+       // Set the target used for subsequent command executions.
+       SetTarget(ExecutorTarget)
+
+       // Return the current target.
+       Target() ExecutorTarget
+
+       // Execute a shell command and return the resulting stdout and
+       // stderr. stdin can be nil.
+       Execute(cmd string, stdin io.Reader) (stdout, stderr []byte, err error)
+}
+
+// An ExecutorTarget is a remote command execution service.
+type ExecutorTarget interface {
+       // SSH server hostname or IP address, or empty string if
+       // unknown while instance is booting.
+       Address() string
+
+       // Return nil if the given public key matches the instance's
+       // SSH server key. If the provided Dialer is not nil,
+       // VerifyHostKey can use it to make outgoing network
+       // connections from the instance -- e.g., to use the cloud's
+       // "this instance's metadata" API.
+       VerifyHostKey(ssh.PublicKey, *ssh.Client) error
+}
+
+// Instance is implemented by the provider-specific instance types.
+type Instance interface {
+       ExecutorTarget
+
+       // ID returns the provider's instance ID. It must be stable
+       // for the life of the instance.
+       ID() InstanceID
+
+       // String typically returns the cloud-provided instance ID.
+       String() string
+
+       // Cloud provider's "instance type" ID. Matches a ProviderType
+       // in the cluster's InstanceTypes configuration.
+       ProviderType() string
+
+       // Get current tags
+       Tags() InstanceTags
+
+       // Replace tags with the given tags
+       SetTags(InstanceTags) error
+
+       // Shut down the node
+       Destroy() error
+}
+
+// An InstanceSet manages a set of VM instances created by an elastic
+// cloud provider like AWS, GCE, or Azure.
+//
+// All public methods of an InstanceSet, and all public methods of the
+// instances it returns, are goroutine safe.
+type InstanceSet interface {
+       // Create a new instance. If supported by the driver, add the
+       // provided public key to /root/.ssh/authorized_keys.
+       //
+       // The returned error should implement RateLimitError and
+       // QuotaError where applicable.
+       Create(arvados.InstanceType, ImageID, InstanceTags, ssh.PublicKey) (Instance, error)
+
+       // Return all instances, including ones that are booting or
+       // shutting down. Optionally, filter out nodes that don't have
+       // all of the given InstanceTags (the caller will ignore these
+       // anyway).
+       //
+       // An instance returned by successive calls to Instances() may
+       // -- but does not need to -- be represented by the same
+       // Instance object each time. Thus, the caller is responsible
+       // for de-duplicating the returned instances by comparing the
+       // InstanceIDs returned by the instances' ID() methods.
+       Instances(InstanceTags) ([]Instance, error)
+
+       // Stop any background tasks and release other resources.
+       Stop()
+}
+
+// A Driver returns an InstanceSet that uses the given InstanceSetID
+// and driver-dependent configuration parameters.
+//
+// The supplied id will be of the form "zzzzz-zzzzz-zzzzzzzzzzzzzzz"
+// where each z can be any alphanum. The returned InstanceSet must use
+// this id to tag long-lived cloud resources that it creates, and must
+// assume control of any existing resources that are tagged with the
+// same id. Tagging can be accomplished by including the ID in
+// resource names, using the cloud provider's tagging feature, or any
+// other mechanism. The tags must be visible to another instance of
+// the same driver running on a different host.
+//
+// The returned InstanceSet must ignore existing resources that are
+// visible but not tagged with the given id, except that it should log
+// a summary of such resources -- only once -- when it starts
+// up. Thus, two identically configured InstanceSets running on
+// different hosts with different ids should log about the existence
+// of each other's resources at startup, but will not interfere with
+// each other.
+//
+// Example:
+//
+//     type exampleInstanceSet struct {
+//             ownID     string
+//             AccessKey string
+//     }
+//
+//     type exampleDriver struct {}
+//
+//     func (*exampleDriver) InstanceSet(config map[string]interface{}, id InstanceSetID) (InstanceSet, error) {
+//             var is exampleInstanceSet
+//             if err := mapstructure.Decode(config, &is); err != nil {
+//                     return nil, err
+//             }
+//             is.ownID = id
+//             return &is, nil
+//     }
+//
+//     var _ = registerCloudDriver("example", &exampleDriver{})
+type Driver interface {
+       InstanceSet(config map[string]interface{}, id InstanceSetID) (InstanceSet, error)
+}
+
+// DriverFunc makes a Driver using the provided function as its
+// InstanceSet method. This is similar to http.HandlerFunc.
+func DriverFunc(fn func(config map[string]interface{}, id InstanceSetID) (InstanceSet, error)) Driver {
+       return driverFunc(fn)
+}
+
+type driverFunc func(config map[string]interface{}, id InstanceSetID) (InstanceSet, error)
+
+func (df driverFunc) InstanceSet(config map[string]interface{}, id InstanceSetID) (InstanceSet, error) {
+       return df(config, id)
+}
index 8c65cf7acf1b6dd7bc02660464be06ea07cc3daa..9292ef7e5ff5b3afb6012833299d9f89a7ea346c 100644 (file)
@@ -36,8 +36,9 @@ func (v Version) RunCommand(prog string, args []string, stdin io.Reader, stdout,
        return 0
 }
 
-// Multi is a Handler that looks up its first argument in a map, and
-// invokes the resulting Handler with the remaining args.
+// Multi is a Handler that looks up its first argument in a map (after
+// stripping any "arvados-" or "crunch-" prefix), and invokes the
+// resulting Handler with the remaining args.
 //
 // Example:
 //
diff --git a/lib/dispatchcloud/cmd.go b/lib/dispatchcloud/cmd.go
new file mode 100644 (file)
index 0000000..92948fb
--- /dev/null
@@ -0,0 +1,19 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dispatchcloud
+
+import (
+       "git.curoverse.com/arvados.git/lib/cmd"
+       "git.curoverse.com/arvados.git/lib/service"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+var Command cmd.Handler = service.Command(arvados.ServiceNameDispatchCloud, newHandler)
+
+func newHandler(cluster *arvados.Cluster, _ *arvados.NodeProfile) service.Handler {
+       d := &dispatcher{Cluster: cluster}
+       go d.Start()
+       return d
+}
diff --git a/lib/dispatchcloud/container/queue.go b/lib/dispatchcloud/container/queue.go
new file mode 100644 (file)
index 0000000..432f4d4
--- /dev/null
@@ -0,0 +1,378 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package container
+
+import (
+       "io"
+       "sync"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/Sirupsen/logrus"
+       "github.com/prometheus/client_golang/prometheus"
+)
+
+type typeChooser func(*arvados.Container) (arvados.InstanceType, error)
+
+// An APIClient performs Arvados API requests. It is typically an
+// *arvados.Client.
+type APIClient interface {
+       RequestAndDecode(dst interface{}, method, path string, body io.Reader, params interface{}) error
+}
+
+// A QueueEnt is an entry in the queue, consisting of a container
+// record and the instance type that should be used to run it.
+type QueueEnt struct {
+       // The container to run. Only the UUID, State, Priority, and
+       // RuntimeConstraints fields are populated.
+       Container    arvados.Container
+       InstanceType arvados.InstanceType
+}
+
+// String implements fmt.Stringer by returning the queued container's
+// UUID.
+func (c *QueueEnt) String() string {
+       return c.Container.UUID
+}
+
+// A Queue is an interface to an Arvados cluster's container
+// database. It presents only the containers that are eligible to be
+// run by, are already being run by, or have recently been run by the
+// present dispatcher.
+//
+// The Entries, Get, and Forget methods do not block: they return
+// immediately, using cached data.
+//
+// The updating methods (Cancel, Lock, Unlock, Update) do block: they
+// return only after the operation has completed.
+//
+// A Queue's Update method should be called periodically to keep the
+// cache up to date.
+type Queue struct {
+       logger     logrus.FieldLogger
+       reg        *prometheus.Registry
+       chooseType typeChooser
+       client     APIClient
+
+       auth    *arvados.APIClientAuthorization
+       current map[string]QueueEnt
+       updated time.Time
+       mtx     sync.Mutex
+
+       // Methods that modify the Queue (like Lock) add the affected
+       // container UUIDs to dontupdate. When applying a batch of
+       // updates received from the network, anything appearing in
+       // dontupdate is skipped, in case the received update has
+       // already been superseded by the locally initiated change.
+       // When no network update is in progress, this protection is
+       // not needed, and dontupdate is nil.
+       dontupdate map[string]struct{}
+
+       // active notification subscribers (see Subscribe)
+       subscribers map[<-chan struct{}]chan struct{}
+}
+
+// NewQueue returns a new Queue. When a new container appears in the
+// Arvados cluster's queue during Update, chooseType will be called to
+// assign an appropriate arvados.InstanceType for the queue entry.
+func NewQueue(logger logrus.FieldLogger, reg *prometheus.Registry, chooseType typeChooser, client APIClient) *Queue {
+       return &Queue{
+               logger:      logger,
+               reg:         reg,
+               chooseType:  chooseType,
+               client:      client,
+               current:     map[string]QueueEnt{},
+               subscribers: map[<-chan struct{}]chan struct{}{},
+       }
+}
+
+// Subscribe returns a channel that becomes ready to receive when an
+// entry in the Queue is updated.
+//
+//     ch := q.Subscribe()
+//     defer q.Unsubscribe(ch)
+//     for range ch {
+//             // ...
+//     }
+func (cq *Queue) Subscribe() <-chan struct{} {
+       cq.mtx.Lock()
+       defer cq.mtx.Unlock()
+       ch := make(chan struct{}, 1)
+       cq.subscribers[ch] = ch
+       return ch
+}
+
+// Unsubscribe stops sending updates to the given channel. See
+// Subscribe.
+func (cq *Queue) Unsubscribe(ch <-chan struct{}) {
+       cq.mtx.Lock()
+       defer cq.mtx.Unlock()
+       delete(cq.subscribers, ch)
+}
+
+// Caller must have lock.
+func (cq *Queue) notify() {
+       for _, ch := range cq.subscribers {
+               select {
+               case ch <- struct{}{}:
+               default:
+               }
+       }
+}
+
+// Forget drops the specified container from the cache. It should be
+// called on finalized containers to avoid leaking memory over
+// time. It is a no-op if the indicated container is not in a
+// finalized state.
+func (cq *Queue) Forget(uuid string) {
+       cq.mtx.Lock()
+       defer cq.mtx.Unlock()
+       ctr := cq.current[uuid].Container
+       if ctr.State == arvados.ContainerStateComplete || ctr.State == arvados.ContainerStateCancelled {
+               delete(cq.current, uuid)
+       }
+}
+
+// Get returns the (partial) Container record for the specified
+// container. Like a map lookup, its second return value is false if
+// the specified container is not in the Queue.
+func (cq *Queue) Get(uuid string) (arvados.Container, bool) {
+       cq.mtx.Lock()
+       defer cq.mtx.Unlock()
+       if ctr, ok := cq.current[uuid]; !ok {
+               return arvados.Container{}, false
+       } else {
+               return ctr.Container, true
+       }
+}
+
+// Entries returns all cache entries, keyed by container UUID.
+//
+// The returned threshold indicates the maximum age of any cached data
+// returned in the map. This makes it possible for a scheduler to
+// determine correctly the outcome of a remote process that updates
+// container state. It must first wait for the remote process to exit,
+// then wait for the Queue to start and finish its next Update --
+// i.e., it must wait until threshold > timeProcessExited.
+func (cq *Queue) Entries() (entries map[string]QueueEnt, threshold time.Time) {
+       cq.mtx.Lock()
+       defer cq.mtx.Unlock()
+       entries = make(map[string]QueueEnt, len(cq.current))
+       for uuid, ctr := range cq.current {
+               entries[uuid] = ctr
+       }
+       threshold = cq.updated
+       return
+}
+
+// Update refreshes the cache from the Arvados API. It adds newly
+// queued containers, and updates the state of previously queued
+// containers.
+func (cq *Queue) Update() error {
+       cq.mtx.Lock()
+       cq.dontupdate = map[string]struct{}{}
+       updateStarted := time.Now()
+       cq.mtx.Unlock()
+
+       next, err := cq.poll()
+       if err != nil {
+               return err
+       }
+
+       cq.mtx.Lock()
+       defer cq.mtx.Unlock()
+       for uuid, ctr := range next {
+               if _, keep := cq.dontupdate[uuid]; keep {
+                       continue
+               }
+               if cur, ok := cq.current[uuid]; !ok {
+                       cq.addEnt(uuid, *ctr)
+               } else {
+                       cur.Container = *ctr
+                       cq.current[uuid] = cur
+               }
+       }
+       for uuid := range cq.current {
+               if _, keep := cq.dontupdate[uuid]; keep {
+                       continue
+               } else if _, keep = next[uuid]; keep {
+                       continue
+               } else {
+                       delete(cq.current, uuid)
+               }
+       }
+       cq.dontupdate = nil
+       cq.updated = updateStarted
+       cq.notify()
+       return nil
+}
+
+func (cq *Queue) addEnt(uuid string, ctr arvados.Container) {
+       it, err := cq.chooseType(&ctr)
+       if err != nil {
+               // FIXME: throttle warnings, cancel after timeout
+               cq.logger.Warnf("cannot run %s", &ctr)
+               return
+       }
+       cq.current[uuid] = QueueEnt{Container: ctr, InstanceType: it}
+}
+
+// Lock acquires the dispatch lock for the given container.
+func (cq *Queue) Lock(uuid string) error {
+       return cq.apiUpdate(uuid, "lock")
+}
+
+// Unlock releases the dispatch lock for the given container.
+func (cq *Queue) Unlock(uuid string) error {
+       return cq.apiUpdate(uuid, "unlock")
+}
+
+// Cancel cancels the given container.
+func (cq *Queue) Cancel(uuid string) error {
+       err := cq.client.RequestAndDecode(nil, "PUT", "arvados/v1/containers/"+uuid, nil, map[string]map[string]interface{}{
+               "container": {"state": arvados.ContainerStateCancelled},
+       })
+       if err != nil {
+               return err
+       }
+       cq.mtx.Lock()
+       defer cq.mtx.Unlock()
+       cq.notify()
+       return nil
+}
+
+func (cq *Queue) apiUpdate(uuid, action string) error {
+       var resp arvados.Container
+       err := cq.client.RequestAndDecode(&resp, "POST", "arvados/v1/containers/"+uuid+"/"+action, nil, nil)
+       if err != nil {
+               return err
+       }
+
+       cq.mtx.Lock()
+       defer cq.mtx.Unlock()
+       if cq.dontupdate != nil {
+               cq.dontupdate[uuid] = struct{}{}
+       }
+       if ent, ok := cq.current[uuid]; !ok {
+               cq.addEnt(uuid, resp)
+       } else {
+               ent.Container.State, ent.Container.Priority, ent.Container.LockedByUUID = resp.State, resp.Priority, resp.LockedByUUID
+               cq.current[uuid] = ent
+       }
+       cq.notify()
+       return nil
+}
+
+func (cq *Queue) poll() (map[string]*arvados.Container, error) {
+       cq.mtx.Lock()
+       size := len(cq.current)
+       auth := cq.auth
+       cq.mtx.Unlock()
+
+       if auth == nil {
+               auth = &arvados.APIClientAuthorization{}
+               err := cq.client.RequestAndDecode(auth, "GET", "arvados/v1/api_client_authorizations/current", nil, nil)
+               if err != nil {
+                       return nil, err
+               }
+               cq.mtx.Lock()
+               cq.auth = auth
+               cq.mtx.Unlock()
+       }
+
+       next := make(map[string]*arvados.Container, size)
+       apply := func(updates []arvados.Container) {
+               for _, upd := range updates {
+                       if next[upd.UUID] == nil {
+                               next[upd.UUID] = &arvados.Container{}
+                       }
+                       *next[upd.UUID] = upd
+               }
+       }
+       selectParam := []string{"uuid", "state", "priority", "runtime_constraints"}
+       limitParam := 1000
+
+       mine, err := cq.fetchAll(arvados.ResourceListParams{
+               Select:  selectParam,
+               Order:   "uuid",
+               Limit:   &limitParam,
+               Count:   "none",
+               Filters: []arvados.Filter{{"locked_by_uuid", "=", auth.UUID}},
+       })
+       if err != nil {
+               return nil, err
+       }
+       apply(mine)
+
+       avail, err := cq.fetchAll(arvados.ResourceListParams{
+               Select:  selectParam,
+               Order:   "uuid",
+               Limit:   &limitParam,
+               Count:   "none",
+               Filters: []arvados.Filter{{"state", "=", arvados.ContainerStateQueued}, {"priority", ">", "0"}},
+       })
+       if err != nil {
+               return nil, err
+       }
+       apply(avail)
+
+       var missing []string
+       cq.mtx.Lock()
+       for uuid, ent := range cq.current {
+               if next[uuid] == nil &&
+                       ent.Container.State != arvados.ContainerStateCancelled &&
+                       ent.Container.State != arvados.ContainerStateComplete {
+                       missing = append(missing, uuid)
+               }
+       }
+       cq.mtx.Unlock()
+
+       for i, page := 0, 20; i < len(missing); i += page {
+               batch := missing[i:]
+               if len(batch) > page {
+                       batch = batch[:page]
+               }
+               ended, err := cq.fetchAll(arvados.ResourceListParams{
+                       Select:  selectParam,
+                       Order:   "uuid",
+                       Count:   "none",
+                       Filters: []arvados.Filter{{"uuid", "in", batch}},
+               })
+               if err != nil {
+                       return nil, err
+               }
+               apply(ended)
+       }
+       return next, nil
+}
+
+func (cq *Queue) fetchAll(initialParams arvados.ResourceListParams) ([]arvados.Container, error) {
+       var results []arvados.Container
+       params := initialParams
+       params.Offset = 0
+       for {
+               // This list variable must be a new one declared
+               // inside the loop: otherwise, items in the API
+               // response would get deep-merged into the items
+               // loaded in previous iterations.
+               var list arvados.ContainerList
+
+               err := cq.client.RequestAndDecode(&list, "GET", "arvados/v1/containers", nil, params)
+               if err != nil {
+                       return nil, err
+               }
+               if len(list.Items) == 0 {
+                       break
+               }
+
+               results = append(results, list.Items...)
+               if len(params.Order) == 1 && params.Order == "uuid" {
+                       params.Filters = append(initialParams.Filters, arvados.Filter{"uuid", ">", list.Items[len(list.Items)-1].UUID})
+               } else {
+                       params.Offset += len(list.Items)
+               }
+       }
+       return results, nil
+}
diff --git a/lib/dispatchcloud/dispatcher.go b/lib/dispatchcloud/dispatcher.go
new file mode 100644 (file)
index 0000000..81ad0ed
--- /dev/null
@@ -0,0 +1,197 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dispatchcloud
+
+import (
+       "crypto/md5"
+       "encoding/json"
+       "fmt"
+       "net/http"
+       "strings"
+       "sync"
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/cloud"
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/container"
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/scheduler"
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/ssh_executor"
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/auth"
+       "git.curoverse.com/arvados.git/sdk/go/httpserver"
+       "github.com/Sirupsen/logrus"
+       "github.com/prometheus/client_golang/prometheus"
+       "github.com/prometheus/client_golang/prometheus/promhttp"
+       "golang.org/x/crypto/ssh"
+)
+
+const (
+       defaultPollInterval     = time.Second
+       defaultStaleLockTimeout = time.Minute
+)
+
+type pool interface {
+       scheduler.WorkerPool
+       Instances() []worker.InstanceView
+       Stop()
+}
+
+type dispatcher struct {
+       Cluster       *arvados.Cluster
+       InstanceSetID cloud.InstanceSetID
+
+       logger      logrus.FieldLogger
+       reg         *prometheus.Registry
+       instanceSet cloud.InstanceSet
+       pool        pool
+       queue       scheduler.ContainerQueue
+       httpHandler http.Handler
+       sshKey      ssh.Signer
+
+       setupOnce sync.Once
+       stop      chan struct{}
+       stopped   chan struct{}
+}
+
+// Start starts the dispatcher. Start can be called multiple times
+// with no ill effect.
+func (disp *dispatcher) Start() {
+       disp.setupOnce.Do(disp.setup)
+}
+
+// ServeHTTP implements service.Handler.
+func (disp *dispatcher) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+       disp.Start()
+       disp.httpHandler.ServeHTTP(w, r)
+}
+
+// CheckHealth implements service.Handler.
+func (disp *dispatcher) CheckHealth() error {
+       disp.Start()
+       return nil
+}
+
+// Stop dispatching containers and release resources. Typically used
+// in tests.
+func (disp *dispatcher) Close() {
+       disp.Start()
+       select {
+       case disp.stop <- struct{}{}:
+       default:
+       }
+       <-disp.stopped
+}
+
+// Make a worker.Executor for the given instance.
+func (disp *dispatcher) newExecutor(inst cloud.Instance) worker.Executor {
+       exr := ssh_executor.New(inst)
+       exr.SetSigners(disp.sshKey)
+       return exr
+}
+
+func (disp *dispatcher) typeChooser(ctr *arvados.Container) (arvados.InstanceType, error) {
+       return ChooseInstanceType(disp.Cluster, ctr)
+}
+
+func (disp *dispatcher) setup() {
+       disp.initialize()
+       go disp.run()
+}
+
+func (disp *dispatcher) initialize() {
+       arvClient := arvados.NewClientFromEnv()
+       if disp.InstanceSetID == "" {
+               if strings.HasPrefix(arvClient.AuthToken, "v2/") {
+                       disp.InstanceSetID = cloud.InstanceSetID(strings.Split(arvClient.AuthToken, "/")[1])
+               } else {
+                       // Use some other string unique to this token
+                       // that doesn't reveal the token itself.
+                       disp.InstanceSetID = cloud.InstanceSetID(fmt.Sprintf("%x", md5.Sum([]byte(arvClient.AuthToken))))
+               }
+       }
+       disp.stop = make(chan struct{}, 1)
+       disp.stopped = make(chan struct{})
+       disp.logger = logrus.StandardLogger()
+
+       if key, err := ssh.ParsePrivateKey(disp.Cluster.Dispatch.PrivateKey); err != nil {
+               disp.logger.Fatalf("error parsing configured Dispatch.PrivateKey: %s", err)
+       } else {
+               disp.sshKey = key
+       }
+
+       instanceSet, err := newInstanceSet(disp.Cluster, disp.InstanceSetID)
+       if err != nil {
+               disp.logger.Fatalf("error initializing driver: %s", err)
+       }
+       disp.instanceSet = &instanceSetProxy{instanceSet}
+       disp.reg = prometheus.NewRegistry()
+       disp.pool = worker.NewPool(disp.logger, disp.reg, disp.instanceSet, disp.newExecutor, disp.Cluster)
+       disp.queue = container.NewQueue(disp.logger, disp.reg, disp.typeChooser, arvClient)
+
+       if disp.Cluster.ManagementToken == "" {
+               disp.httpHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+                       http.Error(w, "Management API authentication is not configured", http.StatusForbidden)
+               })
+       } else {
+               mux := http.NewServeMux()
+               mux.HandleFunc("/arvados/v1/dispatch/containers", disp.apiContainers)
+               mux.HandleFunc("/arvados/v1/dispatch/instances", disp.apiInstances)
+               metricsH := promhttp.HandlerFor(disp.reg, promhttp.HandlerOpts{
+                       ErrorLog: disp.logger,
+               })
+               mux.Handle("/metrics", metricsH)
+               mux.Handle("/metrics.json", metricsH)
+               disp.httpHandler = auth.RequireLiteralToken(disp.Cluster.ManagementToken, mux)
+       }
+}
+
+func (disp *dispatcher) run() {
+       defer close(disp.stopped)
+       defer disp.instanceSet.Stop()
+       defer disp.pool.Stop()
+
+       staleLockTimeout := time.Duration(disp.Cluster.Dispatch.StaleLockTimeout)
+       if staleLockTimeout == 0 {
+               staleLockTimeout = defaultStaleLockTimeout
+       }
+       pollInterval := time.Duration(disp.Cluster.Dispatch.PollInterval)
+       if pollInterval <= 0 {
+               pollInterval = defaultPollInterval
+       }
+       sched := scheduler.New(disp.logger, disp.queue, disp.pool, staleLockTimeout, pollInterval)
+       sched.Start()
+       defer sched.Stop()
+
+       <-disp.stop
+}
+
+// Management API: all active and queued containers.
+func (disp *dispatcher) apiContainers(w http.ResponseWriter, r *http.Request) {
+       if r.Method != "GET" {
+               httpserver.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+               return
+       }
+       var resp struct {
+               Items []container.QueueEnt
+       }
+       qEntries, _ := disp.queue.Entries()
+       for _, ent := range qEntries {
+               resp.Items = append(resp.Items, ent)
+       }
+       json.NewEncoder(w).Encode(resp)
+}
+
+// Management API: all active instances (cloud VMs).
+func (disp *dispatcher) apiInstances(w http.ResponseWriter, r *http.Request) {
+       if r.Method != "GET" {
+               httpserver.Error(w, "method not allowed", http.StatusMethodNotAllowed)
+               return
+       }
+       var resp struct {
+               Items []worker.InstanceView
+       }
+       resp.Items = disp.pool.Instances()
+       json.NewEncoder(w).Encode(resp)
+}
diff --git a/lib/dispatchcloud/dispatcher_test.go b/lib/dispatchcloud/dispatcher_test.go
new file mode 100644 (file)
index 0000000..33823a8
--- /dev/null
@@ -0,0 +1,269 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dispatchcloud
+
+import (
+       "encoding/json"
+       "io/ioutil"
+       "math/rand"
+       "net/http"
+       "net/http/httptest"
+       "os"
+       "sync"
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/test"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/Sirupsen/logrus"
+       "golang.org/x/crypto/ssh"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&DispatcherSuite{})
+
+type DispatcherSuite struct {
+       cluster     *arvados.Cluster
+       instanceSet *test.LameInstanceSet
+       stubDriver  *test.StubDriver
+       disp        *dispatcher
+}
+
+func (s *DispatcherSuite) SetUpSuite(c *check.C) {
+       if os.Getenv("ARVADOS_DEBUG") != "" {
+               logrus.StandardLogger().SetLevel(logrus.DebugLevel)
+       }
+}
+
+func (s *DispatcherSuite) SetUpTest(c *check.C) {
+       dispatchpub, _ := test.LoadTestKey(c, "test/sshkey_dispatch")
+       dispatchprivraw, err := ioutil.ReadFile("test/sshkey_dispatch")
+       c.Assert(err, check.IsNil)
+
+       _, hostpriv := test.LoadTestKey(c, "test/sshkey_vm")
+       s.stubDriver = &test.StubDriver{
+               HostKey:          hostpriv,
+               AuthorizedKeys:   []ssh.PublicKey{dispatchpub},
+               ErrorRateDestroy: 0.1,
+       }
+
+       s.cluster = &arvados.Cluster{
+               CloudVMs: arvados.CloudVMs{
+                       Driver:          "test",
+                       SyncInterval:    arvados.Duration(10 * time.Millisecond),
+                       TimeoutIdle:     arvados.Duration(30 * time.Millisecond),
+                       TimeoutBooting:  arvados.Duration(30 * time.Millisecond),
+                       TimeoutProbe:    arvados.Duration(15 * time.Millisecond),
+                       TimeoutShutdown: arvados.Duration(5 * time.Millisecond),
+               },
+               Dispatch: arvados.Dispatch{
+                       PrivateKey:         dispatchprivraw,
+                       PollInterval:       arvados.Duration(5 * time.Millisecond),
+                       ProbeInterval:      arvados.Duration(5 * time.Millisecond),
+                       StaleLockTimeout:   arvados.Duration(5 * time.Millisecond),
+                       MaxProbesPerSecond: 1000,
+               },
+               InstanceTypes: arvados.InstanceTypeMap{
+                       test.InstanceType(1).Name:  test.InstanceType(1),
+                       test.InstanceType(2).Name:  test.InstanceType(2),
+                       test.InstanceType(3).Name:  test.InstanceType(3),
+                       test.InstanceType(4).Name:  test.InstanceType(4),
+                       test.InstanceType(6).Name:  test.InstanceType(6),
+                       test.InstanceType(8).Name:  test.InstanceType(8),
+                       test.InstanceType(16).Name: test.InstanceType(16),
+               },
+               NodeProfiles: map[string]arvados.NodeProfile{
+                       "*": {
+                               Controller:    arvados.SystemServiceInstance{Listen: os.Getenv("ARVADOS_API_HOST")},
+                               DispatchCloud: arvados.SystemServiceInstance{Listen: ":"},
+                       },
+               },
+       }
+       s.disp = &dispatcher{Cluster: s.cluster}
+       // Test cases can modify s.cluster before calling
+       // initialize(), and then modify private state before calling
+       // go run().
+}
+
+func (s *DispatcherSuite) TearDownTest(c *check.C) {
+       s.disp.Close()
+}
+
+// DispatchToStubDriver checks that the dispatcher wires everything
+// together effectively. It uses a real scheduler and worker pool with
+// a fake queue and cloud driver. The fake cloud driver injects
+// artificial errors in order to exercise a variety of code paths.
+func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
+       drivers["test"] = s.stubDriver
+       s.disp.setupOnce.Do(s.disp.initialize)
+       queue := &test.Queue{
+               ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
+                       return ChooseInstanceType(s.cluster, ctr)
+               },
+       }
+       for i := 0; i < 200; i++ {
+               queue.Containers = append(queue.Containers, arvados.Container{
+                       UUID:     test.ContainerUUID(i + 1),
+                       State:    arvados.ContainerStateQueued,
+                       Priority: int64(i%20 + 1),
+                       RuntimeConstraints: arvados.RuntimeConstraints{
+                               RAM:   int64(i%3+1) << 30,
+                               VCPUs: i%8 + 1,
+                       },
+               })
+       }
+       s.disp.queue = queue
+
+       var mtx sync.Mutex
+       done := make(chan struct{})
+       waiting := map[string]struct{}{}
+       for _, ctr := range queue.Containers {
+               waiting[ctr.UUID] = struct{}{}
+       }
+       executeContainer := func(ctr arvados.Container) int {
+               mtx.Lock()
+               defer mtx.Unlock()
+               if _, ok := waiting[ctr.UUID]; !ok {
+                       c.Logf("container completed twice: %s -- perhaps completed after stub instance was killed?", ctr.UUID)
+                       return 1
+               }
+               delete(waiting, ctr.UUID)
+               if len(waiting) == 0 {
+                       close(done)
+               }
+               return int(rand.Uint32() & 0x3)
+       }
+       n := 0
+       s.stubDriver.Queue = queue
+       s.stubDriver.SetupVM = func(stubvm *test.StubVM) {
+               n++
+               stubvm.Boot = time.Now().Add(time.Duration(rand.Int63n(int64(5 * time.Millisecond))))
+               stubvm.CrunchRunDetachDelay = time.Duration(rand.Int63n(int64(10 * time.Millisecond)))
+               stubvm.ExecuteContainer = executeContainer
+               switch n % 7 {
+               case 0:
+                       stubvm.Broken = time.Now().Add(time.Duration(rand.Int63n(90)) * time.Millisecond)
+               case 1:
+                       stubvm.CrunchRunMissing = true
+               default:
+                       stubvm.CrunchRunCrashRate = 0.1
+               }
+       }
+
+       start := time.Now()
+       go s.disp.run()
+       err := s.disp.CheckHealth()
+       c.Check(err, check.IsNil)
+
+       select {
+       case <-done:
+               c.Logf("containers finished (%s), waiting for instances to shutdown and queue to clear", time.Since(start))
+       case <-time.After(10 * time.Second):
+               c.Fatalf("timed out; still waiting for %d containers: %q", len(waiting), waiting)
+       }
+
+       deadline := time.Now().Add(time.Second)
+       for range time.NewTicker(10 * time.Millisecond).C {
+               insts, err := s.stubDriver.InstanceSets()[0].Instances(nil)
+               c.Check(err, check.IsNil)
+               queue.Update()
+               ents, _ := queue.Entries()
+               if len(ents) == 0 && len(insts) == 0 {
+                       break
+               }
+               if time.Now().After(deadline) {
+                       c.Fatalf("timed out with %d containers (%v), %d instances (%+v)", len(ents), ents, len(insts), insts)
+               }
+       }
+}
+
+func (s *DispatcherSuite) TestAPIPermissions(c *check.C) {
+       s.cluster.ManagementToken = "abcdefgh"
+       drivers["test"] = s.stubDriver
+       s.disp.setupOnce.Do(s.disp.initialize)
+       s.disp.queue = &test.Queue{}
+       go s.disp.run()
+
+       for _, token := range []string{"abc", ""} {
+               req := httptest.NewRequest("GET", "/arvados/v1/dispatch/instances", nil)
+               if token != "" {
+                       req.Header.Set("Authorization", "Bearer "+token)
+               }
+               resp := httptest.NewRecorder()
+               s.disp.ServeHTTP(resp, req)
+               if token == "" {
+                       c.Check(resp.Code, check.Equals, http.StatusUnauthorized)
+               } else {
+                       c.Check(resp.Code, check.Equals, http.StatusForbidden)
+               }
+       }
+}
+
+func (s *DispatcherSuite) TestAPIDisabled(c *check.C) {
+       s.cluster.ManagementToken = ""
+       drivers["test"] = s.stubDriver
+       s.disp.setupOnce.Do(s.disp.initialize)
+       s.disp.queue = &test.Queue{}
+       go s.disp.run()
+
+       for _, token := range []string{"abc", ""} {
+               req := httptest.NewRequest("GET", "/arvados/v1/dispatch/instances", nil)
+               if token != "" {
+                       req.Header.Set("Authorization", "Bearer "+token)
+               }
+               resp := httptest.NewRecorder()
+               s.disp.ServeHTTP(resp, req)
+               c.Check(resp.Code, check.Equals, http.StatusForbidden)
+       }
+}
+
+func (s *DispatcherSuite) TestInstancesAPI(c *check.C) {
+       s.cluster.ManagementToken = "abcdefgh"
+       s.cluster.CloudVMs.TimeoutBooting = arvados.Duration(time.Second)
+       drivers["test"] = s.stubDriver
+       s.disp.setupOnce.Do(s.disp.initialize)
+       s.disp.queue = &test.Queue{}
+       go s.disp.run()
+
+       type instance struct {
+               Instance             string
+               WorkerState          string
+               Price                float64
+               LastContainerUUID    string
+               ArvadosInstanceType  string
+               ProviderInstanceType string
+       }
+       type instancesResponse struct {
+               Items []instance
+       }
+       getInstances := func() instancesResponse {
+               req := httptest.NewRequest("GET", "/arvados/v1/dispatch/instances", nil)
+               req.Header.Set("Authorization", "Bearer abcdefgh")
+               resp := httptest.NewRecorder()
+               s.disp.ServeHTTP(resp, req)
+               var sr instancesResponse
+               c.Check(resp.Code, check.Equals, http.StatusOK)
+               err := json.Unmarshal(resp.Body.Bytes(), &sr)
+               c.Check(err, check.IsNil)
+               return sr
+       }
+
+       sr := getInstances()
+       c.Check(len(sr.Items), check.Equals, 0)
+
+       ch := s.disp.pool.Subscribe()
+       defer s.disp.pool.Unsubscribe(ch)
+       err := s.disp.pool.Create(test.InstanceType(1))
+       c.Check(err, check.IsNil)
+       <-ch
+
+       sr = getInstances()
+       c.Assert(len(sr.Items), check.Equals, 1)
+       c.Check(sr.Items[0].Instance, check.Matches, "stub.*")
+       c.Check(sr.Items[0].WorkerState, check.Equals, "booting")
+       c.Check(sr.Items[0].Price, check.Equals, 0.123)
+       c.Check(sr.Items[0].LastContainerUUID, check.Equals, "")
+       c.Check(sr.Items[0].ProviderInstanceType, check.Equals, test.InstanceType(1).ProviderType)
+       c.Check(sr.Items[0].ArvadosInstanceType, check.Equals, test.InstanceType(1).Name)
+}
diff --git a/lib/dispatchcloud/driver.go b/lib/dispatchcloud/driver.go
new file mode 100644 (file)
index 0000000..295fd61
--- /dev/null
@@ -0,0 +1,22 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dispatchcloud
+
+import (
+       "fmt"
+
+       "git.curoverse.com/arvados.git/lib/cloud"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+var drivers = map[string]cloud.Driver{}
+
+func newInstanceSet(cluster *arvados.Cluster, setID cloud.InstanceSetID) (cloud.InstanceSet, error) {
+       driver, ok := drivers[cluster.CloudVMs.Driver]
+       if !ok {
+               return nil, fmt.Errorf("unsupported cloud driver %q", cluster.CloudVMs.Driver)
+       }
+       return driver.InstanceSet(cluster.CloudVMs.DriverParameters, setID)
+}
diff --git a/lib/dispatchcloud/instance_set_proxy.go b/lib/dispatchcloud/instance_set_proxy.go
new file mode 100644 (file)
index 0000000..e728b67
--- /dev/null
@@ -0,0 +1,25 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dispatchcloud
+
+import (
+       "git.curoverse.com/arvados.git/lib/cloud"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "golang.org/x/crypto/ssh"
+)
+
+type instanceSetProxy struct {
+       cloud.InstanceSet
+}
+
+func (is *instanceSetProxy) Create(it arvados.InstanceType, id cloud.ImageID, tags cloud.InstanceTags, pk ssh.PublicKey) (cloud.Instance, error) {
+       // TODO: return if Create failed recently with a RateLimitError or QuotaError
+       return is.InstanceSet.Create(it, id, tags, pk)
+}
+
+func (is *instanceSetProxy) Instances(tags cloud.InstanceTags) ([]cloud.Instance, error) {
+       // TODO: return if Instances failed recently with a RateLimitError
+       return is.InstanceSet.Instances(tags)
+}
diff --git a/lib/dispatchcloud/logger.go b/lib/dispatchcloud/logger.go
new file mode 100644 (file)
index 0000000..90bb6ca
--- /dev/null
@@ -0,0 +1,29 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dispatchcloud
+
+import (
+       "sync"
+       "time"
+)
+
+type logger interface {
+       Printf(string, ...interface{})
+       Warnf(string, ...interface{})
+       Debugf(string, ...interface{})
+}
+
+var nextSpam = map[string]time.Time{}
+var nextSpamMtx sync.Mutex
+
+func unspam(msg string) bool {
+       nextSpamMtx.Lock()
+       defer nextSpamMtx.Unlock()
+       if nextSpam[msg].Before(time.Now()) {
+               nextSpam[msg] = time.Now().Add(time.Minute)
+               return true
+       }
+       return false
+}
index 1c36d6cf5bb770cb447b6f7f177d39c5ff7ef469..d7f4585619417904a1125bc05d54d58499199179 100644 (file)
@@ -6,19 +6,16 @@ package dispatchcloud
 
 import (
        "errors"
-       "log"
-       "os/exec"
+       "regexp"
        "sort"
-       "strings"
-       "time"
+       "strconv"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
 )
 
-var (
-       ErrInstanceTypesNotConfigured = errors.New("site configuration does not list any instance types")
-       discountConfiguredRAMPercent  = 5
-)
+var ErrInstanceTypesNotConfigured = errors.New("site configuration does not list any instance types")
+
+var discountConfiguredRAMPercent = 5
 
 // ConstraintsNotSatisfiableError includes a list of available instance types
 // to be reported back to the user.
@@ -27,6 +24,65 @@ type ConstraintsNotSatisfiableError struct {
        AvailableTypes []arvados.InstanceType
 }
 
+var pdhRegexp = regexp.MustCompile(`^[0-9a-f]{32}\+(\d+)$`)
+
+// estimateDockerImageSize estimates how much disk space will be used
+// by a Docker image, given the PDH of a collection containing a
+// Docker image that was created by "arv-keepdocker".  Returns
+// estimated number of bytes of disk space that should be reserved.
+func estimateDockerImageSize(collectionPDH string) int64 {
+       m := pdhRegexp.FindStringSubmatch(collectionPDH)
+       if m == nil {
+               return 0
+       }
+       n, err := strconv.ParseInt(m[1], 10, 64)
+       if err != nil || n < 122 {
+               return 0
+       }
+       // To avoid having to fetch the collection, take advantage of
+       // the fact that the manifest storing a container image
+       // uploaded by arv-keepdocker has a predictable format, which
+       // allows us to estimate the size of the image based on just
+       // the size of the manifest.
+       //
+       // Use the following heuristic:
+       // - Start with the length of the mainfest (n)
+       // - Subtract 80 characters for the filename and file segment
+       // - Divide by 42 to get the number of block identifiers ('hash\+size\ ' is 32+1+8+1)
+       // - Assume each block is full, multiply by 64 MiB
+       return ((n - 80) / 42) * (64 * 1024 * 1024)
+}
+
+// EstimateScratchSpace estimates how much available disk space (in
+// bytes) is needed to run the container by summing the capacity
+// requested by 'tmp' mounts plus disk space required to load the
+// Docker image.
+func EstimateScratchSpace(ctr *arvados.Container) (needScratch int64) {
+       for _, m := range ctr.Mounts {
+               if m.Kind == "tmp" {
+                       needScratch += m.Capacity
+               }
+       }
+
+       // Account for disk space usage by Docker, assumes the following behavior:
+       // - Layer tarballs are buffered to disk during "docker load".
+       // - Individual layer tarballs are extracted from buffered
+       // copy to the filesystem
+       dockerImageSize := estimateDockerImageSize(ctr.ContainerImage)
+
+       // The buffer is only needed during image load, so make sure
+       // the baseline scratch space at least covers dockerImageSize,
+       // and assume it will be released to the job afterwards.
+       if needScratch < dockerImageSize {
+               needScratch = dockerImageSize
+       }
+
+       // Now reserve space for the extracted image on disk.
+       needScratch += dockerImageSize
+
+       return
+}
+
 // ChooseInstanceType returns the cheapest available
 // arvados.InstanceType big enough to run ctr.
 func ChooseInstanceType(cc *arvados.Cluster, ctr *arvados.Container) (best arvados.InstanceType, err error) {
@@ -35,12 +91,7 @@ func ChooseInstanceType(cc *arvados.Cluster, ctr *arvados.Container) (best arvad
                return
        }
 
-       needScratch := int64(0)
-       for _, m := range ctr.Mounts {
-               if m.Kind == "tmp" {
-                       needScratch += m.Capacity
-               }
-       }
+       needScratch := EstimateScratchSpace(ctr)
 
        needVCPUs := ctr.RuntimeConstraints.VCPUs
 
@@ -79,61 +130,3 @@ func ChooseInstanceType(cc *arvados.Cluster, ctr *arvados.Container) (best arvad
        }
        return
 }
-
-// SlurmNodeTypeFeatureKludge ensures SLURM accepts every instance
-// type name as a valid feature name, even if no instances of that
-// type have appeared yet.
-//
-// It takes advantage of some SLURM peculiarities:
-//
-// (1) A feature is valid after it has been offered by a node, even if
-// it is no longer offered by any node. So, to make a feature name
-// valid, we can add it to a dummy node ("compute0"), then remove it.
-//
-// (2) To test whether a set of feature names are valid without
-// actually submitting a job, we can call srun --test-only with the
-// desired features.
-//
-// SlurmNodeTypeFeatureKludge does a test-and-fix operation
-// immediately, and then periodically, in case slurm restarts and
-// forgets the list of valid features. It never returns (unless there
-// are no node types configured, in which case it returns
-// immediately), so it should generally be invoked with "go".
-func SlurmNodeTypeFeatureKludge(cc *arvados.Cluster) {
-       if len(cc.InstanceTypes) == 0 {
-               return
-       }
-       var features []string
-       for _, it := range cc.InstanceTypes {
-               features = append(features, "instancetype="+it.Name)
-       }
-       for {
-               slurmKludge(features)
-               time.Sleep(2 * time.Second)
-       }
-}
-
-const slurmDummyNode = "compute0"
-
-func slurmKludge(features []string) {
-       allFeatures := strings.Join(features, ",")
-
-       cmd := exec.Command("sinfo", "--nodes="+slurmDummyNode, "--format=%f", "--noheader")
-       out, err := cmd.CombinedOutput()
-       if err != nil {
-               log.Printf("running %q %q: %s (output was %q)", cmd.Path, cmd.Args, err, out)
-               return
-       }
-       if string(out) == allFeatures+"\n" {
-               // Already configured correctly, nothing to do.
-               return
-       }
-
-       log.Printf("configuring node %q with all node type features", slurmDummyNode)
-       cmd = exec.Command("scontrol", "update", "NodeName="+slurmDummyNode, "Features="+allFeatures)
-       log.Printf("running: %q %q", cmd.Path, cmd.Args)
-       out, err = cmd.CombinedOutput()
-       if err != nil {
-               log.Printf("error: scontrol: %s (output was %q)", err, out)
-       }
-}
index 91c6bb1049fb381d9070e747b1f076eec2f95dbc..eef86f74775134b4c6b0848d0b2897c5d47bef29 100644 (file)
@@ -119,3 +119,25 @@ func (*NodeSizeSuite) TestChoosePreemptable(c *check.C) {
        c.Check(best.Scratch >= 2*GiB, check.Equals, true)
        c.Check(best.Preemptible, check.Equals, true)
 }
+
+func (*NodeSizeSuite) TestScratchForDockerImage(c *check.C) {
+       n := EstimateScratchSpace(&arvados.Container{
+               ContainerImage: "d5025c0f29f6eef304a7358afa82a822+342",
+       })
+       // Actual image is 371.1 MiB (according to workbench)
+       // Estimated size is 384 MiB (402653184 bytes)
+       // Want to reserve 2x the estimated size, so 805306368 bytes
+       c.Check(n, check.Equals, int64(805306368))
+
+       n = EstimateScratchSpace(&arvados.Container{
+               ContainerImage: "d5025c0f29f6eef304a7358afa82a822+-342",
+       })
+       // Parse error will return 0
+       c.Check(n, check.Equals, int64(0))
+
+       n = EstimateScratchSpace(&arvados.Container{
+               ContainerImage: "d5025c0f29f6eef304a7358afa82a822+34",
+       })
+       // Short manifest will return 0
+       c.Check(n, check.Equals, int64(0))
+}
diff --git a/lib/dispatchcloud/readme.go b/lib/dispatchcloud/readme.go
new file mode 100644 (file)
index 0000000..c8491fb
--- /dev/null
@@ -0,0 +1,70 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package dispatchcloud
+
+// A dispatcher comprises a container queue, a scheduler, a worker
+// pool, a remote command executor, and a cloud driver.
+// 1. Choose a provider.
+// 2. Start a worker pool.
+// 3. Start a container queue.
+// 4. Run the scheduler's stale-lock fixer.
+// 5. Run the scheduler's mapper.
+// 6. Run the scheduler's syncer.
+// 7. Wait for updates to the container queue or worker pool.
+// 8. Repeat from 5.
+//
+//
+// A cloud driver creates new cloud VM instances and gets the latest
+// list of instances. The returned instances are caches/proxies for
+// the provider's metadata and control interfaces (get IP address,
+// update tags, shutdown).
+//
+//
+// A worker pool tracks workers' instance types and readiness states
+// (available to do work now, booting, suffering a temporary network
+// outage, shutting down). It loads internal state from the cloud
+// provider's list of instances at startup, and syncs periodically
+// after that.
+//
+//
+// An executor maintains a multiplexed SSH connection to a cloud
+// instance, retrying/reconnecting as needed, so the worker pool can
+// execute commands. It asks the cloud driver's instance to verify its
+// SSH public key once when first connecting, and again later if the
+// key changes.
+//
+//
+// A container queue tracks the known state (according to
+// arvados-controller) of each container of interest -- i.e., queued,
+// or locked/running using our own dispatch token. It also proxies the
+// dispatcher's lock/unlock/cancel requests to the controller. It
+// handles concurrent refresh and update operations without exposing
+// out-of-order updates to its callers. (It drops any new information
+// that might have originated before its own most recent
+// lock/unlock/cancel operation.)
+//
+//
+// The scheduler's stale-lock fixer waits for any already-locked
+// containers (i.e., locked by a prior dispatcher process) to appear
+// on workers as the worker pool recovers its state. It
+// unlocks/requeues any that still remain when all workers are
+// recovered or shutdown, or its timer expires.
+//
+//
+// The scheduler's mapper chooses which containers to assign to which
+// idle workers, and decides what to do when there are not enough idle
+// workers (including shutting down some idle nodes).
+//
+//
+// The scheduler's syncer updates state to Cancelled when a running
+// container process dies without finalizing its entry in the
+// controller database. It also calls the worker pool to kill
+// containers that have priority=0 while locked or running.
+//
+//
+// An instance set proxy wraps a driver's instance set with
+// rate-limiting logic. After the wrapped instance set receives a
+// cloud.RateLimitError, the proxy starts returning errors to callers
+// immediately without calling through to the wrapped instance set.
diff --git a/lib/dispatchcloud/scheduler/fix_stale_locks.go b/lib/dispatchcloud/scheduler/fix_stale_locks.go
new file mode 100644 (file)
index 0000000..264f9e4
--- /dev/null
@@ -0,0 +1,57 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package scheduler
+
+import (
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+// fixStaleLocks waits for any already-locked containers (i.e., locked
+// by a prior dispatcher process) to appear on workers as the worker
+// pool recovers its state. It unlocks any that still remain when all
+// workers are recovered or shutdown, or its timer
+// (sch.staleLockTimeout) expires.
+func (sch *Scheduler) fixStaleLocks() {
+       wp := sch.pool.Subscribe()
+       defer sch.pool.Unsubscribe(wp)
+       timeout := time.NewTimer(sch.staleLockTimeout)
+waiting:
+       for {
+               unlock := false
+               select {
+               case <-wp:
+                       // If all workers have been contacted, unlock
+                       // containers that aren't claimed by any
+                       // worker.
+                       unlock = sch.pool.CountWorkers()[worker.StateUnknown] == 0
+               case <-timeout.C:
+                       // Give up and unlock the containers, even
+                       // though they might be working.
+                       unlock = true
+               }
+
+               running := sch.pool.Running()
+               qEntries, _ := sch.queue.Entries()
+               for uuid, ent := range qEntries {
+                       if ent.Container.State != arvados.ContainerStateLocked {
+                               continue
+                       }
+                       if _, running := running[uuid]; running {
+                               continue
+                       }
+                       if !unlock {
+                               continue waiting
+                       }
+                       err := sch.queue.Unlock(uuid)
+                       if err != nil {
+                               sch.logger.Warnf("Unlock %s: %s", uuid, err)
+                       }
+               }
+               return
+       }
+}
diff --git a/lib/dispatchcloud/scheduler/gocheck_test.go b/lib/dispatchcloud/scheduler/gocheck_test.go
new file mode 100644 (file)
index 0000000..558c60f
--- /dev/null
@@ -0,0 +1,16 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package scheduler
+
+import (
+       "testing"
+
+       check "gopkg.in/check.v1"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       check.TestingT(t)
+}
diff --git a/lib/dispatchcloud/scheduler/interfaces.go b/lib/dispatchcloud/scheduler/interfaces.go
new file mode 100644 (file)
index 0000000..59700c3
--- /dev/null
@@ -0,0 +1,43 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package scheduler
+
+import (
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/container"
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+// A ContainerQueue is a set of containers that need to be started or
+// stopped. Implemented by container.Queue and test stubs.
+type ContainerQueue interface {
+       Entries() (entries map[string]container.QueueEnt, updated time.Time)
+       Lock(uuid string) error
+       Unlock(uuid string) error
+       Cancel(uuid string) error
+       Forget(uuid string)
+       Get(uuid string) (arvados.Container, bool)
+       Subscribe() <-chan struct{}
+       Unsubscribe(<-chan struct{})
+       Update() error
+}
+
+// A WorkerPool asynchronously starts and stops worker VMs, and starts
+// and stops containers on them. Implemented by worker.Pool and test
+// stubs.
+type WorkerPool interface {
+       Running() map[string]time.Time
+       Unallocated() map[arvados.InstanceType]int
+       CountWorkers() map[worker.State]int
+       AtQuota() bool
+       Create(arvados.InstanceType) error
+       Shutdown(arvados.InstanceType) bool
+       StartContainer(arvados.InstanceType, arvados.Container) bool
+       KillContainer(uuid string)
+       Subscribe() <-chan struct{}
+       Unsubscribe(<-chan struct{})
+}
diff --git a/lib/dispatchcloud/scheduler/run_queue.go b/lib/dispatchcloud/scheduler/run_queue.go
new file mode 100644 (file)
index 0000000..ece8e3d
--- /dev/null
@@ -0,0 +1,165 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package scheduler
+
+import (
+       "sort"
+
+       "git.curoverse.com/arvados.git/lib/cloud"
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/container"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/Sirupsen/logrus"
+)
+
+func (sch *Scheduler) runQueue() {
+       unsorted, _ := sch.queue.Entries()
+       sorted := make([]container.QueueEnt, 0, len(unsorted))
+       for _, ent := range unsorted {
+               sorted = append(sorted, ent)
+       }
+       sort.Slice(sorted, func(i, j int) bool {
+               return sorted[i].Container.Priority > sorted[j].Container.Priority
+       })
+
+       running := sch.pool.Running()
+       unalloc := sch.pool.Unallocated()
+
+       sch.logger.WithFields(logrus.Fields{
+               "Containers": len(sorted),
+               "Processes":  len(running),
+       }).Debug("runQueue")
+
+       dontstart := map[arvados.InstanceType]bool{}
+       var overquota []container.QueueEnt // entries that are unmappable because of worker pool quota
+
+tryrun:
+       for i, ctr := range sorted {
+               ctr, it := ctr.Container, ctr.InstanceType
+               logger := sch.logger.WithFields(logrus.Fields{
+                       "ContainerUUID": ctr.UUID,
+                       "InstanceType":  it.Name,
+               })
+               if _, running := running[ctr.UUID]; running || ctr.Priority < 1 {
+                       continue
+               }
+               switch ctr.State {
+               case arvados.ContainerStateQueued:
+                       if unalloc[it] < 1 && sch.pool.AtQuota() {
+                               logger.Debug("not locking: AtQuota and no unalloc workers")
+                               overquota = sorted[i:]
+                               break tryrun
+                       }
+                       sch.bgLock(logger, ctr.UUID)
+                       unalloc[it]--
+               case arvados.ContainerStateLocked:
+                       if unalloc[it] > 0 {
+                               unalloc[it]--
+                       } else if sch.pool.AtQuota() {
+                               logger.Debug("not starting: AtQuota and no unalloc workers")
+                               overquota = sorted[i:]
+                               break tryrun
+                       } else {
+                               logger.Info("creating new instance")
+                               err := sch.pool.Create(it)
+                               if err != nil {
+                                       if _, ok := err.(cloud.QuotaError); !ok {
+                                               logger.WithError(err).Warn("error creating worker")
+                                       }
+                                       sch.queue.Unlock(ctr.UUID)
+                                       // Don't let lower-priority
+                                       // containers starve this one
+                                       // by using keeping idle
+                                       // workers alive on different
+                                       // instance types.  TODO:
+                                       // avoid getting starved here
+                                       // if instances of a specific
+                                       // type always fail.
+                                       overquota = sorted[i:]
+                                       break tryrun
+                               }
+                       }
+
+                       if dontstart[it] {
+                               // We already tried & failed to start
+                               // a higher-priority container on the
+                               // same instance type. Don't let this
+                               // one sneak in ahead of it.
+                       } else if sch.pool.StartContainer(it, ctr) {
+                               // Success.
+                       } else {
+                               dontstart[it] = true
+                       }
+               }
+       }
+
+       if len(overquota) > 0 {
+               // Unlock any containers that are unmappable while
+               // we're at quota.
+               for _, ctr := range overquota {
+                       ctr := ctr.Container
+                       if ctr.State == arvados.ContainerStateLocked {
+                               logger := sch.logger.WithField("ContainerUUID", ctr.UUID)
+                               logger.Debug("unlock because pool capacity is used by higher priority containers")
+                               err := sch.queue.Unlock(ctr.UUID)
+                               if err != nil {
+                                       logger.WithError(err).Warn("error unlocking")
+                               }
+                       }
+               }
+               // Shut down idle workers that didn't get any
+               // containers mapped onto them before we hit quota.
+               for it, n := range unalloc {
+                       if n < 1 {
+                               continue
+                       }
+                       sch.pool.Shutdown(it)
+               }
+       }
+}
+
+// Start an API call to lock the given container, and return
+// immediately while waiting for the response in a new goroutine. Do
+// nothing if a lock request is already in progress for this
+// container.
+func (sch *Scheduler) bgLock(logger logrus.FieldLogger, uuid string) {
+       logger.Debug("locking")
+       sch.mtx.Lock()
+       defer sch.mtx.Unlock()
+       if sch.locking[uuid] {
+               logger.Debug("locking in progress, doing nothing")
+               return
+       }
+       if ctr, ok := sch.queue.Get(uuid); !ok || ctr.State != arvados.ContainerStateQueued {
+               // This happens if the container has been cancelled or
+               // locked since runQueue called sch.queue.Entries(),
+               // possibly by a bgLock() call from a previous
+               // runQueue iteration. In any case, we will respond
+               // appropriately on the next runQueue iteration, which
+               // will have already been triggered by the queue
+               // update.
+               logger.WithField("State", ctr.State).Debug("container no longer queued by the time we decided to lock it, doing nothing")
+               return
+       }
+       sch.locking[uuid] = true
+       go func() {
+               defer func() {
+                       sch.mtx.Lock()
+                       defer sch.mtx.Unlock()
+                       delete(sch.locking, uuid)
+               }()
+               err := sch.queue.Lock(uuid)
+               if err != nil {
+                       logger.WithError(err).Warn("error locking container")
+                       return
+               }
+               logger.Debug("lock succeeded")
+               ctr, ok := sch.queue.Get(uuid)
+               if !ok {
+                       logger.Error("(BUG?) container disappeared from queue after Lock succeeded")
+               } else if ctr.State != arvados.ContainerStateLocked {
+                       logger.Warnf("(race?) container has state=%q after Lock succeeded", ctr.State)
+               }
+       }()
+}
diff --git a/lib/dispatchcloud/scheduler/run_queue_test.go b/lib/dispatchcloud/scheduler/run_queue_test.go
new file mode 100644 (file)
index 0000000..be13e1c
--- /dev/null
@@ -0,0 +1,318 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package scheduler
+
+import (
+       "errors"
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/test"
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/worker"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/Sirupsen/logrus"
+       check "gopkg.in/check.v1"
+)
+
+var (
+       logger = logrus.StandardLogger()
+
+       // arbitrary example container UUIDs
+       uuids = func() (r []string) {
+               for i := 0; i < 16; i++ {
+                       r = append(r, test.ContainerUUID(i))
+               }
+               return
+       }()
+)
+
+type stubQuotaError struct {
+       error
+}
+
+func (stubQuotaError) IsQuotaError() bool { return true }
+
+type stubPool struct {
+       notify    <-chan struct{}
+       unalloc   map[arvados.InstanceType]int // idle+booting+unknown
+       idle      map[arvados.InstanceType]int
+       running   map[string]time.Time
+       atQuota   bool
+       canCreate int
+       creates   []arvados.InstanceType
+       starts    []string
+       shutdowns int
+}
+
+func (p *stubPool) AtQuota() bool                 { return p.atQuota }
+func (p *stubPool) Subscribe() <-chan struct{}    { return p.notify }
+func (p *stubPool) Unsubscribe(<-chan struct{})   {}
+func (p *stubPool) Running() map[string]time.Time { return p.running }
+func (p *stubPool) Unallocated() map[arvados.InstanceType]int {
+       r := map[arvados.InstanceType]int{}
+       for it, n := range p.unalloc {
+               r[it] = n
+       }
+       return r
+}
+func (p *stubPool) Create(it arvados.InstanceType) error {
+       p.creates = append(p.creates, it)
+       if p.canCreate < 1 {
+               return stubQuotaError{errors.New("quota")}
+       }
+       p.canCreate--
+       p.unalloc[it]++
+       return nil
+}
+func (p *stubPool) KillContainer(uuid string) {
+       p.running[uuid] = time.Now()
+}
+func (p *stubPool) Shutdown(arvados.InstanceType) bool {
+       p.shutdowns++
+       return false
+}
+func (p *stubPool) CountWorkers() map[worker.State]int {
+       return map[worker.State]int{
+               worker.StateBooting: len(p.unalloc) - len(p.idle),
+               worker.StateIdle:    len(p.idle),
+               worker.StateRunning: len(p.running),
+       }
+}
+func (p *stubPool) StartContainer(it arvados.InstanceType, ctr arvados.Container) bool {
+       p.starts = append(p.starts, ctr.UUID)
+       if p.idle[it] == 0 {
+               return false
+       }
+       p.idle[it]--
+       p.unalloc[it]--
+       p.running[ctr.UUID] = time.Time{}
+       return true
+}
+
+var _ = check.Suite(&SchedulerSuite{})
+
+type SchedulerSuite struct{}
+
+// Assign priority=4 container to idle node. Create a new instance for
+// the priority=3 container. Don't try to start any priority<3
+// containers because priority=3 container didn't start
+// immediately. Don't try to create any other nodes after the failed
+// create.
+func (*SchedulerSuite) TestUseIdleWorkers(c *check.C) {
+       queue := test.Queue{
+               ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
+                       return test.InstanceType(ctr.RuntimeConstraints.VCPUs), nil
+               },
+               Containers: []arvados.Container{
+                       {
+                               UUID:     test.ContainerUUID(1),
+                               Priority: 1,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 1,
+                                       RAM:   1 << 30,
+                               },
+                       },
+                       {
+                               UUID:     test.ContainerUUID(2),
+                               Priority: 2,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 1,
+                                       RAM:   1 << 30,
+                               },
+                       },
+                       {
+                               UUID:     test.ContainerUUID(3),
+                               Priority: 3,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 1,
+                                       RAM:   1 << 30,
+                               },
+                       },
+                       {
+                               UUID:     test.ContainerUUID(4),
+                               Priority: 4,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 1,
+                                       RAM:   1 << 30,
+                               },
+                       },
+               },
+       }
+       queue.Update()
+       pool := stubPool{
+               unalloc: map[arvados.InstanceType]int{
+                       test.InstanceType(1): 1,
+                       test.InstanceType(2): 2,
+               },
+               idle: map[arvados.InstanceType]int{
+                       test.InstanceType(1): 1,
+                       test.InstanceType(2): 2,
+               },
+               running:   map[string]time.Time{},
+               canCreate: 0,
+       }
+       New(logger, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
+       c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(1)})
+       c.Check(pool.starts, check.DeepEquals, []string{test.ContainerUUID(4)})
+       c.Check(pool.running, check.HasLen, 1)
+       for uuid := range pool.running {
+               c.Check(uuid, check.Equals, uuids[4])
+       }
+}
+
+// If Create() fails, shutdown some nodes, and don't call Create()
+// again.  Don't call Create() at all if AtQuota() is true.
+func (*SchedulerSuite) TestShutdownAtQuota(c *check.C) {
+       for quota := 0; quota < 2; quota++ {
+               c.Logf("quota=%d", quota)
+               shouldCreate := []arvados.InstanceType{}
+               for i := 0; i < quota; i++ {
+                       shouldCreate = append(shouldCreate, test.InstanceType(3))
+               }
+               queue := test.Queue{
+                       ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
+                               return test.InstanceType(ctr.RuntimeConstraints.VCPUs), nil
+                       },
+                       Containers: []arvados.Container{
+                               {
+                                       UUID:     test.ContainerUUID(2),
+                                       Priority: 2,
+                                       State:    arvados.ContainerStateLocked,
+                                       RuntimeConstraints: arvados.RuntimeConstraints{
+                                               VCPUs: 2,
+                                               RAM:   2 << 30,
+                                       },
+                               },
+                               {
+                                       UUID:     test.ContainerUUID(3),
+                                       Priority: 3,
+                                       State:    arvados.ContainerStateLocked,
+                                       RuntimeConstraints: arvados.RuntimeConstraints{
+                                               VCPUs: 3,
+                                               RAM:   3 << 30,
+                                       },
+                               },
+                       },
+               }
+               queue.Update()
+               pool := stubPool{
+                       atQuota: quota == 0,
+                       unalloc: map[arvados.InstanceType]int{
+                               test.InstanceType(2): 2,
+                       },
+                       idle: map[arvados.InstanceType]int{
+                               test.InstanceType(2): 2,
+                       },
+                       running:   map[string]time.Time{},
+                       creates:   []arvados.InstanceType{},
+                       starts:    []string{},
+                       canCreate: 0,
+               }
+               New(logger, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
+               c.Check(pool.creates, check.DeepEquals, shouldCreate)
+               c.Check(pool.starts, check.DeepEquals, []string{})
+               c.Check(pool.shutdowns, check.Not(check.Equals), 0)
+       }
+}
+
+// Start lower-priority containers while waiting for new/existing
+// workers to come up for higher-priority containers.
+func (*SchedulerSuite) TestStartWhileCreating(c *check.C) {
+       pool := stubPool{
+               unalloc: map[arvados.InstanceType]int{
+                       test.InstanceType(1): 2,
+                       test.InstanceType(2): 2,
+               },
+               idle: map[arvados.InstanceType]int{
+                       test.InstanceType(1): 1,
+                       test.InstanceType(2): 1,
+               },
+               running:   map[string]time.Time{},
+               canCreate: 4,
+       }
+       queue := test.Queue{
+               ChooseType: func(ctr *arvados.Container) (arvados.InstanceType, error) {
+                       return test.InstanceType(ctr.RuntimeConstraints.VCPUs), nil
+               },
+               Containers: []arvados.Container{
+                       {
+                               // create a new worker
+                               UUID:     test.ContainerUUID(1),
+                               Priority: 1,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 1,
+                                       RAM:   1 << 30,
+                               },
+                       },
+                       {
+                               // tentatively map to unalloc worker
+                               UUID:     test.ContainerUUID(2),
+                               Priority: 2,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 1,
+                                       RAM:   1 << 30,
+                               },
+                       },
+                       {
+                               // start now on idle worker
+                               UUID:     test.ContainerUUID(3),
+                               Priority: 3,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 1,
+                                       RAM:   1 << 30,
+                               },
+                       },
+                       {
+                               // create a new worker
+                               UUID:     test.ContainerUUID(4),
+                               Priority: 4,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 2,
+                                       RAM:   2 << 30,
+                               },
+                       },
+                       {
+                               // tentatively map to unalloc worker
+                               UUID:     test.ContainerUUID(5),
+                               Priority: 5,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 2,
+                                       RAM:   2 << 30,
+                               },
+                       },
+                       {
+                               // start now on idle worker
+                               UUID:     test.ContainerUUID(6),
+                               Priority: 6,
+                               State:    arvados.ContainerStateLocked,
+                               RuntimeConstraints: arvados.RuntimeConstraints{
+                                       VCPUs: 2,
+                                       RAM:   2 << 30,
+                               },
+                       },
+               },
+       }
+       queue.Update()
+       New(logger, &queue, &pool, time.Millisecond, time.Millisecond).runQueue()
+       c.Check(pool.creates, check.DeepEquals, []arvados.InstanceType{test.InstanceType(2), test.InstanceType(1)})
+       c.Check(pool.starts, check.DeepEquals, []string{uuids[6], uuids[5], uuids[3], uuids[2]})
+       running := map[string]bool{}
+       for uuid, t := range pool.running {
+               if t.IsZero() {
+                       running[uuid] = false
+               } else {
+                       running[uuid] = true
+               }
+       }
+       c.Check(running, check.DeepEquals, map[string]bool{uuids[3]: false, uuids[6]: false})
+}
diff --git a/lib/dispatchcloud/scheduler/scheduler.go b/lib/dispatchcloud/scheduler/scheduler.go
new file mode 100644 (file)
index 0000000..3971a53
--- /dev/null
@@ -0,0 +1,116 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+// Package scheduler uses a resizable worker pool to execute
+// containers in priority order.
+package scheduler
+
+import (
+       "sync"
+       "time"
+
+       "github.com/Sirupsen/logrus"
+)
+
+// A Scheduler maps queued containers onto unallocated workers in
+// priority order, creating new workers if needed. It locks containers
+// that can be mapped onto existing/pending workers, and starts them
+// if possible.
+//
+// A Scheduler unlocks any containers that are locked but can't be
+// mapped. (For example, this happens when the cloud provider reaches
+// quota/capacity and a previously mappable container's priority is
+// surpassed by a newer container.)
+//
+// If it encounters errors while creating new workers, a Scheduler
+// shuts down idle workers, in case they are consuming quota.
+type Scheduler struct {
+       logger              logrus.FieldLogger
+       queue               ContainerQueue
+       pool                WorkerPool
+       staleLockTimeout    time.Duration
+       queueUpdateInterval time.Duration
+
+       locking map[string]bool
+       mtx     sync.Mutex
+
+       runOnce sync.Once
+       stop    chan struct{}
+       stopped chan struct{}
+}
+
+// New returns a new unstarted Scheduler.
+//
+// Any given queue and pool should not be used by more than one
+// scheduler at a time.
+func New(logger logrus.FieldLogger, queue ContainerQueue, pool WorkerPool, staleLockTimeout, queueUpdateInterval time.Duration) *Scheduler {
+       return &Scheduler{
+               logger:              logger,
+               queue:               queue,
+               pool:                pool,
+               staleLockTimeout:    staleLockTimeout,
+               queueUpdateInterval: queueUpdateInterval,
+               stop:                make(chan struct{}),
+               stopped:             make(chan struct{}),
+               locking:             map[string]bool{},
+       }
+}
+
+// Start starts the scheduler.
+func (sch *Scheduler) Start() {
+       go sch.runOnce.Do(sch.run)
+}
+
+// Stop stops the scheduler. No other method should be called after
+// Stop.
+func (sch *Scheduler) Stop() {
+       close(sch.stop)
+       <-sch.stopped
+}
+
+func (sch *Scheduler) run() {
+       defer close(sch.stopped)
+
+       // Ensure the queue is fetched once before attempting anything.
+       for err := sch.queue.Update(); err != nil; err = sch.queue.Update() {
+               sch.logger.Errorf("error updating queue: %s", err)
+               d := sch.queueUpdateInterval / 60
+               sch.logger.Infof("waiting %s before retry", d)
+               time.Sleep(d)
+       }
+
+       // Keep the queue up to date.
+       poll := time.NewTicker(sch.queueUpdateInterval)
+       defer poll.Stop()
+       go func() {
+               for range poll.C {
+                       err := sch.queue.Update()
+                       if err != nil {
+                               sch.logger.Errorf("error updating queue: %s", err)
+                       }
+               }
+       }()
+
+       t0 := time.Now()
+       sch.logger.Infof("FixStaleLocks starting.")
+       sch.fixStaleLocks()
+       sch.logger.Infof("FixStaleLocks finished (%s), starting scheduling.", time.Since(t0))
+
+       poolNotify := sch.pool.Subscribe()
+       defer sch.pool.Unsubscribe(poolNotify)
+
+       queueNotify := sch.queue.Subscribe()
+       defer sch.queue.Unsubscribe(queueNotify)
+
+       for {
+               sch.runQueue()
+               sch.sync()
+               select {
+               case <-sch.stop:
+                       return
+               case <-queueNotify:
+               case <-poolNotify:
+               }
+       }
+}
diff --git a/lib/dispatchcloud/scheduler/sync.go b/lib/dispatchcloud/scheduler/sync.go
new file mode 100644 (file)
index 0000000..4c55b3c
--- /dev/null
@@ -0,0 +1,97 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package scheduler
+
+import (
+       "fmt"
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/container"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/Sirupsen/logrus"
+)
+
+// sync resolves discrepancies between the queue and the pool:
+//
+// Lingering crunch-run processes for finalized and unlocked/requeued
+// containers are killed.
+//
+// Locked containers whose crunch-run processes have exited are
+// requeued.
+//
+// Running containers whose crunch-run processes have exited are
+// cancelled.
+func (sch *Scheduler) sync() {
+       running := sch.pool.Running()
+       cancel := func(ent container.QueueEnt, reason string) {
+               uuid := ent.Container.UUID
+               logger := sch.logger.WithField("ContainerUUID", uuid)
+               logger.Infof("cancelling container because %s", reason)
+               err := sch.queue.Cancel(uuid)
+               if err != nil {
+                       logger.WithError(err).Print("error cancelling container")
+               }
+       }
+       kill := func(ent container.QueueEnt, reason string) {
+               uuid := ent.Container.UUID
+               logger := sch.logger.WithField("ContainerUUID", uuid)
+               logger.Debugf("killing crunch-run process because %s", reason)
+               sch.pool.KillContainer(uuid)
+       }
+       qEntries, qUpdated := sch.queue.Entries()
+       for uuid, ent := range qEntries {
+               exited, running := running[uuid]
+               switch ent.Container.State {
+               case arvados.ContainerStateRunning:
+                       if !running {
+                               go cancel(ent, "not running on any worker")
+                       } else if !exited.IsZero() && qUpdated.After(exited) {
+                               go cancel(ent, "state=\"Running\" after crunch-run exited")
+                       } else if ent.Container.Priority == 0 {
+                               go kill(ent, fmt.Sprintf("priority=%d", ent.Container.Priority))
+                       }
+               case arvados.ContainerStateComplete, arvados.ContainerStateCancelled:
+                       if running {
+                               // Kill crunch-run in case it's stuck;
+                               // nothing it does now will matter
+                               // anyway. If crunch-run has already
+                               // exited and we just haven't found
+                               // out about it yet, the only effect
+                               // of kill() will be to make the
+                               // worker available for the next
+                               // container.
+                               go kill(ent, fmt.Sprintf("state=%q", ent.Container.State))
+                       } else {
+                               sch.logger.WithFields(logrus.Fields{
+                                       "ContainerUUID": uuid,
+                                       "State":         ent.Container.State,
+                               }).Info("container finished")
+                               sch.queue.Forget(uuid)
+                       }
+               case arvados.ContainerStateQueued:
+                       if running {
+                               // Can happen if a worker returns from
+                               // a network outage and is still
+                               // preparing to run a container that
+                               // has already been unlocked/requeued.
+                               go kill(ent, fmt.Sprintf("state=%q", ent.Container.State))
+                       }
+               case arvados.ContainerStateLocked:
+                       if running && !exited.IsZero() && qUpdated.After(exited) {
+                               logger := sch.logger.WithFields(logrus.Fields{
+                                       "ContainerUUID": uuid,
+                                       "Exited":        time.Since(exited).Seconds(),
+                               })
+                               logger.Infof("requeueing container because state=%q after crunch-run exited", ent.Container.State)
+                               err := sch.queue.Unlock(uuid)
+                               if err != nil {
+                                       logger.WithError(err).Info("error requeueing container")
+                               }
+                       }
+               default:
+                       sch.logger.WithField("ContainerUUID", uuid).Errorf("BUG: unexpected state %q", ent.Container.State)
+               }
+       }
+}
diff --git a/lib/dispatchcloud/ssh_executor/executor.go b/lib/dispatchcloud/ssh_executor/executor.go
new file mode 100644 (file)
index 0000000..b5dba98
--- /dev/null
@@ -0,0 +1,190 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+// Package ssh_executor provides an implementation of pool.Executor
+// using a long-lived multiplexed SSH session.
+package ssh_executor
+
+import (
+       "bytes"
+       "errors"
+       "io"
+       "net"
+       "sync"
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/cloud"
+       "golang.org/x/crypto/ssh"
+)
+
+// New returns a new Executor, using the given target.
+func New(t cloud.ExecutorTarget) *Executor {
+       return &Executor{target: t}
+}
+
+// An Executor uses a multiplexed SSH connection to execute shell
+// commands on a remote target. It reconnects automatically after
+// errors.
+//
+// When setting up a connection, the Executor accepts whatever host
+// key is provided by the remote server, then passes the received key
+// and the SSH connection to the target's VerifyHostKey method before
+// executing commands on the connection.
+//
+// A zero Executor must not be used before calling SetTarget.
+//
+// An Executor must not be copied.
+type Executor struct {
+       target  cloud.ExecutorTarget
+       signers []ssh.Signer
+       mtx     sync.RWMutex // controls access to instance after creation
+
+       client      *ssh.Client
+       clientErr   error
+       clientOnce  sync.Once     // initialized private state
+       clientSetup chan bool     // len>0 while client setup is in progress
+       hostKey     ssh.PublicKey // most recent host key that passed verification, if any
+}
+
+// SetSigners updates the set of private keys that will be offered to
+// the target next time the Executor sets up a new connection.
+func (exr *Executor) SetSigners(signers ...ssh.Signer) {
+       exr.mtx.Lock()
+       defer exr.mtx.Unlock()
+       exr.signers = signers
+}
+
+// SetTarget sets the current target. The new target will be used next
+// time a new connection is set up; until then, the Executor will
+// continue to use the existing target.
+//
+// The new target is assumed to represent the same host as the
+// previous target, although its address and host key might differ.
+func (exr *Executor) SetTarget(t cloud.ExecutorTarget) {
+       exr.mtx.Lock()
+       defer exr.mtx.Unlock()
+       exr.target = t
+}
+
+// Target returns the current target.
+func (exr *Executor) Target() cloud.ExecutorTarget {
+       exr.mtx.RLock()
+       defer exr.mtx.RUnlock()
+       return exr.target
+}
+
+// Execute runs cmd on the target. If an existing connection is not
+// usable, it sets up a new connection to the current target.
+func (exr *Executor) Execute(cmd string, stdin io.Reader) ([]byte, []byte, error) {
+       session, err := exr.newSession()
+       if err != nil {
+               return nil, nil, err
+       }
+       defer session.Close()
+       var stdout, stderr bytes.Buffer
+       session.Stdin = stdin
+       session.Stdout = &stdout
+       session.Stderr = &stderr
+       err = session.Run(cmd)
+       return stdout.Bytes(), stderr.Bytes(), err
+}
+
+// Close shuts down any active connections.
+func (exr *Executor) Close() {
+       // Ensure exr is initialized
+       exr.sshClient(false)
+
+       exr.clientSetup <- true
+       if exr.client != nil {
+               defer exr.client.Close()
+       }
+       exr.client, exr.clientErr = nil, errors.New("closed")
+       <-exr.clientSetup
+}
+
+// Create a new SSH session. If session setup fails or the SSH client
+// hasn't been setup yet, setup a new SSH client and try again.
+func (exr *Executor) newSession() (*ssh.Session, error) {
+       try := func(create bool) (*ssh.Session, error) {
+               client, err := exr.sshClient(create)
+               if err != nil {
+                       return nil, err
+               }
+               return client.NewSession()
+       }
+       session, err := try(false)
+       if err != nil {
+               session, err = try(true)
+       }
+       return session, err
+}
+
+// Get the latest SSH client. If another goroutine is in the process
+// of setting one up, wait for it to finish and return its result (or
+// the last successfully setup client, if it fails).
+func (exr *Executor) sshClient(create bool) (*ssh.Client, error) {
+       exr.clientOnce.Do(func() {
+               exr.clientSetup = make(chan bool, 1)
+               exr.clientErr = errors.New("client not yet created")
+       })
+       defer func() { <-exr.clientSetup }()
+       select {
+       case exr.clientSetup <- true:
+               if create {
+                       client, err := exr.setupSSHClient()
+                       if err == nil || exr.client == nil {
+                               if exr.client != nil {
+                                       // Hang up the previous
+                                       // (non-working) client
+                                       go exr.client.Close()
+                               }
+                               exr.client, exr.clientErr = client, err
+                       }
+                       if err != nil {
+                               return nil, err
+                       }
+               }
+       default:
+               // Another goroutine is doing the above case.  Wait
+               // for it to finish and return whatever it leaves in
+               // wkr.client.
+               exr.clientSetup <- true
+       }
+       return exr.client, exr.clientErr
+}
+
+// Create a new SSH client.
+func (exr *Executor) setupSSHClient() (*ssh.Client, error) {
+       target := exr.Target()
+       addr := target.Address()
+       if addr == "" {
+               return nil, errors.New("instance has no address")
+       }
+       var receivedKey ssh.PublicKey
+       client, err := ssh.Dial("tcp", addr, &ssh.ClientConfig{
+               User: "root",
+               Auth: []ssh.AuthMethod{
+                       ssh.PublicKeys(exr.signers...),
+               },
+               HostKeyCallback: func(hostname string, remote net.Addr, key ssh.PublicKey) error {
+                       receivedKey = key
+                       return nil
+               },
+               Timeout: time.Minute,
+       })
+       if err != nil {
+               return nil, err
+       } else if receivedKey == nil {
+               return nil, errors.New("BUG: key was never provided to HostKeyCallback")
+       }
+
+       if exr.hostKey == nil || !bytes.Equal(exr.hostKey.Marshal(), receivedKey.Marshal()) {
+               err = target.VerifyHostKey(receivedKey, client)
+               if err != nil {
+                       return nil, err
+               }
+               exr.hostKey = receivedKey
+       }
+       return client, nil
+}
diff --git a/lib/dispatchcloud/ssh_executor/executor_test.go b/lib/dispatchcloud/ssh_executor/executor_test.go
new file mode 100644 (file)
index 0000000..8dabfec
--- /dev/null
@@ -0,0 +1,102 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package ssh_executor
+
+import (
+       "bytes"
+       "io"
+       "io/ioutil"
+       "sync"
+       "testing"
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/test"
+       "golang.org/x/crypto/ssh"
+       check "gopkg.in/check.v1"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       check.TestingT(t)
+}
+
+var _ = check.Suite(&ExecutorSuite{})
+
+type testTarget struct {
+       test.SSHService
+}
+
+func (*testTarget) VerifyHostKey(ssh.PublicKey, *ssh.Client) error {
+       return nil
+}
+
+type ExecutorSuite struct{}
+
+func (s *ExecutorSuite) TestExecute(c *check.C) {
+       command := `foo 'bar' "baz"`
+       stdinData := "foobar\nbaz\n"
+       _, hostpriv := test.LoadTestKey(c, "../test/sshkey_vm")
+       clientpub, clientpriv := test.LoadTestKey(c, "../test/sshkey_dispatch")
+       for _, exitcode := range []int{0, 1, 2} {
+               srv := &testTarget{
+                       SSHService: test.SSHService{
+                               Exec: func(cmd string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+                                       c.Check(cmd, check.Equals, command)
+                                       var wg sync.WaitGroup
+                                       wg.Add(2)
+                                       go func() {
+                                               io.WriteString(stdout, "stdout\n")
+                                               wg.Done()
+                                       }()
+                                       go func() {
+                                               io.WriteString(stderr, "stderr\n")
+                                               wg.Done()
+                                       }()
+                                       buf, err := ioutil.ReadAll(stdin)
+                                       wg.Wait()
+                                       c.Check(err, check.IsNil)
+                                       if err != nil {
+                                               return 99
+                                       }
+                                       _, err = stdout.Write(buf)
+                                       c.Check(err, check.IsNil)
+                                       return uint32(exitcode)
+                               },
+                               HostKey:        hostpriv,
+                               AuthorizedKeys: []ssh.PublicKey{clientpub},
+                       },
+               }
+               err := srv.Start()
+               c.Check(err, check.IsNil)
+               c.Logf("srv address %q", srv.Address())
+               defer srv.Close()
+
+               exr := New(srv)
+               exr.SetSigners(clientpriv)
+
+               done := make(chan bool)
+               go func() {
+                       stdout, stderr, err := exr.Execute(command, bytes.NewBufferString(stdinData))
+                       if exitcode == 0 {
+                               c.Check(err, check.IsNil)
+                       } else {
+                               c.Check(err, check.NotNil)
+                               err, ok := err.(*ssh.ExitError)
+                               c.Assert(ok, check.Equals, true)
+                               c.Check(err.ExitStatus(), check.Equals, exitcode)
+                       }
+                       c.Check(stdout, check.DeepEquals, []byte("stdout\n"+stdinData))
+                       c.Check(stderr, check.DeepEquals, []byte("stderr\n"))
+                       close(done)
+               }()
+
+               timeout := time.NewTimer(time.Second)
+               select {
+               case <-done:
+               case <-timeout.C:
+                       c.Fatal("timed out")
+               }
+       }
+}
diff --git a/lib/dispatchcloud/test/doc.go b/lib/dispatchcloud/test/doc.go
new file mode 100644 (file)
index 0000000..12f3b16
--- /dev/null
@@ -0,0 +1,7 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+// Package test provides fakes and other tools for testing cloud
+// drivers and other dispatcher modules.
+package test
diff --git a/lib/dispatchcloud/test/fixtures.go b/lib/dispatchcloud/test/fixtures.go
new file mode 100644 (file)
index 0000000..68bdb3d
--- /dev/null
@@ -0,0 +1,28 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package test
+
+import (
+       "fmt"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+// ContainerUUID returns a fake container UUID.
+func ContainerUUID(i int) string {
+       return fmt.Sprintf("zzzzz-dz642-%015d", i)
+}
+
+// InstanceType returns a fake arvados.InstanceType called "type{i}"
+// with i CPUs and i GiB of memory.
+func InstanceType(i int) arvados.InstanceType {
+       return arvados.InstanceType{
+               Name:         fmt.Sprintf("type%d", i),
+               ProviderType: fmt.Sprintf("providertype%d", i),
+               VCPUs:        i,
+               RAM:          arvados.ByteSize(i) << 30,
+               Price:        float64(i) * 0.123,
+       }
+}
diff --git a/lib/dispatchcloud/test/lame_instance_set.go b/lib/dispatchcloud/test/lame_instance_set.go
new file mode 100644 (file)
index 0000000..baab407
--- /dev/null
@@ -0,0 +1,118 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package test
+
+import (
+       "fmt"
+       "math/rand"
+       "sync"
+
+       "git.curoverse.com/arvados.git/lib/cloud"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "golang.org/x/crypto/ssh"
+)
+
+// LameInstanceSet creates instances that boot but can't run
+// containers.
+type LameInstanceSet struct {
+       Hold chan bool // set to make(chan bool) to hold operations until Release is called
+
+       mtx       sync.Mutex
+       instances map[*lameInstance]bool
+}
+
+// Create returns a new instance.
+func (p *LameInstanceSet) Create(instType arvados.InstanceType, imageID cloud.ImageID, tags cloud.InstanceTags, pubkey ssh.PublicKey) (cloud.Instance, error) {
+       inst := &lameInstance{
+               p:            p,
+               id:           cloud.InstanceID(fmt.Sprintf("lame-%x", rand.Uint64())),
+               providerType: instType.ProviderType,
+       }
+       inst.SetTags(tags)
+       if p.Hold != nil {
+               p.Hold <- true
+       }
+       p.mtx.Lock()
+       defer p.mtx.Unlock()
+       if p.instances == nil {
+               p.instances = map[*lameInstance]bool{}
+       }
+       p.instances[inst] = true
+       return inst, nil
+}
+
+// Instances returns the instances that haven't been destroyed.
+func (p *LameInstanceSet) Instances(cloud.InstanceTags) ([]cloud.Instance, error) {
+       p.mtx.Lock()
+       defer p.mtx.Unlock()
+       var instances []cloud.Instance
+       for i := range p.instances {
+               instances = append(instances, i)
+       }
+       return instances, nil
+}
+
+// Stop is a no-op, but exists to satisfy cloud.InstanceSet.
+func (p *LameInstanceSet) Stop() {
+}
+
+// Release n held calls. Blocks if n calls aren't already
+// waiting. Blocks forever if Hold is nil.
+func (p *LameInstanceSet) Release(n int) {
+       for i := 0; i < n; i++ {
+               <-p.Hold
+       }
+}
+
+type lameInstance struct {
+       p            *LameInstanceSet
+       id           cloud.InstanceID
+       providerType string
+       tags         cloud.InstanceTags
+}
+
+func (inst *lameInstance) ID() cloud.InstanceID {
+       return inst.id
+}
+
+func (inst *lameInstance) String() string {
+       return fmt.Sprint(inst.id)
+}
+
+func (inst *lameInstance) ProviderType() string {
+       return inst.providerType
+}
+
+func (inst *lameInstance) Address() string {
+       return "0.0.0.0:1234"
+}
+
+func (inst *lameInstance) SetTags(tags cloud.InstanceTags) error {
+       inst.p.mtx.Lock()
+       defer inst.p.mtx.Unlock()
+       inst.tags = cloud.InstanceTags{}
+       for k, v := range tags {
+               inst.tags[k] = v
+       }
+       return nil
+}
+
+func (inst *lameInstance) Destroy() error {
+       if inst.p.Hold != nil {
+               inst.p.Hold <- true
+       }
+       inst.p.mtx.Lock()
+       defer inst.p.mtx.Unlock()
+       delete(inst.p.instances, inst)
+       return nil
+}
+
+func (inst *lameInstance) Tags() cloud.InstanceTags {
+       return inst.tags
+}
+
+func (inst *lameInstance) VerifyHostKey(ssh.PublicKey, *ssh.Client) error {
+       return nil
+}
diff --git a/lib/dispatchcloud/test/queue.go b/lib/dispatchcloud/test/queue.go
new file mode 100644 (file)
index 0000000..e18a2b5
--- /dev/null
@@ -0,0 +1,171 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package test
+
+import (
+       "fmt"
+       "sync"
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/container"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+// Queue is a test stub for container.Queue. The caller specifies the
+// initial queue state.
+type Queue struct {
+       // Containers represent the API server database contents.
+       Containers []arvados.Container
+
+       // ChooseType will be called for each entry in Containers. It
+       // must not be nil.
+       ChooseType func(*arvados.Container) (arvados.InstanceType, error)
+
+       entries     map[string]container.QueueEnt
+       updTime     time.Time
+       subscribers map[<-chan struct{}]chan struct{}
+
+       mtx sync.Mutex
+}
+
+// Entries returns the containers that were queued when Update was
+// last called.
+func (q *Queue) Entries() (map[string]container.QueueEnt, time.Time) {
+       q.mtx.Lock()
+       defer q.mtx.Unlock()
+       updTime := q.updTime
+       r := map[string]container.QueueEnt{}
+       for uuid, ent := range q.entries {
+               r[uuid] = ent
+       }
+       return r, updTime
+}
+
+// Get returns the container from the cached queue, i.e., as it was
+// when Update was last called -- just like a container.Queue does. If
+// the state has been changed (via Lock, Unlock, or Cancel) since the
+// last Update, the updated state is returned.
+func (q *Queue) Get(uuid string) (arvados.Container, bool) {
+       q.mtx.Lock()
+       defer q.mtx.Unlock()
+       ent, ok := q.entries[uuid]
+       return ent.Container, ok
+}
+
+func (q *Queue) Forget(uuid string) {
+       q.mtx.Lock()
+       defer q.mtx.Unlock()
+       delete(q.entries, uuid)
+}
+
+func (q *Queue) Lock(uuid string) error {
+       q.mtx.Lock()
+       defer q.mtx.Unlock()
+       return q.changeState(uuid, arvados.ContainerStateQueued, arvados.ContainerStateLocked)
+}
+
+func (q *Queue) Unlock(uuid string) error {
+       q.mtx.Lock()
+       defer q.mtx.Unlock()
+       return q.changeState(uuid, arvados.ContainerStateLocked, arvados.ContainerStateQueued)
+}
+
+func (q *Queue) Cancel(uuid string) error {
+       q.mtx.Lock()
+       defer q.mtx.Unlock()
+       return q.changeState(uuid, q.entries[uuid].Container.State, arvados.ContainerStateCancelled)
+}
+
+func (q *Queue) Subscribe() <-chan struct{} {
+       q.mtx.Lock()
+       defer q.mtx.Unlock()
+       if q.subscribers == nil {
+               q.subscribers = map[<-chan struct{}]chan struct{}{}
+       }
+       ch := make(chan struct{}, 1)
+       q.subscribers[ch] = ch
+       return ch
+}
+
+func (q *Queue) Unsubscribe(ch <-chan struct{}) {
+       q.mtx.Lock()
+       defer q.mtx.Unlock()
+       delete(q.subscribers, ch)
+}
+
+// caller must have lock.
+func (q *Queue) notify() {
+       for _, ch := range q.subscribers {
+               select {
+               case ch <- struct{}{}:
+               default:
+               }
+       }
+}
+
+// caller must have lock.
+func (q *Queue) changeState(uuid string, from, to arvados.ContainerState) error {
+       ent := q.entries[uuid]
+       if ent.Container.State != from {
+               return fmt.Errorf("changeState failed: state=%q", ent.Container.State)
+       }
+       ent.Container.State = to
+       q.entries[uuid] = ent
+       for i, ctr := range q.Containers {
+               if ctr.UUID == uuid {
+                       q.Containers[i].State = to
+                       break
+               }
+       }
+       q.notify()
+       return nil
+}
+
+// Update rebuilds the current entries from the Containers slice.
+func (q *Queue) Update() error {
+       q.mtx.Lock()
+       defer q.mtx.Unlock()
+       updTime := time.Now()
+       upd := map[string]container.QueueEnt{}
+       for _, ctr := range q.Containers {
+               _, exists := q.entries[ctr.UUID]
+               if !exists && (ctr.State == arvados.ContainerStateComplete || ctr.State == arvados.ContainerStateCancelled) {
+                       continue
+               }
+               if ent, ok := upd[ctr.UUID]; ok {
+                       ent.Container = ctr
+                       upd[ctr.UUID] = ent
+               } else {
+                       it, _ := q.ChooseType(&ctr)
+                       upd[ctr.UUID] = container.QueueEnt{
+                               Container:    ctr,
+                               InstanceType: it,
+                       }
+               }
+       }
+       q.entries = upd
+       q.updTime = updTime
+       q.notify()
+       return nil
+}
+
+// Notify adds/updates an entry in the Containers slice.  This
+// simulates the effect of an API update from someone other than the
+// dispatcher -- e.g., crunch-run updating state to "Complete" when a
+// container exits.
+//
+// The resulting changes are not exposed through Get() or Entries()
+// until the next call to Update().
+func (q *Queue) Notify(upd arvados.Container) {
+       q.mtx.Lock()
+       defer q.mtx.Unlock()
+       for i, ctr := range q.Containers {
+               if ctr.UUID == upd.UUID {
+                       q.Containers[i] = upd
+                       return
+               }
+       }
+       q.Containers = append(q.Containers, upd)
+}
diff --git a/lib/dispatchcloud/test/ssh_service.go b/lib/dispatchcloud/test/ssh_service.go
new file mode 100644 (file)
index 0000000..b1e4e03
--- /dev/null
@@ -0,0 +1,169 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package test
+
+import (
+       "bytes"
+       "fmt"
+       "io"
+       "io/ioutil"
+       "log"
+       "net"
+       "strings"
+       "sync"
+
+       "golang.org/x/crypto/ssh"
+       check "gopkg.in/check.v1"
+)
+
+func LoadTestKey(c *check.C, fnm string) (ssh.PublicKey, ssh.Signer) {
+       rawpubkey, err := ioutil.ReadFile(fnm + ".pub")
+       c.Assert(err, check.IsNil)
+       pubkey, _, _, _, err := ssh.ParseAuthorizedKey(rawpubkey)
+       c.Assert(err, check.IsNil)
+       rawprivkey, err := ioutil.ReadFile(fnm)
+       c.Assert(err, check.IsNil)
+       privkey, err := ssh.ParsePrivateKey(rawprivkey)
+       c.Assert(err, check.IsNil)
+       return pubkey, privkey
+}
+
+// An SSHExecFunc handles an "exec" session on a multiplexed SSH
+// connection.
+type SSHExecFunc func(command string, stdin io.Reader, stdout, stderr io.Writer) uint32
+
+// An SSHService accepts SSH connections on an available TCP port and
+// passes clients' "exec" sessions to the provided SSHExecFunc.
+type SSHService struct {
+       Exec           SSHExecFunc
+       HostKey        ssh.Signer
+       AuthorizedKeys []ssh.PublicKey
+
+       listener net.Listener
+       conn     *ssh.ServerConn
+       setup    sync.Once
+       mtx      sync.Mutex
+       started  chan bool
+       closed   bool
+       err      error
+}
+
+// Address returns the host:port where the SSH server is listening. It
+// returns "" if called before the server is ready to accept
+// connections.
+func (ss *SSHService) Address() string {
+       ss.setup.Do(ss.start)
+       ss.mtx.Lock()
+       ln := ss.listener
+       ss.mtx.Unlock()
+       if ln == nil {
+               return ""
+       }
+       return ln.Addr().String()
+}
+
+// Close shuts down the server and releases resources. Established
+// connections are unaffected.
+func (ss *SSHService) Close() {
+       ss.Start()
+       ss.mtx.Lock()
+       ln := ss.listener
+       ss.closed = true
+       ss.mtx.Unlock()
+       if ln != nil {
+               ln.Close()
+       }
+}
+
+// Start returns when the server is ready to accept connections.
+func (ss *SSHService) Start() error {
+       ss.setup.Do(ss.start)
+       <-ss.started
+       return ss.err
+}
+
+func (ss *SSHService) start() {
+       ss.started = make(chan bool)
+       go ss.run()
+}
+
+func (ss *SSHService) run() {
+       defer close(ss.started)
+       config := &ssh.ServerConfig{
+               PublicKeyCallback: func(c ssh.ConnMetadata, pubKey ssh.PublicKey) (*ssh.Permissions, error) {
+                       for _, ak := range ss.AuthorizedKeys {
+                               if bytes.Equal(ak.Marshal(), pubKey.Marshal()) {
+                                       return &ssh.Permissions{}, nil
+                               }
+                       }
+                       return nil, fmt.Errorf("unknown public key for %q", c.User())
+               },
+       }
+       config.AddHostKey(ss.HostKey)
+
+       listener, err := net.Listen("tcp", ":")
+       if err != nil {
+               ss.err = err
+               return
+       }
+
+       ss.mtx.Lock()
+       ss.listener = listener
+       ss.mtx.Unlock()
+
+       go func() {
+               for {
+                       nConn, err := listener.Accept()
+                       if err != nil && strings.Contains(err.Error(), "use of closed network connection") && ss.closed {
+                               return
+                       } else if err != nil {
+                               log.Printf("accept: %s", err)
+                               return
+                       }
+                       go ss.serveConn(nConn, config)
+               }
+       }()
+}
+
+func (ss *SSHService) serveConn(nConn net.Conn, config *ssh.ServerConfig) {
+       defer nConn.Close()
+       conn, newchans, reqs, err := ssh.NewServerConn(nConn, config)
+       if err != nil {
+               log.Printf("ssh.NewServerConn: %s", err)
+               return
+       }
+       defer conn.Close()
+       go ssh.DiscardRequests(reqs)
+       for newch := range newchans {
+               if newch.ChannelType() != "session" {
+                       newch.Reject(ssh.UnknownChannelType, "unknown channel type")
+                       continue
+               }
+               ch, reqs, err := newch.Accept()
+               if err != nil {
+                       log.Printf("accept channel: %s", err)
+                       return
+               }
+               var execReq struct {
+                       Command string
+               }
+               go func() {
+                       for req := range reqs {
+                               if req.Type == "exec" && execReq.Command == "" {
+                                       req.Reply(true, nil)
+                                       ssh.Unmarshal(req.Payload, &execReq)
+                                       go func() {
+                                               var resp struct {
+                                                       Status uint32
+                                               }
+                                               resp.Status = ss.Exec(execReq.Command, ch, ch, ch.Stderr())
+                                               ch.SendRequest("exit-status", false, ssh.Marshal(&resp))
+                                               ch.Close()
+                                       }()
+                               }
+                       }
+               }()
+       }
+}
diff --git a/lib/dispatchcloud/test/sshkey_dispatch b/lib/dispatchcloud/test/sshkey_dispatch
new file mode 100644 (file)
index 0000000..5584519
--- /dev/null
@@ -0,0 +1,27 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIIEowIBAAKCAQEAqYm4XsQHm8sBSZFwUX5VeW1OkGsfoNzcGPG2nzzYRhNhClYZ
+0ABHhUk82HkaC/8l6d/jpYTf42HrK42nNQ0r0Yzs7qw8yZMQioK4Yk+kFyVLF78E
+GRG4pGAWXFs6pUchs/lm8fo9zcda4R3XeqgI+NO+nEERXmdRJa1FhI+Za3/S/+CV
+mg+6O00wZz2+vKmDPptGN4MCKmQOCKsMJts7wSZGyVcTtdNv7jjfr6yPAIOIL8X7
+LtarBCFaK/pD7uWll/Uj7h7D8K48nIZUrvBJJjXL8Sm4LxCNoz3Z83k8J5ZzuDRD
+gRiQe/C085mhO6VL+2fypDLwcKt1tOL8fI81MwIDAQABAoIBACR3tEnmHsDbNOav
+Oxq8cwRQh9K2yDHg8BMJgz/TZa4FIx2HEbxVIw0/iLADtJ+Z/XzGJQCIiWQuvtg6
+exoFQESt7JUWRWkSkj9JCQJUoTY9Vl7APtBpqG7rIEQzd3TvzQcagZNRQZQO6rR7
+p8sBdBSZ72lK8cJ9tM3G7Kor/VNK7KgRZFNhEWnmvEa3qMd4hzDcQ4faOn7C9NZK
+dwJAuJVVfwOLlOORYcyEkvksLaDOK2DsB/p0AaCpfSmThRbBKN5fPXYaKgUdfp3w
+70Hpp27WWymb1cgjyqSH3DY+V/kvid+5QxgxCBRq865jPLn3FFT9bWEVS/0wvJRj
+iMIRrjECgYEA4Ffv9rBJXqVXonNQbbstd2PaprJDXMUy9/UmfHL6pkq1xdBeuM7v
+yf2ocXheA8AahHtIOhtgKqwv/aRhVK0ErYtiSvIk+tXG+dAtj/1ZAKbKiFyxjkZV
+X72BH7cTlR6As5SRRfWM/HaBGEgED391gKsI5PyMdqWWdczT5KfxAksCgYEAwXYE
+ewPmV1GaR5fbh2RupoPnUJPMj36gJCnwls7sGaXDQIpdlq56zfKgrLocGXGgj+8f
+QH7FHTJQO15YCYebtsXWwB3++iG43gVlJlecPAydsap2CCshqNWC5JU5pan0QzsP
+exzNzWqfUPSbTkR2SRaN+MenZo2Y/WqScOAth7kCgYBgVoLujW9EXH5QfXJpXLq+
+jTvE38I7oVcs0bJwOLPYGzcJtlwmwn6IYAwohgbhV2pLv+EZSs42JPEK278MLKxY
+lgVkp60npgunFTWroqDIvdc1TZDVxvA8h9VeODEJlSqxczgbMcIUXBM9yRctTI+5
+7DiKlMUA4kTFW2sWwuOlFwKBgGXvrYS0FVbFJKm8lmvMu5D5x5RpjEu/yNnFT4Pn
+G/iXoz4Kqi2PWh3STl804UF24cd1k94D7hDoReZCW9kJnz67F+C67XMW+bXi2d1O
+JIBvlVfcHb1IHMA9YG7ZQjrMRmx2Xj3ce4RVPgUGHh8ra7gvLjd72/Tpf0doNClN
+ti/hAoGBAMW5D3LhU05LXWmOqpeT4VDgqk4MrTBcstVe7KdVjwzHrVHCAmI927vI
+pjpphWzpC9m3x4OsTNf8m+g6H7f3IiQS0aiFNtduXYlcuT5FHS2fSATTzg5PBon9
+1E6BudOve+WyFyBs7hFWAqWFBdWujAl4Qk5Ek09U2ilFEPE7RTgJ
+-----END RSA PRIVATE KEY-----
diff --git a/lib/dispatchcloud/test/sshkey_dispatch.pub b/lib/dispatchcloud/test/sshkey_dispatch.pub
new file mode 100644 (file)
index 0000000..1d5c1ea
--- /dev/null
@@ -0,0 +1 @@
+ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCpibhexAebywFJkXBRflV5bU6Qax+g3NwY8bafPNhGE2EKVhnQAEeFSTzYeRoL/yXp3+OlhN/jYesrjac1DSvRjOzurDzJkxCKgrhiT6QXJUsXvwQZEbikYBZcWzqlRyGz+Wbx+j3Nx1rhHdd6qAj4076cQRFeZ1ElrUWEj5lrf9L/4JWaD7o7TTBnPb68qYM+m0Y3gwIqZA4Iqwwm2zvBJkbJVxO102/uON+vrI8Ag4gvxfsu1qsEIVor+kPu5aWX9SPuHsPwrjychlSu8EkmNcvxKbgvEI2jPdnzeTwnlnO4NEOBGJB78LTzmaE7pUv7Z/KkMvBwq3W04vx8jzUz tom@curve
diff --git a/lib/dispatchcloud/test/sshkey_vm b/lib/dispatchcloud/test/sshkey_vm
new file mode 100644 (file)
index 0000000..10b7ed1
--- /dev/null
@@ -0,0 +1,27 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIIEpQIBAAKCAQEApIfWk2StZGDtmunumIeXLJ46AQrbHHvuxrSAkQf6+zUwjB2I
+rse7ezBRHWcge9U5EsigixmhUM4ozFLnUQNwC862jbmsjbyA97arG/REECNlUrEB
+HQPYHhai5yyJ89AfjWVxKyINfW0K2HX1R8nl4kdVraAgpohPLh0dGjfwzm/BcXDG
++TxW9zRz0KCs9ZRI6s2MNdv08ahKQ0azk8gRTqMADJmYNWIo3zPQ+fhlwyr6EZJ/
+HFbRtjpajEPMJPwoVPO+Wj6wztfHDYKkPIrIWbhMl6w+tEKdsmygd3Iq94ktLS3X
+AbRCfn4njS2QSlkKFEepkUJWCSSWZgFn6DLm2wIDAQABAoIBAQCb137LxcTnG1h0
+L7isCWKMBKN0cU/xvwIAfOB6f1CfuVXuodrhkpZmrPFoJFKEeQbCX/6RQwmlfGDw
+iGZKOjNbO8V2oLRs3GxcNk4FAG2ny58hoD8puIZwmYhb57gTlMMOL1PuQyb78tkf
+Bzv5b6ermV3yQ4Ypt1solrMGLo6NOZD0oDX9p0Zt9kueIhjzgP0v5//T1F4PGHZK
++sLSsMiu9u6F+PB+Oc6uv0Zee9Lnts/QiWH5f18oEculjwKWFx+JwJWiLffGg2Bl
+vbpmvHFRoRWkHTpgSiLwSUqs0ZUWU9R5h11ROg5L39MLsxQoBvHsPEnP5ssN8jGt
+aH86EZjBAoGBAM+A5B/UjhIn9m05EhDTDRzI92hGhM8f7uAwobbnjvIQyZbWlBwj
+2TmgbJdpTGVbD+iTBIwKQdcFBbWobTCZsNMpghqA/ir4YIAnZ5OX9VQ1Bc+bWE7V
+dPmMVpCgyg+ERAe+79FrYWcI3vhnBpHCsY/9p9pGQIKDzlGTWNF1HJGjAoGBAMr7
+2CTVnFImTgD3E+rH4AAAfkz+cyqfK6BUhli/NifFYZhWCs16r9QCGSORnp4gPhMY
+3mf7VBs9rk123zOMo89eJt3adTgbZ+QIxXeXilGXpbT3w1+CJMaZRrIy80E1tB5/
+KvDZcrZ78o8XWMNUa+9k55ukvgyC24ICAmOIWNlpAoGBALEFvphBF2r52MtZUsYz
+pw4VjKvS7V5eWcW891k4tsRf+frK2NQg6SK2b63EUT5ur2W0dr6ZyY2MZVCSfYRm
+uWmMEchWn389IeZyt3Q8wTize1+foXivtflm9jqwUXFnXzpUc/du6kuiT8YO7pXP
+SPgUZ+xY3pP5qjwBvlYC2PqNAoGAZ1CKMi1bdGC0wT8BLzXuqHGX136HhcEgRmnf
+O5qPaOzJAO2CcBWrGuC6hOUgc+F7VuMIiKpeo8LgTeNcNfO2iNymMbN4iEdCuMlS
+IM3MBD2IhTS6h4lJSKBJYHgYYi+AbylQ5Of4wDMUQYqjjkAQ8/dK/2h5pwqPyXtW
+VezXNEkCgYEAq4S0++y9tjlLn+w9BIkmx3bAVRDQZIzIEwxTh+jpqaUp1J0iyseJ
+71pwqQojGNF6x8GglVXa6bMrETae21WhEeHnWmzlpCWIODsYPUQ+erjDuAWi9eGk
+HLklqSEoLB8pzC6zDqjxDw+CnGERIDSaoaeoWiNKZ95IH1WiEwYjuxU=
+-----END RSA PRIVATE KEY-----
diff --git a/lib/dispatchcloud/test/sshkey_vm.pub b/lib/dispatchcloud/test/sshkey_vm.pub
new file mode 100644 (file)
index 0000000..b9d44c9
--- /dev/null
@@ -0,0 +1 @@
+ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCkh9aTZK1kYO2a6e6Yh5csnjoBCtsce+7GtICRB/r7NTCMHYiux7t7MFEdZyB71TkSyKCLGaFQzijMUudRA3ALzraNuayNvID3tqsb9EQQI2VSsQEdA9geFqLnLInz0B+NZXErIg19bQrYdfVHyeXiR1WtoCCmiE8uHR0aN/DOb8FxcMb5PFb3NHPQoKz1lEjqzYw12/TxqEpDRrOTyBFOowAMmZg1YijfM9D5+GXDKvoRkn8cVtG2OlqMQ8wk/ChU875aPrDO18cNgqQ8ishZuEyXrD60Qp2ybKB3cir3iS0tLdcBtEJ+fieNLZBKWQoUR6mRQlYJJJZmAWfoMubb tom@curve
diff --git a/lib/dispatchcloud/test/stub_driver.go b/lib/dispatchcloud/test/stub_driver.go
new file mode 100644 (file)
index 0000000..8bdfaa9
--- /dev/null
@@ -0,0 +1,318 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package test
+
+import (
+       "crypto/rand"
+       "errors"
+       "fmt"
+       "io"
+       math_rand "math/rand"
+       "regexp"
+       "strings"
+       "sync"
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/cloud"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/Sirupsen/logrus"
+       "github.com/mitchellh/mapstructure"
+       "golang.org/x/crypto/ssh"
+)
+
+// A StubDriver implements cloud.Driver by setting up local SSH
+// servers that do fake command executions.
+type StubDriver struct {
+       HostKey        ssh.Signer
+       AuthorizedKeys []ssh.PublicKey
+
+       // SetupVM, if set, is called upon creation of each new
+       // StubVM. This is the caller's opportunity to customize the
+       // VM's error rate and other behaviors.
+       SetupVM func(*StubVM)
+
+       // StubVM's fake crunch-run uses this Queue to read and update
+       // container state.
+       Queue *Queue
+
+       // Frequency of artificially introduced errors on calls to
+       // Destroy. 0=always succeed, 1=always fail.
+       ErrorRateDestroy float64
+
+       instanceSets []*StubInstanceSet
+}
+
+// InstanceSet returns a new *StubInstanceSet.
+func (sd *StubDriver) InstanceSet(params map[string]interface{}, id cloud.InstanceSetID) (cloud.InstanceSet, error) {
+       sis := StubInstanceSet{
+               driver:  sd,
+               servers: map[cloud.InstanceID]*StubVM{},
+       }
+       sd.instanceSets = append(sd.instanceSets, &sis)
+       return &sis, mapstructure.Decode(params, &sis)
+}
+
+// InstanceSets returns all instances that have been created by the
+// driver. This can be used to test a component that uses the driver
+// but doesn't expose the InstanceSets it has created.
+func (sd *StubDriver) InstanceSets() []*StubInstanceSet {
+       return sd.instanceSets
+}
+
+type StubInstanceSet struct {
+       driver  *StubDriver
+       servers map[cloud.InstanceID]*StubVM
+       mtx     sync.RWMutex
+       stopped bool
+}
+
+func (sis *StubInstanceSet) Create(it arvados.InstanceType, image cloud.ImageID, tags cloud.InstanceTags, authKey ssh.PublicKey) (cloud.Instance, error) {
+       sis.mtx.Lock()
+       defer sis.mtx.Unlock()
+       if sis.stopped {
+               return nil, errors.New("StubInstanceSet: Create called after Stop")
+       }
+       ak := sis.driver.AuthorizedKeys
+       if authKey != nil {
+               ak = append([]ssh.PublicKey{authKey}, ak...)
+       }
+       svm := &StubVM{
+               sis:          sis,
+               id:           cloud.InstanceID(fmt.Sprintf("stub-%s-%x", it.ProviderType, math_rand.Int63())),
+               tags:         copyTags(tags),
+               providerType: it.ProviderType,
+       }
+       svm.SSHService = SSHService{
+               HostKey:        sis.driver.HostKey,
+               AuthorizedKeys: ak,
+               Exec:           svm.Exec,
+       }
+       if setup := sis.driver.SetupVM; setup != nil {
+               setup(svm)
+       }
+       sis.servers[svm.id] = svm
+       return svm.Instance(), nil
+}
+
+func (sis *StubInstanceSet) Instances(cloud.InstanceTags) ([]cloud.Instance, error) {
+       sis.mtx.RLock()
+       defer sis.mtx.RUnlock()
+       var r []cloud.Instance
+       for _, ss := range sis.servers {
+               r = append(r, ss.Instance())
+       }
+       return r, nil
+}
+
+func (sis *StubInstanceSet) Stop() {
+       sis.mtx.Lock()
+       defer sis.mtx.Unlock()
+       if sis.stopped {
+               panic("Stop called twice")
+       }
+       sis.stopped = true
+}
+
+// StubVM is a fake server that runs an SSH service. It represents a
+// VM running in a fake cloud.
+//
+// Note this is distinct from a stubInstance, which is a snapshot of
+// the VM's metadata. Like a VM in a real cloud, a StubVM keeps
+// running (and might change IP addresses, shut down, etc.)  without
+// updating any stubInstances that have been returned to callers.
+type StubVM struct {
+       Boot                 time.Time
+       Broken               time.Time
+       CrunchRunMissing     bool
+       CrunchRunCrashRate   float64
+       CrunchRunDetachDelay time.Duration
+       ExecuteContainer     func(arvados.Container) int
+
+       sis          *StubInstanceSet
+       id           cloud.InstanceID
+       tags         cloud.InstanceTags
+       providerType string
+       SSHService   SSHService
+       running      map[string]bool
+       sync.Mutex
+}
+
+func (svm *StubVM) Instance() stubInstance {
+       svm.Lock()
+       defer svm.Unlock()
+       return stubInstance{
+               svm:  svm,
+               addr: svm.SSHService.Address(),
+               // We deliberately return a cached/stale copy of the
+               // real tags here, so that (Instance)Tags() sometimes
+               // returns old data after a call to
+               // (Instance)SetTags().  This is permitted by the
+               // driver interface, and this might help remind
+               // callers that they need to tolerate it.
+               tags: copyTags(svm.tags),
+       }
+}
+
+func (svm *StubVM) Exec(command string, stdin io.Reader, stdout, stderr io.Writer) uint32 {
+       queue := svm.sis.driver.Queue
+       uuid := regexp.MustCompile(`.{5}-dz642-.{15}`).FindString(command)
+       if eta := svm.Boot.Sub(time.Now()); eta > 0 {
+               fmt.Fprintf(stderr, "stub is booting, ETA %s\n", eta)
+               return 1
+       }
+       if !svm.Broken.IsZero() && svm.Broken.Before(time.Now()) {
+               fmt.Fprintf(stderr, "cannot fork\n")
+               return 2
+       }
+       if svm.CrunchRunMissing && strings.Contains(command, "crunch-run") {
+               fmt.Fprint(stderr, "crunch-run: command not found\n")
+               return 1
+       }
+       if strings.HasPrefix(command, "crunch-run --detach ") {
+               svm.Lock()
+               if svm.running == nil {
+                       svm.running = map[string]bool{}
+               }
+               svm.running[uuid] = true
+               svm.Unlock()
+               time.Sleep(svm.CrunchRunDetachDelay)
+               fmt.Fprintf(stderr, "starting %s\n", uuid)
+               logger := logrus.WithField("ContainerUUID", uuid)
+               logger.Printf("[test] starting crunch-run stub")
+               go func() {
+                       crashluck := math_rand.Float64()
+                       ctr, ok := queue.Get(uuid)
+                       if !ok {
+                               logger.Print("[test] container not in queue")
+                               return
+                       }
+                       if crashluck > svm.CrunchRunCrashRate/2 {
+                               time.Sleep(time.Duration(math_rand.Float64()*20) * time.Millisecond)
+                               ctr.State = arvados.ContainerStateRunning
+                               queue.Notify(ctr)
+                       }
+
+                       time.Sleep(time.Duration(math_rand.Float64()*20) * time.Millisecond)
+                       svm.Lock()
+                       _, running := svm.running[uuid]
+                       svm.Unlock()
+                       if !running {
+                               logger.Print("[test] container was killed")
+                               return
+                       }
+                       if svm.ExecuteContainer != nil {
+                               ctr.ExitCode = svm.ExecuteContainer(ctr)
+                       }
+                       // TODO: Check whether the stub instance has
+                       // been destroyed, and if so, don't call
+                       // queue.Notify. Then "container finished
+                       // twice" can be classified as a bug.
+                       if crashluck < svm.CrunchRunCrashRate {
+                               logger.Print("[test] crashing crunch-run stub")
+                       } else {
+                               ctr.State = arvados.ContainerStateComplete
+                               queue.Notify(ctr)
+                       }
+                       logger.Print("[test] exiting crunch-run stub")
+                       svm.Lock()
+                       defer svm.Unlock()
+                       delete(svm.running, uuid)
+               }()
+               return 0
+       }
+       if command == "crunch-run --list" {
+               svm.Lock()
+               defer svm.Unlock()
+               for uuid := range svm.running {
+                       fmt.Fprintf(stdout, "%s\n", uuid)
+               }
+               return 0
+       }
+       if strings.HasPrefix(command, "crunch-run --kill ") {
+               svm.Lock()
+               defer svm.Unlock()
+               if svm.running[uuid] {
+                       delete(svm.running, uuid)
+               } else {
+                       fmt.Fprintf(stderr, "%s: container is not running\n", uuid)
+               }
+               return 0
+       }
+       if command == "true" {
+               return 0
+       }
+       fmt.Fprintf(stderr, "%q: command not found", command)
+       return 1
+}
+
+type stubInstance struct {
+       svm  *StubVM
+       addr string
+       tags cloud.InstanceTags
+}
+
+func (si stubInstance) ID() cloud.InstanceID {
+       return si.svm.id
+}
+
+func (si stubInstance) Address() string {
+       return si.addr
+}
+
+func (si stubInstance) Destroy() error {
+       if math_rand.Float64() < si.svm.sis.driver.ErrorRateDestroy {
+               return errors.New("instance could not be destroyed")
+       }
+       si.svm.SSHService.Close()
+       sis := si.svm.sis
+       sis.mtx.Lock()
+       defer sis.mtx.Unlock()
+       delete(sis.servers, si.svm.id)
+       return nil
+}
+
+func (si stubInstance) ProviderType() string {
+       return si.svm.providerType
+}
+
+func (si stubInstance) SetTags(tags cloud.InstanceTags) error {
+       tags = copyTags(tags)
+       svm := si.svm
+       go func() {
+               svm.Lock()
+               defer svm.Unlock()
+               svm.tags = tags
+       }()
+       return nil
+}
+
+func (si stubInstance) Tags() cloud.InstanceTags {
+       return si.tags
+}
+
+func (si stubInstance) String() string {
+       return string(si.svm.id)
+}
+
+func (si stubInstance) VerifyHostKey(key ssh.PublicKey, client *ssh.Client) error {
+       buf := make([]byte, 512)
+       _, err := io.ReadFull(rand.Reader, buf)
+       if err != nil {
+               return err
+       }
+       sig, err := si.svm.sis.driver.HostKey.Sign(rand.Reader, buf)
+       if err != nil {
+               return err
+       }
+       return key.Verify(buf, sig)
+}
+
+func copyTags(src cloud.InstanceTags) cloud.InstanceTags {
+       dst := cloud.InstanceTags{}
+       for k, v := range src {
+               dst[k] = v
+       }
+       return dst
+}
diff --git a/lib/dispatchcloud/worker/gocheck_test.go b/lib/dispatchcloud/worker/gocheck_test.go
new file mode 100644 (file)
index 0000000..b4ca66c
--- /dev/null
@@ -0,0 +1,16 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package worker
+
+import (
+       "testing"
+
+       check "gopkg.in/check.v1"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       check.TestingT(t)
+}
diff --git a/lib/dispatchcloud/worker/pool.go b/lib/dispatchcloud/worker/pool.go
new file mode 100644 (file)
index 0000000..ff5f762
--- /dev/null
@@ -0,0 +1,684 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package worker
+
+import (
+       "io"
+       "sort"
+       "strings"
+       "sync"
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/cloud"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/Sirupsen/logrus"
+       "github.com/prometheus/client_golang/prometheus"
+)
+
+const (
+       tagKeyInstanceType = "InstanceType"
+       tagKeyHold         = "Hold"
+)
+
+// An InstanceView shows a worker's current state and recent activity.
+type InstanceView struct {
+       Instance             string
+       Price                float64
+       ArvadosInstanceType  string
+       ProviderInstanceType string
+       LastContainerUUID    string
+       LastBusy             time.Time
+       WorkerState          string
+}
+
+// An Executor executes shell commands on a remote host.
+type Executor interface {
+       // Run cmd on the current target.
+       Execute(cmd string, stdin io.Reader) (stdout, stderr []byte, err error)
+
+       // Use the given target for subsequent operations. The new
+       // target is the same host as the previous target, but it
+       // might return a different address and verify a different
+       // host key.
+       //
+       // SetTarget is called frequently, and in most cases the new
+       // target will behave exactly the same as the old one. An
+       // implementation should optimize accordingly.
+       //
+       // SetTarget must not block on concurrent Execute calls.
+       SetTarget(cloud.ExecutorTarget)
+
+       Close()
+}
+
+const (
+       defaultSyncInterval       = time.Minute
+       defaultProbeInterval      = time.Second * 10
+       defaultMaxProbesPerSecond = 10
+       defaultTimeoutIdle        = time.Minute
+       defaultTimeoutBooting     = time.Minute * 10
+       defaultTimeoutProbe       = time.Minute * 10
+       defaultTimeoutShutdown    = time.Second * 10
+)
+
+func duration(conf arvados.Duration, def time.Duration) time.Duration {
+       if conf > 0 {
+               return time.Duration(conf)
+       } else {
+               return def
+       }
+}
+
+// NewPool creates a Pool of workers backed by instanceSet.
+//
+// New instances are configured and set up according to the given
+// cluster configuration.
+func NewPool(logger logrus.FieldLogger, reg *prometheus.Registry, instanceSet cloud.InstanceSet, newExecutor func(cloud.Instance) Executor, cluster *arvados.Cluster) *Pool {
+       wp := &Pool{
+               logger:             logger,
+               instanceSet:        instanceSet,
+               newExecutor:        newExecutor,
+               bootProbeCommand:   cluster.CloudVMs.BootProbeCommand,
+               imageID:            cloud.ImageID(cluster.CloudVMs.ImageID),
+               instanceTypes:      cluster.InstanceTypes,
+               maxProbesPerSecond: cluster.Dispatch.MaxProbesPerSecond,
+               probeInterval:      duration(cluster.Dispatch.ProbeInterval, defaultProbeInterval),
+               syncInterval:       duration(cluster.CloudVMs.SyncInterval, defaultSyncInterval),
+               timeoutIdle:        duration(cluster.CloudVMs.TimeoutIdle, defaultTimeoutIdle),
+               timeoutBooting:     duration(cluster.CloudVMs.TimeoutBooting, defaultTimeoutBooting),
+               timeoutProbe:       duration(cluster.CloudVMs.TimeoutProbe, defaultTimeoutProbe),
+               timeoutShutdown:    duration(cluster.CloudVMs.TimeoutShutdown, defaultTimeoutShutdown),
+               stop:               make(chan bool),
+       }
+       wp.registerMetrics(reg)
+       go func() {
+               wp.setupOnce.Do(wp.setup)
+               go wp.runMetrics()
+               go wp.runProbes()
+               go wp.runSync()
+       }()
+       return wp
+}
+
+// Pool is a resizable worker pool backed by a cloud.InstanceSet. A
+// zero Pool should not be used. Call NewPool to create a new Pool.
+type Pool struct {
+       // configuration
+       logger             logrus.FieldLogger
+       instanceSet        cloud.InstanceSet
+       newExecutor        func(cloud.Instance) Executor
+       bootProbeCommand   string
+       imageID            cloud.ImageID
+       instanceTypes      map[string]arvados.InstanceType
+       syncInterval       time.Duration
+       probeInterval      time.Duration
+       maxProbesPerSecond int
+       timeoutIdle        time.Duration
+       timeoutBooting     time.Duration
+       timeoutProbe       time.Duration
+       timeoutShutdown    time.Duration
+
+       // private state
+       subscribers  map[<-chan struct{}]chan<- struct{}
+       creating     map[arvados.InstanceType][]time.Time // start times of unfinished (InstanceSet)Create calls
+       workers      map[cloud.InstanceID]*worker
+       loaded       bool                 // loaded list of instances from InstanceSet at least once
+       exited       map[string]time.Time // containers whose crunch-run proc has exited, but KillContainer has not been called
+       atQuotaUntil time.Time
+       atQuotaErr   cloud.QuotaError
+       stop         chan bool
+       mtx          sync.RWMutex
+       setupOnce    sync.Once
+
+       mInstances         prometheus.Gauge
+       mContainersRunning prometheus.Gauge
+       mVCPUs             prometheus.Gauge
+       mVCPUsInuse        prometheus.Gauge
+       mMemory            prometheus.Gauge
+       mMemoryInuse       prometheus.Gauge
+}
+
+// Subscribe returns a channel that becomes ready whenever a worker's
+// state changes.
+//
+// Example:
+//
+//     ch := wp.Subscribe()
+//     defer wp.Unsubscribe(ch)
+//     for range ch {
+//             // ...try scheduling some work...
+//             if done {
+//                     break
+//             }
+//     }
+func (wp *Pool) Subscribe() <-chan struct{} {
+       wp.setupOnce.Do(wp.setup)
+       wp.mtx.Lock()
+       defer wp.mtx.Unlock()
+       ch := make(chan struct{}, 1)
+       wp.subscribers[ch] = ch
+       return ch
+}
+
+// Unsubscribe stops sending updates to the given channel.
+func (wp *Pool) Unsubscribe(ch <-chan struct{}) {
+       wp.setupOnce.Do(wp.setup)
+       wp.mtx.Lock()
+       defer wp.mtx.Unlock()
+       delete(wp.subscribers, ch)
+}
+
+// Unallocated returns the number of unallocated (creating + booting +
+// idle + unknown) workers for each instance type.
+func (wp *Pool) Unallocated() map[arvados.InstanceType]int {
+       wp.setupOnce.Do(wp.setup)
+       wp.mtx.RLock()
+       defer wp.mtx.RUnlock()
+       unalloc := map[arvados.InstanceType]int{}
+       creating := map[arvados.InstanceType]int{}
+       for it, times := range wp.creating {
+               creating[it] = len(times)
+       }
+       for _, wkr := range wp.workers {
+               if !(wkr.state == StateIdle || wkr.state == StateBooting || wkr.state == StateUnknown) {
+                       continue
+               }
+               it := wkr.instType
+               unalloc[it]++
+               if wkr.state == StateUnknown && creating[it] > 0 && wkr.appeared.After(wp.creating[it][0]) {
+                       // If up to N new workers appear in
+                       // Instances() while we are waiting for N
+                       // Create() calls to complete, we assume we're
+                       // just seeing a race between Instances() and
+                       // Create() responses.
+                       //
+                       // The other common reason why nodes have
+                       // state==Unknown is that they appeared at
+                       // startup, before any Create calls. They
+                       // don't match the above timing condition, so
+                       // we never mistakenly attribute them to
+                       // pending Create calls.
+                       creating[it]--
+               }
+       }
+       for it, c := range creating {
+               unalloc[it] += c
+       }
+       return unalloc
+}
+
+// Create a new instance with the given type, and add it to the worker
+// pool. The worker is added immediately; instance creation runs in
+// the background.
+func (wp *Pool) Create(it arvados.InstanceType) error {
+       logger := wp.logger.WithField("InstanceType", it.Name)
+       wp.setupOnce.Do(wp.setup)
+       wp.mtx.Lock()
+       defer wp.mtx.Unlock()
+       if time.Now().Before(wp.atQuotaUntil) {
+               return wp.atQuotaErr
+       }
+       tags := cloud.InstanceTags{tagKeyInstanceType: it.Name}
+       now := time.Now()
+       wp.creating[it] = append(wp.creating[it], now)
+       go func() {
+               defer wp.notify()
+               inst, err := wp.instanceSet.Create(it, wp.imageID, tags, nil)
+               wp.mtx.Lock()
+               defer wp.mtx.Unlock()
+               // Remove our timestamp marker from wp.creating
+               for i, t := range wp.creating[it] {
+                       if t == now {
+                               copy(wp.creating[it][i:], wp.creating[it][i+1:])
+                               wp.creating[it] = wp.creating[it][:len(wp.creating[it])-1]
+                               break
+                       }
+               }
+               if err, ok := err.(cloud.QuotaError); ok && err.IsQuotaError() {
+                       wp.atQuotaErr = err
+                       wp.atQuotaUntil = time.Now().Add(time.Minute)
+               }
+               if err != nil {
+                       logger.WithError(err).Error("create failed")
+                       return
+               }
+               wp.updateWorker(inst, it, StateBooting)
+       }()
+       return nil
+}
+
+// AtQuota returns true if Create is not expected to work at the
+// moment.
+func (wp *Pool) AtQuota() bool {
+       wp.mtx.Lock()
+       defer wp.mtx.Unlock()
+       return time.Now().Before(wp.atQuotaUntil)
+}
+
+// Add or update worker attached to the given instance. Use
+// initialState if a new worker is created.
+//
+// The second return value is true if a new worker is created.
+//
+// Caller must have lock.
+func (wp *Pool) updateWorker(inst cloud.Instance, it arvados.InstanceType, initialState State) (*worker, bool) {
+       id := inst.ID()
+       if wkr := wp.workers[id]; wkr != nil {
+               wkr.executor.SetTarget(inst)
+               wkr.instance = inst
+               wkr.updated = time.Now()
+               if initialState == StateBooting && wkr.state == StateUnknown {
+                       wkr.state = StateBooting
+               }
+               return wkr, false
+       }
+       if initialState == StateUnknown && inst.Tags()[tagKeyHold] != "" {
+               initialState = StateHold
+       }
+       logger := wp.logger.WithFields(logrus.Fields{
+               "InstanceType": it.Name,
+               "Instance":     inst,
+       })
+       logger.WithField("State", initialState).Infof("instance appeared in cloud")
+       now := time.Now()
+       wkr := &worker{
+               mtx:      &wp.mtx,
+               wp:       wp,
+               logger:   logger,
+               executor: wp.newExecutor(inst),
+               state:    initialState,
+               instance: inst,
+               instType: it,
+               appeared: now,
+               probed:   now,
+               busy:     now,
+               updated:  now,
+               running:  make(map[string]struct{}),
+               starting: make(map[string]struct{}),
+               probing:  make(chan struct{}, 1),
+       }
+       wp.workers[id] = wkr
+       return wkr, true
+}
+
+// caller must have lock.
+func (wp *Pool) notifyExited(uuid string, t time.Time) {
+       wp.exited[uuid] = t
+}
+
+// Shutdown shuts down a worker with the given type, or returns false
+// if all workers with the given type are busy.
+func (wp *Pool) Shutdown(it arvados.InstanceType) bool {
+       wp.setupOnce.Do(wp.setup)
+       wp.mtx.Lock()
+       defer wp.mtx.Unlock()
+       logger := wp.logger.WithField("InstanceType", it.Name)
+       logger.Info("shutdown requested")
+       for _, tryState := range []State{StateBooting, StateIdle} {
+               // TODO: shutdown the worker with the longest idle
+               // time (Idle) or the earliest create time (Booting)
+               for _, wkr := range wp.workers {
+                       if wkr.state == tryState && wkr.instType == it {
+                               logger.WithField("Instance", wkr.instance).Info("shutting down")
+                               wkr.shutdown()
+                               return true
+                       }
+               }
+       }
+       return false
+}
+
+// CountWorkers returns the current number of workers in each state.
+func (wp *Pool) CountWorkers() map[State]int {
+       wp.setupOnce.Do(wp.setup)
+       wp.mtx.Lock()
+       defer wp.mtx.Unlock()
+       r := map[State]int{}
+       for _, w := range wp.workers {
+               r[w.state]++
+       }
+       return r
+}
+
+// Running returns the container UUIDs being prepared/run on workers.
+func (wp *Pool) Running() map[string]time.Time {
+       wp.setupOnce.Do(wp.setup)
+       wp.mtx.Lock()
+       defer wp.mtx.Unlock()
+       r := map[string]time.Time{}
+       for _, wkr := range wp.workers {
+               for uuid := range wkr.running {
+                       r[uuid] = time.Time{}
+               }
+               for uuid := range wkr.starting {
+                       r[uuid] = time.Time{}
+               }
+       }
+       for uuid, exited := range wp.exited {
+               r[uuid] = exited
+       }
+       return r
+}
+
+// StartContainer starts a container on an idle worker immediately if
+// possible, otherwise returns false.
+func (wp *Pool) StartContainer(it arvados.InstanceType, ctr arvados.Container) bool {
+       wp.setupOnce.Do(wp.setup)
+       wp.mtx.Lock()
+       defer wp.mtx.Unlock()
+       var wkr *worker
+       for _, w := range wp.workers {
+               if w.instType == it && w.state == StateIdle {
+                       if wkr == nil || w.busy.After(wkr.busy) {
+                               wkr = w
+                       }
+               }
+       }
+       if wkr == nil {
+               return false
+       }
+       wkr.startContainer(ctr)
+       return true
+}
+
+// KillContainer kills the crunch-run process for the given container
+// UUID, if it's running on any worker.
+//
+// KillContainer returns immediately; the act of killing the container
+// takes some time, and runs in the background.
+func (wp *Pool) KillContainer(uuid string) {
+       wp.mtx.Lock()
+       defer wp.mtx.Unlock()
+       if _, ok := wp.exited[uuid]; ok {
+               wp.logger.WithField("ContainerUUID", uuid).Debug("clearing placeholder for exited crunch-run process")
+               delete(wp.exited, uuid)
+               return
+       }
+       for _, wkr := range wp.workers {
+               if _, ok := wkr.running[uuid]; ok {
+                       go wp.kill(wkr, uuid)
+                       return
+               }
+       }
+       wp.logger.WithField("ContainerUUID", uuid).Debug("cannot kill: already disappeared")
+}
+
+func (wp *Pool) kill(wkr *worker, uuid string) {
+       logger := wp.logger.WithFields(logrus.Fields{
+               "ContainerUUID": uuid,
+               "Instance":      wkr.instance,
+       })
+       logger.Debug("killing process")
+       stdout, stderr, err := wkr.executor.Execute("crunch-run --kill 15 "+uuid, nil)
+       if err != nil {
+               logger.WithFields(logrus.Fields{
+                       "stderr": string(stderr),
+                       "stdout": string(stdout),
+                       "error":  err,
+               }).Warn("kill failed")
+               return
+       }
+       logger.Debug("killing process succeeded")
+       wp.mtx.Lock()
+       defer wp.mtx.Unlock()
+       if _, ok := wkr.running[uuid]; ok {
+               delete(wkr.running, uuid)
+               if wkr.state == StateRunning && len(wkr.running)+len(wkr.starting) == 0 {
+                       wkr.state = StateIdle
+               }
+               wkr.updated = time.Now()
+               go wp.notify()
+       }
+}
+
+func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
+       if reg == nil {
+               reg = prometheus.NewRegistry()
+       }
+       wp.mInstances = prometheus.NewGauge(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "instances_total",
+               Help:      "Number of cloud VMs including pending, booting, running, held, and shutting down.",
+       })
+       reg.MustRegister(wp.mInstances)
+       wp.mContainersRunning = prometheus.NewGauge(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "containers_running",
+               Help:      "Number of containers reported running by cloud VMs.",
+       })
+       reg.MustRegister(wp.mContainersRunning)
+
+       wp.mVCPUs = prometheus.NewGauge(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "vcpus_total",
+               Help:      "Total VCPUs on all cloud VMs.",
+       })
+       reg.MustRegister(wp.mVCPUs)
+       wp.mVCPUsInuse = prometheus.NewGauge(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "vcpus_inuse",
+               Help:      "VCPUs on cloud VMs that are running containers.",
+       })
+       reg.MustRegister(wp.mVCPUsInuse)
+       wp.mMemory = prometheus.NewGauge(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "memory_bytes_total",
+               Help:      "Total memory on all cloud VMs.",
+       })
+       reg.MustRegister(wp.mMemory)
+       wp.mMemoryInuse = prometheus.NewGauge(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "dispatchcloud",
+               Name:      "memory_bytes_inuse",
+               Help:      "Memory on cloud VMs that are running containers.",
+       })
+       reg.MustRegister(wp.mMemoryInuse)
+}
+
+func (wp *Pool) runMetrics() {
+       ch := wp.Subscribe()
+       defer wp.Unsubscribe(ch)
+       for range ch {
+               wp.updateMetrics()
+       }
+}
+
+func (wp *Pool) updateMetrics() {
+       wp.mtx.RLock()
+       defer wp.mtx.RUnlock()
+
+       var alloc, cpu, cpuInuse, mem, memInuse int64
+       for _, wkr := range wp.workers {
+               cpu += int64(wkr.instType.VCPUs)
+               mem += int64(wkr.instType.RAM)
+               if len(wkr.running)+len(wkr.starting) == 0 {
+                       continue
+               }
+               alloc += int64(len(wkr.running) + len(wkr.starting))
+               cpuInuse += int64(wkr.instType.VCPUs)
+               memInuse += int64(wkr.instType.RAM)
+       }
+       wp.mInstances.Set(float64(len(wp.workers)))
+       wp.mContainersRunning.Set(float64(alloc))
+       wp.mVCPUs.Set(float64(cpu))
+       wp.mMemory.Set(float64(mem))
+       wp.mVCPUsInuse.Set(float64(cpuInuse))
+       wp.mMemoryInuse.Set(float64(memInuse))
+}
+
+func (wp *Pool) runProbes() {
+       maxPPS := wp.maxProbesPerSecond
+       if maxPPS < 1 {
+               maxPPS = defaultMaxProbesPerSecond
+       }
+       limitticker := time.NewTicker(time.Second / time.Duration(maxPPS))
+       defer limitticker.Stop()
+
+       probeticker := time.NewTicker(wp.probeInterval)
+       defer probeticker.Stop()
+
+       workers := []cloud.InstanceID{}
+       for range probeticker.C {
+               workers = workers[:0]
+               wp.mtx.Lock()
+               for id, wkr := range wp.workers {
+                       if wkr.state == StateShutdown || wkr.shutdownIfIdle() {
+                               continue
+                       }
+                       workers = append(workers, id)
+               }
+               wp.mtx.Unlock()
+
+               for _, id := range workers {
+                       wp.mtx.Lock()
+                       wkr, ok := wp.workers[id]
+                       wp.mtx.Unlock()
+                       if !ok {
+                               // Deleted while we were probing
+                               // others
+                               continue
+                       }
+                       go wkr.ProbeAndUpdate()
+                       select {
+                       case <-wp.stop:
+                               return
+                       case <-limitticker.C:
+                       }
+               }
+       }
+}
+
+func (wp *Pool) runSync() {
+       // sync once immediately, then wait syncInterval, sync again,
+       // etc.
+       timer := time.NewTimer(1)
+       for {
+               select {
+               case <-timer.C:
+                       err := wp.getInstancesAndSync()
+                       if err != nil {
+                               wp.logger.WithError(err).Warn("sync failed")
+                       }
+                       timer.Reset(wp.syncInterval)
+               case <-wp.stop:
+                       wp.logger.Debug("worker.Pool stopped")
+                       return
+               }
+       }
+}
+
+// Stop synchronizing with the InstanceSet.
+func (wp *Pool) Stop() {
+       wp.setupOnce.Do(wp.setup)
+       close(wp.stop)
+}
+
+// Instances returns an InstanceView for each worker in the pool,
+// summarizing its current state and recent activity.
+func (wp *Pool) Instances() []InstanceView {
+       var r []InstanceView
+       wp.setupOnce.Do(wp.setup)
+       wp.mtx.Lock()
+       for _, w := range wp.workers {
+               r = append(r, InstanceView{
+                       Instance:             w.instance.String(),
+                       Price:                w.instType.Price,
+                       ArvadosInstanceType:  w.instType.Name,
+                       ProviderInstanceType: w.instType.ProviderType,
+                       LastContainerUUID:    w.lastUUID,
+                       LastBusy:             w.busy,
+                       WorkerState:          w.state.String(),
+               })
+       }
+       wp.mtx.Unlock()
+       sort.Slice(r, func(i, j int) bool {
+               return strings.Compare(r[i].Instance, r[j].Instance) < 0
+       })
+       return r
+}
+
+func (wp *Pool) setup() {
+       wp.creating = map[arvados.InstanceType][]time.Time{}
+       wp.exited = map[string]time.Time{}
+       wp.workers = map[cloud.InstanceID]*worker{}
+       wp.subscribers = map[<-chan struct{}]chan<- struct{}{}
+}
+
+func (wp *Pool) notify() {
+       wp.mtx.RLock()
+       defer wp.mtx.RUnlock()
+       for _, send := range wp.subscribers {
+               select {
+               case send <- struct{}{}:
+               default:
+               }
+       }
+}
+
+func (wp *Pool) getInstancesAndSync() error {
+       wp.setupOnce.Do(wp.setup)
+       wp.logger.Debug("getting instance list")
+       threshold := time.Now()
+       instances, err := wp.instanceSet.Instances(cloud.InstanceTags{})
+       if err != nil {
+               return err
+       }
+       wp.sync(threshold, instances)
+       wp.logger.Debug("sync done")
+       return nil
+}
+
+// Add/remove/update workers based on instances, which was obtained
+// from the instanceSet. However, don't clobber any other updates that
+// already happened after threshold.
+func (wp *Pool) sync(threshold time.Time, instances []cloud.Instance) {
+       wp.mtx.Lock()
+       defer wp.mtx.Unlock()
+       wp.logger.WithField("Instances", len(instances)).Debug("sync instances")
+       notify := false
+
+       for _, inst := range instances {
+               itTag := inst.Tags()[tagKeyInstanceType]
+               it, ok := wp.instanceTypes[itTag]
+               if !ok {
+                       wp.logger.WithField("Instance", inst).Errorf("unknown InstanceType tag %q --- ignoring", itTag)
+                       continue
+               }
+               if wkr, isNew := wp.updateWorker(inst, it, StateUnknown); isNew {
+                       notify = true
+               } else if wkr.state == StateShutdown && time.Since(wkr.destroyed) > wp.timeoutShutdown {
+                       wp.logger.WithField("Instance", inst).Info("worker still listed after shutdown; retrying")
+                       wkr.shutdown()
+               }
+       }
+
+       for id, wkr := range wp.workers {
+               if wkr.updated.After(threshold) {
+                       continue
+               }
+               logger := wp.logger.WithFields(logrus.Fields{
+                       "Instance":    wkr.instance,
+                       "WorkerState": wkr.state,
+               })
+               logger.Info("instance disappeared in cloud")
+               delete(wp.workers, id)
+               go wkr.executor.Close()
+               notify = true
+       }
+
+       if !wp.loaded {
+               wp.loaded = true
+               wp.logger.WithField("N", len(wp.workers)).Info("loaded initial instance list")
+       }
+
+       if notify {
+               go wp.notify()
+       }
+}
diff --git a/lib/dispatchcloud/worker/pool_test.go b/lib/dispatchcloud/worker/pool_test.go
new file mode 100644 (file)
index 0000000..3867e2c
--- /dev/null
@@ -0,0 +1,135 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package worker
+
+import (
+       "io"
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/cloud"
+       "git.curoverse.com/arvados.git/lib/dispatchcloud/test"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/Sirupsen/logrus"
+       check "gopkg.in/check.v1"
+)
+
+const GiB arvados.ByteSize = 1 << 30
+
+var _ = check.Suite(&PoolSuite{})
+
+type lessChecker struct {
+       *check.CheckerInfo
+}
+
+func (*lessChecker) Check(params []interface{}, names []string) (result bool, error string) {
+       return params[0].(int) < params[1].(int), ""
+}
+
+var less = &lessChecker{&check.CheckerInfo{Name: "less", Params: []string{"obtained", "expected"}}}
+
+type PoolSuite struct{}
+
+func (suite *PoolSuite) SetUpSuite(c *check.C) {
+       logrus.StandardLogger().SetLevel(logrus.DebugLevel)
+}
+
+func (suite *PoolSuite) TestStartContainer(c *check.C) {
+       // TODO: use an instanceSet stub with an SSH server
+}
+
+func (suite *PoolSuite) TestVerifyHostKey(c *check.C) {
+       // TODO: use an instanceSet stub with an SSH server
+}
+
+func (suite *PoolSuite) TestCreateUnallocShutdown(c *check.C) {
+       lameInstanceSet := &test.LameInstanceSet{Hold: make(chan bool)}
+       type1 := arvados.InstanceType{Name: "a1s", ProviderType: "a1.small", VCPUs: 1, RAM: 1 * GiB, Price: .01}
+       type2 := arvados.InstanceType{Name: "a2m", ProviderType: "a2.medium", VCPUs: 2, RAM: 2 * GiB, Price: .02}
+       pool := &Pool{
+               logger:      logrus.StandardLogger(),
+               newExecutor: func(cloud.Instance) Executor { return &stubExecutor{} },
+               instanceSet: lameInstanceSet,
+               instanceTypes: arvados.InstanceTypeMap{
+                       type1.Name: type1,
+                       type2.Name: type2,
+               },
+       }
+       notify := pool.Subscribe()
+       defer pool.Unsubscribe(notify)
+       notify2 := pool.Subscribe()
+       defer pool.Unsubscribe(notify2)
+
+       c.Check(pool.Unallocated()[type1], check.Equals, 0)
+       c.Check(pool.Unallocated()[type2], check.Equals, 0)
+       pool.Create(type2)
+       pool.Create(type1)
+       pool.Create(type2)
+       c.Check(pool.Unallocated()[type1], check.Equals, 1)
+       c.Check(pool.Unallocated()[type2], check.Equals, 2)
+
+       // Unblock the pending Create calls.
+       go lameInstanceSet.Release(3)
+
+       // Wait for each instance to either return from its Create
+       // call, or show up in a poll.
+       suite.wait(c, pool, notify, func() bool {
+               pool.mtx.RLock()
+               defer pool.mtx.RUnlock()
+               return len(pool.workers) == 3
+       })
+
+       c.Check(pool.Shutdown(type2), check.Equals, true)
+       suite.wait(c, pool, notify, func() bool {
+               return pool.Unallocated()[type1] == 1 && pool.Unallocated()[type2] == 1
+       })
+       c.Check(pool.Shutdown(type2), check.Equals, true)
+       suite.wait(c, pool, notify, func() bool {
+               return pool.Unallocated()[type1] == 1 && pool.Unallocated()[type2] == 0
+       })
+       c.Check(pool.Shutdown(type2), check.Equals, false)
+       for {
+               // Consume any waiting notifications to ensure the
+               // next one we get is from Shutdown.
+               select {
+               case <-notify:
+                       continue
+               default:
+               }
+               break
+       }
+       c.Check(pool.Shutdown(type1), check.Equals, true)
+       suite.wait(c, pool, notify, func() bool {
+               return pool.Unallocated()[type1] == 0 && pool.Unallocated()[type2] == 0
+       })
+       select {
+       case <-notify2:
+       case <-time.After(time.Second):
+               c.Error("notify did not receive")
+       }
+       go lameInstanceSet.Release(3) // unblock Destroy calls
+}
+
+func (suite *PoolSuite) wait(c *check.C, pool *Pool, notify <-chan struct{}, ready func() bool) {
+       timeout := time.NewTimer(time.Second).C
+       for !ready() {
+               select {
+               case <-notify:
+                       continue
+               case <-timeout:
+               }
+               break
+       }
+       c.Check(ready(), check.Equals, true)
+}
+
+type stubExecutor struct{}
+
+func (*stubExecutor) SetTarget(cloud.ExecutorTarget) {}
+
+func (*stubExecutor) Execute(cmd string, stdin io.Reader) ([]byte, []byte, error) {
+       return nil, nil, nil
+}
+
+func (*stubExecutor) Close() {}
diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go
new file mode 100644 (file)
index 0000000..c261863
--- /dev/null
@@ -0,0 +1,320 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package worker
+
+import (
+       "bytes"
+       "strings"
+       "sync"
+       "time"
+
+       "git.curoverse.com/arvados.git/lib/cloud"
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/Sirupsen/logrus"
+)
+
+// State indicates whether a worker is available to do work, and (if
+// not) whether/when it is expected to become ready.
+type State int
+
+const (
+       StateUnknown  State = iota // might be running a container already
+       StateBooting               // instance is booting
+       StateIdle                  // instance booted, no containers are running
+       StateRunning               // instance is running one or more containers
+       StateShutdown              // worker has stopped monitoring the instance
+       StateHold                  // running, but not available to run new containers
+)
+
+const (
+       // TODO: configurable
+       maxPingFailTime = 10 * time.Minute
+)
+
+var stateString = map[State]string{
+       StateUnknown:  "unknown",
+       StateBooting:  "booting",
+       StateIdle:     "idle",
+       StateRunning:  "running",
+       StateShutdown: "shutdown",
+       StateHold:     "hold",
+}
+
+// String implements fmt.Stringer.
+func (s State) String() string {
+       return stateString[s]
+}
+
+// MarshalText implements encoding.TextMarshaler so a JSON encoding of
+// map[State]anything uses the state's string representation.
+func (s State) MarshalText() ([]byte, error) {
+       return []byte(stateString[s]), nil
+}
+
+type worker struct {
+       logger   logrus.FieldLogger
+       executor Executor
+       wp       *Pool
+
+       mtx       sync.Locker // must be wp's Locker.
+       state     State
+       instance  cloud.Instance
+       instType  arvados.InstanceType
+       vcpus     int64
+       memory    int64
+       appeared  time.Time
+       probed    time.Time
+       updated   time.Time
+       busy      time.Time
+       destroyed time.Time
+       lastUUID  string
+       running   map[string]struct{} // remember to update state idle<->running when this changes
+       starting  map[string]struct{} // remember to update state idle<->running when this changes
+       probing   chan struct{}
+}
+
+// caller must have lock.
+func (wkr *worker) startContainer(ctr arvados.Container) {
+       logger := wkr.logger.WithFields(logrus.Fields{
+               "ContainerUUID": ctr.UUID,
+               "Priority":      ctr.Priority,
+       })
+       logger = logger.WithField("Instance", wkr.instance)
+       logger.Debug("starting container")
+       wkr.starting[ctr.UUID] = struct{}{}
+       wkr.state = StateRunning
+       go func() {
+               stdout, stderr, err := wkr.executor.Execute("crunch-run --detach '"+ctr.UUID+"'", nil)
+               wkr.mtx.Lock()
+               defer wkr.mtx.Unlock()
+               now := time.Now()
+               wkr.updated = now
+               wkr.busy = now
+               delete(wkr.starting, ctr.UUID)
+               wkr.running[ctr.UUID] = struct{}{}
+               wkr.lastUUID = ctr.UUID
+               if err != nil {
+                       logger.WithField("stdout", string(stdout)).
+                               WithField("stderr", string(stderr)).
+                               WithError(err).
+                               Error("error starting crunch-run process")
+                       // Leave uuid in wkr.running, though: it's
+                       // possible the error was just a communication
+                       // failure and the process was in fact
+                       // started.  Wait for next probe to find out.
+                       return
+               }
+               logger.Info("crunch-run process started")
+               wkr.lastUUID = ctr.UUID
+       }()
+}
+
+// ProbeAndUpdate conducts appropriate boot/running probes (if any)
+// for the worker's curent state. If a previous probe is still
+// running, it does nothing.
+//
+// It should be called in a new goroutine.
+func (wkr *worker) ProbeAndUpdate() {
+       select {
+       case wkr.probing <- struct{}{}:
+               wkr.probeAndUpdate()
+               <-wkr.probing
+       default:
+               wkr.logger.Debug("still waiting for last probe to finish")
+       }
+}
+
+// should be called in a new goroutine
+func (wkr *worker) probeAndUpdate() {
+       wkr.mtx.Lock()
+       updated := wkr.updated
+       needProbeRunning := wkr.state == StateRunning || wkr.state == StateIdle
+       needProbeBooted := wkr.state == StateUnknown || wkr.state == StateBooting
+       wkr.mtx.Unlock()
+       if !needProbeBooted && !needProbeRunning {
+               return
+       }
+
+       var (
+               ctrUUIDs []string
+               ok       bool
+               stderr   []byte
+       )
+       if needProbeBooted {
+               ok, stderr = wkr.probeBooted()
+               wkr.mtx.Lock()
+               if ok || wkr.state == StateRunning || wkr.state == StateIdle {
+                       wkr.logger.Info("instance booted; will try probeRunning")
+                       needProbeRunning = true
+               }
+               wkr.mtx.Unlock()
+       }
+       if needProbeRunning {
+               ctrUUIDs, ok, stderr = wkr.probeRunning()
+       }
+       logger := wkr.logger.WithField("stderr", string(stderr))
+       wkr.mtx.Lock()
+       defer wkr.mtx.Unlock()
+       if !ok {
+               if wkr.state == StateShutdown && wkr.updated.After(updated) {
+                       // Skip the logging noise if shutdown was
+                       // initiated during probe.
+                       return
+               }
+               dur := time.Since(wkr.probed)
+               logger := logger.WithFields(logrus.Fields{
+                       "Duration": dur,
+                       "State":    wkr.state,
+               })
+               if wkr.state == StateBooting && !needProbeRunning {
+                       // If we know the instance has never passed a
+                       // boot probe, it's not noteworthy that it
+                       // hasn't passed this probe.
+                       logger.Debug("new instance not responding")
+               } else {
+                       logger.Info("instance not responding")
+               }
+               wkr.shutdownIfBroken(dur)
+               return
+       }
+
+       updateTime := time.Now()
+       wkr.probed = updateTime
+
+       if updated != wkr.updated {
+               // Worker was updated after the probe began, so
+               // wkr.running might have a container UUID that was
+               // not yet running when ctrUUIDs was generated. Leave
+               // wkr.running alone and wait for the next probe to
+               // catch up on any changes.
+               return
+       }
+
+       if len(ctrUUIDs) > 0 {
+               wkr.busy = updateTime
+               wkr.lastUUID = ctrUUIDs[0]
+       } else if len(wkr.running) > 0 {
+               // Actual last-busy time was sometime between wkr.busy
+               // and now. Now is the earliest opportunity to take
+               // advantage of the non-busy state, though.
+               wkr.busy = updateTime
+       }
+       running := map[string]struct{}{}
+       changed := false
+       for _, uuid := range ctrUUIDs {
+               running[uuid] = struct{}{}
+               if _, ok := wkr.running[uuid]; !ok {
+                       changed = true
+               }
+       }
+       for uuid := range wkr.running {
+               if _, ok := running[uuid]; !ok {
+                       logger.WithField("ContainerUUID", uuid).Info("crunch-run process ended")
+                       wkr.wp.notifyExited(uuid, updateTime)
+                       changed = true
+               }
+       }
+       if wkr.state == StateUnknown || wkr.state == StateBooting {
+               wkr.state = StateIdle
+               changed = true
+       }
+       if changed {
+               wkr.running = running
+               if wkr.state == StateIdle && len(wkr.starting)+len(wkr.running) > 0 {
+                       wkr.state = StateRunning
+               } else if wkr.state == StateRunning && len(wkr.starting)+len(wkr.running) == 0 {
+                       wkr.state = StateIdle
+               }
+               wkr.updated = updateTime
+               go wkr.wp.notify()
+       }
+}
+
+func (wkr *worker) probeRunning() (running []string, ok bool, stderr []byte) {
+       cmd := "crunch-run --list"
+       stdout, stderr, err := wkr.executor.Execute(cmd, nil)
+       if err != nil {
+               wkr.logger.WithFields(logrus.Fields{
+                       "Command": cmd,
+                       "stdout":  string(stdout),
+                       "stderr":  string(stderr),
+               }).WithError(err).Warn("probe failed")
+               return nil, false, stderr
+       }
+       stdout = bytes.TrimRight(stdout, "\n")
+       if len(stdout) == 0 {
+               return nil, true, stderr
+       }
+       return strings.Split(string(stdout), "\n"), true, stderr
+}
+
+func (wkr *worker) probeBooted() (ok bool, stderr []byte) {
+       cmd := wkr.wp.bootProbeCommand
+       if cmd == "" {
+               cmd = "true"
+       }
+       stdout, stderr, err := wkr.executor.Execute(cmd, nil)
+       logger := wkr.logger.WithFields(logrus.Fields{
+               "Command": cmd,
+               "stdout":  string(stdout),
+               "stderr":  string(stderr),
+       })
+       if err != nil {
+               logger.WithError(err).Debug("boot probe failed")
+               return false, stderr
+       }
+       logger.Info("boot probe succeeded")
+       return true, stderr
+}
+
+// caller must have lock.
+func (wkr *worker) shutdownIfBroken(dur time.Duration) {
+       if wkr.state == StateHold {
+               return
+       }
+       label, threshold := "", wkr.wp.timeoutProbe
+       if wkr.state == StateBooting {
+               label, threshold = "new ", wkr.wp.timeoutBooting
+       }
+       if dur < threshold {
+               return
+       }
+       wkr.logger.WithFields(logrus.Fields{
+               "Duration": dur,
+               "Since":    wkr.probed,
+               "State":    wkr.state,
+       }).Warnf("%sinstance unresponsive, shutting down", label)
+       wkr.shutdown()
+}
+
+// caller must have lock.
+func (wkr *worker) shutdownIfIdle() bool {
+       if wkr.state != StateIdle {
+               return false
+       }
+       age := time.Since(wkr.busy)
+       if age < wkr.wp.timeoutIdle {
+               return false
+       }
+       wkr.logger.WithField("Age", age).Info("shutdown idle worker")
+       wkr.shutdown()
+       return true
+}
+
+// caller must have lock
+func (wkr *worker) shutdown() {
+       now := time.Now()
+       wkr.updated = now
+       wkr.destroyed = now
+       wkr.state = StateShutdown
+       go wkr.wp.notify()
+       go func() {
+               err := wkr.instance.Destroy()
+               if err != nil {
+                       wkr.logger.WithError(err).Warn("shutdown failed")
+                       return
+               }
+       }()
+}
index 4c49a449b2a68fdf1eaaa5cd674129ae257dfc6e..6a91d6ff3c5076a28e06a685ed8c73bf45a84218 100644 (file)
@@ -133,7 +133,7 @@ class ArvadosContainer(JobBase):
                 vwd = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                     keep_client=self.arvrunner.keep_client,
                                                     num_retries=self.arvrunner.num_retries)
-                generatemapper = NoFollowPathMapper([self.generatefiles], "", "",
+                generatemapper = NoFollowPathMapper(self.generatefiles["listing"], "", "",
                                                     separateDirs=False)
 
                 sorteditems = sorted(generatemapper.items(), None, key=lambda n: n[1].target)
@@ -414,8 +414,8 @@ class RunnerContainer(Runner):
             "properties": {}
         }
 
-        if self.tool.tool.get("id", "").startswith("keep:"):
-            sp = self.tool.tool["id"].split('/')
+        if self.embedded_tool.tool.get("id", "").startswith("keep:"):
+            sp = self.embedded_tool.tool["id"].split('/')
             workflowcollection = sp[0][5:]
             workflowname = "/".join(sp[1:])
             workflowpath = "/var/lib/cwl/workflow/%s" % workflowname
@@ -424,14 +424,14 @@ class RunnerContainer(Runner):
                 "portable_data_hash": "%s" % workflowcollection
             }
         else:
-            packed = packed_workflow(self.arvrunner, self.tool, self.merged_map)
+            packed = packed_workflow(self.arvrunner, self.embedded_tool, self.merged_map)
             workflowpath = "/var/lib/cwl/workflow.json#main"
             container_req["mounts"]["/var/lib/cwl/workflow.json"] = {
                 "kind": "json",
                 "content": packed
             }
-            if self.tool.tool.get("id", "").startswith("arvwf:"):
-                container_req["properties"]["template_uuid"] = self.tool.tool["id"][6:33]
+            if self.embedded_tool.tool.get("id", "").startswith("arvwf:"):
+                container_req["properties"]["template_uuid"] = self.embedded_tool.tool["id"][6:33]
 
 
         # --local means execute the workflow instead of submitting a container request
index 9a03372d32de9375e9401fe4fc4099dce61f1181..87d6d1049a08d349d8d45b41b47a5fccb99f237c 100644 (file)
@@ -67,7 +67,7 @@ class ArvadosJob(JobBase):
                                                     keep_client=self.arvrunner.keep_client,
                                                     num_retries=self.arvrunner.num_retries)
                 script_parameters["task.vwd"] = {}
-                generatemapper = VwdPathMapper([self.generatefiles], "", "",
+                generatemapper = VwdPathMapper(self.generatefiles["listing"], "", "",
                                                separateDirs=False)
 
                 with Perf(metrics, "createfiles %s" % self.name):
@@ -296,10 +296,10 @@ class RunnerJob(Runner):
         a pipeline template or pipeline instance.
         """
 
-        if self.tool.tool["id"].startswith("keep:"):
-            self.job_order["cwl:tool"] = self.tool.tool["id"][5:]
+        if self.embedded_tool.tool["id"].startswith("keep:"):
+            self.job_order["cwl:tool"] = self.embedded_tool.tool["id"][5:]
         else:
-            packed = packed_workflow(self.arvrunner, self.tool, self.merged_map)
+            packed = packed_workflow(self.arvrunner, self.embedded_tool, self.merged_map)
             wf_pdh = upload_workflow_collection(self.arvrunner, self.name, packed)
             self.job_order["cwl:tool"] = "%s/workflow.cwl#main" % wf_pdh
 
@@ -386,19 +386,21 @@ class RunnerTemplate(object):
     }
 
     def __init__(self, runner, tool, job_order, enable_reuse, uuid,
-                 submit_runner_ram=0, name=None, merged_map=None):
+                 submit_runner_ram=0, name=None, merged_map=None,
+                 loadingContext=None):
         self.runner = runner
-        self.tool = tool
+        self.embedded_tool = tool
         self.job = RunnerJob(
             runner=runner,
             tool=tool,
-            job_order=job_order,
             enable_reuse=enable_reuse,
             output_name=None,
             output_tags=None,
             submit_runner_ram=submit_runner_ram,
             name=name,
-            merged_map=merged_map)
+            merged_map=merged_map,
+            loadingContext=loadingContext)
+        self.job.job_order = job_order
         self.uuid = uuid
 
     def pipeline_component_spec(self):
@@ -420,7 +422,7 @@ class RunnerTemplate(object):
         job_params = spec['script_parameters']
         spec['script_parameters'] = {}
 
-        for param in self.tool.tool['inputs']:
+        for param in self.embedded_tool.tool['inputs']:
             param = copy.deepcopy(param)
 
             # Data type and "required" flag...
index cd319e55b12137db6170b37b763a0dccb36d497b..c4e9f44abb0b20ecb66a7bdc13c5240beaaeeccb 100644 (file)
@@ -2,7 +2,7 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-from cwltool.command_line_tool import CommandLineTool
+from cwltool.command_line_tool import CommandLineTool, ExpressionTool
 from cwltool.builder import Builder
 from .arvjob import ArvadosJob
 from .arvcontainer import ArvadosContainer
@@ -105,3 +105,15 @@ class ArvadosCommandTool(CommandLineTool):
             runtimeContext.tmpdir = "$(task.tmpdir)"
             runtimeContext.docker_tmpdir = "$(task.tmpdir)"
         return super(ArvadosCommandTool, self).job(joborder, output_callback, runtimeContext)
+
+class ArvadosExpressionTool(ExpressionTool):
+    def __init__(self, arvrunner, toolpath_object, loadingContext):
+        super(ArvadosExpressionTool, self).__init__(toolpath_object, loadingContext)
+        self.arvrunner = arvrunner
+
+    def job(self,
+            job_order,         # type: Mapping[Text, Text]
+            output_callback,  # type: Callable[[Any, Any], Any]
+            runtimeContext     # type: RuntimeContext
+           ):
+        return super(ArvadosExpressionTool, self).job(job_order, self.arvrunner.get_wrapped_callback(output_callback), runtimeContext)
index eb78a25fedbd4754752ff8598d7e1faa6b1585db..ea167d4044d76fa91953eb401962107afd6b878e 100644 (file)
@@ -205,6 +205,9 @@ class ArvadosWorkflow(Workflow):
                                                     raise WorkflowException("Non-top-level ResourceRequirement in single container cannot have expressions")
                                 if not dyn:
                                     self.static_resource_req.append(req)
+                            if req["class"] == "DockerRequirement":
+                                if "http://arvados.org/cwl#dockerCollectionPDH" in req:
+                                    del req["http://arvados.org/cwl#dockerCollectionPDH"]
 
                 visit_class(packed["$graph"], ("Workflow", "CommandLineTool"), visit)
 
index 7512d5bef27f28014f650d897d24e3d59cb7b3c4..61f9cbbe0dc80a7ce7c4894ccb2697c0b0310652 100644 (file)
@@ -103,6 +103,7 @@ def run():
         arvargs.output_name = output_name
         arvargs.output_tags = output_tags
         arvargs.thread_count = 1
+        arvargs.collection_cache_size = None
 
         runner = arvados_cwl.ArvCwlExecutor(api_client=arvados.safeapi.ThreadSafeApiCache(
             api_params={"model": OrderedJsonModel()}, keep_params={"num_retries": 4}),
index 9595b55915477b6cb8beb858e8cc8e4b89f3d603..27774b2f7cf6bd1fbb9bd8474f5dde4e7e4d6d51 100644 (file)
@@ -27,7 +27,7 @@ import arvados_cwl.util
 from .arvcontainer import RunnerContainer
 from .arvjob import RunnerJob, RunnerTemplate
 from .runner import Runner, upload_docker, upload_job_order, upload_workflow_deps
-from .arvtool import ArvadosCommandTool, validate_cluster_target
+from .arvtool import ArvadosCommandTool, validate_cluster_target, ArvadosExpressionTool
 from .arvworkflow import ArvadosWorkflow, upload_workflow
 from .fsaccess import CollectionFsAccess, CollectionFetcher, collectionResolver, CollectionCache, pdh_size
 from .perf import Perf
@@ -195,8 +195,10 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
             return ArvadosCommandTool(self, toolpath_object, loadingContext)
         elif "class" in toolpath_object and toolpath_object["class"] == "Workflow":
             return ArvadosWorkflow(self, toolpath_object, loadingContext)
+        elif "class" in toolpath_object and toolpath_object["class"] == "ExpressionTool":
+            return ArvadosExpressionTool(self, toolpath_object, loadingContext)
         else:
-            return cwltool.workflow.default_make_tool(toolpath_object, loadingContext)
+            raise Exception("Unknown tool %s" % toolpath_object.get("class"))
 
     def output_callback(self, out, processStatus):
         with self.workflow_eval_lock:
@@ -557,7 +559,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                                       uuid=existing_uuid,
                                       submit_runner_ram=runtimeContext.submit_runner_ram,
                                       name=runtimeContext.name,
-                                      merged_map=merged_map)
+                                      merged_map=merged_map,
+                                      loadingContext=loadingContext)
                 tmpl.save()
                 # cwltool.main will write our return value to stdout.
                 return (tmpl.uuid, "success")
@@ -616,11 +619,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
             if self.work_api == "containers":
                 if tool.tool["class"] == "CommandLineTool" and runtimeContext.wait and (not runtimeContext.always_submit_runner):
                     runtimeContext.runnerjob = tool.tool["id"]
-                    runnerjob = tool.job(job_order,
-                                         self.output_callback,
-                                         runtimeContext).next()
                 else:
-                    runnerjob = RunnerContainer(self, tool, job_order, runtimeContext.enable_reuse,
+                    tool = RunnerContainer(self, tool, loadingContext, runtimeContext.enable_reuse,
                                                 self.output_name,
                                                 self.output_tags,
                                                 submit_runner_ram=runtimeContext.submit_runner_ram,
@@ -634,7 +634,7 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                                                 collection_cache_size=runtimeContext.collection_cache_size,
                                                 collection_cache_is_default=self.should_estimate_cache_size)
             elif self.work_api == "jobs":
-                runnerjob = RunnerJob(self, tool, job_order, runtimeContext.enable_reuse,
+                tool = RunnerJob(self, tool, loadingContext, runtimeContext.enable_reuse,
                                       self.output_name,
                                       self.output_tags,
                                       submit_runner_ram=runtimeContext.submit_runner_ram,
@@ -652,10 +652,16 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
                     "state": "RunningOnClient"}).execute(num_retries=self.num_retries)
             logger.info("Pipeline instance %s", self.pipeline["uuid"])
 
-        if runnerjob and not runtimeContext.wait:
-            submitargs = runtimeContext.copy()
-            submitargs.submit = False
-            runnerjob.run(submitargs)
+        if runtimeContext.cwl_runner_job is not None:
+            self.uuid = runtimeContext.cwl_runner_job.get('uuid')
+
+        jobiter = tool.job(job_order,
+                           self.output_callback,
+                           runtimeContext)
+
+        if runtimeContext.submit and not runtimeContext.wait:
+            runnerjob = jobiter.next()
+            runnerjob.run(runtimeContext)
             return (runnerjob.uuid, "success")
 
         current_container = arvados_cwl.util.get_current_container(self.api, self.num_retries, logger)
@@ -670,14 +676,6 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
 
         try:
             self.workflow_eval_lock.acquire()
-            if runnerjob:
-                jobiter = iter((runnerjob,))
-            else:
-                if runtimeContext.cwl_runner_job is not None:
-                    self.uuid = runtimeContext.cwl_runner_job.get('uuid')
-                jobiter = tool.job(job_order,
-                                   self.output_callback,
-                                   runtimeContext)
 
             # Holds the lock while this code runs and releases it when
             # it is safe to do so in self.workflow_eval_lock.wait(),
@@ -726,8 +724,10 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
             if self.pipeline:
                 self.api.pipeline_instances().update(uuid=self.pipeline["uuid"],
                                                      body={"state": "Failed"}).execute(num_retries=self.num_retries)
-            if runnerjob and runnerjob.uuid and self.work_api == "containers":
-                self.api.container_requests().update(uuid=runnerjob.uuid,
+            if runtimeContext.submit and isinstance(tool, Runner):
+                runnerjob = tool
+                if runnerjob.uuid and self.work_api == "containers":
+                    self.api.container_requests().update(uuid=runnerjob.uuid,
                                                      body={"priority": "0"}).execute(num_retries=self.num_retries)
         finally:
             self.workflow_eval_lock.release()
@@ -742,8 +742,8 @@ http://doc.arvados.org/install/install-api-server.html#disable_api_methods
         if self.final_output is None:
             raise WorkflowException("Workflow did not return a result.")
 
-        if runtimeContext.submit and isinstance(runnerjob, Runner):
-            logger.info("Final output collection %s", runnerjob.final_output)
+        if runtimeContext.submit and isinstance(tool, Runner):
+            logger.info("Final output collection %s", tool.final_output)
         else:
             if self.output_name is None:
                 self.output_name = "Output of %s" % (shortname(tool.tool["id"]))
index 26c85d300ddcb17c8038d31c4d0f8cd1d39aabc9..0b2a22788e6f98537b0f5a3437a2d540a57d47ee 100644 (file)
@@ -119,6 +119,39 @@ class ArvPathMapper(PathMapper):
         else:
             raise SourceLine(obj, "location", WorkflowException).makeError("Don't know what to do with '%s'" % obj["location"])
 
+    def needs_new_collection(self, srcobj, prefix=""):
+        """Check if files need to be staged into a new collection.
+
+        If all the files are in the same collection and in the same
+        paths they would be staged to, return False.  Otherwise, a new
+        collection is needed with files copied/created in the
+        appropriate places.
+        """
+
+        loc = srcobj["location"]
+        if loc.startswith("_:"):
+            return True
+        if prefix:
+            if loc != prefix+srcobj["basename"]:
+                return True
+        else:
+            i = loc.rfind("/")
+            if i > -1:
+                prefix = loc[:i+1]
+            else:
+                prefix = loc+"/"
+        if srcobj["class"] == "File" and loc not in self._pathmap:
+            return True
+        for s in srcobj.get("secondaryFiles", []):
+            if self.needs_new_collection(s, prefix):
+                return True
+        if srcobj.get("listing"):
+            prefix = "%s%s/" % (prefix, srcobj["basename"])
+            for l in srcobj["listing"]:
+                if self.needs_new_collection(l, prefix):
+                    return True
+        return False
+
     def setup(self, referenced_files, basedir):
         # type: (List[Any], unicode) -> None
         uploadfiles = set()
@@ -169,6 +202,13 @@ class ArvPathMapper(PathMapper):
             elif srcobj["class"] == "File" and (srcobj.get("secondaryFiles") or
                 (srcobj["location"].startswith("_:") and "contents" in srcobj)):
 
+                # If all secondary files/directories are located in
+                # the same collection as the primary file and the
+                # paths and names that are consistent with staging,
+                # don't create a new collection.
+                if not self.needs_new_collection(srcobj):
+                    continue
+
                 c = arvados.collection.Collection(api_client=self.arvrunner.api,
                                                   keep_client=self.arvrunner.keep_client,
                                                   num_retries=self.arvrunner.num_retries                                                  )
index 6094cfe245872b1b58976901668bd80a8b5b91b0..4b3275fa36b4ee497ec83c3de2b3e8a0c938fb62 100644 (file)
@@ -16,7 +16,7 @@ from schema_salad.sourceline import SourceLine, cmap
 
 from cwltool.command_line_tool import CommandLineTool
 import cwltool.workflow
-from cwltool.process import scandeps, UnsupportedRequirement, normalizeFilesDirs, shortname
+from cwltool.process import scandeps, UnsupportedRequirement, normalizeFilesDirs, shortname, Process
 from cwltool.load_tool import fetch_document
 from cwltool.pathmapper import adjustFileObjs, adjustDirObjs, visit_class
 from cwltool.utils import aslist
@@ -356,25 +356,28 @@ def upload_workflow_collection(arvrunner, name, packed):
     return collection.portable_data_hash()
 
 
-class Runner(object):
+class Runner(Process):
     """Base class for runner processes, which submit an instance of
     arvados-cwl-runner and wait for the final result."""
 
-    def __init__(self, runner, tool, job_order, enable_reuse,
+    def __init__(self, runner, tool, loadingContext, enable_reuse,
                  output_name, output_tags, submit_runner_ram=0,
                  name=None, on_error=None, submit_runner_image=None,
                  intermediate_output_ttl=0, merged_map=None,
                  priority=None, secret_store=None,
                  collection_cache_size=256,
                  collection_cache_is_default=True):
+
+        super(Runner, self).__init__(tool.tool, loadingContext)
+
         self.arvrunner = runner
-        self.tool = tool
-        self.job_order = job_order
+        self.embedded_tool = tool
+        self.job_order = None
         self.running = False
         if enable_reuse:
             # If reuse is permitted by command line arguments but
             # disabled by the workflow itself, disable it.
-            reuse_req, _ = self.tool.get_requirement("http://arvados.org/cwl#ReuseRequirement")
+            reuse_req, _ = self.embedded_tool.get_requirement("http://arvados.org/cwl#ReuseRequirement")
             if reuse_req:
                 enable_reuse = reuse_req["enableReuse"]
         self.enable_reuse = enable_reuse
@@ -393,7 +396,7 @@ class Runner(object):
         self.submit_runner_ram = 1024  # defaut 1 GiB
         self.collection_cache_size = collection_cache_size
 
-        runner_resource_req, _ = self.tool.get_requirement("http://arvados.org/cwl#WorkflowRunnerResources")
+        runner_resource_req, _ = self.embedded_tool.get_requirement("http://arvados.org/cwl#WorkflowRunnerResources")
         if runner_resource_req:
             if runner_resource_req.get("coresMin"):
                 self.submit_runner_cores = runner_resource_req["coresMin"]
@@ -414,6 +417,15 @@ class Runner(object):
 
         self.merged_map = merged_map or {}
 
+    def job(self,
+            job_order,         # type: Mapping[Text, Text]
+            output_callbacks,  # type: Callable[[Any, Any], Any]
+            runtimeContext     # type: RuntimeContext
+           ):  # type: (...) -> Generator[Any, None, None]
+        self.job_order = job_order
+        self._init_job(job_order, runtimeContext)
+        yield self
+
     def update_pipeline_component(self, record):
         pass
 
@@ -449,7 +461,7 @@ class Runner(object):
                                                        keep_client=self.arvrunner.keep_client,
                                                        num_retries=self.arvrunner.num_retries)
             if "cwl.output.json" in outc:
-                with outc.open("cwl.output.json") as f:
+                with outc.open("cwl.output.json", "rb") as f:
                     if f.size() > 0:
                         outputs = json.load(f)
             def keepify(fileobj):
index 9d25a562ab32d09dcdfba627fc2089260879cce1..3bd62532a8b80b3d3513107470b827ba6ca4d02c 100644 (file)
@@ -33,8 +33,8 @@ setup(name='arvados-cwl-runner',
       # Note that arvados/build/run-build-packages.sh looks at this
       # file to determine what version of cwltool and schema-salad to build.
       install_requires=[
-          'cwltool==1.0.20181116032456',
-          'schema-salad==2.7.20181116024232',
+          'cwltool==1.0.20181217162649',
+          'schema-salad==3.0.20181129082112',
           'typing >= 3.6.4',
           'ruamel.yaml >=0.15.54, <=0.15.77',
           'arvados-python-client>=1.2.1.20181130020805',
index f924adbbd615e11f84f4f773058d5fb2c27e8ff8..a6f4022cc3559adb40f7a5891495de2fbfd78c2a 100755 (executable)
@@ -83,7 +83,7 @@ export ARVADOS_API_TOKEN=\$(cat /var/lib/arvados/superuser_token)
 if test "$tag" = "latest" ; then
   arv-keepdocker --pull arvados/jobs $tag
 else
-  jobsimg=\$(curl http://versions.arvados.org/v1/commit/$tag | python -c "import json; import sys; sys.stdout.write(json.load(sys.stdin)['Versions']['Docker']['arvados/jobs'])")
+  jobsimg=\$(curl https://versions.arvados.org/v1/commit/$tag | python -c "import json; import sys; sys.stdout.write(json.load(sys.stdin)['Versions']['Docker']['arvados/jobs'])")
   arv-keepdocker --pull arvados/jobs \$jobsimg
   docker tag arvados/jobs:\$jobsimg arvados/jobs:latest
   arv-keepdocker arvados/jobs latest
diff --git a/sdk/cwl/tests/submit_test_job_missing.json b/sdk/cwl/tests/submit_test_job_missing.json
new file mode 100644 (file)
index 0000000..02d61fa
--- /dev/null
@@ -0,0 +1,14 @@
+{
+    "x": {
+        "class": "File",
+        "path": "input/blorp.txt"
+    },
+    "y": {
+        "class": "Directory",
+        "location": "keep:99999999999999999999999999999998+99",
+        "listing": [{
+            "class": "File",
+            "location": "keep:99999999999999999999999999999998+99/file1.txt"
+        }]
+    }
+}
index fb3c257d93e1be9cac211defc97d3282100ccdbc..b78e89012ad62c5f952476da0553b2d26dac5fd3 100644 (file)
@@ -102,3 +102,132 @@ class TestPathmap(unittest.TestCase):
                 "class": "File",
                 "location": "file:tests/hw.py"
             }], "", "/test/%s", "/test/%s/%s")
+
+    def test_needs_new_collection(self):
+        arvrunner = arvados_cwl.executor.ArvCwlExecutor(self.api)
+
+        # Plain file.  Don't need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py"
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        self.assertFalse(p.needs_new_collection(a))
+
+        # A file that isn't in the pathmap (for some reason).  Need a new collection.
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        self.assertTrue(p.needs_new_collection(a))
+
+        # A file with a secondary file in the same collection.  Don't need
+        # a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "keep:99999999999999999999999999999991+99/hw.pyc",
+                "basename": "hw.pyc"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
+        self.assertFalse(p.needs_new_collection(a))
+
+        # Secondary file is in a different collection from the
+        # a new collectionprimary.  Need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "keep:99999999999999999999999999999992+99/hw.pyc",
+                "basename": "hw.pyc"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999992+99/hw.pyc"] = True
+        self.assertTrue(p.needs_new_collection(a))
+
+        # Secondary file should be staged to a different name than
+        # path in location.  Need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "keep:99999999999999999999999999999991+99/hw.pyc",
+                "basename": "hw.other"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.pyc"] = True
+        self.assertTrue(p.needs_new_collection(a))
+
+        # Secondary file is a directory.  Do not need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "Directory",
+                "location": "keep:99999999999999999999999999999991+99/hw",
+                "basename": "hw",
+                "listing": [{
+                    "class": "File",
+                    "location": "keep:99999999999999999999999999999991+99/hw/h2",
+                    "basename": "h2"
+                }]
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
+        self.assertFalse(p.needs_new_collection(a))
+
+        # Secondary file is a renamed directory.  Need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "Directory",
+                "location": "keep:99999999999999999999999999999991+99/hw",
+                "basename": "wh",
+                "listing": [{
+                    "class": "File",
+                    "location": "keep:99999999999999999999999999999991+99/hw/h2",
+                    "basename": "h2"
+                }]
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw"] = True
+        p._pathmap["keep:99999999999999999999999999999991+99/hw/h2"] = True
+        self.assertTrue(p.needs_new_collection(a))
+
+        # Secondary file is a file literal.  Need a new collection.
+        a = {
+            "class": "File",
+            "location": "keep:99999999999999999999999999999991+99/hw.py",
+            "basename": "hw.py",
+            "secondaryFiles": [{
+                "class": "File",
+                "location": "_:123",
+                "basename": "hw.pyc",
+                "contents": "123"
+            }]
+        }
+        p = ArvPathMapper(arvrunner, [], "", "%s", "%s/%s")
+        p._pathmap["keep:99999999999999999999999999999991+99/hw.py"] = True
+        p._pathmap["_:123"] = True
+        self.assertTrue(p.needs_new_collection(a))
index 55164446bdc54a5b81f3ca8d27284fc351e338c3..90dab01471ef61ab380955e6301a73306648edef 100644 (file)
@@ -1045,6 +1045,23 @@ class TestSubmit(unittest.TestCase):
                          stubs.expect_container_request_uuid + '\n')
 
 
+    @stubs
+    def test_submit_missing_input(self, stubs):
+        capture_stdout = cStringIO.StringIO()
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+        self.assertEqual(exited, 0)
+
+        capture_stdout = cStringIO.StringIO()
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job_missing.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+        self.assertEqual(exited, 1)
+
+
     @stubs
     def test_submit_container_project(self, stubs):
         project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
index e2e9907d5d115ebb61a53ae873249511b3732a50..bfa86abf6a48a1fcf30eda2618bdfaf28b0a2efb 100644 (file)
@@ -60,6 +60,8 @@ type Cluster struct {
        ManagementToken    string
        NodeProfiles       map[string]NodeProfile
        InstanceTypes      InstanceTypeMap
+       CloudVMs           CloudVMs
+       Dispatch           Dispatch
        HTTPRequestTimeout Duration
        RemoteClusters     map[string]RemoteCluster
        PostgreSQL         PostgreSQL
@@ -95,6 +97,50 @@ type InstanceType struct {
        Preemptible  bool
 }
 
+type Dispatch struct {
+       // PEM encoded SSH key (RSA, DSA, or ECDSA) able to log in to
+       // cloud VMs.
+       PrivateKey []byte
+
+       // Max time for workers to come up before abandoning stale
+       // locks from previous run
+       StaleLockTimeout Duration
+
+       // Interval between queue polls
+       PollInterval Duration
+
+       // Interval between probes to each worker
+       ProbeInterval Duration
+
+       // Maximum total worker probes per second
+       MaxProbesPerSecond int
+}
+
+type CloudVMs struct {
+       // Shell command that exits zero IFF the VM is fully booted
+       // and ready to run containers, e.g., "mount | grep
+       // /encrypted-tmp"
+       BootProbeCommand string
+       SyncInterval     Duration
+
+       // Maximum idle time before automatic shutdown
+       TimeoutIdle Duration
+
+       // Maximum booting time before automatic shutdown
+       TimeoutBooting Duration
+
+       // Maximum time with no successful probes before automatic shutdown
+       TimeoutProbe Duration
+
+       // Time after shutdown to retry shutdown
+       TimeoutShutdown Duration
+
+       ImageID string
+
+       Driver           string
+       DriverParameters map[string]interface{}
+}
+
 type InstanceTypeMap map[string]InstanceType
 
 var errDuplicateInstanceTypeName = errors.New("duplicate instance type name")
@@ -159,45 +205,48 @@ func (cc *Cluster) GetNodeProfile(node string) (*NodeProfile, error) {
 }
 
 type NodeProfile struct {
-       Controller  SystemServiceInstance `json:"arvados-controller"`
-       Health      SystemServiceInstance `json:"arvados-health"`
-       Keepbalance SystemServiceInstance `json:"keep-balance"`
-       Keepproxy   SystemServiceInstance `json:"keepproxy"`
-       Keepstore   SystemServiceInstance `json:"keepstore"`
-       Keepweb     SystemServiceInstance `json:"keep-web"`
-       Nodemanager SystemServiceInstance `json:"arvados-node-manager"`
-       RailsAPI    SystemServiceInstance `json:"arvados-api-server"`
-       Websocket   SystemServiceInstance `json:"arvados-ws"`
-       Workbench   SystemServiceInstance `json:"arvados-workbench"`
+       Controller    SystemServiceInstance `json:"arvados-controller"`
+       Health        SystemServiceInstance `json:"arvados-health"`
+       Keepbalance   SystemServiceInstance `json:"keep-balance"`
+       Keepproxy     SystemServiceInstance `json:"keepproxy"`
+       Keepstore     SystemServiceInstance `json:"keepstore"`
+       Keepweb       SystemServiceInstance `json:"keep-web"`
+       Nodemanager   SystemServiceInstance `json:"arvados-node-manager"`
+       DispatchCloud SystemServiceInstance `json:"arvados-dispatch-cloud"`
+       RailsAPI      SystemServiceInstance `json:"arvados-api-server"`
+       Websocket     SystemServiceInstance `json:"arvados-ws"`
+       Workbench     SystemServiceInstance `json:"arvados-workbench"`
 }
 
 type ServiceName string
 
 const (
-       ServiceNameRailsAPI    ServiceName = "arvados-api-server"
-       ServiceNameController  ServiceName = "arvados-controller"
-       ServiceNameNodemanager ServiceName = "arvados-node-manager"
-       ServiceNameWorkbench   ServiceName = "arvados-workbench"
-       ServiceNameWebsocket   ServiceName = "arvados-ws"
-       ServiceNameKeepbalance ServiceName = "keep-balance"
-       ServiceNameKeepweb     ServiceName = "keep-web"
-       ServiceNameKeepproxy   ServiceName = "keepproxy"
-       ServiceNameKeepstore   ServiceName = "keepstore"
+       ServiceNameRailsAPI      ServiceName = "arvados-api-server"
+       ServiceNameController    ServiceName = "arvados-controller"
+       ServiceNameDispatchCloud ServiceName = "arvados-dispatch-cloud"
+       ServiceNameNodemanager   ServiceName = "arvados-node-manager"
+       ServiceNameWorkbench     ServiceName = "arvados-workbench"
+       ServiceNameWebsocket     ServiceName = "arvados-ws"
+       ServiceNameKeepbalance   ServiceName = "keep-balance"
+       ServiceNameKeepweb       ServiceName = "keep-web"
+       ServiceNameKeepproxy     ServiceName = "keepproxy"
+       ServiceNameKeepstore     ServiceName = "keepstore"
 )
 
 // ServicePorts returns the configured listening address (or "" if
 // disabled) for each service on the node.
 func (np *NodeProfile) ServicePorts() map[ServiceName]string {
        return map[ServiceName]string{
-               ServiceNameRailsAPI:    np.RailsAPI.Listen,
-               ServiceNameController:  np.Controller.Listen,
-               ServiceNameNodemanager: np.Nodemanager.Listen,
-               ServiceNameWorkbench:   np.Workbench.Listen,
-               ServiceNameWebsocket:   np.Websocket.Listen,
-               ServiceNameKeepbalance: np.Keepbalance.Listen,
-               ServiceNameKeepweb:     np.Keepweb.Listen,
-               ServiceNameKeepproxy:   np.Keepproxy.Listen,
-               ServiceNameKeepstore:   np.Keepstore.Listen,
+               ServiceNameRailsAPI:      np.RailsAPI.Listen,
+               ServiceNameController:    np.Controller.Listen,
+               ServiceNameDispatchCloud: np.DispatchCloud.Listen,
+               ServiceNameNodemanager:   np.Nodemanager.Listen,
+               ServiceNameWorkbench:     np.Workbench.Listen,
+               ServiceNameWebsocket:     np.Websocket.Listen,
+               ServiceNameKeepbalance:   np.Keepbalance.Listen,
+               ServiceNameKeepweb:       np.Keepweb.Listen,
+               ServiceNameKeepproxy:     np.Keepproxy.Listen,
+               ServiceNameKeepstore:     np.Keepstore.Listen,
        }
 }
 
index b70b4ac917672f363096a810cd35e3689f5132f9..02a0d76decbad272baee737282b5087a72a33c60 100644 (file)
@@ -18,10 +18,11 @@ type Container struct {
        Mounts               map[string]Mount     `json:"mounts"`
        Output               string               `json:"output"`
        OutputPath           string               `json:"output_path"`
-       Priority             int                  `json:"priority"`
+       Priority             int64                `json:"priority"`
        RuntimeConstraints   RuntimeConstraints   `json:"runtime_constraints"`
        State                ContainerState       `json:"state"`
        SchedulingParameters SchedulingParameters `json:"scheduling_parameters"`
+       ExitCode             int                  `json:"exit_code"`
 }
 
 // Container is an arvados#container resource.
diff --git a/sdk/go/arvados/contextgroup.go b/sdk/go/arvados/contextgroup.go
new file mode 100644 (file)
index 0000000..fa0de24
--- /dev/null
@@ -0,0 +1,95 @@
+package arvados
+
+import (
+       "context"
+       "sync"
+)
+
+// A contextGroup is a context-aware variation on sync.WaitGroup. It
+// provides a child context for the added funcs to use, so they can
+// exit early if another added func returns an error. Its Wait()
+// method returns the first error returned by any added func.
+//
+// Example:
+//
+//     err := errors.New("oops")
+//     cg := newContextGroup()
+//     defer cg.Cancel()
+//     cg.Go(func() error {
+//             someFuncWithContext(cg.Context())
+//             return nil
+//     })
+//     cg.Go(func() error {
+//             return err // this cancels cg.Context()
+//     })
+//     return cg.Wait() // returns err after both goroutines have ended
+type contextGroup struct {
+       ctx    context.Context
+       cancel context.CancelFunc
+       wg     sync.WaitGroup
+       err    error
+       mtx    sync.Mutex
+}
+
+// newContextGroup returns a new contextGroup. The caller must
+// eventually call the Cancel() method of the returned contextGroup.
+func newContextGroup(ctx context.Context) *contextGroup {
+       ctx, cancel := context.WithCancel(ctx)
+       return &contextGroup{
+               ctx:    ctx,
+               cancel: cancel,
+       }
+}
+
+// Cancel cancels the context group.
+func (cg *contextGroup) Cancel() {
+       cg.cancel()
+}
+
+// Context returns a context.Context which will be canceled when all
+// funcs have succeeded or one has failed.
+func (cg *contextGroup) Context() context.Context {
+       return cg.ctx
+}
+
+// Go calls f in a new goroutine. If f returns an error, the
+// contextGroup is canceled.
+//
+// If f notices cg.Context() is done, it should abandon further work
+// and return. In this case, f's return value will be ignored.
+func (cg *contextGroup) Go(f func() error) {
+       cg.mtx.Lock()
+       defer cg.mtx.Unlock()
+       if cg.err != nil {
+               return
+       }
+       cg.wg.Add(1)
+       go func() {
+               defer cg.wg.Done()
+               err := f()
+               cg.mtx.Lock()
+               defer cg.mtx.Unlock()
+               if err != nil && cg.err == nil {
+                       cg.err = err
+                       cg.cancel()
+               }
+       }()
+}
+
+// Wait waits for all added funcs to return, and returns the first
+// non-nil error.
+//
+// If the parent context is canceled before a func returns an error,
+// Wait returns the parent context's Err().
+//
+// Wait returns nil if all funcs return nil before the parent context
+// is canceled.
+func (cg *contextGroup) Wait() error {
+       cg.wg.Wait()
+       cg.mtx.Lock()
+       defer cg.mtx.Unlock()
+       if cg.err != nil {
+               return cg.err
+       }
+       return cg.ctx.Err()
+}
index b996542abd52cf7be04549962fdb31dfb7a366a0..6644f4cfb8e93ef7d601e667cee21a9dbce5d39b 100644 (file)
@@ -5,6 +5,7 @@
 package arvados
 
 import (
+       "context"
        "encoding/json"
        "fmt"
        "io"
@@ -18,7 +19,11 @@ import (
        "time"
 )
 
-var maxBlockSize = 1 << 26
+var (
+       maxBlockSize      = 1 << 26
+       concurrentWriters = 4 // max goroutines writing to Keep during sync()
+       writeAheadBlocks  = 1 // max background jobs flushing to Keep before blocking writes
+)
 
 // A CollectionFileSystem is a FileSystem that can be serialized as a
 // manifest and stored as a collection.
@@ -136,7 +141,7 @@ func (fs *collectionFileSystem) Sync() error {
 func (fs *collectionFileSystem) MarshalManifest(prefix string) (string, error) {
        fs.fileSystem.root.Lock()
        defer fs.fileSystem.root.Unlock()
-       return fs.fileSystem.root.(*dirnode).marshalManifest(prefix)
+       return fs.fileSystem.root.(*dirnode).marshalManifest(context.TODO(), prefix, newThrottle(concurrentWriters))
 }
 
 func (fs *collectionFileSystem) Size() int64 {
@@ -228,6 +233,7 @@ type filenode struct {
        memsize  int64 // bytes in memSegments
        sync.RWMutex
        nullnode
+       throttle *throttle
 }
 
 // caller must have lock
@@ -490,30 +496,75 @@ func (fn *filenode) Write(p []byte, startPtr filenodePtr) (n int, ptr filenodePt
 // Write some data out to disk to reduce memory use. Caller must have
 // write lock.
 func (fn *filenode) pruneMemSegments() {
-       // TODO: async (don't hold Lock() while waiting for Keep)
        // TODO: share code with (*dirnode)sync()
        // TODO: pack/flush small blocks too, when fragmented
+       if fn.throttle == nil {
+               // TODO: share a throttle with filesystem
+               fn.throttle = newThrottle(writeAheadBlocks)
+       }
        for idx, seg := range fn.segments {
                seg, ok := seg.(*memSegment)
-               if !ok || seg.Len() < maxBlockSize {
-                       continue
-               }
-               locator, _, err := fn.FS().PutB(seg.buf)
-               if err != nil {
-                       // TODO: stall (or return errors from)
-                       // subsequent writes until flushing
-                       // starts to succeed
+               if !ok || seg.Len() < maxBlockSize || seg.flushing != nil {
                        continue
                }
-               fn.memsize -= int64(seg.Len())
-               fn.segments[idx] = storedSegment{
-                       kc:      fn.FS(),
-                       locator: locator,
-                       size:    seg.Len(),
-                       offset:  0,
-                       length:  seg.Len(),
+               // Setting seg.flushing guarantees seg.buf will not be
+               // modified in place: WriteAt and Truncate will
+               // allocate a new buf instead, if necessary.
+               idx, buf := idx, seg.buf
+               done := make(chan struct{})
+               seg.flushing = done
+               // If lots of background writes are already in
+               // progress, block here until one finishes, rather
+               // than pile up an unlimited number of buffered writes
+               // and network flush operations.
+               fn.throttle.Acquire()
+               go func() {
+                       defer close(done)
+                       locator, _, err := fn.FS().PutB(buf)
+                       fn.throttle.Release()
+                       fn.Lock()
+                       defer fn.Unlock()
+                       if curbuf := seg.buf[:1]; &curbuf[0] != &buf[0] {
+                               // A new seg.buf has been allocated.
+                               return
+                       }
+                       seg.flushing = nil
+                       if err != nil {
+                               // TODO: stall (or return errors from)
+                               // subsequent writes until flushing
+                               // starts to succeed.
+                               return
+                       }
+                       if len(fn.segments) <= idx || fn.segments[idx] != seg || len(seg.buf) != len(buf) {
+                               // Segment has been dropped/moved/resized.
+                               return
+                       }
+                       fn.memsize -= int64(len(buf))
+                       fn.segments[idx] = storedSegment{
+                               kc:      fn.FS(),
+                               locator: locator,
+                               size:    len(buf),
+                               offset:  0,
+                               length:  len(buf),
+                       }
+               }()
+       }
+}
+
+// Block until all pending pruneMemSegments work is finished. Caller
+// must NOT have lock.
+func (fn *filenode) waitPrune() {
+       var pending []<-chan struct{}
+       fn.Lock()
+       for _, seg := range fn.segments {
+               if seg, ok := seg.(*memSegment); ok && seg.flushing != nil {
+                       pending = append(pending, seg.flushing)
                }
        }
+       fn.Unlock()
+       for _, p := range pending {
+               <-p
+       }
 }
 
 type dirnode struct {
@@ -546,46 +597,67 @@ func (dn *dirnode) Child(name string, replace func(inode) (inode, error)) (inode
        return dn.treenode.Child(name, replace)
 }
 
+type fnSegmentRef struct {
+       fn  *filenode
+       idx int
+}
+
+// commitBlock concatenates the data from the given filenode segments
+// (which must be *memSegments), writes the data out to Keep as a
+// single block, and replaces the filenodes' *memSegments with
+// storedSegments that reference the relevant portions of the new
+// block.
+//
+// Caller must have write lock.
+func (dn *dirnode) commitBlock(ctx context.Context, throttle *throttle, refs []fnSegmentRef) error {
+       if len(refs) == 0 {
+               return nil
+       }
+       throttle.Acquire()
+       defer throttle.Release()
+       if err := ctx.Err(); err != nil {
+               return err
+       }
+       block := make([]byte, 0, maxBlockSize)
+       for _, ref := range refs {
+               block = append(block, ref.fn.segments[ref.idx].(*memSegment).buf...)
+       }
+       locator, _, err := dn.fs.PutB(block)
+       if err != nil {
+               return err
+       }
+       off := 0
+       for _, ref := range refs {
+               data := ref.fn.segments[ref.idx].(*memSegment).buf
+               ref.fn.segments[ref.idx] = storedSegment{
+                       kc:      dn.fs,
+                       locator: locator,
+                       size:    len(block),
+                       offset:  off,
+                       length:  len(data),
+               }
+               off += len(data)
+               ref.fn.memsize -= int64(len(data))
+       }
+       return nil
+}
+
 // sync flushes in-memory data and remote block references (for the
 // children with the given names, which must be children of dn) to
 // local persistent storage. Caller must have write lock on dn and the
 // named children.
-func (dn *dirnode) sync(names []string) error {
-       type shortBlock struct {
-               fn  *filenode
-               idx int
-       }
-       var pending []shortBlock
-       var pendingLen int
+func (dn *dirnode) sync(ctx context.Context, throttle *throttle, names []string) error {
+       cg := newContextGroup(ctx)
+       defer cg.Cancel()
 
-       flush := func(sbs []shortBlock) error {
-               if len(sbs) == 0 {
-                       return nil
-               }
-               block := make([]byte, 0, maxBlockSize)
-               for _, sb := range sbs {
-                       block = append(block, sb.fn.segments[sb.idx].(*memSegment).buf...)
-               }
-               locator, _, err := dn.fs.PutB(block)
-               if err != nil {
-                       return err
-               }
-               off := 0
-               for _, sb := range sbs {
-                       data := sb.fn.segments[sb.idx].(*memSegment).buf
-                       sb.fn.segments[sb.idx] = storedSegment{
-                               kc:      dn.fs,
-                               locator: locator,
-                               size:    len(block),
-                               offset:  off,
-                               length:  len(data),
-                       }
-                       off += len(data)
-                       sb.fn.memsize -= int64(len(data))
-               }
-               return nil
+       goCommit := func(refs []fnSegmentRef) {
+               cg.Go(func() error {
+                       return dn.commitBlock(cg.Context(), throttle, refs)
+               })
        }
 
+       var pending []fnSegmentRef
+       var pendingLen int = 0
        localLocator := map[string]string{}
        for _, name := range names {
                fn, ok := dn.inodes[name].(*filenode)
@@ -608,39 +680,29 @@ func (dn *dirnode) sync(names []string) error {
                                fn.segments[idx] = seg
                        case *memSegment:
                                if seg.Len() > maxBlockSize/2 {
-                                       if err := flush([]shortBlock{{fn, idx}}); err != nil {
-                                               return err
-                                       }
+                                       goCommit([]fnSegmentRef{{fn, idx}})
                                        continue
                                }
                                if pendingLen+seg.Len() > maxBlockSize {
-                                       if err := flush(pending); err != nil {
-                                               return err
-                                       }
+                                       goCommit(pending)
                                        pending = nil
                                        pendingLen = 0
                                }
-                               pending = append(pending, shortBlock{fn, idx})
+                               pending = append(pending, fnSegmentRef{fn, idx})
                                pendingLen += seg.Len()
                        default:
                                panic(fmt.Sprintf("can't sync segment type %T", seg))
                        }
                }
        }
-       return flush(pending)
+       goCommit(pending)
+       return cg.Wait()
 }
 
 // caller must have write lock.
-func (dn *dirnode) marshalManifest(prefix string) (string, error) {
-       var streamLen int64
-       type filepart struct {
-               name   string
-               offset int64
-               length int64
-       }
-       var fileparts []filepart
-       var subdirs string
-       var blocks []string
+func (dn *dirnode) marshalManifest(ctx context.Context, prefix string, throttle *throttle) (string, error) {
+       cg := newContextGroup(ctx)
+       defer cg.Cancel()
 
        if len(dn.inodes) == 0 {
                if prefix == "." {
@@ -658,26 +720,61 @@ func (dn *dirnode) marshalManifest(prefix string) (string, error) {
                names = append(names, name)
        }
        sort.Strings(names)
+
+       // Wait for children to finish any pending write operations
+       // before locking them.
        for _, name := range names {
                node := dn.inodes[name]
-               node.Lock()
-               defer node.Unlock()
-       }
-       if err := dn.sync(names); err != nil {
-               return "", err
+               if fn, ok := node.(*filenode); ok {
+                       fn.waitPrune()
+               }
        }
+
+       var dirnames []string
+       var filenames []string
        for _, name := range names {
-               switch node := dn.inodes[name].(type) {
+               node := dn.inodes[name]
+               node.Lock()
+               defer node.Unlock()
+               switch node := node.(type) {
                case *dirnode:
-                       subdir, err := node.marshalManifest(prefix + "/" + name)
-                       if err != nil {
-                               return "", err
-                       }
-                       subdirs = subdirs + subdir
+                       dirnames = append(dirnames, name)
                case *filenode:
+                       filenames = append(filenames, name)
+               default:
+                       panic(fmt.Sprintf("can't marshal inode type %T", node))
+               }
+       }
+
+       subdirs := make([]string, len(dirnames))
+       rootdir := ""
+       for i, name := range dirnames {
+               i, name := i, name
+               cg.Go(func() error {
+                       txt, err := dn.inodes[name].(*dirnode).marshalManifest(cg.Context(), prefix+"/"+name, throttle)
+                       subdirs[i] = txt
+                       return err
+               })
+       }
+
+       cg.Go(func() error {
+               var streamLen int64
+               type filepart struct {
+                       name   string
+                       offset int64
+                       length int64
+               }
+
+               var fileparts []filepart
+               var blocks []string
+               if err := dn.sync(cg.Context(), throttle, names); err != nil {
+                       return err
+               }
+               for _, name := range filenames {
+                       node := dn.inodes[name].(*filenode)
                        if len(node.segments) == 0 {
                                fileparts = append(fileparts, filepart{name: name})
-                               break
+                               continue
                        }
                        for _, seg := range node.segments {
                                switch seg := seg.(type) {
@@ -707,20 +804,21 @@ func (dn *dirnode) marshalManifest(prefix string) (string, error) {
                                        panic(fmt.Sprintf("can't marshal segment type %T", seg))
                                }
                        }
-               default:
-                       panic(fmt.Sprintf("can't marshal inode type %T", node))
                }
-       }
-       var filetokens []string
-       for _, s := range fileparts {
-               filetokens = append(filetokens, fmt.Sprintf("%d:%d:%s", s.offset, s.length, manifestEscape(s.name)))
-       }
-       if len(filetokens) == 0 {
-               return subdirs, nil
-       } else if len(blocks) == 0 {
-               blocks = []string{"d41d8cd98f00b204e9800998ecf8427e+0"}
-       }
-       return manifestEscape(prefix) + " " + strings.Join(blocks, " ") + " " + strings.Join(filetokens, " ") + "\n" + subdirs, nil
+               var filetokens []string
+               for _, s := range fileparts {
+                       filetokens = append(filetokens, fmt.Sprintf("%d:%d:%s", s.offset, s.length, manifestEscape(s.name)))
+               }
+               if len(filetokens) == 0 {
+                       return nil
+               } else if len(blocks) == 0 {
+                       blocks = []string{"d41d8cd98f00b204e9800998ecf8427e+0"}
+               }
+               rootdir = manifestEscape(prefix) + " " + strings.Join(blocks, " ") + " " + strings.Join(filetokens, " ") + "\n"
+               return nil
+       })
+       err := cg.Wait()
+       return rootdir + strings.Join(subdirs, ""), err
 }
 
 func (dn *dirnode) loadManifest(txt string) error {
@@ -936,6 +1034,11 @@ type segment interface {
 
 type memSegment struct {
        buf []byte
+       // If flushing is not nil, then a) buf is being shared by a
+       // pruneMemSegments goroutine, and must be copied on write;
+       // and b) the flushing channel will close when the goroutine
+       // finishes, whether it succeeds or not.
+       flushing <-chan struct{}
 }
 
 func (me *memSegment) Len() int {
@@ -952,28 +1055,31 @@ func (me *memSegment) Slice(off, length int) segment {
 }
 
 func (me *memSegment) Truncate(n int) {
-       if n > cap(me.buf) {
+       if n > cap(me.buf) || (me.flushing != nil && n > len(me.buf)) {
                newsize := 1024
                for newsize < n {
                        newsize = newsize << 2
                }
                newbuf := make([]byte, n, newsize)
                copy(newbuf, me.buf)
-               me.buf = newbuf
+               me.buf, me.flushing = newbuf, nil
        } else {
-               // Zero unused part when shrinking, in case we grow
-               // and start using it again later.
-               for i := n; i < len(me.buf); i++ {
+               // reclaim existing capacity, and zero reclaimed part
+               oldlen := len(me.buf)
+               me.buf = me.buf[:n]
+               for i := oldlen; i < n; i++ {
                        me.buf[i] = 0
                }
        }
-       me.buf = me.buf[:n]
 }
 
 func (me *memSegment) WriteAt(p []byte, off int) {
        if off+len(p) > len(me.buf) {
                panic("overflowed segment")
        }
+       if me.flushing != nil {
+               me.buf, me.flushing = append([]byte(nil), me.buf...), nil
+       }
        copy(me.buf[off:], p)
 }
 
index a6d4ab1e5b71baccabafdbdf810db0ee264420a5..2ae2bd8924e23b583a267091cc6b9985e52d3422 100644 (file)
@@ -19,6 +19,7 @@ import (
        "runtime"
        "strings"
        "sync"
+       "sync/atomic"
        "testing"
        "time"
 
@@ -31,6 +32,7 @@ var _ = check.Suite(&CollectionFSSuite{})
 type keepClientStub struct {
        blocks      map[string][]byte
        refreshable map[string]bool
+       onPut       func(bufcopy []byte) // called from PutB, before acquiring lock
        sync.RWMutex
 }
 
@@ -50,6 +52,9 @@ func (kcs *keepClientStub) PutB(p []byte) (string, int, error) {
        locator := fmt.Sprintf("%x+%d+A12345@abcde", md5.Sum(p), len(p))
        buf := make([]byte, len(p))
        copy(buf, p)
+       if kcs.onPut != nil {
+               kcs.onPut(buf)
+       }
        kcs.Lock()
        defer kcs.Unlock()
        kcs.blocks[locator[:32]] = buf
@@ -583,7 +588,7 @@ func (s *CollectionFSSuite) TestRandomWrites(c *check.C) {
        const ngoroutines = 256
 
        var wg sync.WaitGroup
-       for n := 0; n < nfiles; n++ {
+       for n := 0; n < ngoroutines; n++ {
                wg.Add(1)
                go func(n int) {
                        defer wg.Done()
@@ -592,7 +597,7 @@ func (s *CollectionFSSuite) TestRandomWrites(c *check.C) {
                        f, err := s.fs.OpenFile(fmt.Sprintf("random-%d", n), os.O_RDWR|os.O_CREATE|os.O_EXCL, 0)
                        c.Assert(err, check.IsNil)
                        defer f.Close()
-                       for i := 0; i < ngoroutines; i++ {
+                       for i := 0; i < nfiles; i++ {
                                trunc := rand.Intn(65)
                                woff := rand.Intn(trunc + 1)
                                wbytes = wbytes[:rand.Intn(64-woff+1)]
@@ -618,11 +623,18 @@ func (s *CollectionFSSuite) TestRandomWrites(c *check.C) {
                                c.Check(string(buf), check.Equals, string(expect))
                                c.Check(err, check.IsNil)
                        }
-                       s.checkMemSize(c, f)
                }(n)
        }
        wg.Wait()
 
+       for n := 0; n < ngoroutines; n++ {
+               f, err := s.fs.OpenFile(fmt.Sprintf("random-%d", n), os.O_RDONLY, 0)
+               c.Assert(err, check.IsNil)
+               f.(*filehandle).inode.(*filenode).waitPrune()
+               s.checkMemSize(c, f)
+               defer f.Close()
+       }
+
        root, err := s.fs.Open("/")
        c.Assert(err, check.IsNil)
        defer root.Close()
@@ -1029,8 +1041,37 @@ func (s *CollectionFSSuite) TestOpenFileFlags(c *check.C) {
 }
 
 func (s *CollectionFSSuite) TestFlushFullBlocks(c *check.C) {
+       defer func(wab, mbs int) {
+               writeAheadBlocks = wab
+               maxBlockSize = mbs
+       }(writeAheadBlocks, maxBlockSize)
+       writeAheadBlocks = 2
        maxBlockSize = 1024
-       defer func() { maxBlockSize = 2 << 26 }()
+
+       proceed := make(chan struct{})
+       var started, concurrent int32
+       blk2done := false
+       s.kc.onPut = func([]byte) {
+               atomic.AddInt32(&concurrent, 1)
+               switch atomic.AddInt32(&started, 1) {
+               case 1:
+                       // Wait until block 2 starts and finishes, and block 3 starts
+                       select {
+                       case <-proceed:
+                               c.Check(blk2done, check.Equals, true)
+                       case <-time.After(time.Second):
+                               c.Error("timed out")
+                       }
+               case 2:
+                       time.Sleep(time.Millisecond)
+                       blk2done = true
+               case 3:
+                       close(proceed)
+               default:
+                       time.Sleep(time.Millisecond)
+               }
+               c.Check(atomic.AddInt32(&concurrent, -1) < int32(writeAheadBlocks), check.Equals, true)
+       }
 
        fs, err := (&Collection{}).FileSystem(s.client, s.kc)
        c.Assert(err, check.IsNil)
@@ -1056,6 +1097,7 @@ func (s *CollectionFSSuite) TestFlushFullBlocks(c *check.C) {
                }
                return
        }
+       f.(*filehandle).inode.(*filenode).waitPrune()
        c.Check(currentMemExtents(), check.HasLen, 1)
 
        m, err := fs.MarshalManifest(".")
diff --git a/sdk/go/arvados/throttle.go b/sdk/go/arvados/throttle.go
new file mode 100644 (file)
index 0000000..464b73b
--- /dev/null
@@ -0,0 +1,17 @@
+package arvados
+
+type throttle struct {
+       c chan struct{}
+}
+
+func newThrottle(n int) *throttle {
+       return &throttle{c: make(chan struct{}, n)}
+}
+
+func (t *throttle) Acquire() {
+       t.c <- struct{}{}
+}
+
+func (t *throttle) Release() {
+       <-t.c
+}
index cb47c9e6705ea096087199813050e3c3095f4974..122355be987755b161d38a2e46e0bc2cc4f52208 100644 (file)
@@ -107,15 +107,16 @@ func (s *AggregatorSuite) TestHealthy(c *check.C) {
        srv, listen := s.stubServer(&healthyHandler{})
        defer srv.Close()
        s.handler.Config.Clusters["zzzzz"].NodeProfiles["localhost"] = arvados.NodeProfile{
-               Controller:  arvados.SystemServiceInstance{Listen: listen},
-               Keepbalance: arvados.SystemServiceInstance{Listen: listen},
-               Keepproxy:   arvados.SystemServiceInstance{Listen: listen},
-               Keepstore:   arvados.SystemServiceInstance{Listen: listen},
-               Keepweb:     arvados.SystemServiceInstance{Listen: listen},
-               Nodemanager: arvados.SystemServiceInstance{Listen: listen},
-               RailsAPI:    arvados.SystemServiceInstance{Listen: listen},
-               Websocket:   arvados.SystemServiceInstance{Listen: listen},
-               Workbench:   arvados.SystemServiceInstance{Listen: listen},
+               Controller:    arvados.SystemServiceInstance{Listen: listen},
+               DispatchCloud: arvados.SystemServiceInstance{Listen: listen},
+               Keepbalance:   arvados.SystemServiceInstance{Listen: listen},
+               Keepproxy:     arvados.SystemServiceInstance{Listen: listen},
+               Keepstore:     arvados.SystemServiceInstance{Listen: listen},
+               Keepweb:       arvados.SystemServiceInstance{Listen: listen},
+               Nodemanager:   arvados.SystemServiceInstance{Listen: listen},
+               RailsAPI:      arvados.SystemServiceInstance{Listen: listen},
+               Websocket:     arvados.SystemServiceInstance{Listen: listen},
+               Workbench:     arvados.SystemServiceInstance{Listen: listen},
        }
        s.handler.ServeHTTP(s.resp, s.req)
        resp := s.checkOK(c)
@@ -132,15 +133,16 @@ func (s *AggregatorSuite) TestHealthyAndUnhealthy(c *check.C) {
        srvU, listenU := s.stubServer(&unhealthyHandler{})
        defer srvU.Close()
        s.handler.Config.Clusters["zzzzz"].NodeProfiles["localhost"] = arvados.NodeProfile{
-               Controller:  arvados.SystemServiceInstance{Listen: listenH},
-               Keepbalance: arvados.SystemServiceInstance{Listen: listenH},
-               Keepproxy:   arvados.SystemServiceInstance{Listen: listenH},
-               Keepstore:   arvados.SystemServiceInstance{Listen: listenH},
-               Keepweb:     arvados.SystemServiceInstance{Listen: listenH},
-               Nodemanager: arvados.SystemServiceInstance{Listen: listenH},
-               RailsAPI:    arvados.SystemServiceInstance{Listen: listenH},
-               Websocket:   arvados.SystemServiceInstance{Listen: listenH},
-               Workbench:   arvados.SystemServiceInstance{Listen: listenH},
+               Controller:    arvados.SystemServiceInstance{Listen: listenH},
+               DispatchCloud: arvados.SystemServiceInstance{Listen: listenH},
+               Keepbalance:   arvados.SystemServiceInstance{Listen: listenH},
+               Keepproxy:     arvados.SystemServiceInstance{Listen: listenH},
+               Keepstore:     arvados.SystemServiceInstance{Listen: listenH},
+               Keepweb:       arvados.SystemServiceInstance{Listen: listenH},
+               Nodemanager:   arvados.SystemServiceInstance{Listen: listenH},
+               RailsAPI:      arvados.SystemServiceInstance{Listen: listenH},
+               Websocket:     arvados.SystemServiceInstance{Listen: listenH},
+               Workbench:     arvados.SystemServiceInstance{Listen: listenH},
        }
        s.handler.Config.Clusters["zzzzz"].NodeProfiles["127.0.0.1"] = arvados.NodeProfile{
                Keepstore: arvados.SystemServiceInstance{Listen: listenU},
index 3281d78e209db3a0e69726d285c59b456ea93035..37666eb8e8b8f7e2d8f4cbbdf76ff7bda56b003b 100644 (file)
@@ -88,9 +88,6 @@ class _FileLikeObjectBase(object):
 class ArvadosFileReaderBase(_FileLikeObjectBase):
     def __init__(self, name, mode, num_retries=None):
         super(ArvadosFileReaderBase, self).__init__(name, mode)
-        self._binary = 'b' in mode
-        if sys.version_info >= (3, 0) and not self._binary:
-            raise NotImplementedError("text mode {!r} is not implemented".format(mode))
         self._filepos = 0
         self.num_retries = num_retries
         self._readline_cache = (None, None)
@@ -1278,6 +1275,11 @@ class ArvadosFileReader(ArvadosFileReaderBase):
     def stream_name(self):
         return self.arvadosfile.parent.stream_name()
 
+    def readinto(self, b):
+        data = self.read(len(b))
+        b[:len(data)] = data
+        return len(data)
+
     @_FileLikeObjectBase._before_close
     @retry_method
     def read(self, size=None, num_retries=None):
@@ -1356,3 +1358,33 @@ class ArvadosFileWriter(ArvadosFileReader):
         if not self.closed:
             self.arvadosfile.remove_writer(self, flush)
             super(ArvadosFileWriter, self).close()
+
+
+class WrappableFile(object):
+    """An interface to an Arvados file that's compatible with io wrappers.
+
+    """
+    def __init__(self, f):
+        self.f = f
+        self.closed = False
+    def close(self):
+        self.closed = True
+        return self.f.close()
+    def flush(self):
+        return self.f.flush()
+    def read(self, *args, **kwargs):
+        return self.f.read(*args, **kwargs)
+    def readable(self):
+        return self.f.readable()
+    def readinto(self, *args, **kwargs):
+        return self.f.readinto(*args, **kwargs)
+    def seek(self, *args, **kwargs):
+        return self.f.seek(*args, **kwargs)
+    def seekable(self):
+        return self.f.seekable()
+    def tell(self):
+        return self.f.tell()
+    def writable(self):
+        return self.f.writable()
+    def write(self, *args, **kwargs):
+        return self.f.write(*args, **kwargs)
index 48fdaf03ecd685f3e420437cebb3d46fd1741085..627f0346db2c6760710db3edaf356f4cb724bf91 100644 (file)
@@ -7,21 +7,23 @@ from future.utils import listitems, listvalues, viewkeys
 from builtins import str
 from past.builtins import basestring
 from builtins import object
+import ciso8601
+import datetime
+import errno
 import functools
+import hashlib
+import io
 import logging
 import os
 import re
-import errno
-import hashlib
-import datetime
-import ciso8601
-import time
+import sys
 import threading
+import time
 
 from collections import deque
 from stat import *
 
-from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, _BlockManager, synchronized, must_be_writable, NoopLock
+from .arvfile import split, _FileLikeObjectBase, ArvadosFile, ArvadosFileWriter, ArvadosFileReader, WrappableFile, _BlockManager, synchronized, must_be_writable, NoopLock
 from .keep import KeepLocator, KeepClient
 from .stream import StreamReader
 from ._normalize_stream import normalize_stream
@@ -35,6 +37,21 @@ from arvados.retry import retry_method
 
 _logger = logging.getLogger('arvados.collection')
 
+
+if sys.version_info >= (3, 0):
+    TextIOWrapper = io.TextIOWrapper
+else:
+    class TextIOWrapper(io.TextIOWrapper):
+        """To maintain backward compatibility, cast str to unicode in
+        write('foo').
+
+        """
+        def write(self, data):
+            if isinstance(data, basestring):
+                data = unicode(data)
+            return super(TextIOWrapper, self).write(data)
+
+
 class CollectionBase(object):
     """Abstract base class for Collection classes."""
 
@@ -654,7 +671,7 @@ class RichCollectionBase(CollectionBase):
 
         return self.find_or_create(path, COLLECTION)
 
-    def open(self, path, mode="r"):
+    def open(self, path, mode="r", encoding=None):
         """Open a file-like object for access.
 
         :path:
@@ -676,6 +693,7 @@ class RichCollectionBase(CollectionBase):
             opens for reading and writing.  All writes are appended to
             the end of the file.  Writing does not affect the file pointer for
             reading.
+
         """
 
         if not re.search(r'^[rwa][bt]?\+?$', mode):
@@ -698,7 +716,12 @@ class RichCollectionBase(CollectionBase):
         if mode[0] == 'w':
             arvfile.truncate(0)
 
-        return fclass(arvfile, mode=mode, num_retries=self.num_retries)
+        binmode = mode[0] + 'b' + re.sub('[bt]', '', mode[1:])
+        f = fclass(arvfile, mode=binmode, num_retries=self.num_retries)
+        if 'b' not in mode:
+            bufferclass = io.BufferedRandom if f.writable() else io.BufferedReader
+            f = TextIOWrapper(bufferclass(WrappableFile(f)), encoding=encoding)
+        return f
 
     def modified(self):
         """Determine if the collection has been modified since last commited."""
index cba00c3c8cf153039de990d27867558d0dbc699a..61258632bdd94f7acc684eaab50c442046f29f2e 100644 (file)
@@ -31,6 +31,7 @@ import traceback
 
 from apiclient import errors as apiclient_errors
 from arvados._version import __version__
+from arvados.util import keep_locator_pattern
 
 import arvados.commands._util as arv_cmd
 
@@ -289,6 +290,9 @@ class ResumeCacheConflict(Exception):
     pass
 
 
+class ResumeCacheInvalidError(Exception):
+    pass
+
 class ArvPutArgumentConflict(Exception):
     pass
 
@@ -387,7 +391,7 @@ class ResumeCache(object):
             new_cache = os.fdopen(new_cache_fd, 'r+')
             json.dump(data, new_cache)
             os.rename(new_cache_name, self.filename)
-        except (IOError, OSError, ResumeCacheConflict) as error:
+        except (IOError, OSError, ResumeCacheConflict):
             try:
                 os.unlink(new_cache_name)
             except NameError:  # mkstemp failed.
@@ -482,8 +486,8 @@ class ArvPutUploadJob(object):
 
     def _build_upload_list(self):
         """
-        Scan the requested paths to count file sizes, excluding files & dirs if requested
-        and building the upload file list.
+        Scan the requested paths to count file sizes, excluding requested files
+        and dirs and building the upload file list.
         """
         # If there aren't special files to be read, reset total bytes count to zero
         # to start counting.
@@ -795,6 +799,20 @@ class ArvPutUploadJob(object):
     def _my_collection(self):
         return self._remote_collection if self.update else self._local_collection
 
+    def _get_cache_filepath(self):
+        # Set up cache file name from input paths.
+        md5 = hashlib.md5()
+        md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost').encode())
+        realpaths = sorted(os.path.realpath(path) for path in self.paths)
+        md5.update(b'\0'.join([p.encode() for p in realpaths]))
+        if self.filename:
+            md5.update(self.filename.encode())
+        cache_filename = md5.hexdigest()
+        cache_filepath = os.path.join(
+            arv_cmd.make_home_conf_dir(self.CACHE_DIR, 0o700, 'raise'),
+            cache_filename)
+        return cache_filepath
+
     def _setup_state(self, update_collection):
         """
         Create a new cache file or load a previously existing one.
@@ -814,17 +832,7 @@ class ArvPutUploadJob(object):
             raise CollectionUpdateError("Collection locator unknown: '{}'".format(update_collection))
 
         if self.use_cache:
-            # Set up cache file name from input paths.
-            md5 = hashlib.md5()
-            md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost').encode())
-            realpaths = sorted(os.path.realpath(path) for path in self.paths)
-            md5.update(b'\0'.join([p.encode() for p in realpaths]))
-            if self.filename:
-                md5.update(self.filename.encode())
-            cache_filename = md5.hexdigest()
-            cache_filepath = os.path.join(
-                arv_cmd.make_home_conf_dir(self.CACHE_DIR, 0o700, 'raise'),
-                cache_filename)
+            cache_filepath = self._get_cache_filepath()
             if self.resume and os.path.exists(cache_filepath):
                 self.logger.info("Resuming upload from cache file {}".format(cache_filepath))
                 self._cache_file = open(cache_filepath, 'a+')
@@ -850,6 +858,8 @@ class ArvPutUploadJob(object):
                 self.logger.info("No cache usage requested for this run.")
                 # No cache file, set empty state
                 self._state = copy.deepcopy(self.EMPTY_STATE)
+            if not self._cached_manifest_valid():
+                raise ResumeCacheInvalidError()
             # Load the previous manifest so we can check if files were modified remotely.
             self._local_collection = arvados.collection.Collection(
                 self._state['manifest'],
@@ -857,6 +867,48 @@ class ArvPutUploadJob(object):
                 put_threads=self.put_threads,
                 api_client=self._api_client)
 
+    def _cached_manifest_valid(self):
+        """
+        Validate the oldest non-expired block signature to check if cached manifest
+        is usable: checking if the cached manifest was not created with a different
+        arvados account.
+        """
+        if self._state.get('manifest', None) is None:
+            # No cached manifest yet, all good.
+            return True
+        now = datetime.datetime.utcnow()
+        oldest_exp = None
+        oldest_loc = None
+        block_found = False
+        for m in keep_locator_pattern.finditer(self._state['manifest']):
+            loc = m.group(0)
+            try:
+                exp = datetime.datetime.utcfromtimestamp(int(loc.split('@')[1], 16))
+            except IndexError:
+                # Locator without signature
+                continue
+            block_found = True
+            if exp > now and (oldest_exp is None or exp < oldest_exp):
+                oldest_exp = exp
+                oldest_loc = loc
+        if not block_found:
+            # No block signatures found => no invalid block signatures.
+            return True
+        if oldest_loc is None:
+            # Locator signatures found, but all have expired.
+            # Reset the cache and move on.
+            self.logger.info('Cache expired, starting from scratch.')
+            self._state['manifest'] = ''
+            return True
+        kc = arvados.KeepClient(api_client=self._api_client,
+                                num_retries=self.num_retries)
+        try:
+            kc.head(oldest_loc)
+        except arvados.errors.KeepRequestError:
+            # Something is wrong, cached manifest is not valid.
+            return False
+        return True
+
     def collection_file_paths(self, col, path_prefix='.'):
         """Return a list of file paths by recursively go through the entire collection `col`"""
         file_paths = []
@@ -1131,6 +1183,14 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr,
             "arv-put: Another process is already uploading this data.",
             "         Use --no-cache if this is really what you want."]))
         sys.exit(1)
+    except ResumeCacheInvalidError:
+        logger.error("\n".join([
+            "arv-put: Resume cache contains invalid signature: it may have expired",
+            "         or been created with another Arvados user's credentials.",
+            "         Switch user or use one of the following options to restart upload:",
+            "         --no-resume to start a new resume cache.",
+            "         --no-cache to disable resume cache."]))
+        sys.exit(1)
     except CollectionUpdateError as error:
         logger.error("\n".join([
             "arv-put: %s" % str(error)]))
index 96f5bdd44a12ae42c25fbe64f68b342cb0356fcf..b17ed291807ab88de5948cfcdfaf6562bea5d009 100644 (file)
@@ -151,8 +151,8 @@ def statfile(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)",
     return prefix+fn
 
 def write_file(collection, pathprefix, fn, flush=False):
-    with open(os.path.join(pathprefix, fn)) as src:
-        dst = collection.open(fn, "w")
+    with open(os.path.join(pathprefix, fn), "rb") as src:
+        dst = collection.open(fn, "wb")
         r = src.read(1024*128)
         while r:
             dst.write(r)
index 1b6376e9be1035dac69bb34da974c2c486477627..4354ced67d3b4a3b678be3cacaa575f58f4f6d3f 100644 (file)
@@ -791,6 +791,7 @@ class KeepClient(object):
 
         if local_store:
             self.local_store = local_store
+            self.head = self.local_store_head
             self.get = self.local_store_get
             self.put = self.local_store_put
         else:
@@ -1230,5 +1231,17 @@ class KeepClient(object):
         with open(os.path.join(self.local_store, locator.md5sum), 'rb') as f:
             return f.read()
 
+    def local_store_head(self, loc_s, num_retries=None):
+        """Companion to local_store_put()."""
+        try:
+            locator = KeepLocator(loc_s)
+        except ValueError:
+            raise arvados.errors.NotFoundError(
+                "Invalid data locator: '%s'" % loc_s)
+        if locator.md5sum == config.EMPTY_BLOCK_LOCATOR.split('+')[0]:
+            return True
+        if os.path.exists(os.path.join(self.local_store, locator.md5sum)):
+            return True
+
     def is_cached(self, locator):
         return self.block_cache.reserve_cache(expect_hash)
index 93cfdc2a36c26389a3259222304e7ba1d5de7dff..a41184d10fb4fe7daadeb0892cf60ce36f47e8df 100644 (file)
@@ -8,6 +8,7 @@ from future import standard_library
 standard_library.install_aliases()
 from builtins import str
 from builtins import range
+from functools import partial
 import apiclient
 import datetime
 import hashlib
@@ -528,6 +529,85 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
                                     resume=False)
         del(self.writer)
 
+class CachedManifestValidationTest(ArvadosBaseTestCase):
+    class MockedPut(arv_put.ArvPutUploadJob):
+        def __init__(self, cached_manifest=None):
+            self._state = arv_put.ArvPutUploadJob.EMPTY_STATE
+            self._state['manifest'] = cached_manifest
+            self._api_client = mock.MagicMock()
+            self.logger = mock.MagicMock()
+            self.num_retries = 1
+
+    def datetime_to_hex(self, dt):
+        return hex(int(time.mktime(dt.timetuple())))[2:]
+
+    def setUp(self):
+        super(CachedManifestValidationTest, self).setUp()
+        self.block1 = "fdba98970961edb29f88241b9d99d890" # foo
+        self.block2 = "37b51d194a7513e45b56f6524f2d51f2" # bar
+        self.template = ". "+self.block1+"+3+Asignature@%s "+self.block2+"+3+Anothersignature@%s 0:3:foofile.txt 3:6:barfile.txt\n"
+
+    def test_empty_cached_manifest_is_valid(self):
+        put_mock = self.MockedPut()
+        self.assertEqual(None, put_mock._state.get('manifest'))
+        self.assertTrue(put_mock._cached_manifest_valid())
+        put_mock._state['manifest'] = ''
+        self.assertTrue(put_mock._cached_manifest_valid())
+
+    def test_signature_cases(self):
+        now = datetime.datetime.utcnow()
+        yesterday = now - datetime.timedelta(days=1)
+        lastweek = now - datetime.timedelta(days=7)
+        tomorrow = now + datetime.timedelta(days=1)
+        nextweek = now + datetime.timedelta(days=7)
+
+        def mocked_head(blocks={}, loc=None):
+            blk = loc.split('+', 1)[0]
+            if blocks.get(blk):
+                return True
+            raise arvados.errors.KeepRequestError("mocked error - block invalid")
+
+        # Block1_expiration, Block2_expiration, Block1_HEAD, Block2_HEAD, Expectation
+        cases = [
+            # All expired, reset cache - OK
+            (yesterday, lastweek, False, False, True),
+            (lastweek, yesterday, False, False, True),
+            # All non-expired valid blocks - OK
+            (tomorrow, nextweek, True, True, True),
+            (nextweek, tomorrow, True, True, True),
+            # All non-expired invalid blocks - Not OK
+            (tomorrow, nextweek, False, False, False),
+            (nextweek, tomorrow, False, False, False),
+            # One non-expired valid block - OK
+            (tomorrow, yesterday, True, False, True),
+            (yesterday, tomorrow, False, True, True),
+            # One non-expired invalid block - Not OK
+            (tomorrow, yesterday, False, False, False),
+            (yesterday, tomorrow, False, False, False),
+        ]
+        for case in cases:
+            b1_expiration, b2_expiration, b1_valid, b2_valid, outcome = case
+            head_responses = {
+                self.block1: b1_valid,
+                self.block2: b2_valid,
+            }
+            cached_manifest = self.template % (
+                self.datetime_to_hex(b1_expiration),
+                self.datetime_to_hex(b2_expiration),
+            )
+            arvput = self.MockedPut(cached_manifest)
+            with mock.patch('arvados.collection.KeepClient.head') as head_mock:
+                head_mock.side_effect = partial(mocked_head, head_responses)
+                self.assertEqual(outcome, arvput._cached_manifest_valid(),
+                    "Case '%s' should have produced outcome '%s'" % (case, outcome)
+                )
+                if b1_expiration > now or b2_expiration > now:
+                    # A HEAD request should have been done
+                    head_mock.assert_called_once()
+                else:
+                    head_mock.assert_not_called()
+
+
 class ArvadosExpectedBytesTest(ArvadosBaseTestCase):
     TEST_SIZE = os.path.getsize(__file__)
 
@@ -549,7 +629,7 @@ class ArvadosExpectedBytesTest(ArvadosBaseTestCase):
                          writer.bytes_expected)
 
     def test_expected_bytes_for_device(self):
-        writer = arv_put.ArvPutUploadJob(['/dev/null'])
+        writer = arv_put.ArvPutUploadJob(['/dev/null'], use_cache=False, resume=False)
         self.assertIsNone(writer.bytes_expected)
         writer = arv_put.ArvPutUploadJob([__file__, '/dev/null'])
         self.assertIsNone(writer.bytes_expected)
@@ -938,7 +1018,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
         self.assertEqual(1, len(collection_list))
         return collection_list[0]
 
-    def test_expired_token_invalidates_cache(self):
+    def test_all_expired_signatures_invalidates_cache(self):
         self.authorize_with('active')
         tmpdir = self.make_tmpdir()
         with open(os.path.join(tmpdir, 'somefile.txt'), 'w') as f:
@@ -974,7 +1054,89 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
         (out, err) = p.communicate()
         self.assertRegex(
             err.decode(),
-            r'WARNING: Uploaded file .* access token expired, will re-upload it from scratch')
+            r'INFO: Cache expired, starting from scratch.*')
+        self.assertEqual(p.returncode, 0)
+
+    def test_invalid_signature_invalidates_cache(self):
+        self.authorize_with('active')
+        tmpdir = self.make_tmpdir()
+        with open(os.path.join(tmpdir, 'somefile.txt'), 'w') as f:
+            f.write('foo')
+        # Upload a directory and get the cache file name
+        p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE,
+                             env=self.ENVIRON)
+        (out, err) = p.communicate()
+        self.assertRegex(err.decode(), r'INFO: Creating new cache file at ')
+        self.assertEqual(p.returncode, 0)
+        cache_filepath = re.search(r'INFO: Creating new cache file at (.*)',
+                                   err.decode()).groups()[0]
+        self.assertTrue(os.path.isfile(cache_filepath))
+        # Load the cache file contents and modify the manifest to simulate
+        # an invalid access token
+        with open(cache_filepath, 'r') as c:
+            cache = json.load(c)
+        self.assertRegex(cache['manifest'], r'\+A\S+\@')
+        cache['manifest'] = re.sub(
+            r'\+A.*\@',
+            "+Aabcdef0123456789abcdef0123456789abcdef01@",
+            cache['manifest'])
+        with open(cache_filepath, 'w') as c:
+            c.write(json.dumps(cache))
+        # Re-run the upload and expect to get an invalid cache message
+        p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE,
+                             env=self.ENVIRON)
+        (out, err) = p.communicate()
+        self.assertRegex(
+            err.decode(),
+            r'ERROR: arv-put: Resume cache contains invalid signature.*')
+        self.assertEqual(p.returncode, 1)
+
+    def test_single_expired_signature_reuploads_file(self):
+        self.authorize_with('active')
+        tmpdir = self.make_tmpdir()
+        with open(os.path.join(tmpdir, 'foofile.txt'), 'w') as f:
+            f.write('foo')
+        # Write a second file on its own subdir to force a new stream
+        os.mkdir(os.path.join(tmpdir, 'bar'))
+        with open(os.path.join(tmpdir, 'bar', 'barfile.txt'), 'w') as f:
+            f.write('bar')
+        # Upload a directory and get the cache file name
+        p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE,
+                             env=self.ENVIRON)
+        (out, err) = p.communicate()
+        self.assertRegex(err.decode(), r'INFO: Creating new cache file at ')
+        self.assertEqual(p.returncode, 0)
+        cache_filepath = re.search(r'INFO: Creating new cache file at (.*)',
+                                   err.decode()).groups()[0]
+        self.assertTrue(os.path.isfile(cache_filepath))
+        # Load the cache file contents and modify the manifest to simulate
+        # an expired access token
+        with open(cache_filepath, 'r') as c:
+            cache = json.load(c)
+        self.assertRegex(cache['manifest'], r'\+A\S+\@')
+        a_month_ago = datetime.datetime.now() - datetime.timedelta(days=30)
+        # Make one of the signatures appear to have expired
+        cache['manifest'] = re.sub(
+            r'\@.*? 3:3:barfile.txt',
+            "@{} 3:3:barfile.txt".format(self.datetime_to_hex(a_month_ago)),
+            cache['manifest'])
+        with open(cache_filepath, 'w') as c:
+            c.write(json.dumps(cache))
+        # Re-run the upload and expect to get an invalid cache message
+        p = subprocess.Popen([sys.executable, arv_put.__file__, tmpdir],
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE,
+                             env=self.ENVIRON)
+        (out, err) = p.communicate()
+        self.assertRegex(
+            err.decode(),
+            r'WARNING: Uploaded file \'.*barfile.txt\' access token expired, will re-upload it from scratch')
         self.assertEqual(p.returncode, 0)
         # Confirm that the resulting cache is different from the last run.
         with open(cache_filepath, 'r') as c2:
index faad29872de541621ae258ac5dfb635c35ca9bf6..a760255dd6da8e01470dacafa6bcfafeddc50058 100644 (file)
@@ -217,26 +217,41 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
         keep = ArvadosFileWriterTestCase.MockKeep({
             "781e5e245d69b566979b86e28d23f2c7+10": b"0123456789",
         })
-        c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', keep_client=keep)
-        writer = c.open("count.txt", "ab+")
-        self.assertEqual(writer.read(20), b"0123456789")
-
-        writer.seek(0, os.SEEK_SET)
-        writer.write("hello")
-        self.assertEqual(writer.read(), b"")
-        writer.seek(-5, os.SEEK_CUR)
-        self.assertEqual(writer.read(3), b"hel")
-        self.assertEqual(writer.read(), b"lo")
-        writer.seek(0, os.SEEK_SET)
-        self.assertEqual(writer.read(), b"0123456789hello")
-
-        writer.seek(0)
-        writer.write("world")
-        self.assertEqual(writer.read(), b"")
-        writer.seek(0)
-        self.assertEqual(writer.read(), b"0123456789helloworld")
-
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text())
+        for (mode, convert) in (
+                ('a+', lambda data: data.decode(encoding='utf-8')),
+                ('at+', lambda data: data.decode(encoding='utf-8')),
+                ('ab+', lambda data: data)):
+            c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', keep_client=keep)
+            writer = c.open("count.txt", mode)
+            self.assertEqual(writer.read(20), convert(b"0123456789"))
+
+            writer.seek(0, os.SEEK_SET)
+            writer.write(convert(b"hello"))
+            self.assertEqual(writer.read(), convert(b""))
+            if 'b' in mode:
+                writer.seek(-5, os.SEEK_CUR)
+                self.assertEqual(writer.read(3), convert(b"hel"))
+                self.assertEqual(writer.read(), convert(b"lo"))
+            else:
+                with self.assertRaises(IOError):
+                    writer.seek(-5, os.SEEK_CUR)
+                with self.assertRaises(IOError):
+                    writer.seek(-3, os.SEEK_END)
+            writer.seek(0, os.SEEK_SET)
+            writer.read(7)
+            self.assertEqual(7, writer.tell())
+            self.assertEqual(7, writer.seek(7, os.SEEK_SET))
+
+            writer.seek(0, os.SEEK_SET)
+            self.assertEqual(writer.read(), convert(b"0123456789hello"))
+
+            writer.seek(0)
+            writer.write(convert(b"world"))
+            self.assertEqual(writer.read(), convert(b""))
+            writer.seek(0)
+            self.assertEqual(writer.read(), convert(b"0123456789helloworld"))
+
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text())
 
     def test_write_at_beginning(self):
         keep = ArvadosFileWriterTestCase.MockKeep({
index ac18c44c6844c2f54fbc54d91acf094778834b3c..de01006741e91b12047f70d6b82dfff04f80bfdc 100644 (file)
@@ -836,17 +836,58 @@ class CollectionOpenModes(run_test_server.TestCaseWithServers):
         with c.open('foo', 'wb') as f:
             f.write('foo')
         for mode in ['r', 'rt', 'r+', 'rt+', 'w', 'wt', 'a', 'at']:
-            if sys.version_info >= (3, 0):
-                with self.assertRaises(NotImplementedError):
-                    c.open('foo', mode)
-            else:
-                with c.open('foo', mode) as f:
-                    if mode[0] == 'r' and '+' not in mode:
-                        self.assertEqual('foo', f.read(3))
-                    else:
-                        f.write('bar')
-                        f.seek(-3, os.SEEK_CUR)
-                        self.assertEqual('bar', f.read(3))
+            with c.open('foo', mode) as f:
+                if mode[0] == 'r' and '+' not in mode:
+                    self.assertEqual('foo', f.read(3))
+                else:
+                    f.write('bar')
+                    f.seek(0, os.SEEK_SET)
+                    self.assertEqual('bar', f.read(3))
+
+
+class TextModes(run_test_server.TestCaseWithServers):
+
+    def setUp(self):
+        arvados.config.KEEP_BLOCK_SIZE = 4
+        if sys.version_info < (3, 0):
+            import unicodedata
+            self.sailboat = unicodedata.lookup('SAILBOAT')
+            self.snowman = unicodedata.lookup('SNOWMAN')
+        else:
+            self.sailboat = '\N{SAILBOAT}'
+            self.snowman = '\N{SNOWMAN}'
+
+    def tearDown(self):
+        arvados.config.KEEP_BLOCK_SIZE = 2 ** 26
+
+    def test_read_sailboat_across_block_boundary(self):
+        c = Collection()
+        f = c.open('sailboats', 'wb')
+        data = self.sailboat.encode('utf-8')
+        f.write(data)
+        f.write(data[:1])
+        f.write(data[1:])
+        f.write(b'\n')
+        f.close()
+        self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+3 ')
+
+        f = c.open('sailboats', 'r')
+        string = f.readline()
+        self.assertEqual(string, self.sailboat+self.sailboat+'\n')
+        f.close()
+
+    def test_write_snowman_across_block_boundary(self):
+        c = Collection()
+        f = c.open('snowmany', 'w')
+        data = self.snowman
+        f.write(data+data+'\n'+data+'\n')
+        f.close()
+        self.assertRegex(c.portable_manifest_text(), r'\+4 .*\+4 .*\+3 ')
+
+        f = c.open('snowmany', 'r')
+        self.assertEqual(f.readline(), self.snowman+self.snowman+'\n')
+        self.assertEqual(f.readline(), self.snowman+'\n')
+        f.close()
 
 
 class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
index e6e67d63135e5965157b633400f41980af32ae58..aecf748d10cbe93dc3a41b6030269fc3c02478ad 100644 (file)
@@ -8,37 +8,37 @@ GIT
 GEM
   remote: https://rubygems.org/
   specs:
-    actionmailer (4.2.10)
-      actionpack (= 4.2.10)
-      actionview (= 4.2.10)
-      activejob (= 4.2.10)
+    actionmailer (4.2.11)
+      actionpack (= 4.2.11)
+      actionview (= 4.2.11)
+      activejob (= 4.2.11)
       mail (~> 2.5, >= 2.5.4)
       rails-dom-testing (~> 1.0, >= 1.0.5)
-    actionpack (4.2.10)
-      actionview (= 4.2.10)
-      activesupport (= 4.2.10)
+    actionpack (4.2.11)
+      actionview (= 4.2.11)
+      activesupport (= 4.2.11)
       rack (~> 1.6)
       rack-test (~> 0.6.2)
       rails-dom-testing (~> 1.0, >= 1.0.5)
       rails-html-sanitizer (~> 1.0, >= 1.0.2)
-    actionview (4.2.10)
-      activesupport (= 4.2.10)
+    actionview (4.2.11)
+      activesupport (= 4.2.11)
       builder (~> 3.1)
       erubis (~> 2.7.0)
       rails-dom-testing (~> 1.0, >= 1.0.5)
       rails-html-sanitizer (~> 1.0, >= 1.0.3)
-    activejob (4.2.10)
-      activesupport (= 4.2.10)
+    activejob (4.2.11)
+      activesupport (= 4.2.11)
       globalid (>= 0.3.0)
-    activemodel (4.2.10)
-      activesupport (= 4.2.10)
+    activemodel (4.2.11)
+      activesupport (= 4.2.11)
       builder (~> 3.1)
-    activerecord (4.2.10)
-      activemodel (= 4.2.10)
-      activesupport (= 4.2.10)
+    activerecord (4.2.11)
+      activemodel (= 4.2.11)
+      activesupport (= 4.2.11)
       arel (~> 6.0)
     activerecord-deprecated_finders (1.0.4)
-    activesupport (4.2.10)
+    activesupport (4.2.11)
       i18n (~> 0.7)
       minitest (~> 5.1)
       thread_safe (~> 0.3, >= 0.3.4)
@@ -85,7 +85,7 @@ GEM
       coffee-script-source
       execjs
     coffee-script-source (1.12.2)
-    concurrent-ruby (1.0.5)
+    concurrent-ruby (1.1.4)
     crass (1.0.4)
     curb (0.9.6)
     database_cleaner (1.7.0)
@@ -152,12 +152,12 @@ GEM
     loofah (2.2.3)
       crass (~> 1.0.2)
       nokogiri (>= 1.5.9)
-    mail (2.7.0)
+    mail (2.7.1)
       mini_mime (>= 0.1.1)
     memoist (0.16.0)
     metaclass (0.0.4)
-    mini_mime (1.0.0)
-    mini_portile2 (2.3.0)
+    mini_mime (1.0.1)
+    mini_portile2 (2.4.0)
     minitest (5.11.3)
     mocha (1.5.0)
       metaclass (~> 0.0.1)
@@ -171,8 +171,8 @@ GEM
     net-ssh (4.2.0)
     net-ssh-gateway (2.0.0)
       net-ssh (>= 4.0.0)
-    nokogiri (1.8.5)
-      mini_portile2 (~> 2.3.0)
+    nokogiri (1.9.1)
+      mini_portile2 (~> 2.4.0)
     oauth2 (1.4.0)
       faraday (>= 0.8, < 0.13)
       jwt (~> 1.0)
@@ -198,16 +198,16 @@ GEM
     rack (1.6.11)
     rack-test (0.6.3)
       rack (>= 1.0)
-    rails (4.2.10)
-      actionmailer (= 4.2.10)
-      actionpack (= 4.2.10)
-      actionview (= 4.2.10)
-      activejob (= 4.2.10)
-      activemodel (= 4.2.10)
-      activerecord (= 4.2.10)
-      activesupport (= 4.2.10)
+    rails (4.2.11)
+      actionmailer (= 4.2.11)
+      actionpack (= 4.2.11)
+      actionview (= 4.2.11)
+      activejob (= 4.2.11)
+      activemodel (= 4.2.11)
+      activerecord (= 4.2.11)
+      activesupport (= 4.2.11)
       bundler (>= 1.3.0, < 2.0)
-      railties (= 4.2.10)
+      railties (= 4.2.11)
       sprockets-rails
     rails-deprecated_sanitizer (1.0.3)
       activesupport (>= 4.2.0.alpha)
@@ -219,12 +219,12 @@ GEM
       loofah (~> 2.2, >= 2.2.2)
     rails-observers (0.1.5)
       activemodel (>= 4.0)
-    railties (4.2.10)
-      actionpack (= 4.2.10)
-      activesupport (= 4.2.10)
+    railties (4.2.11)
+      actionpack (= 4.2.11)
+      activesupport (= 4.2.11)
       rake (>= 0.8.7)
       thor (>= 0.18.1, < 2.0)
-    rake (12.3.1)
+    rake (12.3.2)
     ref (2.0.0)
     request_store (1.4.1)
       rack (>= 1.4)
@@ -270,7 +270,7 @@ GEM
     therubyracer (0.12.3)
       libv8 (~> 3.16.14.15)
       ref
-    thor (0.20.0)
+    thor (0.20.3)
     thread_safe (0.3.6)
     tilt (1.4.1)
     trollop (2.1.2)
@@ -326,4 +326,4 @@ DEPENDENCIES
   uglifier (~> 2.0)
 
 BUNDLED WITH
-   1.16.3
+   1.17.2
index 862582aa9ce65fcbcf4a73d8b309b19cf3749556..f54c4a9a519c563ca8fc08e9bb480b254e616608 100644 (file)
@@ -66,7 +66,7 @@ class Arvados::V1::LinksController < ApplicationController
     super
 
     # head_kind and tail_kind columns are now virtual,
-    # equivilent functionality is now provided by
+    # equivalent functionality is now provided by
     # 'is_a', so fix up any old-style 'where' clauses.
     if @where
       @filters ||= []
@@ -86,7 +86,7 @@ class Arvados::V1::LinksController < ApplicationController
     super
 
     # head_kind and tail_kind columns are now virtual,
-    # equivilent functionality is now provided by
+    # equivalent functionality is now provided by
     # 'is_a', so fix up any old-style 'filter' clauses.
     @filters = @filters.map do |k|
       if k[0] == 'head_kind' and k[1] == '='
index 93d5b9a0239753a8820d86b883abcbdf1a06b776..eea95e2be1aad7c7535f48e430d5662d40149e04 100644 (file)
@@ -557,6 +557,8 @@ class ArvadosModel < ActiveRecord::Base
     self.owner_uuid ||= current_default_owner if self.respond_to? :owner_uuid=
     if !anonymous_updater
       self.modified_by_user_uuid = current_user ? current_user.uuid : nil
+    end
+    if !timeless_updater
       self.modified_at = current_time
     end
     self.modified_by_client_uuid = current_api_client ? current_api_client.uuid : nil
index 487043ee3549d8afe915f9abeeaeab2c8f252707..33cc686d4f5a14f3a432bc3df077ab258797a4a8 100644 (file)
@@ -287,7 +287,9 @@ class Collection < ArvadosModel
       # Use a different validation context to skip the 'old_versions_cannot_be_updated'
       # validator, as on this case it is legal to update some fields.
       leave_modified_by_user_alone do
-        c.save(context: :update_old_versions)
+        leave_modified_at_alone do
+          c.save(context: :update_old_versions)
+        end
       end
     end
   end
index ac67040edf799465c1dda671e0a4d0eb80cf9483..bd586907ee2eaf205616251be126bc7cf9c94b09 100644 (file)
@@ -279,14 +279,6 @@ class Container < ArvadosModel
     candidates = candidates.where_serialized(:runtime_constraints, resolve_runtime_constraints(attrs[:runtime_constraints]), md5: true)
     log_reuse_info(candidates) { "after filtering on runtime_constraints #{attrs[:runtime_constraints].inspect}" }
 
-    candidates = candidates.where('runtime_user_uuid = ? or (runtime_user_uuid is NULL and runtime_auth_scopes is NULL)',
-                                  attrs[:runtime_user_uuid])
-    log_reuse_info(candidates) { "after filtering on runtime_user_uuid #{attrs[:runtime_user_uuid].inspect}" }
-
-    candidates = candidates.where('runtime_auth_scopes = ? or (runtime_user_uuid is NULL and runtime_auth_scopes is NULL)',
-                                  SafeJSON.dump(attrs[:runtime_auth_scopes].sort))
-    log_reuse_info(candidates) { "after filtering on runtime_auth_scopes #{attrs[:runtime_auth_scopes].inspect}" }
-
     log_reuse_info { "checking for state=Complete with readable output and log..." }
 
     select_readable_pdh = Collection.
diff --git a/services/api/db/migrate/20181213183234_add_expression_index_to_links.rb b/services/api/db/migrate/20181213183234_add_expression_index_to_links.rb
new file mode 100644 (file)
index 0000000..2fdf830
--- /dev/null
@@ -0,0 +1,11 @@
+class AddExpressionIndexToLinks < ActiveRecord::Migration
+  def up
+    ActiveRecord::Base.connection.execute 'CREATE INDEX index_links_on_substring_head_uuid on links (substring(head_uuid, 7, 5))'
+    ActiveRecord::Base.connection.execute 'CREATE INDEX index_links_on_substring_tail_uuid on links (substring(tail_uuid, 7, 5))'
+  end
+
+  def down
+    ActiveRecord::Base.connection.execute 'DROP INDEX index_links_on_substring_head_uuid'
+    ActiveRecord::Base.connection.execute 'DROP INDEX index_links_on_substring_tail_uuid'
+  end
+end
index aa29a1cbb409d59542d0d037cbdf703f9c407ea5..211fa5043fda2aedc33646f8c98dff863bec8d7a 100644 (file)
@@ -2278,6 +2278,20 @@ CREATE INDEX index_links_on_modified_at_uuid ON public.links USING btree (modifi
 CREATE INDEX index_links_on_owner_uuid ON public.links USING btree (owner_uuid);
 
 
+--
+-- Name: index_links_on_substring_head_uuid; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX index_links_on_substring_head_uuid ON public.links USING btree ("substring"((head_uuid)::text, 7, 5));
+
+
+--
+-- Name: index_links_on_substring_tail_uuid; Type: INDEX; Schema: public; Owner: -
+--
+
+CREATE INDEX index_links_on_substring_tail_uuid ON public.links USING btree ("substring"((tail_uuid)::text, 7, 5));
+
+
 --
 -- Name: index_links_on_tail_uuid; Type: INDEX; Schema: public; Owner: -
 --
@@ -3201,3 +3215,5 @@ INSERT INTO schema_migrations (version) VALUES ('20181005192222');
 
 INSERT INTO schema_migrations (version) VALUES ('20181011184200');
 
+INSERT INTO schema_migrations (version) VALUES ('20181213183234');
+
index b456bd39562adc4ff87a5c73a460c055344269b3..7f0d7c29092f51c1ac4d76b01aa6d4f62181de7f 100644 (file)
@@ -18,4 +18,21 @@ module ArvadosModelUpdates
       Thread.current[:anonymous_updater] = anonymous_updater_was
     end
   end
+
+  # ArvadosModel checks this to decide whether it should update the
+  # 'modified_at' field.
+  def timeless_updater
+    Thread.current[:timeless_updater] || false
+  end
+
+  def leave_modified_at_alone
+    timeless_updater_was = timeless_updater
+    begin
+      Thread.current[:timeless_updater] = true
+      yield
+    ensure
+      Thread.current[:timeless_updater] = timeless_updater_was
+    end
+  end
+
 end
index dc427c12c1f82cfc76d8b53a13ad1d7b8a88c032..831e357b4235b6b11217de47ee2c5960a4ef3563 100644 (file)
@@ -74,7 +74,7 @@ module RecordFilters
             subproperty[1] = subproperty[1][1..-2]
           end
 
-        # jsonb search
+          # jsonb search
           case operator.downcase
           when '=', '!='
             not_in = if operator.downcase == "!=" then "NOT " else "" end
@@ -109,14 +109,14 @@ module RecordFilters
                                       "for '#{operator}' operator in filters")
             end
           when 'exists'
-          if operand == true
-            cond_out << "jsonb_exists(#{ar_table_name}.#{subproperty[0]}, ?)"
-          elsif operand == false
-            cond_out << "(NOT jsonb_exists(#{ar_table_name}.#{subproperty[0]}, ?)) OR #{ar_table_name}.#{subproperty[0]} is NULL"
-          else
-            raise ArgumentError.new("Invalid operand '#{operand}' for '#{operator}' must be true or false")
-          end
-          param_out << subproperty[1]
+            if operand == true
+              cond_out << "jsonb_exists(#{ar_table_name}.#{subproperty[0]}, ?)"
+            elsif operand == false
+              cond_out << "(NOT jsonb_exists(#{ar_table_name}.#{subproperty[0]}, ?)) OR #{ar_table_name}.#{subproperty[0]} is NULL"
+            else
+              raise ArgumentError.new("Invalid operand '#{operand}' for '#{operator}' must be true or false")
+            end
+            param_out << subproperty[1]
           else
             raise ArgumentError.new("Invalid operator for subproperty search '#{operator}'")
           end
@@ -197,8 +197,17 @@ module RecordFilters
             operand.each do |op|
               cl = ArvadosModel::kind_class op
               if cl
-                cond << "#{ar_table_name}.#{attr} like ?"
-                param_out << cl.uuid_like_pattern
+                if attr == 'uuid'
+                  if model_class.uuid_prefix == cl.uuid_prefix
+                    cond << "1=1"
+                  else
+                    cond << "1=0"
+                  end
+                else
+                  # Use a substring query to support remote uuids
+                  cond << "substring(#{ar_table_name}.#{attr}, 7, 5) = ?"
+                  param_out << cl.uuid_prefix
+                end
               else
                 cond << "1=0"
               end
index 2b247a960d989e962b373b726878828bc008d105..e66baceb28d0a28b3efb5361ca2a3a06b9401d75 100644 (file)
@@ -156,6 +156,20 @@ foo_file_readable_by_active:
   head_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
   properties: {}
 
+foo_file_readable_by_federated_active:
+  uuid: zzzzz-o0j2j-dp1d8395ldqw23r
+  owner_uuid: zzzzz-tpzed-000000000000000
+  created_at: 2014-01-24 20:42:26 -0800
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-000000000000000
+  modified_at: 2014-01-24 20:42:26 -0800
+  updated_at: 2014-01-24 20:42:26 -0800
+  tail_uuid: zbbbb-tpzed-xurymjxw79nv3jz
+  link_class: permission
+  name: can_read
+  head_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
+  properties: {}
+
 foo_file_readable_by_active_duplicate_permission:
   uuid: zzzzz-o0j2j-2qlmhgothiur55r
   owner_uuid: zzzzz-tpzed-000000000000000
index 4ae37455503ca79326b3c92986e7ed36be9ab1b2..47e46fe8378410f0804b21b9ca0d079788928e6c 100644 (file)
@@ -144,6 +144,23 @@ class Arvados::V1::LinksControllerTest < ActionController::TestCase
     assert_equal found.count, (found.select { |f| f.tail_uuid.match User.uuid_regex }).count
   end
 
+  test "filter links with 'is_a' operator includes remote objects" do
+    authorize_with :admin
+    get :index, {
+      filters: [
+        ['tail_uuid', 'is_a', 'arvados#user'],
+        ['link_class', '=', 'permission'],
+        ['name', '=', 'can_read'],
+        ['head_uuid', '=', collections(:foo_file).uuid],
+      ]
+    }
+    assert_response :success
+    found = assigns(:objects)
+    assert_not_equal 0, found.count
+    assert_includes(found.map(&:tail_uuid),
+                    users(:federated_active).uuid)
+  end
+
   test "filter links with 'is_a' operator with more than one" do
     authorize_with :admin
     get :index, {
index 90b4f13bf597b5b9ea306dec04b698e75fb98ae3..2a9ff5bf4cc6985a413f62a03d7b9555e9c0f938 100644 (file)
@@ -558,7 +558,8 @@ class ContainerTest < ActiveSupport::TestCase
     c1, _ = minimal_new(common_attrs.merge({runtime_token: api_client_authorizations(:active).token}))
     assert_equal Container::Queued, c1.state
     reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
-    assert_nil reused
+    # See #14584
+    assert_equal c1.uuid, reused.uuid
   end
 
   test "find_reusable method with nil runtime_token, then runtime_token with different user" do
@@ -567,7 +568,8 @@ class ContainerTest < ActiveSupport::TestCase
     c1, _ = minimal_new(common_attrs.merge({runtime_token: nil}))
     assert_equal Container::Queued, c1.state
     reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
-    assert_nil reused
+    # See #14584
+    assert_equal c1.uuid, reused.uuid
   end
 
   test "find_reusable method with different runtime_token, different scope, same user" do
@@ -576,7 +578,8 @@ class ContainerTest < ActiveSupport::TestCase
     c1, _ = minimal_new(common_attrs.merge({runtime_token: api_client_authorizations(:runtime_token_limited_scope).token}))
     assert_equal Container::Queued, c1.state
     reused = Container.find_reusable(common_attrs.merge(runtime_token_attr(:container_runtime_token)))
-    assert_nil reused
+    # See #14584
+    assert_equal c1.uuid, reused.uuid
   end
 
   test "Container running" do
index 084700d39bfad76b109078f29e81ecf82c40c5be..092524d8063b7ea818f99f1fe20f37957c8a2c15 100644 (file)
@@ -197,7 +197,7 @@ func (disp *Dispatcher) run() error {
        defer disp.sqCheck.Stop()
 
        if disp.cluster != nil && len(disp.cluster.InstanceTypes) > 0 {
-               go dispatchcloud.SlurmNodeTypeFeatureKludge(disp.cluster)
+               go SlurmNodeTypeFeatureKludge(disp.cluster)
        }
 
        if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
@@ -229,12 +229,7 @@ func (disp *Dispatcher) checkSqueueForOrphans() {
 func (disp *Dispatcher) slurmConstraintArgs(container arvados.Container) []string {
        mem := int64(math.Ceil(float64(container.RuntimeConstraints.RAM+container.RuntimeConstraints.KeepCacheRAM+disp.ReserveExtraRAM) / float64(1048576)))
 
-       var disk int64
-       for _, m := range container.Mounts {
-               if m.Kind == "tmp" {
-                       disk += m.Capacity
-               }
-       }
+       disk := dispatchcloud.EstimateScratchSpace(&container)
        disk = int64(math.Ceil(float64(disk) / float64(1048576)))
        return []string{
                fmt.Sprintf("--mem=%d", mem),
@@ -246,7 +241,7 @@ func (disp *Dispatcher) slurmConstraintArgs(container arvados.Container) []strin
 func (disp *Dispatcher) sbatchArgs(container arvados.Container) ([]string, error) {
        var args []string
        args = append(args, disp.SbatchArguments...)
-       args = append(args, "--job-name="+container.UUID, fmt.Sprintf("--nice=%d", initialNiceValue))
+       args = append(args, "--job-name="+container.UUID, fmt.Sprintf("--nice=%d", initialNiceValue), "--no-requeue")
 
        if disp.cluster == nil {
                // no instance types configured
index b76ece314d47806afcfb328ba12970b9171b58d5..10fdb07124cc817cc857f35384ded22ca0193d63 100644 (file)
@@ -202,6 +202,7 @@ func (s *IntegrationSuite) TestMissingFromSqueue(c *C) {
                [][]string{{
                        fmt.Sprintf("--job-name=%s", "zzzzz-dz642-queuedcontainer"),
                        fmt.Sprintf("--nice=%d", 10000),
+                       "--no-requeue",
                        fmt.Sprintf("--mem=%d", 11445),
                        fmt.Sprintf("--cpus-per-task=%d", 4),
                        fmt.Sprintf("--tmp=%d", 45777),
@@ -217,7 +218,7 @@ func (s *IntegrationSuite) TestMissingFromSqueue(c *C) {
 func (s *IntegrationSuite) TestSbatchFail(c *C) {
        s.slurm = slurmFake{errBatch: errors.New("something terrible happened")}
        container := s.integrationTest(c,
-               [][]string{{"--job-name=zzzzz-dz642-queuedcontainer", "--nice=10000", "--mem=11445", "--cpus-per-task=4", "--tmp=45777"}},
+               [][]string{{"--job-name=zzzzz-dz642-queuedcontainer", "--nice=10000", "--no-requeue", "--mem=11445", "--cpus-per-task=4", "--tmp=45777"}},
                func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
                        dispatcher.UpdateState(container.UUID, dispatch.Running)
                        dispatcher.UpdateState(container.UUID, dispatch.Complete)
@@ -362,7 +363,7 @@ func (s *StubbedSuite) TestSbatchArgs(c *C) {
                s.disp.SbatchArguments = defaults
 
                args, err := s.disp.sbatchArgs(container)
-               c.Check(args, DeepEquals, append(defaults, "--job-name=123", "--nice=10000", "--mem=239", "--cpus-per-task=2", "--tmp=0"))
+               c.Check(args, DeepEquals, append(defaults, "--job-name=123", "--nice=10000", "--no-requeue", "--mem=239", "--cpus-per-task=2", "--tmp=0"))
                c.Check(err, IsNil)
        }
 }
@@ -408,7 +409,7 @@ func (s *StubbedSuite) TestSbatchInstanceTypeConstraint(c *C) {
                args, err := s.disp.sbatchArgs(container)
                c.Check(err == nil, Equals, trial.err == nil)
                if trial.err == nil {
-                       c.Check(args, DeepEquals, append([]string{"--job-name=123", "--nice=10000"}, trial.sbatchArgs...))
+                       c.Check(args, DeepEquals, append([]string{"--job-name=123", "--nice=10000", "--no-requeue"}, trial.sbatchArgs...))
                } else {
                        c.Check(len(err.(dispatchcloud.ConstraintsNotSatisfiableError).AvailableTypes), Equals, len(trial.types))
                }
@@ -425,7 +426,7 @@ func (s *StubbedSuite) TestSbatchPartition(c *C) {
 
        args, err := s.disp.sbatchArgs(container)
        c.Check(args, DeepEquals, []string{
-               "--job-name=123", "--nice=10000",
+               "--job-name=123", "--nice=10000", "--no-requeue",
                "--mem=239", "--cpus-per-task=1", "--tmp=0",
                "--partition=blurb,b2",
        })
diff --git a/services/crunch-dispatch-slurm/node_type.go b/services/crunch-dispatch-slurm/node_type.go
new file mode 100644 (file)
index 0000000..62a9693
--- /dev/null
@@ -0,0 +1,72 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+       "log"
+       "os/exec"
+       "strings"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+// SlurmNodeTypeFeatureKludge ensures SLURM accepts every instance
+// type name as a valid feature name, even if no instances of that
+// type have appeared yet.
+//
+// It takes advantage of some SLURM peculiarities:
+//
+// (1) A feature is valid after it has been offered by a node, even if
+// it is no longer offered by any node. So, to make a feature name
+// valid, we can add it to a dummy node ("compute0"), then remove it.
+//
+// (2) To test whether a set of feature names are valid without
+// actually submitting a job, we can call srun --test-only with the
+// desired features.
+//
+// SlurmNodeTypeFeatureKludge does a test-and-fix operation
+// immediately, and then periodically, in case slurm restarts and
+// forgets the list of valid features. It never returns (unless there
+// are no node types configured, in which case it returns
+// immediately), so it should generally be invoked with "go".
+func SlurmNodeTypeFeatureKludge(cc *arvados.Cluster) {
+       if len(cc.InstanceTypes) == 0 {
+               return
+       }
+       var features []string
+       for _, it := range cc.InstanceTypes {
+               features = append(features, "instancetype="+it.Name)
+       }
+       for {
+               slurmKludge(features)
+               time.Sleep(2 * time.Second)
+       }
+}
+
+const slurmDummyNode = "compute0"
+
+func slurmKludge(features []string) {
+       allFeatures := strings.Join(features, ",")
+
+       cmd := exec.Command("sinfo", "--nodes="+slurmDummyNode, "--format=%f", "--noheader")
+       out, err := cmd.CombinedOutput()
+       if err != nil {
+               log.Printf("running %q %q: %s (output was %q)", cmd.Path, cmd.Args, err, out)
+               return
+       }
+       if string(out) == allFeatures+"\n" {
+               // Already configured correctly, nothing to do.
+               return
+       }
+
+       log.Printf("configuring node %q with all node type features", slurmDummyNode)
+       cmd = exec.Command("scontrol", "update", "NodeName="+slurmDummyNode, "Features="+allFeatures)
+       log.Printf("running: %q %q", cmd.Path, cmd.Args)
+       out, err = cmd.CombinedOutput()
+       if err != nil {
+               log.Printf("error: scontrol: %s (output was %q)", err, out)
+       }
+}
diff --git a/services/crunch-run/background.go b/services/crunch-run/background.go
new file mode 100644 (file)
index 0000000..a508538
--- /dev/null
@@ -0,0 +1,237 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package main
+
+import (
+       "encoding/json"
+       "fmt"
+       "io"
+       "io/ioutil"
+       "os"
+       "os/exec"
+       "path/filepath"
+       "strings"
+       "syscall"
+       "time"
+)
+
+var (
+       lockdir    = "/var/lock"
+       lockprefix = "crunch-run-"
+       locksuffix = ".lock"
+)
+
+// procinfo is saved in each process's lockfile.
+type procinfo struct {
+       UUID   string
+       PID    int
+       Stdout string
+       Stderr string
+}
+
+// Detach acquires a lock for the given uuid, and starts the current
+// program as a child process (with -no-detach prepended to the given
+// arguments so the child knows not to detach again). The lock is
+// passed along to the child process.
+func Detach(uuid string, args []string, stdout, stderr io.Writer) int {
+       return exitcode(stderr, detach(uuid, args, stdout, stderr))
+}
+func detach(uuid string, args []string, stdout, stderr io.Writer) error {
+       lockfile, err := func() (*os.File, error) {
+               // We must hold the dir-level lock between
+               // opening/creating the lockfile and acquiring LOCK_EX
+               // on it, to avoid racing with the ListProcess's
+               // alive-checking and garbage collection.
+               dirlock, err := lockall()
+               if err != nil {
+                       return nil, err
+               }
+               defer dirlock.Close()
+               lockfile, err := os.OpenFile(filepath.Join(lockdir, lockprefix+uuid+locksuffix), os.O_CREATE|os.O_RDWR, 0700)
+               if err != nil {
+                       return nil, err
+               }
+               err = syscall.Flock(int(lockfile.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
+               if err != nil {
+                       lockfile.Close()
+                       return nil, err
+               }
+               return lockfile, nil
+       }()
+       if err != nil {
+               return err
+       }
+       defer lockfile.Close()
+       lockfile.Truncate(0)
+
+       outfile, err := ioutil.TempFile("", "crunch-run-"+uuid+"-stdout-")
+       if err != nil {
+               return err
+       }
+       defer outfile.Close()
+       errfile, err := ioutil.TempFile("", "crunch-run-"+uuid+"-stderr-")
+       if err != nil {
+               os.Remove(outfile.Name())
+               return err
+       }
+       defer errfile.Close()
+
+       cmd := exec.Command(args[0], append([]string{"-no-detach"}, args[1:]...)...)
+       cmd.Stdout = outfile
+       cmd.Stderr = errfile
+       // Child inherits lockfile.
+       cmd.ExtraFiles = []*os.File{lockfile}
+       // Ensure child isn't interrupted even if we receive signals
+       // from parent (sshd) while sending lockfile content to
+       // caller.
+       cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
+       err = cmd.Start()
+       if err != nil {
+               os.Remove(outfile.Name())
+               os.Remove(errfile.Name())
+               return err
+       }
+
+       w := io.MultiWriter(stdout, lockfile)
+       err = json.NewEncoder(w).Encode(procinfo{
+               UUID:   uuid,
+               PID:    cmd.Process.Pid,
+               Stdout: outfile.Name(),
+               Stderr: errfile.Name(),
+       })
+       if err != nil {
+               os.Remove(outfile.Name())
+               os.Remove(errfile.Name())
+               return err
+       }
+       return nil
+}
+
+// KillProcess finds the crunch-run process corresponding to the given
+// uuid, and sends the given signal to it. It then waits up to 1
+// second for the process to die. It returns 0 if the process is
+// successfully killed or didn't exist in the first place.
+func KillProcess(uuid string, signal syscall.Signal, stdout, stderr io.Writer) int {
+       return exitcode(stderr, kill(uuid, signal, stdout, stderr))
+}
+
+func kill(uuid string, signal syscall.Signal, stdout, stderr io.Writer) error {
+       path := filepath.Join(lockdir, lockprefix+uuid+locksuffix)
+       f, err := os.Open(path)
+       if os.IsNotExist(err) {
+               return nil
+       } else if err != nil {
+               return err
+       }
+       defer f.Close()
+
+       var pi procinfo
+       err = json.NewDecoder(f).Decode(&pi)
+       if err != nil {
+               return fmt.Errorf("%s: %s\n", path, err)
+       }
+
+       if pi.UUID != uuid || pi.PID == 0 {
+               return fmt.Errorf("%s: bogus procinfo: %+v", path, pi)
+       }
+
+       proc, err := os.FindProcess(pi.PID)
+       if err != nil {
+               return err
+       }
+
+       err = proc.Signal(signal)
+       for deadline := time.Now().Add(time.Second); err == nil && time.Now().Before(deadline); time.Sleep(time.Second / 100) {
+               err = proc.Signal(syscall.Signal(0))
+       }
+       if err == nil {
+               return fmt.Errorf("pid %d: sent signal %d (%s) but process is still alive", pi.PID, signal, signal)
+       }
+       fmt.Fprintf(stderr, "pid %d: %s\n", pi.PID, err)
+       return nil
+}
+
+// List UUIDs of active crunch-run processes.
+func ListProcesses(stdout, stderr io.Writer) int {
+       return exitcode(stderr, filepath.Walk(lockdir, func(path string, info os.FileInfo, err error) error {
+               if info.IsDir() {
+                       return filepath.SkipDir
+               }
+               if name := info.Name(); !strings.HasPrefix(name, lockprefix) || !strings.HasSuffix(name, locksuffix) {
+                       return nil
+               }
+               if info.Size() == 0 {
+                       // race: process has opened/locked but hasn't yet written pid/uuid
+                       return nil
+               }
+
+               f, err := os.Open(path)
+               if err != nil {
+                       return nil
+               }
+               defer f.Close()
+
+               // Ensure other processes don't acquire this lockfile
+               // after we have decided it is abandoned but before we
+               // have deleted it.
+               dirlock, err := lockall()
+               if err != nil {
+                       return err
+               }
+               err = syscall.Flock(int(f.Fd()), syscall.LOCK_SH|syscall.LOCK_NB)
+               if err == nil {
+                       // lockfile is stale
+                       err := os.Remove(path)
+                       dirlock.Close()
+                       if err != nil {
+                               fmt.Fprintln(stderr, err)
+                       }
+                       return nil
+               }
+               dirlock.Close()
+
+               var pi procinfo
+               err = json.NewDecoder(f).Decode(&pi)
+               if err != nil {
+                       fmt.Fprintf(stderr, "%s: %s\n", path, err)
+                       return nil
+               }
+               if pi.UUID == "" || pi.PID == 0 {
+                       fmt.Fprintf(stderr, "%s: bogus procinfo: %+v", path, pi)
+                       return nil
+               }
+
+               fmt.Fprintln(stdout, pi.UUID)
+               return nil
+       }))
+}
+
+// If err is nil, return 0 ("success"); otherwise, print err to stderr
+// and return 1.
+func exitcode(stderr io.Writer, err error) int {
+       if err != nil {
+               fmt.Fprintln(stderr, err)
+               return 1
+       }
+       return 0
+}
+
+// Acquire a dir-level lock. Must be held while creating or deleting
+// container-specific lockfiles, to avoid races during the intervals
+// when those container-specific lockfiles are open but not locked.
+//
+// Caller releases the lock by closing the returned file.
+func lockall() (*os.File, error) {
+       f, err := os.OpenFile(filepath.Join(lockdir, lockprefix+"all"+locksuffix), os.O_CREATE|os.O_RDWR, 0700)
+       if err != nil {
+               return nil, err
+       }
+       err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
+       if err != nil {
+               f.Close()
+               return nil, err
+       }
+       return f, nil
+}
index e5a1b94706c781cdc62e805e972c5f46ac0bba24..2b9a119581dfd7c4f3245b1e57317ae95155f5b9 100644 (file)
@@ -1518,6 +1518,14 @@ func (runner *ContainerRunner) Run() (err error) {
                runner.CrunchLog.Close()
        }()
 
+       err = runner.fetchContainerRecord()
+       if err != nil {
+               return
+       }
+       if runner.Container.State != "Locked" {
+               return fmt.Errorf("dispatch error detected: container %q has state %q", runner.Container.UUID, runner.Container.State)
+       }
+
        defer func() {
                // checkErr prints e (unless it's nil) and sets err to
                // e (unless err is already non-nil). Thus, if err
@@ -1558,10 +1566,6 @@ func (runner *ContainerRunner) Run() (err error) {
                checkErr("UpdateContainerFinal", runner.UpdateContainerFinal())
        }()
 
-       err = runner.fetchContainerRecord()
-       if err != nil {
-               return
-       }
        runner.setupSignals()
        err = runner.startHoststat()
        if err != nil {
@@ -1732,6 +1736,10 @@ func main() {
        cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)")
        cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container")
        caCertsPath := flag.String("ca-certs", "", "Path to TLS root certificates")
+       detach := flag.Bool("detach", false, "Detach from parent process and run in the background")
+       sleep := flag.Duration("sleep", 0, "Delay before starting (testing use only)")
+       kill := flag.Int("kill", -1, "Send signal to an existing crunch-run process for given UUID")
+       list := flag.Bool("list", false, "List UUIDs of existing crunch-run processes")
        enableNetwork := flag.String("container-enable-networking", "default",
                `Specify if networking should be enabled for container.  One of 'default', 'always':
        default: only enable networking if container requests it.
@@ -1743,8 +1751,30 @@ func main() {
        memprofile := flag.String("memprofile", "", "write memory profile to `file` after running container")
        getVersion := flag.Bool("version", false, "Print version information and exit.")
        flag.Duration("check-containerd", 0, "Ignored. Exists for compatibility with older versions.")
+
+       ignoreDetachFlag := false
+       if len(os.Args) > 1 && os.Args[1] == "-no-detach" {
+               // This process was invoked by a parent process, which
+               // has passed along its own arguments, including
+               // -detach, after the leading -no-detach flag.  Strip
+               // the leading -no-detach flag (it's not recognized by
+               // flag.Parse()) and ignore the -detach flag that
+               // comes later.
+               os.Args = append([]string{os.Args[0]}, os.Args[2:]...)
+               ignoreDetachFlag = true
+       }
+
        flag.Parse()
 
+       switch {
+       case *detach && !ignoreDetachFlag:
+               os.Exit(Detach(flag.Arg(0), os.Args, os.Stdout, os.Stderr))
+       case *kill >= 0:
+               os.Exit(KillProcess(flag.Arg(0), syscall.Signal(*kill), os.Stdout, os.Stderr))
+       case *list:
+               os.Exit(ListProcesses(os.Stdout, os.Stderr))
+       }
+
        // Print version information if requested
        if *getVersion {
                fmt.Printf("crunch-run %s\n", version)
@@ -1752,6 +1782,7 @@ func main() {
        }
 
        log.Printf("crunch-run %s started", version)
+       time.Sleep(*sleep)
 
        containerId := flag.Arg(0)
 
index 3fdd440e3e9168e2ed3b4c1e7234b3e93eb2f50c..17e5e145811aba3e587a66d07fb642ec07bef2d8 100644 (file)
@@ -855,7 +855,8 @@ func (s *TestSuite) TestFullRunHello(c *C) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-       "runtime_constraints": {}
+    "runtime_constraints": {},
+    "state": "Locked"
 }`, nil, 0, func(t *TestDockerClient) {
                t.logWriter.Write(dockerLog(1, "hello world\n"))
                t.logWriter.Close()
@@ -867,6 +868,28 @@ func (s *TestSuite) TestFullRunHello(c *C) {
 
 }
 
+func (s *TestSuite) TestRunAlreadyRunning(c *C) {
+       var ran bool
+       api, _, _ := s.fullRunHelper(c, `{
+    "command": ["sleep", "3"],
+    "container_image": "d4ab34d3d4f8a72f5c4973051ae69fab+122",
+    "cwd": ".",
+    "environment": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
+    "output_path": "/tmp",
+    "priority": 1,
+    "runtime_constraints": {},
+    "scheduling_parameters":{"max_run_time": 1},
+    "state": "Running"
+}`, nil, 2, func(t *TestDockerClient) {
+               ran = true
+       })
+
+       c.Check(api.CalledWith("container.state", "Cancelled"), IsNil)
+       c.Check(api.CalledWith("container.state", "Complete"), IsNil)
+       c.Check(ran, Equals, false)
+}
+
 func (s *TestSuite) TestRunTimeExceeded(c *C) {
        api, _, _ := s.fullRunHelper(c, `{
     "command": ["sleep", "3"],
@@ -876,8 +899,9 @@ func (s *TestSuite) TestRunTimeExceeded(c *C) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-       "runtime_constraints": {},
-       "scheduling_parameters":{"max_run_time": 1}
+    "runtime_constraints": {},
+    "scheduling_parameters":{"max_run_time": 1},
+    "state": "Locked"
 }`, nil, 0, func(t *TestDockerClient) {
                time.Sleep(3 * time.Second)
                t.logWriter.Close()
@@ -894,7 +918,8 @@ func (s *TestSuite) TestContainerWaitFails(c *C) {
     "cwd": ".",
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
-    "priority": 1
+    "priority": 1,
+    "state": "Locked"
 }`, nil, 0, func(t *TestDockerClient) {
                t.ctrExited = true
                time.Sleep(10 * time.Second)
@@ -914,7 +939,8 @@ func (s *TestSuite) TestCrunchstat(c *C) {
                "mounts": {"/tmp": {"kind": "tmp"} },
                "output_path": "/tmp",
                "priority": 1,
-               "runtime_constraints": {}
+               "runtime_constraints": {},
+               "state": "Locked"
        }`, nil, 0, func(t *TestDockerClient) {
                time.Sleep(time.Second)
                t.logWriter.Close()
@@ -947,7 +973,8 @@ func (s *TestSuite) TestNodeInfoLog(c *C) {
                "mounts": {"/tmp": {"kind": "tmp"} },
                "output_path": "/tmp",
                "priority": 1,
-               "runtime_constraints": {}
+               "runtime_constraints": {},
+               "state": "Locked"
        }`, nil, 0,
                func(t *TestDockerClient) {
                        time.Sleep(time.Second)
@@ -981,7 +1008,8 @@ func (s *TestSuite) TestContainerRecordLog(c *C) {
                "mounts": {"/tmp": {"kind": "tmp"} },
                "output_path": "/tmp",
                "priority": 1,
-               "runtime_constraints": {}
+               "runtime_constraints": {},
+               "state": "Locked"
        }`, nil, 0,
                func(t *TestDockerClient) {
                        time.Sleep(time.Second)
@@ -1004,7 +1032,8 @@ func (s *TestSuite) TestFullRunStderr(c *C) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {}
+    "runtime_constraints": {},
+    "state": "Locked"
 }`, nil, 1, func(t *TestDockerClient) {
                t.logWriter.Write(dockerLog(1, "hello\n"))
                t.logWriter.Write(dockerLog(2, "world\n"))
@@ -1029,7 +1058,8 @@ func (s *TestSuite) TestFullRunDefaultCwd(c *C) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {}
+    "runtime_constraints": {},
+    "state": "Locked"
 }`, nil, 0, func(t *TestDockerClient) {
                t.logWriter.Write(dockerLog(1, t.cwd+"\n"))
                t.logWriter.Close()
@@ -1050,7 +1080,8 @@ func (s *TestSuite) TestFullRunSetCwd(c *C) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {}
+    "runtime_constraints": {},
+    "state": "Locked"
 }`, nil, 0, func(t *TestDockerClient) {
                t.logWriter.Write(dockerLog(1, t.cwd+"\n"))
                t.logWriter.Close()
@@ -1091,7 +1122,8 @@ func (s *TestSuite) testStopContainer(c *C, setup func(cr *ContainerRunner)) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {}
+    "runtime_constraints": {},
+    "state": "Locked"
 }`
 
        rec := arvados.Container{}
@@ -1146,7 +1178,8 @@ func (s *TestSuite) TestFullRunSetEnv(c *C) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {}
+    "runtime_constraints": {},
+    "state": "Locked"
 }`, nil, 0, func(t *TestDockerClient) {
                t.logWriter.Write(dockerLog(1, t.env[0][7:]+"\n"))
                t.logWriter.Close()
@@ -1568,7 +1601,8 @@ func (s *TestSuite) TestStdout(c *C) {
                "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path": "/tmp/a/b/c.out"} },
                "output_path": "/tmp",
                "priority": 1,
-               "runtime_constraints": {}
+               "runtime_constraints": {},
+               "state": "Locked"
        }`
 
        api, cr, _ := s.fullRunHelper(c, helperRecord, nil, 0, func(t *TestDockerClient) {
@@ -1608,7 +1642,8 @@ func (s *TestSuite) stdoutErrorRunHelper(c *C, record string, fn func(t *TestDoc
 func (s *TestSuite) TestStdoutWithWrongPath(c *C) {
        _, _, err := s.stdoutErrorRunHelper(c, `{
     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "file", "path":"/tmpa.out"} },
-    "output_path": "/tmp"
+    "output_path": "/tmp",
+    "state": "Locked"
 }`, func(t *TestDockerClient) {})
 
        c.Check(err, NotNil)
@@ -1618,7 +1653,8 @@ func (s *TestSuite) TestStdoutWithWrongPath(c *C) {
 func (s *TestSuite) TestStdoutWithWrongKindTmp(c *C) {
        _, _, err := s.stdoutErrorRunHelper(c, `{
     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "tmp", "path":"/tmp/a.out"} },
-    "output_path": "/tmp"
+    "output_path": "/tmp",
+    "state": "Locked"
 }`, func(t *TestDockerClient) {})
 
        c.Check(err, NotNil)
@@ -1628,7 +1664,8 @@ func (s *TestSuite) TestStdoutWithWrongKindTmp(c *C) {
 func (s *TestSuite) TestStdoutWithWrongKindCollection(c *C) {
        _, _, err := s.stdoutErrorRunHelper(c, `{
     "mounts": {"/tmp": {"kind": "tmp"}, "stdout": {"kind": "collection", "path":"/tmp/a.out"} },
-    "output_path": "/tmp"
+    "output_path": "/tmp",
+    "state": "Locked"
 }`, func(t *TestDockerClient) {})
 
        c.Check(err, NotNil)
@@ -1646,7 +1683,8 @@ func (s *TestSuite) TestFullRunWithAPI(c *C) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {"API": true}
+    "runtime_constraints": {"API": true},
+    "state": "Locked"
 }`, nil, 0, func(t *TestDockerClient) {
                t.logWriter.Write(dockerLog(1, t.env[1][17:]+"\n"))
                t.logWriter.Close()
@@ -1669,7 +1707,8 @@ func (s *TestSuite) TestFullRunSetOutput(c *C) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {"API": true}
+    "runtime_constraints": {"API": true},
+    "state": "Locked"
 }`, nil, 0, func(t *TestDockerClient) {
                t.api.Container.Output = "d4ab34d3d4f8a72f5c4973051ae69fab+122"
                t.logWriter.Close()
@@ -1696,7 +1735,8 @@ func (s *TestSuite) TestStdoutWithExcludeFromOutputMountPointUnderOutputDir(c *C
     },
                "output_path": "/tmp",
                "priority": 1,
-               "runtime_constraints": {}
+               "runtime_constraints": {},
+               "state": "Locked"
        }`
 
        extraMounts := []string{"a3e8f74c6f101eae01fa08bfb4e49b3a+54"}
@@ -1727,7 +1767,8 @@ func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
     },
                "output_path": "/tmp",
                "priority": 1,
-               "runtime_constraints": {}
+               "runtime_constraints": {},
+               "state": "Locked"
        }`
 
        extraMounts := []string{
@@ -1781,7 +1822,8 @@ func (s *TestSuite) TestStdoutWithMountPointsUnderOutputDirDenormalizedManifest(
     },
                "output_path": "/tmp",
                "priority": 1,
-               "runtime_constraints": {}
+               "runtime_constraints": {},
+               "state": "Locked"
        }`
 
        extraMounts := []string{
@@ -1816,11 +1858,12 @@ func (s *TestSuite) TestOutputError(c *C) {
                "cwd": "/bin",
                "environment": {"FROBIZ": "bilbo"},
                "mounts": {
-        "/tmp": {"kind": "tmp"}
-    },
+                       "/tmp": {"kind": "tmp"}
+               },
                "output_path": "/tmp",
                "priority": 1,
-               "runtime_constraints": {}
+               "runtime_constraints": {},
+               "state": "Locked"
        }`
 
        extraMounts := []string{}
@@ -1846,7 +1889,8 @@ func (s *TestSuite) TestStdinCollectionMountPoint(c *C) {
     },
                "output_path": "/tmp",
                "priority": 1,
-               "runtime_constraints": {}
+               "runtime_constraints": {},
+               "state": "Locked"
        }`
 
        extraMounts := []string{
@@ -1885,7 +1929,8 @@ func (s *TestSuite) TestStdinJsonMountPoint(c *C) {
     },
                "output_path": "/tmp",
                "priority": 1,
-               "runtime_constraints": {}
+               "runtime_constraints": {},
+               "state": "Locked"
        }`
 
        api, _, _ := s.fullRunHelper(c, helperRecord, nil, 0, func(t *TestDockerClient) {
@@ -1918,7 +1963,8 @@ func (s *TestSuite) TestStderrMount(c *C) {
                "stderr": {"kind": "file", "path": "/tmp/b/err.txt"}},
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {}
+    "runtime_constraints": {},
+    "state": "Locked"
 }`, nil, 1, func(t *TestDockerClient) {
                t.logWriter.Write(dockerLog(1, "hello\n"))
                t.logWriter.Write(dockerLog(2, "oops\n"))
@@ -1968,7 +2014,8 @@ exec echo killme
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {}
+    "runtime_constraints": {},
+    "state": "Locked"
 }`, nil, 2, func(t *TestDockerClient) {
                t.logWriter.Write(dockerLog(1, "hello world\n"))
                t.logWriter.Close()
@@ -1993,7 +2040,8 @@ func (s *TestSuite) TestFullBrokenDocker2(c *C) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {}
+    "runtime_constraints": {},
+    "state": "Locked"
 }`, nil, 2, func(t *TestDockerClient) {
                t.logWriter.Write(dockerLog(1, "hello world\n"))
                t.logWriter.Close()
@@ -2016,7 +2064,8 @@ func (s *TestSuite) TestFullBrokenDocker3(c *C) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {}
+    "runtime_constraints": {},
+    "state": "Locked"
 }`, nil, 3, func(t *TestDockerClient) {
                t.logWriter.Write(dockerLog(1, "hello world\n"))
                t.logWriter.Close()
@@ -2038,7 +2087,8 @@ func (s *TestSuite) TestBadCommand1(c *C) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {}
+    "runtime_constraints": {},
+    "state": "Locked"
 }`, nil, 4, func(t *TestDockerClient) {
                t.logWriter.Write(dockerLog(1, "hello world\n"))
                t.logWriter.Close()
@@ -2060,7 +2110,8 @@ func (s *TestSuite) TestBadCommand2(c *C) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {}
+    "runtime_constraints": {},
+    "state": "Locked"
 }`, nil, 5, func(t *TestDockerClient) {
                t.logWriter.Write(dockerLog(1, "hello world\n"))
                t.logWriter.Close()
@@ -2082,7 +2133,8 @@ func (s *TestSuite) TestBadCommand3(c *C) {
     "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
-    "runtime_constraints": {}
+    "runtime_constraints": {},
+    "state": "Locked"
 }`, nil, 6, func(t *TestDockerClient) {
                t.logWriter.Write(dockerLog(1, "hello world\n"))
                t.logWriter.Close()
@@ -2106,7 +2158,8 @@ func (s *TestSuite) TestSecretTextMountPoint(c *C) {
                 },
                "output_path": "/tmp",
                "priority": 1,
-               "runtime_constraints": {}
+               "runtime_constraints": {},
+               "state": "Locked"
        }`
 
        api, cr, _ := s.fullRunHelper(c, helperRecord, nil, 0, func(t *TestDockerClient) {
@@ -2134,7 +2187,8 @@ func (s *TestSuite) TestSecretTextMountPoint(c *C) {
                 },
                "output_path": "/tmp",
                "priority": 1,
-               "runtime_constraints": {}
+               "runtime_constraints": {},
+               "state": "Locked"
        }`
 
        api, cr, _ = s.fullRunHelper(c, helperRecord, nil, 0, func(t *TestDockerClient) {
index c2afc178392ebe5d13a5503d38e50e830d8c1b43..06a9ba7087892e12e1daeab396d82f463c43409b 100755 (executable)
@@ -20,6 +20,12 @@ fi
 uuid_prefix=$(cat /var/lib/arvados/api_uuid_prefix)
 database_pw=$(cat /var/lib/arvados/api_database_pw)
 
+if test -s /var/lib/arvados/api_rails_env ; then
+  database_env=$(cat /var/lib/arvados/api_rails_env)
+else
+  database_env=development
+fi
+
 mkdir -p /etc/arvados
 
 cat >/var/lib/arvados/cluster_config.yml <<EOF
@@ -39,7 +45,7 @@ Clusters:
         Host: localhost
         User: arvados
         Password: ${database_pw}
-        DBName: arvados_development
+        DBName: arvados_${database_env}
         client_encoding: utf8
 EOF
 
index af7b2e92ebeb0cb60697ac03dacc25e33553782b..93e0dd5d21f91c08f1f5e685d1f2de1aa6627918 100644 (file)
@@ -510,8 +510,8 @@ func GetRemoteGroups(cfg *ConfigParams, allUsers map[string]arvados.User) (remot
                                Operand:  group.UUID,
                        }, {
                                Attr:     "head_uuid",
-                               Operator: "like",
-                               Operand:  "%-tpzed-%",
+                               Operator: "is_a",
+                               Operand:  "arvados#user",
                        }},
                }
                // User -> Group filter
@@ -534,8 +534,8 @@ func GetRemoteGroups(cfg *ConfigParams, allUsers map[string]arvados.User) (remot
                                Operand:  group.UUID,
                        }, {
                                Attr:     "tail_uuid",
-                               Operator: "like",
-                               Operand:  "%-tpzed-%",
+                               Operator: "is_a",
+                               Operand:  "arvados#user",
                        }},
                }
                g2uLinks, err := GetAll(cfg.Client, "links", g2uFilter, &LinkList{})
index c0e6fee5dbe63ed053c70fcc1ac0875f13813381..ec296d21d1c25a92c14639c36658c4b0aa10bce5 100644 (file)
                        "revision": "b8bc1bf767474819792c23f32d8286a45736f1c6",
                        "revisionTime": "2016-12-03T19:45:07Z"
                },
+               {
+                       "checksumSHA1": "ewGq4nGalpCQOHcmBTdAEQx1wW0=",
+                       "path": "github.com/mitchellh/mapstructure",
+                       "revision": "bb74f1db0675b241733089d5a1faa5dd8b0ef57b",
+                       "revisionTime": "2018-05-11T14:21:26Z"
+               },
                {
                        "checksumSHA1": "OFNit1Qx2DdWhotfREKodDNUwCM=",
                        "path": "github.com/opencontainers/go-digest",