Merge branch '8497-datamanager-batchsize-1000' of https://github.com/wtsi-hgi/arvados...
authorradhika <radhika@curoverse.com>
Thu, 3 Mar 2016 22:03:05 +0000 (17:03 -0500)
committerradhika <radhika@curoverse.com>
Thu, 3 Mar 2016 22:03:05 +0000 (17:03 -0500)
152 files changed:
README [deleted file]
README.md [new file with mode: 0644]
apps/workbench/.gitignore
apps/workbench/app/controllers/application_controller.rb
apps/workbench/app/models/job.rb
apps/workbench/app/views/jobs/_show_log.html.erb
apps/workbench/app/views/projects/_choose.html.erb
apps/workbench/config/application.default.yml
apps/workbench/test/integration/websockets_test.rb
crunch_scripts/crunchrunner [new file with mode: 0755]
doc/_config.yml
doc/_includes/_run_command_foreach_example.liquid
doc/install/arvbox.html.textile.liquid [new file with mode: 0644]
doc/install/index.html.textile.liquid
doc/install/install-crunch-dispatch.html.textile.liquid
sdk/cli/bin/crunch-job
sdk/cli/test/binstub_clean_fail/mount
sdk/cli/test/test_crunch-job.rb
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/setup.py
sdk/go/crunchrunner/crunchrunner.go
sdk/perl/.gitignore [new file with mode: 0644]
sdk/python/arvados/api.py
sdk/python/arvados/commands/arv_copy.py
sdk/python/arvados/commands/run.py
sdk/python/arvados/events.py
sdk/python/setup.py
services/api/.gitignore
services/api/config/application.default.yml
services/api/lib/crunch_dispatch.rb
services/api/test/fixtures/collections.yml
services/api/test/unit/crunch_dispatch_test.rb
services/crunch-dispatch-local/crunch-dispatch-local.go
services/crunch-run/crunchrun.go
services/crunch-run/crunchrun_test.go
services/crunch-run/logging.go
services/crunch-run/logging_test.go
services/crunch-run/upload.go
services/crunch-run/upload_test.go [new file with mode: 0644]
services/datamanager/collection/collection.go
services/nodemanager/arvnodeman/computenode/driver/__init__.py
services/nodemanager/arvnodeman/computenode/driver/ec2.py
services/nodemanager/arvnodeman/computenode/driver/gce.py
services/nodemanager/arvnodeman/config.py
services/nodemanager/arvnodeman/fullstopactor.py [new file with mode: 0644]
services/nodemanager/tests/test_failure.py [new file with mode: 0644]
tools/arvbox/bin/arvbox [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/Dockerfile.base [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/Dockerfile.demo [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/Dockerfile.dev [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/application_yml_override.py [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/common.sh [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/createusers.sh [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/crunch-setup.sh [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/fuse.conf [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/gitolite.rc [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/keep-setup.sh [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/logger [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/runit-docker/.gitignore [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/runit-docker/LICENSE [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/runit-docker/Makefile [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/runit-docker/README.md [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/runit-docker/debian/changelog [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/runit-docker/debian/compat [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/runit-docker/debian/control [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/runit-docker/debian/copyright [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/runit-docker/debian/docs [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/runit-docker/debian/rules [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/runit-docker/debian/source/format [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/runit-docker/runit-docker [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/runit-docker/runit-docker.c [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/runsu.sh [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/api/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/api/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/api/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/api/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/arv-git-httpd/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/arv-git-httpd/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/arv-git-httpd/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/arv-git-httpd/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/doc/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/doc/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/doc/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/doc/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/docker/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/docker/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/docker/run [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/gitolite/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/gitolite/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/gitolite/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/gitolite/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/keep-web/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/keep-web/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/keep-web/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/keep-web/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/keepproxy/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/keepproxy/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/keepproxy/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/keepproxy/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/keepstore0/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/keepstore0/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/keepstore0/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/keepstore0/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/keepstore1/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/keepstore1/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/keepstore1/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/keepstore1/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/postgres/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/postgres/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/postgres/run [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/postgres/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/ready/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/ready/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/sdk/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/sdk/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/sdk/run [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/sdk/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/slurmctld/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/slurmctld/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/slurmctld/run [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/slurmd/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/slurmd/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/slurmd/run [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/ssh/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/ssh/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/ssh/run [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/sso/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/sso/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/sso/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/sso/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/vm/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/vm/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/vm/run [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/vm/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/workbench/log/main/.gitstub [new file with mode: 0644]
tools/arvbox/lib/arvbox/docker/service/workbench/log/run [new symlink]
tools/arvbox/lib/arvbox/docker/service/workbench/run [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/service/workbench/run-service [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/waitforpostgres.sh [new file with mode: 0755]
tools/crunchstat-summary/crunchstat_summary/summarizer.py

diff --git a/README b/README
deleted file mode 100644 (file)
index 7a2c5ef..0000000
--- a/README
+++ /dev/null
@@ -1,21 +0,0 @@
-Welcome to Arvados!
-
-The main Arvados web site is 
-  https://arvados.org
-
-The Arvados public wiki is located at 
-  https://dev.arvados.org/projects/arvados/wiki
-
-The Arvados public bug tracker is located at 
-  https://dev.arvados.org/projects/arvados/issues
-
-For support see 
-  http://doc.arvados.org/user/getting_started/community.html
-
-Installation documentation is located at 
-  http://doc.arvados.org/install
-
-If you wish to build the documentation yourself, follow the instructions in
-doc/README to build the documentation, then consult the "Install Guide".
-
-See COPYING for information about Arvados Free Software licenses.
diff --git a/README.md b/README.md
new file mode 100644 (file)
index 0000000..629c2f0
--- /dev/null
+++ b/README.md
@@ -0,0 +1,74 @@
+[Arvados](https://arvados.org) is a free software distributed computing platform
+for bioinformatics, data science, and high throughput analysis of massive data
+sets.  Arvados supports a variety of cloud, cluster and HPC environments.
+
+Arvados consists of:
+
+* *Keep*: a petabyte-scale content-addressed distributed storage system for managing and
+  storing collections of files, accessible via HTTP and FUSE mount.
+
+* *Crunch*: a Docker-based cluster and HPC workflow engine designed providing
+  strong versioning, reproducibilty, and provenance of computations.
+
+* Related services and components including a web workbench for managing files
+  and compute jobs, REST APIs, SDKs, and other tools.
+
+## Quick start
+
+Curoverse maintains an Arvados public cloud demo at
+[https://cloud.curoverse.com](https://cloud.curoverse.com).  A Google account
+is required to log in.
+
+To try out Arvados on your local workstation, you can use Arvbox, which
+provides Arvados components pre-installed in a Docker container (requires
+Docker 1.9+).  After cloning the Arvados git repository:
+
+```
+$ cd arvados/tools/arvbox/bin
+$ ./arvbox start localdemo
+```
+
+In this mode you will only be able to connect to Arvbox from the same host.  To
+configure Arvbox to be accessible over a network and for other options see
+http://doc.arvados.org/install/arvbox.html for details.
+
+## Documentation
+
+Complete documentation, including a User Guide, Installation documentation and
+API documentation is available at http://doc.arvados.org/
+
+If you wish to build the Arvados documentation from a local git clone, see
+doc/README.textile for instructions.
+
+## Community
+
+The [#arvados](irc://irc.oftc.net:6667/#arvados IRC) (Internet Relay Chat)
+channel at the
+[Open and Free Technology Community (irc.oftc.net)](http://www.oftc.net/oftc/)
+is available for live discussion and support.  You can use a traditional IRC
+client or [join OFTC over the web.](https://webchat.oftc.net/?channels=arvados)
+
+The
+[Arvados user mailing list](http://lists.arvados.org/mailman/listinfo/arvados)
+is a forum for general discussion, questions, and news about Arvados
+development.  The
+[Arvados developer mailing list](http://lists.arvados.org/mailman/listinfo/arvados-dev)
+is a forum for more technical discussion, intended for developers and
+contributers to Arvados.
+
+## Development
+
+[![Build Status](https://ci.curoverse.com/buildStatus/icon?job=arvados-api-server)](https://ci.curoverse.com/job/arvados-api-server/)
+
+The Arvados public bug tracker is located at https://dev.arvados.org/projects/arvados/issues
+
+Continuous integration is hosted at https://ci.curoverse.com/
+
+Instructions for setting up a development environment and working on specific
+components can be found on the
+["Hacking Arvados" page of the Arvados wiki](https://dev.arvados.org/projects/arvados/wiki/Hacking).
+
+## Licensing
+
+Arvados is Free Software.  See COPYING for information about Arvados Free
+Software licenses.
index 9bef02bbfda670595750fd99a4461005ce5b8f12..a27ac31580a1d6b5cc81ab47e60c8deb649a2f85 100644 (file)
@@ -36,3 +36,6 @@
 # Dev/test SSL certificates
 /self-signed.key
 /self-signed.pem
+
+# Generated git-commit.version file
+/git-commit.version
index 1fc15807c9568e05876a349cc38ed152887dc943..4c3d3f852eb2a737049f0a734e88de738a6f0b95 100644 (file)
@@ -89,6 +89,7 @@ class ApplicationController < ActionController::Base
     # exception here than in a template.)
     unless current_user.nil?
       begin
+        my_starred_projects current_user
         build_my_wanted_projects_tree current_user
       rescue ArvadosApiClient::ApiError
         # Fall back to the default-setting code later.
@@ -96,8 +97,6 @@ class ApplicationController < ActionController::Base
     end
     @starred_projects ||= []
     @my_wanted_projects_tree ||= []
-    @my_project_tree ||= []
-    @shared_project_tree ||= []
     render_error(err_opts)
   end
 
@@ -850,7 +849,7 @@ class ApplicationController < ActionController::Base
     links = Link.filter([['tail_uuid', '=', user.uuid],
                          ['link_class', '=', 'star'],
                          ['head_uuid', 'is_a', 'arvados#group']]).select(%w(head_uuid))
-    uuids =links.collect { |x| x.head_uuid }
+    uuids = links.collect { |x| x.head_uuid }
     starred_projects = Group.filter([['uuid', 'in', uuids]]).order('name')
     @starred_projects = starred_projects.results
   end
@@ -928,57 +927,6 @@ class ApplicationController < ActionController::Base
       sorted_paths.call buildtree.call(children_of, 'me')
   end
 
-  helper_method :my_project_tree
-  def my_project_tree
-    build_project_trees
-    @my_project_tree
-  end
-
-  helper_method :shared_project_tree
-  def shared_project_tree
-    build_project_trees
-    @shared_project_tree
-  end
-
-  def build_project_trees
-    return if @my_project_tree and @shared_project_tree
-    parent_of = {current_user.uuid => 'me'}
-    all_projects.each do |ob|
-      parent_of[ob.uuid] = ob.owner_uuid
-    end
-    children_of = {false => [], 'me' => [current_user]}
-    all_projects.each do |ob|
-      if ob.owner_uuid != current_user.uuid and
-          not parent_of.has_key? ob.owner_uuid
-        parent_of[ob.uuid] = false
-      end
-      children_of[parent_of[ob.uuid]] ||= []
-      children_of[parent_of[ob.uuid]] << ob
-    end
-    buildtree = lambda do |children_of, root_uuid=false|
-      tree = {}
-      children_of[root_uuid].andand.each do |ob|
-        tree[ob] = buildtree.call(children_of, ob.uuid)
-      end
-      tree
-    end
-    sorted_paths = lambda do |tree, depth=0|
-      paths = []
-      tree.keys.sort_by { |ob|
-        ob.is_a?(String) ? ob : ob.friendly_link_name
-      }.each do |ob|
-        paths << {object: ob, depth: depth}
-        paths += sorted_paths.call tree[ob], depth+1
-      end
-      paths
-    end
-    @my_project_tree =
-      sorted_paths.call buildtree.call(children_of, 'me')
-    @shared_project_tree =
-      sorted_paths.call({'Projects shared with me' =>
-                          buildtree.call(children_of, false)})
-  end
-
   helper_method :get_object
   def get_object uuid
     if @get_object.nil? and @objects
index 3ece865959f10aabf70acfab6cde1dd9145cf6d5..6566aeb7cd5f82c4aa9e8f16a88b4d287f1afc24 100644 (file)
@@ -41,4 +41,16 @@ class Job < ArvadosBase
   def textile_attributes
     [ 'description' ]
   end
+
+  def stderr_log_query(limit=nil)
+    query = Log.where(event_type: "stderr", object_uuid: self.uuid)
+               .order("id DESC")
+    query = query.limit(limit) if limit
+    query
+  end
+
+  def stderr_log_lines(limit=2000)
+    stderr_log_query(limit).results.reverse.
+      flat_map { |log| log.properties[:text].split("\n") rescue [] }
+  end
 end
index 7d67b74210812895347d21d6c08f4ceb15c50db0..02ad2b73986856185e6b66bbaacd6d8a1a5826c7 100644 (file)
@@ -8,7 +8,8 @@
 <div id="event_log_div"
      class="arv-log-event-listener arv-log-event-handler-append-logs arv-job-log-window"
      data-object-uuid="<%= @object.uuid %>"
-     ></div>
+  ><%= @object.stderr_log_lines(Rails.configuration.running_job_log_records_to_fetch).join("\n") %>
+</div>
 
 <%# Applying a long throttle suppresses the auto-refresh of this
     partial that would normally be triggered by arv-log-event. %>
index c0759ed2e3ac1da813acd7afa744f62f543185a7..badaa24983f2640e60a2d2e8397d9842941f74ea 100644 (file)
 
       <div class="modal-body">
         <div class="selectable-container" style="height: 15em; overflow-y: scroll">
-          <% [my_project_tree, shared_project_tree].each do |tree| %>
-            <% tree.each do |projectnode| %>
-              <% if projectnode[:object].is_a? String %>
-                <div class="row" style="padding-left: <%= 1 + projectnode[:depth] %>em; margin-right: 0px">
-                  <i class="fa fa-fw fa-share-alt"></i>
-                  <%= projectnode[:object] %>
-                </div>
-              <% else
-                 row_selectable = !params[:editable] || projectnode[:object].editable?
-                 if projectnode[:object].uuid == current_user.uuid
-                   row_name = "Home"
-                   row_selectable = true
-                 else
-                   row_name = projectnode[:object].friendly_link_name || 'New project'
-                 end %>
-                <div class="<%= 'selectable project' if row_selectable %> row"
-                     style="padding-left: <%= 1 + projectnode[:depth] %>em; margin-right: 0px" data-object-uuid="<%= projectnode[:object].uuid %>">
-                  <i class="fa fa-fw fa-folder-o"></i> <%= row_name %>
-                </div>
-              <% end %>
+          <% starred_projects = my_starred_projects current_user%>
+          <% if starred_projects.andand.any? %>
+            <% writable_projects = starred_projects.select(&:editable?) %>
+            <% writable_projects.each do |projectnode| %>
+              <% row_name = projectnode.friendly_link_name || 'New project' %>
+              <div class="selectable project row"
+                   style="padding-left: 1em; margin-right: 0px"
+                   data-object-uuid="<%= projectnode.uuid %>">
+                <i class="fa fa-fw fa-folder-o"></i> <%= row_name %> <i class="fa fa-fw fa-star"></i>
+              </div>
             <% end %>
           <% end %>
+
+          <% my_projects = my_wanted_projects_tree(current_user) %>
+          <% my_projects[0].each do |projectnode| %>
+            <% if projectnode[:object].uuid == current_user.uuid
+                 row_name = "Home"
+               else
+                 row_name = projectnode[:object].friendly_link_name || 'New project'
+               end %>
+            <div class="selectable project row"
+                 style="padding-left: <%= 1 + projectnode[:depth] %>em; margin-right: 0px"
+                 data-object-uuid="<%= projectnode[:object].uuid %>">
+              <i class="fa fa-fw fa-folder-o"></i> <%= row_name %>
+            </div>
+          <% end %>
         </div>
+
+        <% if my_projects[1] or my_projects[2] or my_projects[0].size > 200 %>
+          <div>Some of your projects are omitted. Add projects of interest to favorites.</div>
+        <% end %>
       </div>
 
       <div class="modal-footer">
index 239ffcd225da24a0c444851c16e2484c1293ba96..5400debbfdaf55e1f64c004adf70f98ca4037cb1 100644 (file)
@@ -272,3 +272,7 @@ common:
   #
   # The default setting (false) is appropriate for a multi-user site.
   trust_all_content: false
+
+  # Maximum number of historic log records of a running job to fetch
+  # and display in the Log tab, while subscribing to web sockets.
+  running_job_log_records_to_fetch: 2000
index 648d59c69000b19cfca089d4aac154da5e2529c7..655ad92c94d1d18f23988990c59cb832a817fb9e 100644 (file)
@@ -211,4 +211,68 @@ class WebsocketTest < ActionDispatch::IntegrationTest
     datum = page.evaluate_script("jobGraphData[jobGraphData.length-1]['#{series}']")
     assert_in_epsilon value, datum.to_f
   end
+
+  test "test running job with just a few previous log records" do
+    Thread.current[:arvados_api_token] = @@API_AUTHS["admin"]['api_token']
+    job = Job.where(uuid: api_fixture("jobs")['running']['uuid']).results.first
+    visit page_with_token("admin", "/jobs/#{job.uuid}")
+
+    api = ArvadosApiClient.new
+
+    # Create just one old log record
+    api.api("logs", "", {log: {
+                object_uuid: job.uuid,
+                event_type: "stderr",
+                properties: {"text" => "Historic log message"}}})
+
+    click_link("Log")
+
+    # Expect "all" historic log records because we have less than
+    # default Rails.configuration.running_job_log_records_to_fetch count
+    assert_text 'Historic log message'
+
+    # Create new log record and expect it to show up in log tab
+    api.api("logs", "", {log: {
+                object_uuid: job.uuid,
+                event_type: "stderr",
+                properties: {"text" => "Log message after subscription"}}})
+    assert_text 'Log message after subscription'
+  end
+
+  test "test running job with too many previous log records" do
+    Rails.configuration.running_job_log_records_to_fetch = 5
+
+    Thread.current[:arvados_api_token] = @@API_AUTHS["admin"]['api_token']
+    job = Job.where(uuid: api_fixture("jobs")['running']['uuid']).results.first
+
+    visit page_with_token("admin", "/jobs/#{job.uuid}")
+
+    api = ArvadosApiClient.new
+
+    # Create Rails.configuration.running_job_log_records_to_fetch + 1 log records
+    (0..Rails.configuration.running_job_log_records_to_fetch).each do |count|
+      api.api("logs", "", {log: {
+                object_uuid: job.uuid,
+                event_type: "stderr",
+                properties: {"text" => "Old log message #{count}"}}})
+    end
+
+    # Go to log tab, which results in subscribing to websockets
+    click_link("Log")
+
+    # Expect all but the first historic log records,
+    # because that was one too many than fetch count.
+    (1..Rails.configuration.running_job_log_records_to_fetch).each do |count|
+      assert_text "Old log message #{count}"
+    end
+    assert_no_text 'Old log message 0'
+
+    # Create one more log record after subscription
+    api.api("logs", "", {log: {
+                object_uuid: job.uuid,
+                event_type: "stderr",
+                properties: {"text" => "Life goes on!"}}})
+    # Expect it to show up in log tab
+    assert_text 'Life goes on!'
+  end
 end
diff --git a/crunch_scripts/crunchrunner b/crunch_scripts/crunchrunner
new file mode 100755 (executable)
index 0000000..71c10c9
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/sh
+exec $TASK_KEEPMOUNT/$JOB_PARAMETER_CRUNCHRUNNER
index 05b7437d750286c9e6c8978f080467e084c527de..2ee1f5dcf39fbb381cde41fa273fd6c4294a525f 100644 (file)
@@ -144,6 +144,8 @@ navbar:
   installguide:
     - Overview:
       - install/index.html.textile.liquid
+    - Docker quick start:
+      - install/arvbox.html.textile.liquid
     - Manual installation:
       - install/install-manual-prerequisites.html.textile.liquid
       - install/install-sso.html.textile.liquid
index 3fb754f9db139d4a659f5aba0dec9487f1fe488c..20fe6c2c2fd15e69522c9ebd4d5c2ad1c784d1ab 100644 (file)
@@ -27,7 +27,7 @@
                     "required": true,
                     "dataclass": "Collection"
                 },
-                "sample_subdir": "$(dir $(samples))",
+                "sample_subdir": "$(dir $(sample))",
                 "read_pair": {
                     "value": {
                         "group": "sample_subdir",
diff --git a/doc/install/arvbox.html.textile.liquid b/doc/install/arvbox.html.textile.liquid
new file mode 100644 (file)
index 0000000..2420ff7
--- /dev/null
@@ -0,0 +1,136 @@
+---
+layout: default
+navsection: installguide
+title: Arvados-in-a-box
+...
+
+Arvbox is a Docker-based self-contained development, demonstration and testing environment for Arvados.  It is not intended for production use.
+
+h2. Quick start
+
+<pre>
+$ git clone https://github.com/curoverse/arvados.git
+$ cd arvados/tools/arvbox/bin
+$ ./arvbox start localdemo
+</pre>
+
+h2. Requirements
+
+* Linux 3.x+ and Docker 1.9+
+* Minimum of 3 GiB of RAM  + additional memory to run jobs
+* Minimum of 3 GiB of disk + storage for actual data
+
+h2. Usage
+
+<pre>
+Arvados-in-a-box
+
+arvbox (build|start|run|open|shell|ip|stop|reboot|reset|destroy|log|svrestart)
+
+build <config>      build arvbox Docker image
+start|run <config>  start arvbox container
+open       open arvbox workbench in a web browser
+shell      enter arvbox shell
+ip         print arvbox ip address
+status     print some information about current arvbox
+stop       stop arvbox container
+restart <config>  stop, then run again
+reboot  <config>  stop, build arvbox Docker image, run
+reset      delete arvbox arvados data (be careful!)
+destroy    delete all arvbox code and data (be careful!)
+log       <service> tail log of specified service
+sv        <start|stop|restart> <service> change state of service inside arvbox
+clone <from> <to>   clone an arvbox
+</pre>
+
+h2. Configs
+
+h3. dev
+
+Development configuration.  Boots a complete Arvados environment inside the container.  The "arvados", "arvado-dev" and "sso-devise-omniauth-provider" code directories along data directories "postgres", "var", "passenger" and "gems" are bind mounted from the host file system for easy access and persistence across container rebuilds.  Services are bound to the Docker container's network IP address and can only be accessed on the local host.
+
+In "dev" mode, you can override the default autogenerated settings of Rails projects by adding "application.yml.override" to any Rails project (sso, api, workbench).  This can be used to test out API server settings or point Workbench at an alternate API server.
+
+h3. localdemo
+
+Demo configuration.  Boots a complete Arvados environment inside the container. Unlike the development configuration, code directories are included in the demo image, and data directories are stored in a separate data volume container. Services are bound to the Docker container's network IP address and can only be accessed on the local host.
+
+h3. test
+
+Run the test suite.
+
+h3. publicdev
+
+Publicly accessible development configuration.  Similar to 'dev' except that service ports are published to the host's IP address and can accessed by anyone who can connect to the host system.  See below for more information.  WARNING! The public arvbox configuration is NOT SECURE and must not be placed on a public IP address or used for production work.
+
+h3. publicdemo
+
+Publicly accessible development configuration.  Similar to 'localdemo' except that service ports are published to the host's IP address and can accessed by anyone who can connect to the host system.  See below for more information.  WARNING! The public arvbox configuration is NOT SECURE and must not be placed on a public IP address or used for production work.
+
+h2. Environment variables
+
+h3. ARVBOX_DOCKER
+
+The location of Dockerfile.base and associated files used by "arvbox build".
+default: result of $(readlink -f $(dirname $0)/../lib/arvbox/docker)
+
+h3. ARVBOX_CONTAINER
+
+The name of the Docker container to manipulate.
+default: arvbox
+
+h3. ARVBOX_BASE
+
+The base directory to store persistent data for arvbox containers.
+default: $HOME/.arvbox
+
+h3. ARVBOX_DATA
+
+The base directory to store persistent data for the current container.
+default: $ARVBOX_BASE/$ARVBOX_CONTAINER
+
+h3. ARVADOS_ROOT
+
+The root directory of the Arvados source tree
+default: $ARVBOX_DATA/arvados
+
+h3. ARVADOS_DEV_ROOT
+
+The root directory of the Arvados-dev source tree
+default: $ARVBOX_DATA/arvados-dev
+
+h3. SSO_ROOT
+
+The root directory of the SSO source tree
+default: $ARVBOX_DATA/sso-devise-omniauth-provider
+
+h3. ARVBOX_PUBLISH_IP
+
+The IP address on which to publish services when running in public configuration.  Overrides default detection of the host's IP address.
+
+h2. Using Arvbox for Arvados development
+
+The "Arvbox section of Hacking Arvados":https://dev.arvados.org/projects/arvados/wiki/Arvbox has information about using Arvbox for Arvados development.
+
+h2. Making Arvbox accessible from other hosts
+
+In "dev" and "localdemo" mode, Arvbox can only be accessed on the same host it is running.  To publish Arvbox service ports to the host's service ports and advertise the host's IP address for services, use @publicdev@ or @publicdemo@:
+
+<pre>
+$ arvbox reboot publicdemo
+</pre>
+
+This attempts to auto-detect the correct IP address to use by taking the IP address of the default route device.  If the auto-detection is wrong, you want to publish a hostname instead of a raw address, or you need to access it through a different device (such as a router or firewall), set @ARVBOX_PUBLISH_IP@ to the desire hostname or IP address.
+
+<pre>
+$ export ARVBOX_PUBLISH_IP=example.com
+$ arvbox reboot publicdemo
+</pre>
+
+Note: this expects to bind the host's port 80 (http) for workbench, so you cannot have a conflicting web server already running on the host.  It does not attempt to take bind the host's port 22 (ssh), as a result the arvbox ssh port is not published.
+
+h2. Notes
+
+Services are designed to install and auto-configure on start or restart.  For example, the service script for keepstore always compiles keepstore from source and registers the daemon with the API server.
+
+Services are run with process supervision, so a service which exits will be restarted.  Dependencies between services are handled by repeatedly trying and failing the service script until dependencies are fulfilled (by other service scripts) enabling the service script to complete.
index edd2d854f0914a12b073f7ca6f4889b81fbbe339..7580318077e29b9ae7025a39c2203f9b55146ac6 100644 (file)
@@ -8,4 +8,5 @@ Arvados components run on GNU/Linux systems, and do not depend on any particular
 
 Arvados components can be installed and configured in a number of different ways.  Step-by-step instructions are available to perform a production installation from packages with manual configuration.  This method assumes you have several (virtual) machines at your disposal for running the various Arvados components.
 
+* "Docker quick start":arvbox.html
 * "Manual installation":install-manual-prerequisites.html
index 0e5be9411a28f435a738e694dd6c398b91ae2b1e..fda0769abfe6c34d94e0359ce395d2e6bf9b0d48 100644 (file)
@@ -184,6 +184,10 @@ export CRUNCH_DISPATCH_LOCKFILE=/var/lock/crunch-dispatch
 export HOME=$(pwd)
 export RAILS_ENV=production
 
+## Uncomment and edit this line if your compute nodes have cgroup info
+## somewhere other than /sys/fs/cgroup (e.g., "/cgroup" for CentOS 6)
+#export CRUNCH_CGROUP_ROOT="/sys/fs/cgroup"
+
 ## Uncomment this line if your cluster uses self-signed SSL certificates:
 #export ARVADOS_API_HOST_INSECURE=yes
 
index ae210a6f447e42d69ecd9302f414866bb4da6e23..e473710c243683f0e520b575dfbe49a2bec99bc6 100755 (executable)
@@ -126,6 +126,7 @@ my $jobspec;
 my $job_api_token;
 my $no_clear_tmp;
 my $resume_stash;
+my $cgroup_root = "/sys/fs/cgroup";
 my $docker_bin = "docker.io";
 my $docker_run_args = "";
 GetOptions('force-unlock' => \$force_unlock,
@@ -134,6 +135,7 @@ GetOptions('force-unlock' => \$force_unlock,
            'job-api-token=s' => \$job_api_token,
            'no-clear-tmp' => \$no_clear_tmp,
            'resume-stash=s' => \$resume_stash,
+           'cgroup-root=s' => \$cgroup_root,
            'docker-bin=s' => \$docker_bin,
            'docker-run-args=s' => \$docker_run_args,
     );
@@ -183,11 +185,12 @@ if (($Job || $local_job)->{docker_image_locator}) {
   $cmd = [$docker_bin, 'ps', '-q'];
 }
 Log(undef, "Sanity check is `@$cmd`");
-srun(["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
-     $cmd,
-     {fork => 1});
-if ($? != 0) {
-  Log(undef, "Sanity check failed: ".exit_status_s($?));
+my ($exited, $stdout, $stderr) = srun_sync(
+  ["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
+  $cmd,
+  {label => "sanity check"});
+if ($exited != 0) {
+  Log(undef, "Sanity check failed: ".exit_status_s($exited));
   exit EX_TEMPFAIL;
 }
 Log(undef, "Sanity check OK");
@@ -386,28 +389,17 @@ my $nodelist = join(",", @node);
 my $git_tar_count = 0;
 
 if (!defined $no_clear_tmp) {
-  # Clean out crunch_tmp/work, crunch_tmp/opt, crunch_tmp/src*
-  Log (undef, "Clean work dirs");
-
-  my $cleanpid = fork();
-  if ($cleanpid == 0)
-  {
-    # Find FUSE mounts under $CRUNCH_TMP and unmount them.
-    # Then clean up work directories.
-    # TODO: When #5036 is done and widely deployed, we can limit mount's
-    # -t option to simply fuse.keep.
-    srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
-          ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk "(index(\$3, \"$CRUNCH_TMP\") == 1){print \$3}" | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
-    exit (1);
-  }
-  while (1)
-  {
-    last if $cleanpid == waitpid (-1, WNOHANG);
-    freeze_if_want_freeze ($cleanpid);
-    select (undef, undef, undef, 0.1);
-  }
-  if ($?) {
-    Log(undef, "Clean work dirs: exit ".exit_status_s($?));
+  # Find FUSE mounts under $CRUNCH_TMP and unmount them.  Then clean
+  # up work directories crunch_tmp/work, crunch_tmp/opt,
+  # crunch_tmp/src*.
+  #
+  # TODO: When #5036 is done and widely deployed, we can limit mount's
+  # -t option to simply fuse.keep.
+  my ($exited, $stdout, $stderr) = srun_sync(
+    ["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
+    ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk "(index(\$3, \"$CRUNCH_TMP\") == 1){print \$3}" | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid'],
+    {label => "clean work dirs"});
+  if ($exited != 0) {
     exit(EX_RETRY_UNLOCKED);
   }
 }
@@ -428,30 +420,22 @@ if ! $docker_bin images -q --no-trunc --all | grep -qxF \Q$docker_hash\E; then
     arv-get \Q$docker_locator$docker_stream/$docker_hash.tar\E | $docker_bin load
 fi
 };
-  my $docker_pid = fork();
-  if ($docker_pid == 0)
-  {
-    srun (["srun", "--nodelist=" . join(',', @node)],
-          ["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script]);
-    exit ($?);
-  }
-  while (1)
-  {
-    last if $docker_pid == waitpid (-1, WNOHANG);
-    freeze_if_want_freeze ($docker_pid);
-    select (undef, undef, undef, 0.1);
-  }
-  if ($? != 0)
+
+  my ($exited, $stdout, $stderr) = srun_sync(
+    ["srun", "--nodelist=" . join(',', @node)],
+    ["/bin/bash", "-o", "pipefail", "-ec", $docker_install_script],
+    {label => "load docker image"});
+  if ($exited != 0)
   {
-    Log(undef, "Installing Docker image from $docker_locator exited " . exit_status_s($?));
     exit(EX_RETRY_UNLOCKED);
   }
 
   # Determine whether this version of Docker supports memory+swap limits.
-  srun(["srun", "--nodelist=" . $node[0]],
-       ["/bin/sh", "-ec", "$docker_bin run --help | grep -qe --memory-swap="],
-      {fork => 1});
-  $docker_limitmem = ($? == 0);
+  ($exited, $stdout, $stderr) = srun_sync(
+    ["srun", "--nodelist=" . $node[0]],
+    [$docker_bin, 'run', '--help'],
+    {label => "check --memory-swap feature"});
+  $docker_limitmem = ($stdout =~ /--memory-swap/);
 
   # Find a non-root Docker user to use.
   # Tries the default user for the container, then 'crunch', then 'nobody',
@@ -461,20 +445,22 @@ fi
   # Docker containers.
   my @tryusers = ("", "crunch", "nobody");
   foreach my $try_user (@tryusers) {
+    my $label;
     my $try_user_arg;
     if ($try_user eq "") {
-      Log(undef, "Checking if container default user is not UID 0");
+      $label = "check whether default user is UID 0";
       $try_user_arg = "";
     } else {
-      Log(undef, "Checking if user '$try_user' is not UID 0");
+      $label = "check whether user '$try_user' is UID 0";
       $try_user_arg = "--user=$try_user";
     }
-    srun(["srun", "--nodelist=" . $node[0]],
-         ["/bin/sh", "-ec",
-          "a=`$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user` && " .
-          " test \$a -ne 0"],
-         {fork => 1});
-    if ($? == 0) {
+    my ($exited, $stdout, $stderr) = srun_sync(
+      ["srun", "--nodelist=" . $node[0]],
+      ["/bin/sh", "-ec",
+       "$docker_bin run $docker_run_args $try_user_arg $docker_hash id --user"],
+      {label => $label});
+    chomp($stdout);
+    if ($exited == 0 && $stdout =~ /^\d+$/ && $stdout > 0) {
       $dockeruserarg = $try_user_arg;
       if ($try_user eq "") {
         Log(undef, "Container will run with default user");
@@ -664,11 +650,9 @@ if (!defined $git_archive) {
   }
 }
 else {
-  my $install_exited;
+  my $exited;
   my $install_script_tries_left = 3;
   for (my $attempts = 0; $attempts < 3; $attempts++) {
-    Log(undef, "Run install script on all workers");
-
     my @srunargs = ("srun",
                     "--nodelist=$nodelist",
                     "-D", $ENV{'TMPDIR'}, "--job-name=$job_id");
@@ -676,59 +660,21 @@ else {
                     "mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
 
     $ENV{"CRUNCH_GIT_ARCHIVE_HASH"} = md5_hex($git_archive);
-    my ($install_stderr_r, $install_stderr_w);
-    pipe $install_stderr_r, $install_stderr_w or croak("pipe() failed: $!");
-    set_nonblocking($install_stderr_r);
-    my $installpid = fork();
-    if ($installpid == 0)
-    {
-      close($install_stderr_r);
-      fcntl($install_stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
-      open(STDOUT, ">&", $install_stderr_w);
-      open(STDERR, ">&", $install_stderr_w);
-      srun (\@srunargs, \@execargs, {}, $build_script . $git_archive);
-      exit (1);
-    }
-    close($install_stderr_w);
-    # Tell freeze_if_want_freeze how to kill the child, otherwise the
-    # "waitpid(installpid)" loop won't get interrupted by a freeze:
-    $proc{$installpid} = {};
-    my $stderr_buf = '';
-    # Track whether anything appears on stderr other than slurm errors
-    # ("srun: ...") and the "starting: ..." message printed by the
-    # srun subroutine itself:
+    my ($stdout, $stderr);
+    ($exited, $stdout, $stderr) = srun_sync(
+      \@srunargs, \@execargs,
+      {label => "run install script on all workers"},
+      $build_script . $git_archive);
+
     my $stderr_anything_from_script = 0;
-    my $match_our_own_errors = '^(srun: error: |starting: \[)';
-    while ($installpid != waitpid(-1, WNOHANG)) {
-      freeze_if_want_freeze ($installpid);
-      # Wait up to 0.1 seconds for something to appear on stderr, then
-      # do a non-blocking read.
-      my $bits = fhbits($install_stderr_r);
-      select ($bits, undef, $bits, 0.1);
-      if (0 < sysread ($install_stderr_r, $stderr_buf, 8192, length($stderr_buf)))
-      {
-        while ($stderr_buf =~ /^(.*?)\n/) {
-          my $line = $1;
-          substr $stderr_buf, 0, 1+length($line), "";
-          Log(undef, "stderr $line");
-          if ($line !~ /$match_our_own_errors/) {
-            $stderr_anything_from_script = 1;
-          }
-        }
-      }
-    }
-    delete $proc{$installpid};
-    $install_exited = $?;
-    close($install_stderr_r);
-    if (length($stderr_buf) > 0) {
-      if ($stderr_buf !~ /$match_our_own_errors/) {
+    for my $line (split(/\n/, $stderr)) {
+      if ($line !~ /^(srun: error: |starting: \[)/) {
         $stderr_anything_from_script = 1;
       }
-      Log(undef, "stderr $stderr_buf")
     }
 
-    Log (undef, "Install script exited ".exit_status_s($install_exited));
-    last if $install_exited == 0 || $main::please_freeze;
+    last if $exited == 0 || $main::please_freeze;
+
     # If the install script fails but doesn't print an error message,
     # the next thing anyone is likely to do is just run it again in
     # case it was a transient problem like "slurm communication fails
@@ -744,7 +690,7 @@ else {
     unlink($tar_filename);
   }
 
-  if ($install_exited != 0) {
+  if ($exited != 0) {
     croak("Giving up");
   }
 }
@@ -803,6 +749,7 @@ if ($initial_tasks_this_level < @node) {
   @freeslot = (0..$#slot);
 }
 my $round_num_freeslots = scalar(@freeslot);
+print STDERR "crunch-job have ${round_num_freeslots} free slots for ${initial_tasks_this_level} initial tasks at this level, ".scalar(@node)." nodes, and ".scalar(@slot)." slots\n";
 
 my %round_max_slots = ();
 for (my $ii = $#freeslot; $ii >= 0; $ii--) {
@@ -915,7 +862,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
     {
       my $containername = "$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}";
       my $cidfile = "$ENV{CRUNCH_TMP}/$containername.cid";
-      $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
+      $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
       $command .= "$docker_bin run $docker_run_args --name=$containername --attach=stdout --attach=stderr --attach=stdin -i \Q$dockeruserarg\E --cidfile=$cidfile --sig-proxy ";
       # We only set memory limits if Docker lets us limit both memory and swap.
       # Memory limits alone have been supported longer, but subprocesses tend
@@ -995,7 +942,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
       }
     } else {
       # Non-docker run
-      $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -poll=10000 ";
+      $command .= "crunchstat -cgroup-root=\Q$cgroup_root\E -poll=10000 ";
       $command .= $stdbuf;
       $command .= "perl - $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
     }
@@ -1013,11 +960,12 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
     next;
   }
   shift @freeslot;
-  $proc{$childpid} = { jobstep => $id,
-                      time => time,
-                      slot => $childslot,
-                      jobstepname => "$job_id.$id.$childpid",
-                    };
+  $proc{$childpid} = {
+    jobstepidx => $id,
+    time => time,
+    slot => $childslot,
+    jobstepname => "$job_id.$id.$childpid",
+  };
   croak ("assert failed: \$slot[$childslot]->{'pid'} exists") if exists $slot[$childslot]->{pid};
   $slot[$childslot]->{pid} = $childpid;
 
@@ -1185,109 +1133,126 @@ sub update_progress_stats
 
 sub reapchildren
 {
-  my $pid = waitpid (-1, WNOHANG);
-  return 0 if $pid <= 0;
-
-  my $whatslot = ($slot[$proc{$pid}->{slot}]->{node}->{name}
-                 . "."
-                 . $slot[$proc{$pid}->{slot}]->{cpu});
-  my $jobstepid = $proc{$pid}->{jobstep};
-  my $elapsed = time - $proc{$pid}->{time};
-  my $Jobstep = $jobstep[$jobstepid];
-
-  my $childstatus = $?;
-  my $exitvalue = $childstatus >> 8;
-  my $exitinfo = "exit ".exit_status_s($childstatus);
-  $Jobstep->{'arvados_task'}->reload;
-  my $task_success = $Jobstep->{'arvados_task'}->{success};
-
-  Log ($jobstepid, "child $pid on $whatslot $exitinfo success=$task_success");
-
-  if (!defined $task_success) {
-    # task did not indicate one way or the other --> fail
-    Log($jobstepid, sprintf(
-          "ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.",
-          exit_status_s($childstatus)));
-    $Jobstep->{'arvados_task'}->{success} = 0;
-    $Jobstep->{'arvados_task'}->save;
-    $task_success = 0;
-  }
+  my $children_reaped = 0;
+  my @successful_task_uuids = ();
 
-  if (!$task_success)
+  while((my $pid = waitpid (-1, WNOHANG)) > 0)
   {
-    my $temporary_fail;
-    $temporary_fail ||= $Jobstep->{tempfail};
-    $temporary_fail ||= ($exitvalue == TASK_TEMPFAIL);
-
-    ++$thisround_failed;
-    ++$thisround_failed_multiple if $Jobstep->{'failures'} >= 1;
-
-    # Check for signs of a failed or misconfigured node
-    if (++$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} >=
-       2+$slot[$proc{$pid}->{slot}]->{node}->{ncpus}) {
-      # Don't count this against jobstep failure thresholds if this
-      # node is already suspected faulty and srun exited quickly
-      if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
-         $elapsed < 5) {
-       Log ($jobstepid, "blaming failure on suspect node " .
-             $slot[$proc{$pid}->{slot}]->{node}->{name});
-        $temporary_fail ||= 1;
-      }
-      ban_node_by_slot($proc{$pid}->{slot});
+    my $childstatus = $?;
+
+    my $whatslot = ($slot[$proc{$pid}->{slot}]->{node}->{name}
+                    . "."
+                    . $slot[$proc{$pid}->{slot}]->{cpu});
+    my $jobstepidx = $proc{$pid}->{jobstepidx};
+
+    if (!WIFEXITED($childstatus))
+    {
+      # child did not exit (may be temporarily stopped)
+      Log ($jobstepidx, "child $pid did not actually exit in reapchildren, ignoring for now.");
+      next;
     }
 
-    Log ($jobstepid, sprintf('failure (#%d, %s) after %d seconds',
-                             ++$Jobstep->{'failures'},
-                             $temporary_fail ? 'temporary' : 'permanent',
-                             $elapsed));
+    $children_reaped++;
+    my $elapsed = time - $proc{$pid}->{time};
+    my $Jobstep = $jobstep[$jobstepidx];
+
+    my $exitvalue = $childstatus >> 8;
+    my $exitinfo = "exit ".exit_status_s($childstatus);
+    $Jobstep->{'arvados_task'}->reload;
+    my $task_success = $Jobstep->{'arvados_task'}->{success};
+
+    Log ($jobstepidx, "child $pid on $whatslot $exitinfo success=$task_success");
+
+    if (!defined $task_success) {
+      # task did not indicate one way or the other --> fail
+      Log($jobstepidx, sprintf(
+            "ERROR: Task process exited %s, but never updated its task record to indicate success and record its output.",
+            exit_status_s($childstatus)));
+      $Jobstep->{'arvados_task'}->{success} = 0;
+      $Jobstep->{'arvados_task'}->save;
+      $task_success = 0;
+    }
 
-    if (!$temporary_fail || $Jobstep->{'failures'} >= 3) {
-      # Give up on this task, and the whole job
-      $main::success = 0;
+    if (!$task_success)
+    {
+      my $temporary_fail;
+      $temporary_fail ||= $Jobstep->{tempfail};
+      $temporary_fail ||= ($exitvalue == TASK_TEMPFAIL);
+
+      ++$thisround_failed;
+      ++$thisround_failed_multiple if $Jobstep->{'failures'} >= 1;
+
+      # Check for signs of a failed or misconfigured node
+      if (++$slot[$proc{$pid}->{slot}]->{node}->{losing_streak} >=
+          2+$slot[$proc{$pid}->{slot}]->{node}->{ncpus}) {
+        # Don't count this against jobstep failure thresholds if this
+        # node is already suspected faulty and srun exited quickly
+        if ($slot[$proc{$pid}->{slot}]->{node}->{hold_until} &&
+            $elapsed < 5) {
+          Log ($jobstepidx, "blaming failure on suspect node " .
+               $slot[$proc{$pid}->{slot}]->{node}->{name});
+          $temporary_fail ||= 1;
+        }
+        ban_node_by_slot($proc{$pid}->{slot});
+      }
+
+      Log ($jobstepidx, sprintf('failure (#%d, %s) after %d seconds',
+                                ++$Jobstep->{'failures'},
+                                $temporary_fail ? 'temporary' : 'permanent',
+                                $elapsed));
+
+      if (!$temporary_fail || $Jobstep->{'failures'} >= 3) {
+        # Give up on this task, and the whole job
+        $main::success = 0;
+      }
+      # Put this task back on the todo queue
+      push @jobstep_todo, $jobstepidx;
+      $Job->{'tasks_summary'}->{'failed'}++;
     }
-    # Put this task back on the todo queue
-    push @jobstep_todo, $jobstepid;
-    $Job->{'tasks_summary'}->{'failed'}++;
+    else # task_success
+    {
+      push @successful_task_uuids, $Jobstep->{'arvados_task'}->{uuid};
+      ++$thisround_succeeded;
+      $slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0;
+      $slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
+      $slot[$proc{$pid}->{slot}]->{node}->{fail_count} = 0;
+      push @jobstep_done, $jobstepidx;
+      Log ($jobstepidx, "success in $elapsed seconds");
+    }
+    $Jobstep->{exitcode} = $childstatus;
+    $Jobstep->{finishtime} = time;
+    $Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
+    $Jobstep->{'arvados_task'}->save;
+    process_stderr_final ($jobstepidx);
+    Log ($jobstepidx, sprintf("task output (%d bytes): %s",
+                              length($Jobstep->{'arvados_task'}->{output}),
+                              $Jobstep->{'arvados_task'}->{output}));
+
+    close $reader{$jobstepidx};
+    delete $reader{$jobstepidx};
+    delete $slot[$proc{$pid}->{slot}]->{pid};
+    push @freeslot, $proc{$pid}->{slot};
+    delete $proc{$pid};
+
+    $progress_is_dirty = 1;
   }
-  else
+
+  if (scalar(@successful_task_uuids) > 0)
   {
-    ++$thisround_succeeded;
-    $slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0;
-    $slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
-    $slot[$proc{$pid}->{slot}]->{node}->{fail_count} = 0;
-    push @jobstep_done, $jobstepid;
-    Log ($jobstepid, "success in $elapsed seconds");
-  }
-  $Jobstep->{exitcode} = $childstatus;
-  $Jobstep->{finishtime} = time;
-  $Jobstep->{'arvados_task'}->{finished_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{finishtime});
-  $Jobstep->{'arvados_task'}->save;
-  process_stderr ($jobstepid, $task_success);
-  Log ($jobstepid, sprintf("task output (%d bytes): %s",
-                           length($Jobstep->{'arvados_task'}->{output}),
-                           $Jobstep->{'arvados_task'}->{output}));
-
-  close $reader{$jobstepid};
-  delete $reader{$jobstepid};
-  delete $slot[$proc{$pid}->{slot}]->{pid};
-  push @freeslot, $proc{$pid}->{slot};
-  delete $proc{$pid};
-
-  if ($task_success) {
+    Log (undef, sprintf("%d tasks exited (%d succeeded), checking for new tasks from API server.", $children_reaped, scalar(@successful_task_uuids)));
     # Load new tasks
     my $newtask_list = [];
     my $newtask_results;
     do {
       $newtask_results = api_call(
         "job_tasks/list",
-        'where' => {
-          'created_by_job_task_uuid' => $Jobstep->{'arvados_task'}->{uuid}
-        },
+        'filters' => [["created_by_job_task_uuid","in",\@successful_task_uuids]],
         'order' => 'qsequence',
         'offset' => scalar(@$newtask_list),
-      );
+          );
       push(@$newtask_list, @{$newtask_results->{items}});
     } while (@{$newtask_results->{items}});
+    Log (undef, sprintf("Got %d new tasks from API server.", scalar(@$newtask_list)));
     foreach my $arvados_task (@$newtask_list) {
       my $jobstep = {
         'level' => $arvados_task->{'sequence'},
@@ -1299,14 +1264,16 @@ sub reapchildren
     }
   }
 
-  $progress_is_dirty = 1;
-  1;
+  return $children_reaped;
 }
 
 sub check_refresh_wanted
 {
   my @stat = stat $ENV{"CRUNCH_REFRESH_TRIGGER"};
-  if (@stat && $stat[9] > $latest_refresh) {
+  if (@stat &&
+      $stat[9] > $latest_refresh &&
+      # ...and we have actually locked the job record...
+      $job_id eq $Job->{'uuid'}) {
     $latest_refresh = scalar time;
     my $Job2 = api_call("jobs/get", uuid => $jobspec);
     for my $attr ('cancelled_at',
@@ -1344,10 +1311,13 @@ sub check_squeue
   # squeue check interval (15s) this should make the squeue check an
   # infrequent event.
   my $silent_procs = 0;
-  for my $procinfo (values %proc)
+  for my $js (map {$jobstep[$_->{jobstepidx}]} values %proc)
   {
-    my $jobstep = $jobstep[$procinfo->{jobstep}];
-    if ($jobstep->{stderr_at} < $last_squeue_check)
+    if (!exists($js->{stderr_at}))
+    {
+      $js->{stderr_at} = 0;
+    }
+    if ($js->{stderr_at} < $last_squeue_check)
     {
       $silent_procs++;
     }
@@ -1357,16 +1327,16 @@ sub check_squeue
   # use killem() on procs whose killtime is reached
   while (my ($pid, $procinfo) = each %proc)
   {
-    my $jobstep = $jobstep[$procinfo->{jobstep}];
+    my $js = $jobstep[$procinfo->{jobstepidx}];
     if (exists $procinfo->{killtime}
         && $procinfo->{killtime} <= time
-        && $jobstep->{stderr_at} < $last_squeue_check)
+        && $js->{stderr_at} < $last_squeue_check)
     {
       my $sincewhen = "";
-      if ($jobstep->{stderr_at}) {
-        $sincewhen = " in last " . (time - $jobstep->{stderr_at}) . "s";
+      if ($js->{stderr_at}) {
+        $sincewhen = " in last " . (time - $js->{stderr_at}) . "s";
       }
-      Log($procinfo->{jobstep}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
+      Log($procinfo->{jobstepidx}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
       killem ($pid);
     }
   }
@@ -1416,7 +1386,7 @@ sub check_squeue
       # error/delay has caused the task to die without notifying srun,
       # and we'll kill srun ourselves.
       $procinfo->{killtime} = time + 30;
-      Log($procinfo->{jobstep}, "notice: task is not in slurm queue but srun process $pid has not exited");
+      Log($procinfo->{jobstepidx}, "notice: task is not in slurm queue but srun process $pid has not exited");
     }
   }
 }
@@ -1435,70 +1405,99 @@ sub release_allocation
 sub readfrompipes
 {
   my $gotsome = 0;
-  foreach my $job (keys %reader)
+  my %fd_job;
+  my $sel = IO::Select->new();
+  foreach my $jobstepidx (keys %reader)
+  {
+    my $fd = $reader{$jobstepidx};
+    $sel->add($fd);
+    $fd_job{$fd} = $jobstepidx;
+
+    if (my $stdout_fd = $jobstep[$jobstepidx]->{stdout_r}) {
+      $sel->add($stdout_fd);
+      $fd_job{$stdout_fd} = $jobstepidx;
+    }
+  }
+  # select on all reader fds with 0.1s timeout
+  my @ready_fds = $sel->can_read(0.1);
+  foreach my $fd (@ready_fds)
   {
     my $buf;
-    if (0 < sysread ($reader{$job}, $buf, 65536))
+    if (0 < sysread ($fd, $buf, 65536))
     {
+      $gotsome = 1;
       print STDERR $buf if $ENV{CRUNCH_DEBUG};
-      $jobstep[$job]->{stderr_at} = time;
-      $jobstep[$job]->{stderr} .= $buf;
+
+      my $jobstepidx = $fd_job{$fd};
+      if ($jobstep[$jobstepidx]->{stdout_r} == $fd) {
+        $jobstep[$jobstepidx]->{stdout_captured} .= $buf;
+        next;
+      }
+
+      $jobstep[$jobstepidx]->{stderr_at} = time;
+      $jobstep[$jobstepidx]->{stderr} .= $buf;
 
       # Consume everything up to the last \n
-      preprocess_stderr ($job);
+      preprocess_stderr ($jobstepidx);
 
-      if (length ($jobstep[$job]->{stderr}) > 16384)
+      if (length ($jobstep[$jobstepidx]->{stderr}) > 16384)
       {
         # If we get a lot of stderr without a newline, chop off the
         # front to avoid letting our buffer grow indefinitely.
-        substr ($jobstep[$job]->{stderr},
-                0, length($jobstep[$job]->{stderr}) - 8192) = "";
+        substr ($jobstep[$jobstepidx]->{stderr},
+                0, length($jobstep[$jobstepidx]->{stderr}) - 8192) = "";
       }
-      $gotsome = 1;
     }
   }
   return $gotsome;
 }
 
 
+# Consume all full lines of stderr for a jobstep. Everything after the
+# last newline will remain in $jobstep[$jobstepidx]->{stderr} after
+# returning.
 sub preprocess_stderr
 {
-  my $job = shift;
+  my $jobstepidx = shift;
 
-  while ($jobstep[$job]->{stderr} =~ /^(.*?)\n/) {
+  while ($jobstep[$jobstepidx]->{stderr} =~ /^(.*?)\n/) {
     my $line = $1;
-    substr $jobstep[$job]->{stderr}, 0, 1+length($line), "";
-    Log ($job, "stderr $line");
+    substr $jobstep[$jobstepidx]->{stderr}, 0, 1+length($line), "";
+    Log ($jobstepidx, "stderr $line");
     if ($line =~ /srun: error: (SLURM job $ENV{SLURM_JOB_ID} has expired|Unable to confirm allocation for job $ENV{SLURM_JOB_ID})/) {
       # whoa.
       $main::please_freeze = 1;
     }
+    elsif (!exists $jobstep[$jobstepidx]->{slotindex}) {
+      # Skip the following tempfail checks if this srun proc isn't
+      # attached to a particular worker slot.
+    }
     elsif ($line =~ /srun: error: (Node failure on|Aborting, .*\bio error\b)/) {
-      my $job_slot_index = $jobstep[$job]->{slotindex};
+      my $job_slot_index = $jobstep[$jobstepidx]->{slotindex};
       $slot[$job_slot_index]->{node}->{fail_count}++;
-      $jobstep[$job]->{tempfail} = 1;
+      $jobstep[$jobstepidx]->{tempfail} = 1;
       ban_node_by_slot($job_slot_index);
     }
     elsif ($line =~ /srun: error: (Unable to create job step|.*: Communication connection failure)/) {
-      $jobstep[$job]->{tempfail} = 1;
-      ban_node_by_slot($jobstep[$job]->{slotindex});
+      $jobstep[$jobstepidx]->{tempfail} = 1;
+      ban_node_by_slot($jobstep[$jobstepidx]->{slotindex});
     }
-    elsif ($line =~ /arvados\.errors\.Keep/) {
-      $jobstep[$job]->{tempfail} = 1;
+    elsif ($line =~ /\bKeep(Read|Write|Request)Error:/) {
+      $jobstep[$jobstepidx]->{tempfail} = 1;
     }
   }
 }
 
 
-sub process_stderr
+sub process_stderr_final
 {
-  my $job = shift;
-  my $task_success = shift;
-  preprocess_stderr ($job);
+  my $jobstepidx = shift;
+  preprocess_stderr ($jobstepidx);
 
   map {
-    Log ($job, "stderr $_");
-  } split ("\n", $jobstep[$job]->{stderr});
+    Log ($jobstepidx, "stderr $_");
+  } split ("\n", $jobstep[$jobstepidx]->{stderr});
+  $jobstep[$jobstepidx]->{stderr} = '';
 }
 
 sub fetch_block
@@ -1636,7 +1635,7 @@ sub killem
     }
     if (!exists $proc{$_}->{"sent_$sig"})
     {
-      Log ($proc{$_}->{jobstep}, "sending 2x signal $sig to pid $_");
+      Log ($proc{$_}->{jobstepidx}, "sending 2x signal $sig to pid $_");
       kill $sig, $_;
       select (undef, undef, undef, 0.1);
       if ($sig == 2)
@@ -1760,16 +1759,21 @@ sub log_writer_is_active() {
   return $log_pipe_pid;
 }
 
-sub Log                                # ($jobstep_id, $logmessage)
+sub Log                                # ($jobstepidx, $logmessage)
 {
-  if ($_[1] =~ /\n/) {
+  my ($jobstepidx, $logmessage) = @_;
+  if ($logmessage =~ /\n/) {
     for my $line (split (/\n/, $_[1])) {
-      Log ($_[0], $line);
+      Log ($jobstepidx, $line);
     }
     return;
   }
   my $fh = select STDERR; $|=1; select $fh;
-  my $message = sprintf ("%s %d %s %s", $job_id, $$, @_);
+  my $task_qseq = '';
+  if (defined($jobstepidx) && exists($jobstep[$jobstepidx]->{arvados_task})) {
+    $task_qseq = $jobstepidx;
+  }
+  my $message = sprintf ("%s %d %s %s", $job_id, $$, $task_qseq, $logmessage);
   $message =~ s{([^ -\176])}{"\\" . sprintf ("%03o", ord($1))}ge;
   $message .= "\n";
   my $datetime;
@@ -1893,6 +1897,83 @@ sub freezeunquote
 }
 
 
+sub srun_sync
+{
+  my $srunargs = shift;
+  my $execargs = shift;
+  my $opts = shift || {};
+  my $stdin = shift;
+
+  my $label = exists $opts->{label} ? $opts->{label} : "@$execargs";
+  Log (undef, "$label: start");
+
+  my ($stderr_r, $stderr_w);
+  pipe $stderr_r, $stderr_w or croak("pipe() failed: $!");
+
+  my ($stdout_r, $stdout_w);
+  pipe $stdout_r, $stdout_w or croak("pipe() failed: $!");
+
+  my $srunpid = fork();
+  if ($srunpid == 0)
+  {
+    close($stderr_r);
+    close($stdout_r);
+    fcntl($stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
+    fcntl($stdout_w, F_SETFL, 0) or croak($!);
+    open(STDERR, ">&", $stderr_w);
+    open(STDOUT, ">&", $stdout_w);
+    srun ($srunargs, $execargs, $opts, $stdin);
+    exit (1);
+  }
+  close($stderr_w);
+  close($stdout_w);
+
+  set_nonblocking($stderr_r);
+  set_nonblocking($stdout_r);
+
+  # Add entries to @jobstep and %proc so check_squeue() and
+  # freeze_if_want_freeze() can treat it like a job task process.
+  push @jobstep, {
+    stderr => '',
+    stderr_at => 0,
+    stderr_captured => '',
+    stdout_r => $stdout_r,
+    stdout_captured => '',
+  };
+  my $jobstepidx = $#jobstep;
+  $proc{$srunpid} = {
+    jobstepidx => $jobstepidx,
+  };
+  $reader{$jobstepidx} = $stderr_r;
+
+  while ($srunpid != waitpid ($srunpid, WNOHANG)) {
+    my $busy = readfrompipes();
+    if (!$busy || ($latest_refresh + 2 < scalar time)) {
+      check_refresh_wanted();
+      check_squeue();
+    }
+    if (!$busy) {
+      select(undef, undef, undef, 0.1);
+    }
+    killem(keys %proc) if $main::please_freeze;
+  }
+  my $exited = $?;
+
+  1 while readfrompipes();
+  process_stderr_final ($jobstepidx);
+
+  Log (undef, "$label: exit ".exit_status_s($exited));
+
+  close($stdout_r);
+  close($stderr_r);
+  delete $proc{$srunpid};
+  delete $reader{$jobstepidx};
+
+  my $j = pop @jobstep;
+  return ($exited, $j->{stdout_captured}, $j->{stderr_captured});
+}
+
+
 sub srun
 {
   my $srunargs = shift;
index 961ac28a9f97888b8230e6bb27e9dabef31f58b0..52a7353f67a9e5f0040b31f083a5f719164a063c 100755 (executable)
@@ -1,3 +1,3 @@
 #!/bin/sh
 echo >&2 Failing mount stub was called
-exit 1
+exit 44
index 22d756a8c81f8ae64d8602925ab25fda9bbe156b..0fbff2e6de670da112461da20107fa5eeaae1e9b 100644 (file)
@@ -91,7 +91,7 @@ class TestCrunchJob < Minitest::Test
       tryjobrecord j, binstubs: ['clean_fail']
     end
     assert_match /Failing mount stub was called/, err
-    assert_match /Clean work dirs: exit 1\n$/, err
+    assert_match /clean work dirs: exit 44\n$/, err
     assert_equal SPECIAL_EXIT[:EX_RETRY_UNLOCKED], $?.exitstatus
   end
 
index 4198c34482ccd0f6fa54daa9e5a9d1d143db2ee1..885497171e8828852c27f0ece6c455cc75c4ee49 100644 (file)
@@ -5,21 +5,35 @@ import arvados
 import arvados.events
 import arvados.commands.keepdocker
 import arvados.commands.run
+import arvados.collection
+import arvados.util
 import cwltool.draft2tool
 import cwltool.workflow
 import cwltool.main
+from cwltool.process import shortname
 import threading
 import cwltool.docker
 import fnmatch
 import logging
 import re
 import os
+import sys
 
 from cwltool.process import get_feature
+from arvados.api import OrderedJsonModel
 
 logger = logging.getLogger('arvados.cwl-runner')
 logger.setLevel(logging.INFO)
 
+crunchrunner_pdh = "83db29f08544e1c319572a6bd971088a+140"
+crunchrunner_download = "https://cloud.curoverse.com/collections/download/qr1hi-4zz18-n3m1yxd0vx78jic/1i1u2qtq66k1atziv4ocfgsg5nu5tj11n4r6e0bhvjg03rix4m/crunchrunner"
+certs_download = "https://cloud.curoverse.com/collections/download/qr1hi-4zz18-n3m1yxd0vx78jic/1i1u2qtq66k1atziv4ocfgsg5nu5tj11n4r6e0bhvjg03rix4m/ca-certificates.crt"
+
+tmpdirre = re.compile(r"^\S+ \S+ \d+ \d+ stderr \S+ \S+ crunchrunner: \$\(task\.tmpdir\)=(.*)")
+outdirre = re.compile(r"^\S+ \S+ \d+ \d+ stderr \S+ \S+ crunchrunner: \$\(task\.outdir\)=(.*)")
+keepre = re.compile(r"^\S+ \S+ \d+ \d+ stderr \S+ \S+ crunchrunner: \$\(task\.keep\)=(.*)")
+
+
 def arv_docker_get_image(api_client, dockerRequirement, pull_image):
     if "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement:
         dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"]
@@ -37,6 +51,7 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image):
         args = [image_name]
         if image_tag:
             args.append(image_tag)
+        logger.info("Uploading Docker image %s", ":".join(args))
         arvados.commands.keepdocker.main(args)
 
     return dockerRequirement["dockerImageId"]
@@ -140,15 +155,21 @@ class ArvadosJob(object):
         try:
             response = self.arvrunner.api.jobs().create(body={
                 "script": "crunchrunner",
-                "repository": kwargs["repository"],
-                "script_version": "master",
-                "script_parameters": {"tasks": [script_parameters]},
+                "repository": "arvados",
+                "script_version": "8488-cwl-crunchrunner-collection",
+                "script_parameters": {"tasks": [script_parameters], "crunchrunner": crunchrunner_pdh+"/crunchrunner"},
                 "runtime_constraints": runtime_constraints
-            }, find_or_create=kwargs.get("enable_reuse", True)).execute()
+            }, find_or_create=kwargs.get("enable_reuse", True)).execute(num_retries=self.arvrunner.num_retries)
 
             self.arvrunner.jobs[response["uuid"]] = self
 
-            logger.info("Job %s is %s", response["uuid"], response["state"])
+            self.arvrunner.pipeline["components"][self.name] = {"job": response}
+            self.arvrunner.pipeline = self.arvrunner.api.pipeline_instances().update(uuid=self.arvrunner.pipeline["uuid"],
+                                                                                     body={
+                                                                                         "components": self.arvrunner.pipeline["components"]
+                                                                                     }).execute(num_retries=self.arvrunner.num_retries)
+
+            logger.info("Job %s (%s) is %s", self.name, response["uuid"], response["state"])
 
             if response["state"] in ("Complete", "Failed", "Cancelled"):
                 self.done(response)
@@ -156,8 +177,19 @@ class ArvadosJob(object):
             logger.error("Got error %s" % str(e))
             self.output_callback({}, "permanentFail")
 
+    def update_pipeline_component(self, record):
+        self.arvrunner.pipeline["components"][self.name] = {"job": record}
+        self.arvrunner.pipeline = self.arvrunner.api.pipeline_instances().update(uuid=self.arvrunner.pipeline["uuid"],
+                                                                                 body={
+                                                                                    "components": self.arvrunner.pipeline["components"]
+                                                                                 }).execute(num_retries=self.arvrunner.num_retries)
 
     def done(self, record):
+        try:
+            self.update_pipeline_component(record)
+        except:
+            pass
+
         try:
             if record["state"] == "Complete":
                 processStatus = "success"
@@ -166,7 +198,28 @@ class ArvadosJob(object):
 
             try:
                 outputs = {}
-                outputs = self.collect_outputs("keep:" + record["output"])
+                if record["output"]:
+                    logc = arvados.collection.Collection(record["log"])
+                    log = logc.open(logc.keys()[0])
+                    tmpdir = None
+                    outdir = None
+                    keepdir = None
+                    for l in log.readlines():
+                        g = tmpdirre.match(l)
+                        if g:
+                            tmpdir = g.group(1)
+                        g = outdirre.match(l)
+                        if g:
+                            outdir = g.group(1)
+                        g = keepre.match(l)
+                        if g:
+                            keepdir = g.group(1)
+                        if tmpdir and outdir and keepdir:
+                            break
+
+                    self.builder.outdir = outdir
+                    self.builder.pathmapper.keepdir = keepdir
+                    outputs = self.collect_outputs("keep:" + record["output"])
             except Exception as e:
                 logger.exception("Got exception while collecting job outputs:")
                 processStatus = "permanentFail"
@@ -188,7 +241,7 @@ class ArvPathMapper(cwltool.pathmapper.PathMapper):
                 self._pathmap[src] = (src, "$(task.keep)/%s" % src[5:])
             if src not in self._pathmap:
                 ab = cwltool.pathmapper.abspath(src, basedir)
-                st = arvados.commands.run.statfile("", ab)
+                st = arvados.commands.run.statfile("", ab, fnPattern="$(task.keep)/%s/%s")
                 if kwargs.get("conformance_test"):
                     self._pathmap[src] = (src, ab)
                 elif isinstance(st, arvados.commands.run.UploadFile):
@@ -209,11 +262,20 @@ class ArvPathMapper(cwltool.pathmapper.PathMapper):
             arvrunner.add_uploaded(src, (ab, st.fn))
             self._pathmap[src] = (ab, st.fn)
 
+        self.keepdir = None
+
+    def reversemap(self, target):
+        if target.startswith("keep:"):
+            return target
+        elif self.keepdir and target.startswith(self.keepdir):
+            return "keep:" + target[len(self.keepdir)+1:]
+        else:
+            return super(ArvPathMapper, self).reversemap(target)
 
 
 class ArvadosCommandTool(cwltool.draft2tool.CommandLineTool):
     def __init__(self, arvrunner, toolpath_object, **kwargs):
-        super(ArvadosCommandTool, self).__init__(toolpath_object, outdir="$(task.outdir)", tmpdir="$(task.tmpdir)", **kwargs)
+        super(ArvadosCommandTool, self).__init__(toolpath_object, **kwargs)
         self.arvrunner = arvrunner
 
     def makeJobRunner(self):
@@ -231,6 +293,7 @@ class ArvCwlRunner(object):
         self.cond = threading.Condition(self.lock)
         self.final_output = None
         self.uploaded = {}
+        self.num_retries = 4
 
     def arvMakeTool(self, toolpath_object, **kwargs):
         if "class" in toolpath_object and toolpath_object["class"] == "CommandLineTool":
@@ -241,22 +304,33 @@ class ArvCwlRunner(object):
     def output_callback(self, out, processStatus):
         if processStatus == "success":
             logger.info("Overall job status is %s", processStatus)
+            self.api.pipeline_instances().update(uuid=self.pipeline["uuid"],
+                                                 body={"state": "Complete"}).execute(num_retries=self.num_retries)
+
         else:
             logger.warn("Overall job status is %s", processStatus)
+            self.api.pipeline_instances().update(uuid=self.pipeline["uuid"],
+                                                 body={"state": "Failed"}).execute(num_retries=self.num_retries)
         self.final_output = out
 
+
     def on_message(self, event):
         if "object_uuid" in event:
                 if event["object_uuid"] in self.jobs and event["event_type"] == "update":
                     if event["properties"]["new_attributes"]["state"] == "Running" and self.jobs[event["object_uuid"]].running is False:
-                        logger.info("Job %s is Running", event["object_uuid"])
+                        uuid = event["object_uuid"]
                         with self.lock:
-                            self.jobs[event["object_uuid"]].running = True
+                            j = self.jobs[uuid]
+                            logger.info("Job %s (%s) is Running", j.name, uuid)
+                            j.running = True
+                            j.update_pipeline_component(event["properties"]["new_attributes"])
                     elif event["properties"]["new_attributes"]["state"] in ("Complete", "Failed", "Cancelled"):
-                        logger.info("Job %s is %s", event["object_uuid"], event["properties"]["new_attributes"]["state"])
+                        uuid = event["object_uuid"]
                         try:
                             self.cond.acquire()
-                            self.jobs[event["object_uuid"]].done(event["properties"]["new_attributes"])
+                            j = self.jobs[uuid]
+                            logger.info("Job %s (%s) is %s", j.name, uuid, event["properties"]["new_attributes"]["state"])
+                            j.done(event["properties"]["new_attributes"])
                             self.cond.notify()
                         finally:
                             self.cond.release()
@@ -270,52 +344,84 @@ class ArvCwlRunner(object):
     def arvExecutor(self, tool, job_order, input_basedir, args, **kwargs):
         events = arvados.events.subscribe(arvados.api('v1'), [["object_uuid", "is_a", "arvados#job"]], self.on_message)
 
+        try:
+            self.api.collections().get(uuid=crunchrunner_pdh).execute()
+        except arvados.errors.ApiError as e:
+            import httplib2
+            h = httplib2.Http(ca_certs=arvados.util.ca_certs_path())
+            resp, content = h.request(crunchrunner_download, "GET")
+            resp2, content2 = h.request(certs_download, "GET")
+            with arvados.collection.Collection() as col:
+                with col.open("crunchrunner", "w") as f:
+                    f.write(content)
+                with col.open("ca-certificates.crt", "w") as f:
+                    f.write(content2)
+
+                col.save_new("crunchrunner binary", ensure_unique_name=True)
+
         self.fs_access = CollectionFsAccess(input_basedir)
 
         kwargs["fs_access"] = self.fs_access
         kwargs["enable_reuse"] = args.enable_reuse
-        kwargs["repository"] = args.repository
+
+        kwargs["outdir"] = "$(task.outdir)"
+        kwargs["tmpdir"] = "$(task.tmpdir)"
 
         if kwargs.get("conformance_test"):
             return cwltool.main.single_job_executor(tool, job_order, input_basedir, args, **kwargs)
         else:
-            jobiter = tool.job(job_order,
-                            input_basedir,
-                            self.output_callback,
-                            **kwargs)
-
-            for runnable in jobiter:
-                if runnable:
-                    with self.lock:
-                        runnable.run(**kwargs)
-                else:
-                    if self.jobs:
-                        try:
-                            self.cond.acquire()
-                            self.cond.wait()
-                        finally:
-                            self.cond.release()
-                    else:
-                        logger.error("Workflow cannot make any more progress.")
-                        break
-
-            while self.jobs:
-                try:
-                    self.cond.acquire()
-                    self.cond.wait()
-                finally:
-                    self.cond.release()
+            self.pipeline = self.api.pipeline_instances().create(body={"name": shortname(tool.tool["id"]),
+                                                                   "components": {},
+                                                                   "state": "RunningOnClient"}).execute(num_retries=self.num_retries)
 
-            events.close()
+            jobiter = tool.job(job_order,
+                               input_basedir,
+                               self.output_callback,
+                               docker_outdir="$(task.outdir)",
+                               **kwargs)
 
-            if self.final_output is None:
-                raise cwltool.workflow.WorkflowException("Workflow did not return a result.")
+            try:
+                for runnable in jobiter:
+                    if runnable:
+                        with self.lock:
+                            runnable.run(**kwargs)
+                    else:
+                        if self.jobs:
+                            try:
+                                self.cond.acquire()
+                                self.cond.wait(1)
+                            except RuntimeError:
+                                pass
+                            finally:
+                                self.cond.release()
+                        else:
+                            logger.error("Workflow cannot make any more progress.")
+                            break
+
+                while self.jobs:
+                    try:
+                        self.cond.acquire()
+                        self.cond.wait(1)
+                    except RuntimeError:
+                        pass
+                    finally:
+                        self.cond.release()
+
+                events.close()
+
+                if self.final_output is None:
+                    raise cwltool.workflow.WorkflowException("Workflow did not return a result.")
+
+            except:
+                if sys.exc_info()[0] is not KeyboardInterrupt:
+                    logger.exception("Caught unhandled exception, marking pipeline as failed")
+                self.api.pipeline_instances().update(uuid=self.pipeline["uuid"],
+                                                     body={"state": "Failed"}).execute(num_retries=self.num_retries)
 
             return self.final_output
 
 
 def main(args, stdout, stderr, api_client=None):
-    runner = ArvCwlRunner(api_client=arvados.api('v1'))
     args.insert(0, "--leave-outputs")
     parser = cwltool.main.arg_parser()
     exgroup = parser.add_mutually_exclusive_group()
@@ -326,6 +432,10 @@ def main(args, stdout, stderr, api_client=None):
                         default=False, dest="enable_reuse",
                         help="")
 
-    parser.add_argument('--repository', type=str, default="peter/crunchrunner", help="Repository containing the 'crunchrunner' program.")
+    try:
+        runner = ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()))
+    except Exception as e:
+        logger.error(e)
+        return 1
 
     return cwltool.main.main(args, executor=runner.arvExecutor, makeTool=runner.arvMakeTool, parser=parser)
index bcf6b963830aca8570545045ab112ee79aa8216d..cdbb41be17160861e672eb94995ad315f7b6e461 100644 (file)
@@ -30,8 +30,8 @@ setup(name='arvados-cwl-runner',
           'bin/arvados-cwl-runner'
       ],
       install_requires=[
-          'cwltool>=1.0.20151026181844',
-          'arvados-python-client>=0.1.20151023214338'
+          'cwltool>=1.0.20160302134341',
+          'arvados-python-client>=0.1.20160219154918'
       ],
       zip_safe=True,
       cmdclass={'egg_info': tagger},
index 8e24e18fda845866909aff7f6bba1bd02234d53c..226cf9122be430d8c08c03c595447a3448a19a22 100644 (file)
@@ -1,13 +1,17 @@
 package main
 
 import (
+       "crypto/x509"
        "fmt"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "io/ioutil"
        "log"
+       "net/http"
        "os"
        "os/exec"
        "os/signal"
+       "path"
        "strings"
        "syscall"
 )
@@ -209,6 +213,10 @@ func runner(api IArvadosClient,
                "$(task.outdir)": outdir,
                "$(task.keep)":   keepmount}
 
+       log.Printf("crunchrunner: $(task.tmpdir)=%v", tmpdir)
+       log.Printf("crunchrunner: $(task.outdir)=%v", outdir)
+       log.Printf("crunchrunner: $(task.keep)=%v", keepmount)
+
        // Set up subprocess
        for k, v := range taskp.Command {
                taskp.Command[k] = substitute(v, replacements)
@@ -317,6 +325,15 @@ func main() {
                log.Fatal(err)
        }
 
+       certpath := path.Join(path.Dir(os.Args[0]), "ca-certificates.crt")
+       certdata, err := ioutil.ReadFile(certpath)
+       if err == nil {
+               log.Printf("Using TLS certificates at %v", certpath)
+               certs := x509.NewCertPool()
+               certs.AppendCertsFromPEM(certdata)
+               api.Client.Transport.(*http.Transport).TLSClientConfig.RootCAs = certs
+       }
+
        jobUuid := os.Getenv("JOB_UUID")
        taskUuid := os.Getenv("TASK_UUID")
        tmpdir := os.Getenv("TASK_WORK")
diff --git a/sdk/perl/.gitignore b/sdk/perl/.gitignore
new file mode 100644 (file)
index 0000000..7c32f55
--- /dev/null
@@ -0,0 +1 @@
+install
index e2e8ba19b64823af233c25eba1f90880dc9f4067..f24b1ed8142d3e3678ff819bfe4823ebe2add874 100644 (file)
@@ -207,6 +207,7 @@ def api(version=None, cache=True, host=None, token=None, insecure=False, **kwarg
 
     svc = apiclient_discovery.build('arvados', version, **kwargs)
     svc.api_token = token
+    svc.insecure = insecure
     kwargs['http'].max_request_size = svc._rootDesc.get('maxRequestSize', 0)
     kwargs['http'].cache = None
     return svc
index a10eb2b348aff4499648d6738b8042e5a9d4fa11..2ee97b9867036ded89244d1f11c2294efccb9ef3 100755 (executable)
@@ -24,6 +24,7 @@ import shutil
 import sys
 import logging
 import tempfile
+import urlparse
 
 import arvados
 import arvados.config
@@ -87,6 +88,13 @@ def main():
     copy_opts.add_argument(
         '--project-uuid', dest='project_uuid',
         help='The UUID of the project at the destination to which the pipeline should be copied.')
+    copy_opts.add_argument(
+        '--allow-git-http-src', action="store_true",
+        help='Allow cloning git repositories over insecure http')
+    copy_opts.add_argument(
+        '--allow-git-http-dst', action="store_true",
+        help='Allow pushing git repositories over insecure http')
+
     copy_opts.add_argument(
         'object_uuid',
         help='The UUID of the object to be copied.')
@@ -583,6 +591,55 @@ def copy_collection(obj_uuid, src, dst, args):
     c['manifest_text'] = dst_manifest
     return create_collection_from(c, src, dst, args)
 
+def select_git_url(api, repo_name, retries, allow_insecure_http, allow_insecure_http_opt):
+    r = api.repositories().list(
+        filters=[['name', '=', repo_name]]).execute(num_retries=retries)
+    if r['items_available'] != 1:
+        raise Exception('cannot identify repo {}; {} repos found'
+                        .format(repo_name, r['items_available']))
+
+    https_url = [c for c in r['items'][0]["clone_urls"] if c.startswith("https:")]
+    http_url = [c for c in r['items'][0]["clone_urls"] if c.startswith("http:")]
+    other_url = [c for c in r['items'][0]["clone_urls"] if not c.startswith("http")]
+
+    priority = https_url + other_url + http_url
+
+    git_config = []
+    git_url = None
+    for url in priority:
+        if url.startswith("http"):
+            u = urlparse.urlsplit(url)
+            baseurl = urlparse.urlunsplit((u.scheme, u.netloc, "", "", ""))
+            git_config = ["-c", "credential.%s/.username=none" % baseurl,
+                          "-c", "credential.%s/.helper=!cred(){ cat >/dev/null; if [ \"$1\" = get ]; then echo password=$ARVADOS_API_TOKEN; fi; };cred" % baseurl]
+        else:
+            git_config = []
+
+        try:
+            logger.debug("trying %s", url)
+            arvados.util.run_command(["git"] + git_config + ["ls-remote", url],
+                                      env={"HOME": os.environ["HOME"],
+                                           "ARVADOS_API_TOKEN": api.api_token,
+                                           "GIT_ASKPASS": "/bin/false"})
+        except arvados.errors.CommandFailedError:
+            pass
+        else:
+            git_url = url
+            break
+
+    if not git_url:
+        raise Exception('Cannot access git repository, tried {}'
+                        .format(priority))
+
+    if git_url.startswith("http:"):
+        if allow_insecure_http:
+            logger.warn("Using insecure git url %s but will allow this because %s", git_url, allow_insecure_http_opt)
+        else:
+            raise Exception("Refusing to use insecure git url %s, use %s if you really want this." % (git_url, allow_insecure_http_opt))
+
+    return (git_url, git_config)
+
+
 # copy_git_repo(src_git_repo, src, dst, dst_git_repo, script_version, args)
 #
 #    Copies commits from git repository 'src_git_repo' on Arvados
@@ -600,21 +657,12 @@ def copy_collection(obj_uuid, src, dst, args):
 #
 def copy_git_repo(src_git_repo, src, dst, dst_git_repo, script_version, args):
     # Identify the fetch and push URLs for the git repositories.
-    r = src.repositories().list(
-        filters=[['name', '=', src_git_repo]]).execute(num_retries=args.retries)
-    if r['items_available'] != 1:
-        raise Exception('cannot identify source repo {}; {} repos found'
-                        .format(src_git_repo, r['items_available']))
-    src_git_url = r['items'][0]['fetch_url']
-    logger.debug('src_git_url: {}'.format(src_git_url))
 
-    r = dst.repositories().list(
-        filters=[['name', '=', dst_git_repo]]).execute(num_retries=args.retries)
-    if r['items_available'] != 1:
-        raise Exception('cannot identify destination repo {}; {} repos found'
-                        .format(dst_git_repo, r['items_available']))
-    dst_git_push_url  = r['items'][0]['push_url']
-    logger.debug('dst_git_push_url: {}'.format(dst_git_push_url))
+    (src_git_url, src_git_config) = select_git_url(src, src_git_repo, args.retries, args.allow_git_http_src, "--allow-git-http-src")
+    (dst_git_url, dst_git_config) = select_git_url(dst, dst_git_repo, args.retries, args.allow_git_http_dst, "--allow-git-http-dst")
+
+    logger.debug('src_git_url: {}'.format(src_git_url))
+    logger.debug('dst_git_url: {}'.format(dst_git_url))
 
     dst_branch = re.sub(r'\W+', '_', "{}_{}".format(src_git_url, script_version))
 
@@ -622,17 +670,23 @@ def copy_git_repo(src_git_repo, src, dst, dst_git_repo, script_version, args):
     if src_git_repo not in local_repo_dir:
         local_repo_dir[src_git_repo] = tempfile.mkdtemp()
         arvados.util.run_command(
-            ["git""clone", "--bare", src_git_url,
+            ["git"] + src_git_config + ["clone", "--bare", src_git_url,
              local_repo_dir[src_git_repo]],
-            cwd=os.path.dirname(local_repo_dir[src_git_repo]))
+            cwd=os.path.dirname(local_repo_dir[src_git_repo]),
+            env={"HOME": os.environ["HOME"],
+                 "ARVADOS_API_TOKEN": src.api_token,
+                 "GIT_ASKPASS": "/bin/false"})
         arvados.util.run_command(
-            ["git", "remote", "add", "dst", dst_git_push_url],
+            ["git", "remote", "add", "dst", dst_git_url],
             cwd=local_repo_dir[src_git_repo])
     arvados.util.run_command(
         ["git", "branch", dst_branch, script_version],
         cwd=local_repo_dir[src_git_repo])
-    arvados.util.run_command(["git", "push", "dst", dst_branch],
-                             cwd=local_repo_dir[src_git_repo])
+    arvados.util.run_command(["git"] + dst_git_config + ["push", "dst", dst_branch],
+                             cwd=local_repo_dir[src_git_repo],
+                             env={"HOME": os.environ["HOME"],
+                                  "ARVADOS_API_TOKEN": dst.api_token,
+                                  "GIT_ASKPASS": "/bin/false"})
 
 def copy_docker_images(pipeline, src, dst, args):
     """Copy any docker images named in the pipeline components'
index 5c8bced513c160dd64e2cdbf3f4433d72ce89fe6..ef39be81a4650cda86e20c6d13a7d23848398ecb 100644 (file)
@@ -81,7 +81,7 @@ def determine_project(root, current_user):
 # ArvFile() (file already exists in a collection), UploadFile() (file needs to
 # be uploaded to a collection), or simply returns prefix+fn (which yields the
 # original parameter string).
-def statfile(prefix, fn):
+def statfile(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)"):
     absfn = os.path.abspath(fn)
     if os.path.exists(absfn):
         st = os.stat(absfn)
@@ -89,7 +89,7 @@ def statfile(prefix, fn):
             sp = os.path.split(absfn)
             (pdh, branch) = is_in_collection(sp[0], sp[1])
             if pdh:
-                return ArvFile(prefix, "$(file %s/%s)" % (pdh, branch))
+                return ArvFile(prefix, fnPattern % (pdh, branch))
             else:
                 # trim leading '/' for path prefix test later
                 return UploadFile(prefix, absfn[1:])
@@ -97,7 +97,7 @@ def statfile(prefix, fn):
             sp = os.path.split(absfn)
             (pdh, branch) = is_in_collection(sp[0], sp[1])
             if pdh:
-                return ArvFile(prefix, "$(dir %s/%s/)" % (pdh, branch))
+                return ArvFile(prefix, dirPattern % (pdh, branch))
 
     return prefix+fn
 
index 94b8a9d06cfaec73b718b8514adcd3ba08ab2991..df824a331ea41a2fd702587be9c5d2828884ffb5 100644 (file)
@@ -190,7 +190,10 @@ def subscribe(api, filters, on_event, poll_fallback=15, last_log_id=None):
         return _subscribe_websocket(api, filters, on_event, last_log_id)
 
     try:
-        return _subscribe_websocket(api, filters, on_event, last_log_id)
+        if not config.flag_is_true('ARVADOS_DISABLE_WEBSOCKETS'):
+            return _subscribe_websocket(api, filters, on_event, last_log_id)
+        else:
+            _logger.info("Using polling because ARVADOS_DISABLE_WEBSOCKETS is true")
     except Exception as e:
         _logger.warn("Falling back to polling after websocket error: %s" % e)
     p = PollClient(api, filters, on_event, poll_fallback, last_log_id)
index b6518f95a17495851c72985af0e8b16a97e3f1b6..759e8ff67edf1ec8b99b0de86ee8a3e4602b73b7 100644 (file)
@@ -39,8 +39,9 @@ setup(name='arvados-python-client',
           ('share/doc/arvados-python-client', ['LICENSE-2.0.txt', 'README.rst']),
       ],
       install_requires=[
+          'google-api-python-client==1.4.2',
+          'oauth2client >=1.4.6, <2',
           'ciso8601',
-          'google-api-python-client',
           'httplib2',
           'pycurl >=7.19.5.1, <7.21.5',
           'python-gflags<3.0',
index 4ad5e10faa46b96222d4291596d2f47f686bf18c..29eb939002fa9dab98cb8feff4987d46151d0bc8 100644 (file)
@@ -28,3 +28,6 @@
 # Dev/test SSL certificates
 /self-signed.key
 /self-signed.pem
+
+# Generated git-commit.version file
+/git-commit.version
index 40c9ef6dfc742d5cb5a4256183ed29aff4b8bb30..66916836c23fb9538281400ad84b1f1f672247e2 100644 (file)
@@ -338,6 +338,7 @@ common:
   # "git log".
   source_version: false
 
+  crunch_log_partial_line_throttle_period: 5
 
 development:
   force_ssl: false
index b59279e554643c5015fb371ec3774e34bc28f99f..ce94f737a2467f855a7156ba76873db57cd183ee 100644 (file)
@@ -24,6 +24,7 @@ class CrunchDispatch
 
     @docker_bin = ENV['CRUNCH_JOB_DOCKER_BIN']
     @docker_run_args = ENV['CRUNCH_JOB_DOCKER_RUN_ARGS']
+    @cgroup_root = ENV['CRUNCH_CGROUP_ROOT']
 
     @arvados_internal = Rails.configuration.git_internal_dir
     if not File.exists? @arvados_internal
@@ -384,6 +385,10 @@ class CrunchDispatch
                    '--job', job.uuid,
                    '--git-dir', @arvados_internal]
 
+      if @cgroup_root
+        cmd_args += ['--cgroup-root', @cgroup_root]
+      end
+
       if @docker_bin
         cmd_args += ['--docker-bin', @docker_bin]
       end
@@ -429,6 +434,8 @@ class CrunchDispatch
         log_throttle_bytes_so_far: 0,
         log_throttle_lines_so_far: 0,
         log_throttle_bytes_skipped: 0,
+        log_throttle_partial_line_last_at: Time.new(0),
+        log_throttle_first_partial_line: true,
       }
       i.close
       @todo_job_retries.delete(job.uuid)
@@ -443,9 +450,23 @@ class CrunchDispatch
     message = false
     linesize = line.size
     if running_job[:log_throttle_is_open]
-      running_job[:log_throttle_lines_so_far] += 1
-      running_job[:log_throttle_bytes_so_far] += linesize
-      running_job[:bytes_logged] += linesize
+      partial_line = false
+      skip_counts = false
+      matches = line.match(/^\S+ \S+ \d+ \d+ stderr (.*)/)
+      if matches and matches[1] and matches[1].start_with?('[...]') and matches[1].end_with?('[...]')
+        partial_line = true
+        if Time.now > running_job[:log_throttle_partial_line_last_at] + Rails.configuration.crunch_log_partial_line_throttle_period
+          running_job[:log_throttle_partial_line_last_at] = Time.now
+        else
+          skip_counts = true
+        end
+      end
+
+      if !skip_counts
+        running_job[:log_throttle_lines_so_far] += 1
+        running_job[:log_throttle_bytes_so_far] += linesize
+        running_job[:bytes_logged] += linesize
+      end
 
       if (running_job[:bytes_logged] >
           Rails.configuration.crunch_limit_log_bytes_per_job)
@@ -456,14 +477,18 @@ class CrunchDispatch
       elsif (running_job[:log_throttle_bytes_so_far] >
              Rails.configuration.crunch_log_throttle_bytes)
         remaining_time = running_job[:log_throttle_reset_time] - Time.now
-        message = "Exceeded rate #{Rails.configuration.crunch_log_throttle_bytes} bytes per #{Rails.configuration.crunch_log_throttle_period} seconds (crunch_log_throttle_bytes). Logging will be silenced for the next #{remaining_time.round} seconds.\n"
+        message = "Exceeded rate #{Rails.configuration.crunch_log_throttle_bytes} bytes per #{Rails.configuration.crunch_log_throttle_period} seconds (crunch_log_throttle_bytes). Logging will be silenced for the next #{remaining_time.round} seconds."
         running_job[:log_throttle_is_open] = false
 
       elsif (running_job[:log_throttle_lines_so_far] >
              Rails.configuration.crunch_log_throttle_lines)
         remaining_time = running_job[:log_throttle_reset_time] - Time.now
-        message = "Exceeded rate #{Rails.configuration.crunch_log_throttle_lines} lines per #{Rails.configuration.crunch_log_throttle_period} seconds (crunch_log_throttle_lines), logging will be silenced for the next #{remaining_time.round} seconds.\n"
+        message = "Exceeded rate #{Rails.configuration.crunch_log_throttle_lines} lines per #{Rails.configuration.crunch_log_throttle_period} seconds (crunch_log_throttle_lines), logging will be silenced for the next #{remaining_time.round} seconds."
         running_job[:log_throttle_is_open] = false
+
+      elsif partial_line and running_job[:log_throttle_first_partial_line]
+        running_job[:log_throttle_first_partial_line] = false
+        message = "Rate-limiting partial segments of long lines to one every #{Rails.configuration.crunch_log_partial_line_throttle_period} seconds."
       end
     end
 
@@ -475,8 +500,11 @@ class CrunchDispatch
     if message
       # Yes, write to logs, but use our "rate exceeded" message
       # instead of the log message that exceeded the limit.
+      message += " A complete log is still being written to Keep, and will be available when the job finishes.\n"
       line.replace message
       true
+    elsif partial_line
+      false
     else
       running_job[:log_throttle_is_open]
     end
@@ -501,6 +529,8 @@ class CrunchDispatch
         j[:log_throttle_lines_so_far] = 0
         j[:log_throttle_bytes_skipped] = 0
         j[:log_throttle_is_open] = true
+        j[:log_throttle_partial_line_last_at] = Time.new(0)
+        j[:log_throttle_first_partial_line] = true
       end
 
       j[:buf].each do |stream, streambuf|
index 556a5c047120923aa557a5fe38eab002313aaad3..b1154a8399e478d9fd2147fbb5fdb6cc8d46e016 100644 (file)
@@ -244,6 +244,7 @@ real_log_collection:
   uuid: zzzzz-4zz18-op4e2lbej01tcvu
   owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
   created_at: 2014-09-01 12:00:00
+  modified_at: 2014-09-01 12:00:00
   portable_data_hash: 0b9a7787660e1fce4a93f33e01376ba6+81
   manifest_text: ". cdd549ae79fe6640fa3d5c6261d8303c+195 0:195:zzzzz-8i9sb-0vsrcqi7whchuil.log.txt\n"
   name: real_log_collection
@@ -278,6 +279,7 @@ collection_owned_by_foo:
   manifest_text: ". 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz\n"
   owner_uuid: zzzzz-tpzed-81hsbo6mk8nl05c
   created_at: 2014-02-03T17:22:54Z
+  modified_at: 2014-02-03T17:22:54Z
   name: collection_owned_by_foo
 
 collection_to_remove_from_subproject:
@@ -287,6 +289,7 @@ collection_to_remove_from_subproject:
   manifest_text: ". 8258b505536a9ab47baa2f4281cb932a+9 0:9:missingno\n"
   owner_uuid: zzzzz-j7d0g-axqo7eu9pwvna1x
   created_at: 2014-10-15T10:45:00
+  modified_at: 2014-10-15T10:45:00
   name: Collection to remove from subproject
 
 collection_with_files_in_subdir:
@@ -307,6 +310,8 @@ graph_test_collection1:
   portable_data_hash: fa7aeb5140e2848d39b416daeef4ffc5+45
   manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n"
   name: bar_file
+  created_at: 2014-02-03T17:22:54Z
+  modified_at: 2014-02-03T17:22:54Z
 
 graph_test_collection2:
   uuid: zzzzz-4zz18-uukreo9rbgwsujx
@@ -314,6 +319,8 @@ graph_test_collection2:
   portable_data_hash: 65b17c95fdbc9800fc48acda4e9dcd0b+93
   manifest_text: ". 6a4ff0499484c6c79c95cd8c566bd25f+249025 0:249025:FOO_General_Public_License,_version_3.pdf\n"
   name: "FOO General Public License, version 3"
+  created_at: 2014-02-03T17:22:54Z
+  modified_at: 2014-02-03T17:22:54Z
 
 graph_test_collection3:
   uuid: zzzzz-4zz18-uukreo9rbgwsujj
@@ -321,6 +328,8 @@ graph_test_collection3:
   portable_data_hash: ea10d51bcf88862dbcc36eb292017dfd+45
   manifest_text: ". 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz\n"
   name: "baz file"
+  created_at: 2014-02-03T17:22:54Z
+  modified_at: 2014-02-03T17:22:54Z
 
 collection_1_owned_by_fuse:
   uuid: zzzzz-4zz18-ovx05bfzormx3bg
@@ -557,6 +566,7 @@ collection_<%=i%>_of_10:
   uuid: zzzzz-4zz18-10gneyn6brkx<%= i.to_s.rjust(3, '0') %>
   owner_uuid: zzzzz-j7d0g-0010collections
   created_at: <%= i.minute.ago.to_s(:db) %>
+  modified_at: <%= i.minute.ago.to_s(:db) %>
 <% end %>
 
 # collections in project_with_201_collections
@@ -568,6 +578,7 @@ collection_<%=i%>_of_201:
   uuid: zzzzz-4zz18-201gneyn6brd<%= i.to_s.rjust(3, '0') %>
   owner_uuid: zzzzz-j7d0g-0201collections
   created_at: <%= i.minute.ago.to_s(:db) %>
+  modified_at: <%= i.minute.ago.to_s(:db) %>
 <% end %>
 
 # Do not add your fixtures below this line as the rest of this file will be trimmed by test_helper
index 09f4af6373fa03a2f4bdb79e332ea7b38130ff0a..b7f0a96376e446bde2230dc35ba9fb8486a4916f 100644 (file)
@@ -1,7 +1,10 @@
 require 'test_helper'
 require 'crunch_dispatch'
+require 'helpers/git_test_helper'
 
 class CrunchDispatchTest < ActiveSupport::TestCase
+  include GitTestHelper
+
   test 'choose cheaper nodes first' do
     act_as_system_user do
       # Replace test fixtures with a set suitable for testing dispatch
@@ -100,6 +103,30 @@ class CrunchDispatchTest < ActiveSupport::TestCase
     end
   end
 
+  test 'override --cgroup-root with CRUNCH_CGROUP_ROOT' do
+    ENV['CRUNCH_CGROUP_ROOT'] = '/path/to/cgroup'
+    Rails.configuration.crunch_job_wrapper = :none
+    act_as_system_user do
+      j = Job.create(repository: 'active/foo',
+                     script: 'hash',
+                     script_version: '4fe459abe02d9b365932b8f5dc419439ab4e2577',
+                     script_parameters: {})
+      ok = false
+      Open3.expects(:popen3).at_least_once.with do |*args|
+        if args.index(j.uuid)
+          ok = ((i = args.index '--cgroup-root') and
+                (args[i+1] == '/path/to/cgroup'))
+        end
+        true
+      end.raises(StandardError.new('all is well'))
+      dispatch = CrunchDispatch.new
+      dispatch.parse_argv ['--jobs']
+      dispatch.refresh_todo
+      dispatch.start_jobs
+      assert ok
+    end
+  end
+
   def assert_with_timeout timeout, message
     t = 0
     while (t += 0.1) < timeout
@@ -116,4 +143,62 @@ class CrunchDispatchTest < ActiveSupport::TestCase
       return f.flock(File::LOCK_EX|File::LOCK_NB)
     end
   end
+
+  test 'rate limit of partial line segments' do
+    act_as_system_user do
+      Rails.configuration.crunch_log_partial_line_throttle_period = 1
+
+      job = {}
+      job[:bytes_logged] = 0
+      job[:log_throttle_bytes_so_far] = 0
+      job[:log_throttle_lines_so_far] = 0
+      job[:log_throttle_bytes_skipped] = 0
+      job[:log_throttle_is_open] = true
+      job[:log_throttle_partial_line_last_at] = Time.new(0)
+      job[:log_throttle_first_partial_line] = true
+
+      dispatch = CrunchDispatch.new
+
+      line = "first log line"
+      limit = dispatch.rate_limit(job, line)
+      assert_equal true, limit
+      assert_equal "first log line", line
+      assert_equal 1, job[:log_throttle_lines_so_far]
+
+      # first partial line segment is skipped and counted towards skipped lines
+      now = Time.now.strftime('%Y-%m-%d-%H:%M:%S')
+      line = "#{now} localhost 100 0 stderr [...] this is first partial line segment [...]"
+      limit = dispatch.rate_limit(job, line)
+      assert_equal true, limit
+      assert_includes line, "Rate-limiting partial segments of long lines", line
+      assert_equal 2, job[:log_throttle_lines_so_far]
+
+      # next partial line segment within throttle interval is skipped but not counted towards skipped lines
+      line = "#{now} localhost 100 0 stderr [...] second partial line segment within the interval [...]"
+      limit = dispatch.rate_limit(job, line)
+      assert_equal false, limit
+      assert_equal 2, job[:log_throttle_lines_so_far]
+
+      # next partial line after interval is counted towards skipped lines
+      sleep(1)
+      line = "#{now} localhost 100 0 stderr [...] third partial line segment after the interval [...]"
+      limit = dispatch.rate_limit(job, line)
+      assert_equal false, limit
+      assert_equal 3, job[:log_throttle_lines_so_far]
+
+      # this is not a valid line segment
+      line = "#{now} localhost 100 0 stderr [...] does not end with [...] and is not a partial segment"
+      limit = dispatch.rate_limit(job, line)
+      assert_equal true, limit
+      assert_equal "#{now} localhost 100 0 stderr [...] does not end with [...] and is not a partial segment", line
+      assert_equal 4, job[:log_throttle_lines_so_far]
+
+      # this also is not a valid line segment
+      line = "#{now} localhost 100 0 stderr does not start correctly but ends with [...]"
+      limit = dispatch.rate_limit(job, line)
+      assert_equal true, limit
+      assert_equal "#{now} localhost 100 0 stderr does not start correctly but ends with [...]", line
+      assert_equal 5, job[:log_throttle_lines_so_far]
+    end
+  end
 end
index be1fef86e1928165e161b57c0788e95c922ba541..e05c0c5da4439e44931837ea5a259885624b80d8 100644 (file)
@@ -215,4 +215,6 @@ func run(uuid string, crunchRunCommand string, priorityPollInterval int) {
                        log.Printf("Error updating container state to Complete for %v: %q", uuid, err)
                }
        }
+
+       log.Printf("Finished container run for %v", uuid)
 }
index 640ac88ca9a0aa70c1cc305012ab3bb18962ae6e..01edb0a516fadd33c175df030c7c4c330b2985ba 100644 (file)
@@ -1,19 +1,24 @@
 package main
 
 import (
+       "encoding/json"
        "errors"
        "flag"
+       "fmt"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
        "git.curoverse.com/arvados.git/sdk/go/manifest"
        "github.com/curoverse/dockerclient"
        "io"
+       "io/ioutil"
        "log"
        "os"
+       "os/exec"
        "os/signal"
        "strings"
        "sync"
        "syscall"
+       "time"
 )
 
 // IArvadosClient is the minimal Arvados API methods used by crunch-run.
@@ -33,11 +38,18 @@ type IKeepClient interface {
 }
 
 // Mount describes the mount points to create inside the container.
-type Mount struct{}
+type Mount struct {
+       Kind             string `json:"kind"`
+       Writable         bool   `json:"writable"`
+       PortableDataHash string `json:"portable_data_hash"`
+       UUID             string `json:"uuid"`
+       DeviceType       string `json:"device_type"`
+}
 
 // Collection record returned by the API server.
-type Collection struct {
-       ManifestText string `json:"manifest_text"`
+type CollectionRecord struct {
+       ManifestText     string `json:"manifest_text"`
+       PortableDataHash string `json:"portable_data_hash"`
 }
 
 // ContainerRecord is the container record returned by the API server.
@@ -52,11 +64,16 @@ type ContainerRecord struct {
        Priority           int                    `json:"priority"`
        RuntimeConstraints map[string]interface{} `json:"runtime_constraints"`
        State              string                 `json:"state"`
+       Output             string                 `json:"output"`
 }
 
 // NewLogWriter is a factory function to create a new log writer.
 type NewLogWriter func(name string) io.WriteCloser
 
+type RunArvMount func([]string) (*exec.Cmd, error)
+
+type MkTempDir func(string, string) (string, error)
+
 // ThinDockerClient is the minimal Docker client interface used by crunch-run.
 type ThinDockerClient interface {
        StopContainer(id string, timeout int) error
@@ -64,7 +81,7 @@ type ThinDockerClient interface {
        LoadImage(reader io.Reader) error
        CreateContainer(config *dockerclient.ContainerConfig, name string, authConfig *dockerclient.AuthConfig) (string, error)
        StartContainer(id string, config *dockerclient.HostConfig) error
-       ContainerLogs(id string, options *dockerclient.LogOptions) (io.ReadCloser, error)
+       AttachContainer(id string, options *dockerclient.AttachOptions) (io.ReadCloser, error)
        Wait(id string) <-chan dockerclient.WaitResult
        RemoveImage(name string, force bool) ([]*dockerclient.ImageDelete, error)
 }
@@ -86,15 +103,24 @@ type ContainerRunner struct {
        Stderr        *ThrottledLogger
        LogCollection *CollectionWriter
        LogsPDH       *string
-       CancelLock    sync.Mutex
-       Cancelled     bool
-       SigChan       chan os.Signal
-       finalState    string
+       RunArvMount
+       MkTempDir
+       ArvMount       *exec.Cmd
+       ArvMountPoint  string
+       HostOutputDir  string
+       CleanupTempDir []string
+       Binds          []string
+       OutputPDH      *string
+       CancelLock     sync.Mutex
+       Cancelled      bool
+       SigChan        chan os.Signal
+       ArvMountExit   chan error
+       finalState     string
 }
 
 // SetupSignals sets up signal handling to gracefully terminate the underlying
 // Docker container and update state when receiving a TERM, INT or QUIT signal.
-func (runner *ContainerRunner) SetupSignals() error {
+func (runner *ContainerRunner) SetupSignals() {
        runner.SigChan = make(chan os.Signal, 1)
        signal.Notify(runner.SigChan, syscall.SIGTERM)
        signal.Notify(runner.SigChan, syscall.SIGINT)
@@ -112,8 +138,6 @@ func (runner *ContainerRunner) SetupSignals() error {
                        }
                }
        }(runner.SigChan)
-
-       return nil
 }
 
 // LoadImage determines the docker image id from the container record and
@@ -123,17 +147,17 @@ func (runner *ContainerRunner) LoadImage() (err error) {
 
        runner.CrunchLog.Printf("Fetching Docker image from collection '%s'", runner.ContainerRecord.ContainerImage)
 
-       var collection Collection
+       var collection CollectionRecord
        err = runner.ArvClient.Get("collections", runner.ContainerRecord.ContainerImage, nil, &collection)
        if err != nil {
-               return err
+               return fmt.Errorf("While getting container image collection: %v", err)
        }
        manifest := manifest.Manifest{Text: collection.ManifestText}
        var img, imageID string
        for ms := range manifest.StreamIter() {
                img = ms.FileStreamSegments[0].Name
                if !strings.HasSuffix(img, ".tar") {
-                       return errors.New("First file in the collection does not end in .tar")
+                       return fmt.Errorf("First file in the container image collection does not end in .tar")
                }
                imageID = img[:len(img)-4]
        }
@@ -147,12 +171,12 @@ func (runner *ContainerRunner) LoadImage() (err error) {
                var readCloser io.ReadCloser
                readCloser, err = runner.Kc.ManifestFileReader(manifest, img)
                if err != nil {
-                       return err
+                       return fmt.Errorf("While creating ManifestFileReader for container image: %v", err)
                }
 
                err = runner.Docker.LoadImage(readCloser)
                if err != nil {
-                       return err
+                       return fmt.Errorf("While loading container image into Docker: %v", err)
                }
        } else {
                runner.CrunchLog.Print("Docker image is available")
@@ -163,6 +187,210 @@ func (runner *ContainerRunner) LoadImage() (err error) {
        return nil
 }
 
+func (runner *ContainerRunner) ArvMountCmd(arvMountCmd []string) (c *exec.Cmd, err error) {
+       c = exec.Command("arv-mount", arvMountCmd...)
+       nt := NewThrottledLogger(runner.NewLogWriter("arv-mount"))
+       c.Stdout = nt
+       c.Stderr = nt
+
+       err = c.Start()
+       if err != nil {
+               return nil, err
+       }
+
+       statReadme := make(chan bool)
+       runner.ArvMountExit = make(chan error)
+
+       keepStatting := true
+       go func() {
+               for keepStatting {
+                       time.Sleep(100 * time.Millisecond)
+                       _, err = os.Stat(fmt.Sprintf("%s/by_id/README", runner.ArvMountPoint))
+                       if err == nil {
+                               keepStatting = false
+                               statReadme <- true
+                       }
+               }
+               close(statReadme)
+       }()
+
+       go func() {
+               runner.ArvMountExit <- c.Wait()
+               close(runner.ArvMountExit)
+       }()
+
+       select {
+       case <-statReadme:
+               break
+       case err := <-runner.ArvMountExit:
+               runner.ArvMount = nil
+               keepStatting = false
+               return nil, err
+       }
+
+       return c, nil
+}
+
+func (runner *ContainerRunner) SetupMounts() (err error) {
+       runner.ArvMountPoint, err = runner.MkTempDir("", "keep")
+       if err != nil {
+               return fmt.Errorf("While creating keep mount temp dir: %v", err)
+       }
+
+       runner.CleanupTempDir = append(runner.CleanupTempDir, runner.ArvMountPoint)
+
+       pdhOnly := true
+       tmpcount := 0
+       arvMountCmd := []string{"--foreground", "--allow-other", "--read-write"}
+       collectionPaths := []string{}
+       runner.Binds = nil
+
+       for bind, mnt := range runner.ContainerRecord.Mounts {
+               if mnt.Kind == "collection" {
+                       var src string
+                       if mnt.UUID != "" && mnt.PortableDataHash != "" {
+                               return fmt.Errorf("Cannot specify both 'uuid' and 'portable_data_hash' for a collection mount")
+                       }
+                       if mnt.UUID != "" {
+                               if mnt.Writable {
+                                       return fmt.Errorf("Writing to existing collections currently not permitted.")
+                               }
+                               pdhOnly = false
+                               src = fmt.Sprintf("%s/by_id/%s", runner.ArvMountPoint, mnt.UUID)
+                       } else if mnt.PortableDataHash != "" {
+                               if mnt.Writable {
+                                       return fmt.Errorf("Can never write to a collection specified by portable data hash")
+                               }
+                               src = fmt.Sprintf("%s/by_id/%s", runner.ArvMountPoint, mnt.PortableDataHash)
+                       } else {
+                               src = fmt.Sprintf("%s/tmp%d", runner.ArvMountPoint, tmpcount)
+                               arvMountCmd = append(arvMountCmd, "--mount-tmp")
+                               arvMountCmd = append(arvMountCmd, fmt.Sprintf("tmp%d", tmpcount))
+                               tmpcount += 1
+                       }
+                       if mnt.Writable {
+                               if bind == runner.ContainerRecord.OutputPath {
+                                       runner.HostOutputDir = src
+                               }
+                               runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s", src, bind))
+                       } else {
+                               runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s:ro", src, bind))
+                       }
+                       collectionPaths = append(collectionPaths, src)
+               } else if mnt.Kind == "tmp" {
+                       if bind == runner.ContainerRecord.OutputPath {
+                               runner.HostOutputDir, err = runner.MkTempDir("", "")
+                               if err != nil {
+                                       return fmt.Errorf("While creating mount temp dir: %v", err)
+                               }
+                               st, staterr := os.Stat(runner.HostOutputDir)
+                               if staterr != nil {
+                                       return fmt.Errorf("While Stat on temp dir: %v", staterr)
+                               }
+                               err = os.Chmod(runner.HostOutputDir, st.Mode()|os.ModeSetgid|0777)
+                               if staterr != nil {
+                                       return fmt.Errorf("While Chmod temp dir: %v", err)
+                               }
+                               runner.CleanupTempDir = append(runner.CleanupTempDir, runner.HostOutputDir)
+                               runner.Binds = append(runner.Binds, fmt.Sprintf("%s:%s", runner.HostOutputDir, bind))
+                       } else {
+                               runner.Binds = append(runner.Binds, bind)
+                       }
+               } else {
+                       return fmt.Errorf("Unknown mount kind '%s'", mnt.Kind)
+               }
+       }
+
+       if runner.HostOutputDir == "" {
+               return fmt.Errorf("Output path does not correspond to a writable mount point")
+       }
+
+       if pdhOnly {
+               arvMountCmd = append(arvMountCmd, "--mount-by-pdh", "by_id")
+       } else {
+               arvMountCmd = append(arvMountCmd, "--mount-by-id", "by_id")
+       }
+       arvMountCmd = append(arvMountCmd, runner.ArvMountPoint)
+
+       runner.ArvMount, err = runner.RunArvMount(arvMountCmd)
+       if err != nil {
+               return fmt.Errorf("While trying to start arv-mount: %v", err)
+       }
+
+       for _, p := range collectionPaths {
+               _, err = os.Stat(p)
+               if err != nil {
+                       return fmt.Errorf("While checking that input files exist: %v", err)
+               }
+       }
+
+       return nil
+}
+
+func (runner *ContainerRunner) ProcessDockerAttach(containerReader io.Reader) {
+       // Handle docker log protocol
+       // https://docs.docker.com/engine/reference/api/docker_remote_api_v1.15/#attach-to-a-container
+
+       header := make([]byte, 8)
+       for {
+               _, readerr := io.ReadAtLeast(containerReader, header, 8)
+
+               if readerr == nil {
+                       readsize := int64(header[7]) | (int64(header[6]) << 8) | (int64(header[5]) << 16) | (int64(header[4]) << 24)
+                       if header[0] == 1 {
+                               // stdout
+                               _, readerr = io.CopyN(runner.Stdout, containerReader, readsize)
+                       } else {
+                               // stderr
+                               _, readerr = io.CopyN(runner.Stderr, containerReader, readsize)
+                       }
+               }
+
+               if readerr != nil {
+                       if readerr != io.EOF {
+                               runner.CrunchLog.Printf("While reading docker logs: %v", readerr)
+                       }
+
+                       closeerr := runner.Stdout.Close()
+                       if closeerr != nil {
+                               runner.CrunchLog.Printf("While closing stdout logs: %v", closeerr)
+                       }
+
+                       closeerr = runner.Stderr.Close()
+                       if closeerr != nil {
+                               runner.CrunchLog.Printf("While closing stderr logs: %v", closeerr)
+                       }
+
+                       runner.loggingDone <- true
+                       close(runner.loggingDone)
+                       return
+               }
+       }
+}
+
+// AttachLogs connects the docker container stdout and stderr logs to the
+// Arvados logger which logs to Keep and the API server logs table.
+func (runner *ContainerRunner) AttachStreams() (err error) {
+
+       runner.CrunchLog.Print("Attaching container streams")
+
+       var containerReader io.Reader
+       containerReader, err = runner.Docker.AttachContainer(runner.ContainerID,
+               &dockerclient.AttachOptions{Stream: true, Stdout: true, Stderr: true})
+       if err != nil {
+               return fmt.Errorf("While attaching container stdout/stderr streams: %v", err)
+       }
+
+       runner.loggingDone = make(chan bool)
+
+       runner.Stdout = NewThrottledLogger(runner.NewLogWriter("stdout"))
+       runner.Stderr = NewThrottledLogger(runner.NewLogWriter("stderr"))
+
+       go runner.ProcessDockerAttach(containerReader)
+
+       return nil
+}
+
 // StartContainer creates the container and runs it.
 func (runner *ContainerRunner) StartContainer() (err error) {
        runner.CrunchLog.Print("Creating Docker container")
@@ -181,67 +409,126 @@ func (runner *ContainerRunner) StartContainer() (err error) {
        for k, v := range runner.ContainerRecord.Environment {
                runner.ContainerConfig.Env = append(runner.ContainerConfig.Env, k+"="+v)
        }
+       runner.ContainerConfig.NetworkDisabled = true
        runner.ContainerID, err = runner.Docker.CreateContainer(&runner.ContainerConfig, "", nil)
        if err != nil {
-               return
+               return fmt.Errorf("While creating container: %v", err)
        }
-       hostConfig := &dockerclient.HostConfig{}
+       hostConfig := &dockerclient.HostConfig{Binds: runner.Binds,
+               LogConfig: dockerclient.LogConfig{Type: "none"}}
 
-       runner.CrunchLog.Printf("Starting Docker container id '%s'", runner.ContainerID)
-       err = runner.Docker.StartContainer(runner.ContainerID, hostConfig)
+       err = runner.AttachStreams()
        if err != nil {
-               return
+               return err
        }
 
-       return nil
-}
-
-// AttachLogs connects the docker container stdout and stderr logs to the
-// Arvados logger which logs to Keep and the API server logs table.
-func (runner *ContainerRunner) AttachLogs() (err error) {
-
-       runner.CrunchLog.Print("Attaching container logs")
-
-       var stderrReader, stdoutReader io.Reader
-       stderrReader, err = runner.Docker.ContainerLogs(runner.ContainerID, &dockerclient.LogOptions{Follow: true, Stderr: true})
-       if err != nil {
-               return
-       }
-       stdoutReader, err = runner.Docker.ContainerLogs(runner.ContainerID, &dockerclient.LogOptions{Follow: true, Stdout: true})
+       runner.CrunchLog.Printf("Starting Docker container id '%s'", runner.ContainerID)
+       err = runner.Docker.StartContainer(runner.ContainerID, hostConfig)
        if err != nil {
-               return
+               return fmt.Errorf("While starting container: %v", err)
        }
 
-       runner.loggingDone = make(chan bool)
-
-       runner.Stdout = NewThrottledLogger(runner.NewLogWriter("stdout"))
-       runner.Stderr = NewThrottledLogger(runner.NewLogWriter("stderr"))
-       go ReadWriteLines(stdoutReader, runner.Stdout, runner.loggingDone)
-       go ReadWriteLines(stderrReader, runner.Stderr, runner.loggingDone)
-
        return nil
 }
 
 // WaitFinish waits for the container to terminate, capture the exit code, and
 // close the stdout/stderr logging.
 func (runner *ContainerRunner) WaitFinish() error {
+       runner.CrunchLog.Print("Waiting for container to finish")
+
        result := runner.Docker.Wait(runner.ContainerID)
        wr := <-result
        if wr.Error != nil {
-               return wr.Error
+               return fmt.Errorf("While waiting for container to finish: %v", wr.Error)
        }
        runner.ExitCode = &wr.ExitCode
 
-       // drain stdout/stderr
-       <-runner.loggingDone
+       // wait for stdout/stderr to complete
        <-runner.loggingDone
 
-       runner.Stdout.Close()
-       runner.Stderr.Close()
+       return nil
+}
+
+// HandleOutput sets the output, unmounts the FUSE mount, and deletes temporary directories
+func (runner *ContainerRunner) CaptureOutput() error {
+       if runner.finalState != "Complete" {
+               return nil
+       }
+
+       if runner.HostOutputDir == "" {
+               return nil
+       }
+
+       _, err := os.Stat(runner.HostOutputDir)
+       if err != nil {
+               return fmt.Errorf("While checking host output path: %v", err)
+       }
+
+       var manifestText string
+
+       collectionMetafile := fmt.Sprintf("%s/.arvados#collection", runner.HostOutputDir)
+       _, err = os.Stat(collectionMetafile)
+       if err != nil {
+               // Regular directory
+               cw := CollectionWriter{runner.Kc, nil, sync.Mutex{}}
+               manifestText, err = cw.WriteTree(runner.HostOutputDir, runner.CrunchLog.Logger)
+               if err != nil {
+                       return fmt.Errorf("While uploading output files: %v", err)
+               }
+       } else {
+               // FUSE mount directory
+               file, openerr := os.Open(collectionMetafile)
+               if openerr != nil {
+                       return fmt.Errorf("While opening FUSE metafile: %v", err)
+               }
+               defer file.Close()
+
+               rec := CollectionRecord{}
+               err = json.NewDecoder(file).Decode(&rec)
+               if err != nil {
+                       return fmt.Errorf("While reading FUSE metafile: %v", err)
+               }
+               manifestText = rec.ManifestText
+       }
+
+       var response CollectionRecord
+       err = runner.ArvClient.Create("collections",
+               arvadosclient.Dict{
+                       "collection": arvadosclient.Dict{
+                               "manifest_text": manifestText}},
+               &response)
+       if err != nil {
+               return fmt.Errorf("While creating output collection: %v", err)
+       }
+
+       runner.OutputPDH = new(string)
+       *runner.OutputPDH = response.PortableDataHash
 
        return nil
 }
 
+func (runner *ContainerRunner) CleanupDirs() {
+       if runner.ArvMount != nil {
+               umount := exec.Command("fusermount", "-z", "-u", runner.ArvMountPoint)
+               umnterr := umount.Run()
+               if umnterr != nil {
+                       runner.CrunchLog.Printf("While running fusermount: %v", umnterr)
+               }
+
+               mnterr := <-runner.ArvMountExit
+               if mnterr != nil {
+                       runner.CrunchLog.Printf("Arv-mount exit error: %v", mnterr)
+               }
+       }
+
+       for _, tmpdir := range runner.CleanupTempDir {
+               rmerr := os.RemoveAll(tmpdir)
+               if rmerr != nil {
+                       runner.CrunchLog.Printf("While cleaning up temporary directory %s: %v", tmpdir, rmerr)
+               }
+       }
+}
+
 // CommitLogs posts the collection containing the final container logs.
 func (runner *ContainerRunner) CommitLogs() error {
        runner.CrunchLog.Print(runner.finalState)
@@ -256,28 +543,30 @@ func (runner *ContainerRunner) CommitLogs() error {
 
        mt, err := runner.LogCollection.ManifestText()
        if err != nil {
-               return err
+               return fmt.Errorf("While creating log manifest: %v", err)
        }
 
-       response := make(map[string]string)
+       var response CollectionRecord
        err = runner.ArvClient.Create("collections",
-               arvadosclient.Dict{"name": "logs for " + runner.ContainerRecord.UUID,
-                       "manifest_text": mt},
-               response)
+               arvadosclient.Dict{
+                       "collection": arvadosclient.Dict{
+                               "name":          "logs for " + runner.ContainerRecord.UUID,
+                               "manifest_text": mt}},
+               &response)
        if err != nil {
-               return err
+               return fmt.Errorf("While creating log collection: %v", err)
        }
 
        runner.LogsPDH = new(string)
-       *runner.LogsPDH = response["portable_data_hash"]
+       *runner.LogsPDH = response.PortableDataHash
 
        return nil
 }
 
 // UpdateContainerRecordRunning updates the container state to "Running"
 func (runner *ContainerRunner) UpdateContainerRecordRunning() error {
-       update := arvadosclient.Dict{"state": "Running"}
-       return runner.ArvClient.Update("containers", runner.ContainerRecord.UUID, update, nil)
+       return runner.ArvClient.Update("containers", runner.ContainerRecord.UUID,
+               arvadosclient.Dict{"container": arvadosclient.Dict{"state": "Running"}}, nil)
 }
 
 // UpdateContainerRecordComplete updates the container record state on API
@@ -290,10 +579,13 @@ func (runner *ContainerRunner) UpdateContainerRecordComplete() error {
        if runner.ExitCode != nil {
                update["exit_code"] = *runner.ExitCode
        }
+       if runner.OutputPDH != nil {
+               update["output"] = runner.OutputPDH
+       }
 
        update["state"] = runner.finalState
 
-       return runner.ArvClient.Update("containers", runner.ContainerRecord.UUID, update, nil)
+       return runner.ArvClient.Update("containers", runner.ContainerRecord.UUID, arvadosclient.Dict{"container": update}, nil)
 }
 
 // NewArvLogWriter creates an ArvLogWriter
@@ -302,8 +594,15 @@ func (runner *ContainerRunner) NewArvLogWriter(name string) io.WriteCloser {
 }
 
 // Run the full container lifecycle.
-func (runner *ContainerRunner) Run(containerUUID string) (err error) {
-       runner.CrunchLog.Printf("Executing container '%s'", containerUUID)
+func (runner *ContainerRunner) Run() (err error) {
+       runner.CrunchLog.Printf("Executing container '%s'", runner.ContainerRecord.UUID)
+
+       hostname, hosterr := os.Hostname()
+       if hosterr != nil {
+               runner.CrunchLog.Printf("Error getting hostname '%v'", hosterr)
+       } else {
+               runner.CrunchLog.Printf("Executing on host '%s'", hostname)
+       }
 
        var runerr, waiterr error
 
@@ -318,13 +617,22 @@ func (runner *ContainerRunner) Run(containerUUID string) (err error) {
                        runner.finalState = "Complete"
                }
 
-               // (6) write logs
+               // (6) capture output
+               outputerr := runner.CaptureOutput()
+               if outputerr != nil {
+                       runner.CrunchLog.Print(outputerr)
+               }
+
+               // (7) clean up temporary directories
+               runner.CleanupDirs()
+
+               // (8) write logs
                logerr := runner.CommitLogs()
                if logerr != nil {
                        runner.CrunchLog.Print(logerr)
                }
 
-               // (7) update container record with results
+               // (9) update container record with results
                updateerr := runner.UpdateContainerRecordComplete()
                if updateerr != nil {
                        runner.CrunchLog.Print(updateerr)
@@ -336,7 +644,7 @@ func (runner *ContainerRunner) Run(containerUUID string) (err error) {
                        if runerr != nil {
                                err = runerr
                        } else if waiterr != nil {
-                               err = runerr
+                               err = waiterr
                        } else if logerr != nil {
                                err = logerr
                        } else if updateerr != nil {
@@ -345,24 +653,27 @@ func (runner *ContainerRunner) Run(containerUUID string) (err error) {
                }
        }()
 
-       err = runner.ArvClient.Get("containers", containerUUID, nil, &runner.ContainerRecord)
+       err = runner.ArvClient.Get("containers", runner.ContainerRecord.UUID, nil, &runner.ContainerRecord)
        if err != nil {
-               return
+               return fmt.Errorf("While getting container record: %v", err)
        }
 
-       // (0) setup signal handling
-       err = runner.SetupSignals()
+       // (1) setup signal handling
+       runner.SetupSignals()
+
+       // (2) check for and/or load image
+       err = runner.LoadImage()
        if err != nil {
-               return
+               return fmt.Errorf("While loading container image: %v", err)
        }
 
-       // (1) check for and/or load image
-       err = runner.LoadImage()
+       // (3) set up FUSE mount and binds
+       err = runner.SetupMounts()
        if err != nil {
-               return
+               return fmt.Errorf("While setting up mounts: %v", err)
        }
 
-       // (2) start container
+       // (3) create and start container
        err = runner.StartContainer()
        if err != nil {
                if err == ErrCancelled {
@@ -371,18 +682,12 @@ func (runner *ContainerRunner) Run(containerUUID string) (err error) {
                return
        }
 
-       // (3) update container record state
+       // (4) update container record state
        err = runner.UpdateContainerRecordRunning()
        if err != nil {
                runner.CrunchLog.Print(err)
        }
 
-       // (4) attach container logs
-       runerr = runner.AttachLogs()
-       if runerr != nil {
-               runner.CrunchLog.Print(runerr)
-       }
-
        // (5) wait for container to finish
        waiterr = runner.WaitFinish()
 
@@ -392,42 +697,49 @@ func (runner *ContainerRunner) Run(containerUUID string) (err error) {
 // NewContainerRunner creates a new container runner.
 func NewContainerRunner(api IArvadosClient,
        kc IKeepClient,
-       docker ThinDockerClient) *ContainerRunner {
+       docker ThinDockerClient,
+       containerUUID string) *ContainerRunner {
 
        cr := &ContainerRunner{ArvClient: api, Kc: kc, Docker: docker}
        cr.NewLogWriter = cr.NewArvLogWriter
-       cr.LogCollection = &CollectionWriter{kc, nil}
+       cr.RunArvMount = cr.ArvMountCmd
+       cr.MkTempDir = ioutil.TempDir
+       cr.LogCollection = &CollectionWriter{kc, nil, sync.Mutex{}}
+       cr.ContainerRecord.UUID = containerUUID
        cr.CrunchLog = NewThrottledLogger(cr.NewLogWriter("crunch-run"))
+       cr.CrunchLog.Immediate = log.New(os.Stderr, containerUUID+" ", 0)
        return cr
 }
 
 func main() {
        flag.Parse()
 
+       containerId := flag.Arg(0)
+
        api, err := arvadosclient.MakeArvadosClient()
        if err != nil {
-               log.Fatal(err)
+               log.Fatalf("%s: %v", containerId, err)
        }
        api.Retries = 8
 
        var kc *keepclient.KeepClient
        kc, err = keepclient.MakeKeepClient(&api)
        if err != nil {
-               log.Fatal(err)
+               log.Fatalf("%s: %v", containerId, err)
        }
        kc.Retries = 4
 
        var docker *dockerclient.DockerClient
        docker, err = dockerclient.NewDockerClient("unix:///var/run/docker.sock", nil)
        if err != nil {
-               log.Fatal(err)
+               log.Fatalf("%s: %v", containerId, err)
        }
 
-       cr := NewContainerRunner(api, kc, docker)
+       cr := NewContainerRunner(api, kc, docker, containerId)
 
-       err = cr.Run(flag.Arg(0))
+       err = cr.Run()
        if err != nil {
-               log.Fatal(err)
+               log.Fatalf("%s: %v", containerId, err)
        }
 
 }
index 1946e5c403e02756f18589bc9a37a8333e2ca7a4..53cdbbc54e956538c084ce68e3aba89a29973825 100644 (file)
@@ -13,6 +13,10 @@ import (
        . "gopkg.in/check.v1"
        "io"
        "io/ioutil"
+       "log"
+       "os"
+       "os/exec"
+       "sort"
        "strings"
        "syscall"
        "testing"
@@ -51,22 +55,19 @@ var otherManifest = ". 68a84f561b1d1708c6baff5e019a9ab3+46+Ae5d0af96944a3690becb
 var otherPDH = "a3e8f74c6f101eae01fa08bfb4e49b3a+54"
 
 type TestDockerClient struct {
-       imageLoaded  string
-       stdoutReader io.ReadCloser
-       stderrReader io.ReadCloser
-       stdoutWriter io.WriteCloser
-       stderrWriter io.WriteCloser
-       fn           func(t *TestDockerClient)
-       finish       chan dockerclient.WaitResult
-       stop         chan bool
-       cwd          string
-       env          []string
+       imageLoaded string
+       logReader   io.ReadCloser
+       logWriter   io.WriteCloser
+       fn          func(t *TestDockerClient)
+       finish      chan dockerclient.WaitResult
+       stop        chan bool
+       cwd         string
+       env         []string
 }
 
 func NewTestDockerClient() *TestDockerClient {
        t := &TestDockerClient{}
-       t.stdoutReader, t.stdoutWriter = io.Pipe()
-       t.stderrReader, t.stderrWriter = io.Pipe()
+       t.logReader, t.logWriter = io.Pipe()
        t.finish = make(chan dockerclient.WaitResult)
        t.stop = make(chan bool)
        t.cwd = "/"
@@ -113,14 +114,8 @@ func (t *TestDockerClient) StartContainer(id string, config *dockerclient.HostCo
        }
 }
 
-func (t *TestDockerClient) ContainerLogs(id string, options *dockerclient.LogOptions) (io.ReadCloser, error) {
-       if options.Stdout {
-               return t.stdoutReader, nil
-       }
-       if options.Stderr {
-               return t.stderrReader, nil
-       }
-       return nil, nil
+func (t *TestDockerClient) AttachContainer(id string, options *dockerclient.AttachOptions) (io.ReadCloser, error) {
+       return t.logReader, nil
 }
 
 func (t *TestDockerClient) Wait(id string) <-chan dockerclient.WaitResult {
@@ -139,20 +134,20 @@ func (this *ArvTestClient) Create(resourceType string,
        this.Content = parameters
 
        if resourceType == "logs" {
-               et := parameters["event_type"].(string)
+               et := parameters["log"].(arvadosclient.Dict)["event_type"].(string)
                if this.Logs == nil {
                        this.Logs = make(map[string]*bytes.Buffer)
                }
                if this.Logs[et] == nil {
                        this.Logs[et] = &bytes.Buffer{}
                }
-               this.Logs[et].Write([]byte(parameters["properties"].(map[string]string)["text"]))
+               this.Logs[et].Write([]byte(parameters["log"].(arvadosclient.Dict)["properties"].(map[string]string)["text"]))
        }
 
        if resourceType == "collections" && output != nil {
-               mt := parameters["manifest_text"].(string)
-               outmap := output.(map[string]string)
-               outmap["portable_data_hash"] = fmt.Sprintf("%x+%d", md5.Sum([]byte(mt)), len(mt))
+               mt := parameters["collection"].(arvadosclient.Dict)["manifest_text"].(string)
+               outmap := output.(*CollectionRecord)
+               outmap.PortableDataHash = fmt.Sprintf("%x+%d", md5.Sum([]byte(mt)), len(mt))
        }
 
        return nil
@@ -161,9 +156,9 @@ func (this *ArvTestClient) Create(resourceType string,
 func (this *ArvTestClient) Get(resourceType string, uuid string, parameters arvadosclient.Dict, output interface{}) error {
        if resourceType == "collections" {
                if uuid == hwPDH {
-                       output.(*Collection).ManifestText = hwManifest
+                       output.(*CollectionRecord).ManifestText = hwManifest
                } else if uuid == otherPDH {
-                       output.(*Collection).ManifestText = otherManifest
+                       output.(*CollectionRecord).ManifestText = otherManifest
                }
        }
        if resourceType == "containers" {
@@ -176,7 +171,7 @@ func (this *ArvTestClient) Update(resourceType string, uuid string, parameters a
 
        this.Content = parameters
        if resourceType == "containers" {
-               if parameters["state"] == "Running" {
+               if parameters["container"].(arvadosclient.Dict)["state"] == "Running" {
                        this.WasSetRunning = true
                }
 
@@ -210,7 +205,7 @@ func (this *KeepTestClient) ManifestFileReader(m manifest.Manifest, filename str
 func (s *TestSuite) TestLoadImage(c *C) {
        kc := &KeepTestClient{}
        docker := NewTestDockerClient()
-       cr := NewContainerRunner(&ArvTestClient{}, kc, docker)
+       cr := NewContainerRunner(&ArvTestClient{}, kc, docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
 
        _, err := cr.Docker.RemoveImage(hwImageId, true)
 
@@ -266,7 +261,7 @@ func (this ArvErrorTestClient) Update(resourceType string, uuid string, paramete
 }
 
 func (this KeepErrorTestClient) PutHB(hash string, buf []byte) (string, int, error) {
-       return "", 0, nil
+       return "", 0, errors.New("KeepError")
 }
 
 func (this KeepErrorTestClient) ManifestFileReader(m manifest.Manifest, filename string) (keepclient.ReadCloserWithLen, error) {
@@ -297,36 +292,36 @@ func (this KeepReadErrorTestClient) ManifestFileReader(m manifest.Manifest, file
 
 func (s *TestSuite) TestLoadImageArvError(c *C) {
        // (1) Arvados error
-       cr := NewContainerRunner(ArvErrorTestClient{}, &KeepTestClient{}, nil)
+       cr := NewContainerRunner(ArvErrorTestClient{}, &KeepTestClient{}, nil, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
        cr.ContainerRecord.ContainerImage = hwPDH
 
        err := cr.LoadImage()
-       c.Check(err.Error(), Equals, "ArvError")
+       c.Check(err.Error(), Equals, "While getting container image collection: ArvError")
 }
 
 func (s *TestSuite) TestLoadImageKeepError(c *C) {
        // (2) Keep error
        docker := NewTestDockerClient()
-       cr := NewContainerRunner(&ArvTestClient{}, KeepErrorTestClient{}, docker)
+       cr := NewContainerRunner(&ArvTestClient{}, KeepErrorTestClient{}, docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
        cr.ContainerRecord.ContainerImage = hwPDH
 
        err := cr.LoadImage()
-       c.Check(err.Error(), Equals, "KeepError")
+       c.Check(err.Error(), Equals, "While creating ManifestFileReader for container image: KeepError")
 }
 
 func (s *TestSuite) TestLoadImageCollectionError(c *C) {
        // (3) Collection doesn't contain image
-       cr := NewContainerRunner(&ArvTestClient{}, KeepErrorTestClient{}, nil)
+       cr := NewContainerRunner(&ArvTestClient{}, KeepErrorTestClient{}, nil, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
        cr.ContainerRecord.ContainerImage = otherPDH
 
        err := cr.LoadImage()
-       c.Check(err.Error(), Equals, "First file in the collection does not end in .tar")
+       c.Check(err.Error(), Equals, "First file in the container image collection does not end in .tar")
 }
 
 func (s *TestSuite) TestLoadImageKeepReadError(c *C) {
        // (4) Collection doesn't contain image
        docker := NewTestDockerClient()
-       cr := NewContainerRunner(&ArvTestClient{}, KeepReadErrorTestClient{}, docker)
+       cr := NewContainerRunner(&ArvTestClient{}, KeepReadErrorTestClient{}, docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
        cr.ContainerRecord.ContainerImage = hwPDH
 
        err := cr.LoadImage()
@@ -356,15 +351,23 @@ func (this *TestLogs) NewTestLoggingWriter(logstr string) io.WriteCloser {
        return nil
 }
 
+func dockerLog(fd byte, msg string) []byte {
+       by := []byte(msg)
+       header := make([]byte, 8+len(by))
+       header[0] = fd
+       header[7] = byte(len(by))
+       copy(header[8:], by)
+       return header
+}
+
 func (s *TestSuite) TestRunContainer(c *C) {
        docker := NewTestDockerClient()
        docker.fn = func(t *TestDockerClient) {
-               t.stdoutWriter.Write([]byte("Hello world\n"))
-               t.stdoutWriter.Close()
-               t.stderrWriter.Close()
+               t.logWriter.Write(dockerLog(1, "Hello world\n"))
+               t.logWriter.Close()
                t.finish <- dockerclient.WaitResult{}
        }
-       cr := NewContainerRunner(&ArvTestClient{}, &KeepTestClient{}, docker)
+       cr := NewContainerRunner(&ArvTestClient{}, &KeepTestClient{}, docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
 
        var logs TestLogs
        cr.NewLogWriter = logs.NewTestLoggingWriter
@@ -376,9 +379,6 @@ func (s *TestSuite) TestRunContainer(c *C) {
        err = cr.StartContainer()
        c.Check(err, IsNil)
 
-       err = cr.AttachLogs()
-       c.Check(err, IsNil)
-
        err = cr.WaitFinish()
        c.Check(err, IsNil)
 
@@ -389,8 +389,7 @@ func (s *TestSuite) TestRunContainer(c *C) {
 func (s *TestSuite) TestCommitLogs(c *C) {
        api := &ArvTestClient{}
        kc := &KeepTestClient{}
-       cr := NewContainerRunner(api, kc, nil)
-       cr.ContainerRecord.UUID = "zzzzz-zzzzz-zzzzzzzzzzzzzzz"
+       cr := NewContainerRunner(api, kc, nil, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
        cr.CrunchLog.Timestamper = (&TestTimestamper{}).Timestamp
 
        cr.CrunchLog.Print("Hello world!")
@@ -400,28 +399,26 @@ func (s *TestSuite) TestCommitLogs(c *C) {
        err := cr.CommitLogs()
        c.Check(err, IsNil)
 
-       c.Check(api.Content["name"], Equals, "logs for zzzzz-zzzzz-zzzzzzzzzzzzzzz")
-       c.Check(api.Content["manifest_text"], Equals, ". 744b2e4553123b02fa7b452ec5c18993+123 0:123:crunch-run.txt\n")
+       c.Check(api.Content["collection"].(arvadosclient.Dict)["name"], Equals, "logs for zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+       c.Check(api.Content["collection"].(arvadosclient.Dict)["manifest_text"], Equals, ". 744b2e4553123b02fa7b452ec5c18993+123 0:123:crunch-run.txt\n")
        c.Check(*cr.LogsPDH, Equals, "63da7bdacf08c40f604daad80c261e9a+60")
 }
 
 func (s *TestSuite) TestUpdateContainerRecordRunning(c *C) {
        api := &ArvTestClient{}
        kc := &KeepTestClient{}
-       cr := NewContainerRunner(api, kc, nil)
-       cr.ContainerRecord.UUID = "zzzzz-zzzzz-zzzzzzzzzzzzzzz"
+       cr := NewContainerRunner(api, kc, nil, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
 
        err := cr.UpdateContainerRecordRunning()
        c.Check(err, IsNil)
 
-       c.Check(api.Content["state"], Equals, "Running")
+       c.Check(api.Content["container"].(arvadosclient.Dict)["state"], Equals, "Running")
 }
 
 func (s *TestSuite) TestUpdateContainerRecordComplete(c *C) {
        api := &ArvTestClient{}
        kc := &KeepTestClient{}
-       cr := NewContainerRunner(api, kc, nil)
-       cr.ContainerRecord.UUID = "zzzzz-zzzzz-zzzzzzzzzzzzzzz"
+       cr := NewContainerRunner(api, kc, nil, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
 
        cr.LogsPDH = new(string)
        *cr.LogsPDH = "d3a229d2fe3690c2c3e75a71a153c6a3+60"
@@ -433,25 +430,24 @@ func (s *TestSuite) TestUpdateContainerRecordComplete(c *C) {
        err := cr.UpdateContainerRecordComplete()
        c.Check(err, IsNil)
 
-       c.Check(api.Content["log"], Equals, *cr.LogsPDH)
-       c.Check(api.Content["exit_code"], Equals, *cr.ExitCode)
-       c.Check(api.Content["state"], Equals, "Complete")
+       c.Check(api.Content["container"].(arvadosclient.Dict)["log"], Equals, *cr.LogsPDH)
+       c.Check(api.Content["container"].(arvadosclient.Dict)["exit_code"], Equals, *cr.ExitCode)
+       c.Check(api.Content["container"].(arvadosclient.Dict)["state"], Equals, "Complete")
 }
 
 func (s *TestSuite) TestUpdateContainerRecordCancelled(c *C) {
        api := &ArvTestClient{}
        kc := &KeepTestClient{}
-       cr := NewContainerRunner(api, kc, nil)
-       cr.ContainerRecord.UUID = "zzzzz-zzzzz-zzzzzzzzzzzzzzz"
+       cr := NewContainerRunner(api, kc, nil, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
        cr.Cancelled = true
        cr.finalState = "Cancelled"
 
        err := cr.UpdateContainerRecordComplete()
        c.Check(err, IsNil)
 
-       c.Check(api.Content["log"], IsNil)
-       c.Check(api.Content["exit_code"], IsNil)
-       c.Check(api.Content["state"], Equals, "Cancelled")
+       c.Check(api.Content["container"].(arvadosclient.Dict)["log"], IsNil)
+       c.Check(api.Content["container"].(arvadosclient.Dict)["exit_code"], IsNil)
+       c.Check(api.Content["container"].(arvadosclient.Dict)["state"], Equals, "Cancelled")
 }
 
 // Used by the TestFullRun*() test below to DRY up boilerplate setup to do full
@@ -466,13 +462,15 @@ func FullRunHelper(c *C, record string, fn func(t *TestDockerClient)) (api *ArvT
        docker.RemoveImage(hwImageId, true)
 
        api = &ArvTestClient{ContainerRecord: rec}
-       cr = NewContainerRunner(api, &KeepTestClient{}, docker)
+       cr = NewContainerRunner(api, &KeepTestClient{}, docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+       am := &ArvMountCmdLine{}
+       cr.RunArvMount = am.ArvMountTest
 
-       err = cr.Run("zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+       err = cr.Run()
        c.Check(err, IsNil)
        c.Check(api.WasSetRunning, Equals, true)
 
-       c.Check(api.Content["log"], NotNil)
+       c.Check(api.Content["container"].(arvadosclient.Dict)["log"], NotNil)
 
        if err != nil {
                for k, v := range api.Logs {
@@ -490,19 +488,18 @@ func (s *TestSuite) TestFullRunHello(c *C) {
     "container_image": "d4ab34d3d4f8a72f5c4973051ae69fab+122",
     "cwd": ".",
     "environment": {},
-    "mounts": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
     "runtime_constraints": {}
 }`, func(t *TestDockerClient) {
-               t.stdoutWriter.Write([]byte("hello world\n"))
-               t.stdoutWriter.Close()
-               t.stderrWriter.Close()
+               t.logWriter.Write(dockerLog(1, "hello world\n"))
+               t.logWriter.Close()
                t.finish <- dockerclient.WaitResult{}
        })
 
-       c.Check(api.Content["exit_code"], Equals, 0)
-       c.Check(api.Content["state"], Equals, "Complete")
+       c.Check(api.Content["container"].(arvadosclient.Dict)["exit_code"], Equals, 0)
+       c.Check(api.Content["container"].(arvadosclient.Dict)["state"], Equals, "Complete")
 
        c.Check(strings.HasSuffix(api.Logs["stdout"].String(), "hello world\n"), Equals, true)
 
@@ -514,21 +511,20 @@ func (s *TestSuite) TestFullRunStderr(c *C) {
     "container_image": "d4ab34d3d4f8a72f5c4973051ae69fab+122",
     "cwd": ".",
     "environment": {},
-    "mounts": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
     "runtime_constraints": {}
 }`, func(t *TestDockerClient) {
-               t.stdoutWriter.Write([]byte("hello\n"))
-               t.stderrWriter.Write([]byte("world\n"))
-               t.stdoutWriter.Close()
-               t.stderrWriter.Close()
+               t.logWriter.Write(dockerLog(1, "hello\n"))
+               t.logWriter.Write(dockerLog(2, "world\n"))
+               t.logWriter.Close()
                t.finish <- dockerclient.WaitResult{ExitCode: 1}
        })
 
-       c.Check(api.Content["log"], NotNil)
-       c.Check(api.Content["exit_code"], Equals, 1)
-       c.Check(api.Content["state"], Equals, "Complete")
+       c.Check(api.Content["container"].(arvadosclient.Dict)["log"], NotNil)
+       c.Check(api.Content["container"].(arvadosclient.Dict)["exit_code"], Equals, 1)
+       c.Check(api.Content["container"].(arvadosclient.Dict)["state"], Equals, "Complete")
 
        c.Check(strings.HasSuffix(api.Logs["stdout"].String(), "hello\n"), Equals, true)
        c.Check(strings.HasSuffix(api.Logs["stderr"].String(), "world\n"), Equals, true)
@@ -540,19 +536,20 @@ func (s *TestSuite) TestFullRunDefaultCwd(c *C) {
     "container_image": "d4ab34d3d4f8a72f5c4973051ae69fab+122",
     "cwd": ".",
     "environment": {},
-    "mounts": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
     "runtime_constraints": {}
 }`, func(t *TestDockerClient) {
-               t.stdoutWriter.Write([]byte(t.cwd + "\n"))
-               t.stdoutWriter.Close()
-               t.stderrWriter.Close()
+               t.logWriter.Write(dockerLog(1, t.cwd+"\n"))
+               t.logWriter.Close()
                t.finish <- dockerclient.WaitResult{ExitCode: 0}
        })
 
-       c.Check(api.Content["exit_code"], Equals, 0)
-       c.Check(api.Content["state"], Equals, "Complete")
+       c.Check(api.Content["container"].(arvadosclient.Dict)["exit_code"], Equals, 0)
+       c.Check(api.Content["container"].(arvadosclient.Dict)["state"], Equals, "Complete")
+
+       log.Print(api.Logs["stdout"].String())
 
        c.Check(strings.HasSuffix(api.Logs["stdout"].String(), "/\n"), Equals, true)
 }
@@ -563,19 +560,18 @@ func (s *TestSuite) TestFullRunSetCwd(c *C) {
     "container_image": "d4ab34d3d4f8a72f5c4973051ae69fab+122",
     "cwd": "/bin",
     "environment": {},
-    "mounts": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
     "runtime_constraints": {}
 }`, func(t *TestDockerClient) {
-               t.stdoutWriter.Write([]byte(t.cwd + "\n"))
-               t.stdoutWriter.Close()
-               t.stderrWriter.Close()
+               t.logWriter.Write(dockerLog(1, t.cwd+"\n"))
+               t.logWriter.Close()
                t.finish <- dockerclient.WaitResult{ExitCode: 0}
        })
 
-       c.Check(api.Content["exit_code"], Equals, 0)
-       c.Check(api.Content["state"], Equals, "Complete")
+       c.Check(api.Content["container"].(arvadosclient.Dict)["exit_code"], Equals, 0)
+       c.Check(api.Content["container"].(arvadosclient.Dict)["state"], Equals, "Complete")
 
        c.Check(strings.HasSuffix(api.Logs["stdout"].String(), "/bin\n"), Equals, true)
 }
@@ -586,7 +582,7 @@ func (s *TestSuite) TestCancel(c *C) {
     "container_image": "d4ab34d3d4f8a72f5c4973051ae69fab+122",
     "cwd": ".",
     "environment": {},
-    "mounts": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
     "runtime_constraints": {}
@@ -599,15 +595,16 @@ func (s *TestSuite) TestCancel(c *C) {
        docker := NewTestDockerClient()
        docker.fn = func(t *TestDockerClient) {
                <-t.stop
-               t.stdoutWriter.Write([]byte("foo\n"))
-               t.stdoutWriter.Close()
-               t.stderrWriter.Close()
+               t.logWriter.Write(dockerLog(1, "foo\n"))
+               t.logWriter.Close()
                t.finish <- dockerclient.WaitResult{ExitCode: 0}
        }
        docker.RemoveImage(hwImageId, true)
 
        api := &ArvTestClient{ContainerRecord: rec}
-       cr := NewContainerRunner(api, &KeepTestClient{}, docker)
+       cr := NewContainerRunner(api, &KeepTestClient{}, docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+       am := &ArvMountCmdLine{}
+       cr.RunArvMount = am.ArvMountTest
 
        go func() {
                for cr.ContainerID == "" {
@@ -616,11 +613,11 @@ func (s *TestSuite) TestCancel(c *C) {
                cr.SigChan <- syscall.SIGINT
        }()
 
-       err = cr.Run("zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+       err = cr.Run()
 
        c.Check(err, IsNil)
 
-       c.Check(api.Content["log"], NotNil)
+       c.Check(api.Content["container"].(arvadosclient.Dict)["log"], NotNil)
 
        if err != nil {
                for k, v := range api.Logs {
@@ -629,7 +626,7 @@ func (s *TestSuite) TestCancel(c *C) {
                }
        }
 
-       c.Check(api.Content["state"], Equals, "Cancelled")
+       c.Check(api.Content["container"].(arvadosclient.Dict)["state"], Equals, "Cancelled")
 
        c.Check(strings.HasSuffix(api.Logs["stdout"].String(), "foo\n"), Equals, true)
 
@@ -641,19 +638,90 @@ func (s *TestSuite) TestFullRunSetEnv(c *C) {
     "container_image": "d4ab34d3d4f8a72f5c4973051ae69fab+122",
     "cwd": "/bin",
     "environment": {"FROBIZ": "bilbo"},
-    "mounts": {},
+    "mounts": {"/tmp": {"kind": "tmp"} },
     "output_path": "/tmp",
     "priority": 1,
     "runtime_constraints": {}
 }`, func(t *TestDockerClient) {
-               t.stdoutWriter.Write([]byte(t.env[0][7:] + "\n"))
-               t.stdoutWriter.Close()
-               t.stderrWriter.Close()
+               t.logWriter.Write(dockerLog(1, t.env[0][7:]+"\n"))
+               t.logWriter.Close()
                t.finish <- dockerclient.WaitResult{ExitCode: 0}
        })
 
-       c.Check(api.Content["exit_code"], Equals, 0)
-       c.Check(api.Content["state"], Equals, "Complete")
+       c.Check(api.Content["container"].(arvadosclient.Dict)["exit_code"], Equals, 0)
+       c.Check(api.Content["container"].(arvadosclient.Dict)["state"], Equals, "Complete")
 
        c.Check(strings.HasSuffix(api.Logs["stdout"].String(), "bilbo\n"), Equals, true)
 }
+
+type ArvMountCmdLine struct {
+       Cmd []string
+}
+
+func (am *ArvMountCmdLine) ArvMountTest(c []string) (*exec.Cmd, error) {
+       am.Cmd = c
+       return nil, nil
+}
+
+func (s *TestSuite) TestSetupMounts(c *C) {
+       api := &ArvTestClient{}
+       kc := &KeepTestClient{}
+       cr := NewContainerRunner(api, kc, nil, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+       am := &ArvMountCmdLine{}
+       cr.RunArvMount = am.ArvMountTest
+
+       i := 0
+       cr.MkTempDir = func(string, string) (string, error) {
+               i += 1
+               d := fmt.Sprintf("/tmp/mktmpdir%d", i)
+               os.Mkdir(d, os.ModePerm)
+               return d, nil
+       }
+
+       {
+               cr.ContainerRecord.Mounts = make(map[string]Mount)
+               cr.ContainerRecord.Mounts["/tmp"] = Mount{Kind: "tmp"}
+               cr.OutputPath = "/tmp"
+
+               err := cr.SetupMounts()
+               c.Check(err, IsNil)
+               c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other", "--read-write", "--mount-by-pdh", "by_id", "/tmp/mktmpdir1"})
+               c.Check(cr.Binds, DeepEquals, []string{"/tmp/mktmpdir2:/tmp"})
+               cr.CleanupDirs()
+       }
+
+       {
+               i = 0
+               cr.ContainerRecord.Mounts = make(map[string]Mount)
+               cr.ContainerRecord.Mounts["/keeptmp"] = Mount{Kind: "collection", Writable: true}
+               cr.OutputPath = "/keeptmp"
+
+               os.MkdirAll("/tmp/mktmpdir1/tmp0", os.ModePerm)
+
+               err := cr.SetupMounts()
+               c.Check(err, IsNil)
+               c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other", "--read-write", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "/tmp/mktmpdir1"})
+               c.Check(cr.Binds, DeepEquals, []string{"/tmp/mktmpdir1/tmp0:/keeptmp"})
+               cr.CleanupDirs()
+       }
+
+       {
+               i = 0
+               cr.ContainerRecord.Mounts = make(map[string]Mount)
+               cr.ContainerRecord.Mounts["/keepinp"] = Mount{Kind: "collection", PortableDataHash: "59389a8f9ee9d399be35462a0f92541c+53"}
+               cr.ContainerRecord.Mounts["/keepout"] = Mount{Kind: "collection", Writable: true}
+               cr.OutputPath = "/keepout"
+
+               os.MkdirAll("/tmp/mktmpdir1/by_id/59389a8f9ee9d399be35462a0f92541c+53", os.ModePerm)
+               os.MkdirAll("/tmp/mktmpdir1/tmp0", os.ModePerm)
+
+               err := cr.SetupMounts()
+               c.Check(err, IsNil)
+               c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other", "--read-write", "--mount-tmp", "tmp0", "--mount-by-pdh", "by_id", "/tmp/mktmpdir1"})
+               var ss sort.StringSlice = cr.Binds
+               ss.Sort()
+               c.Check(cr.Binds, DeepEquals, []string{"/tmp/mktmpdir1/by_id/59389a8f9ee9d399be35462a0f92541c+53:/keepinp:ro",
+                       "/tmp/mktmpdir1/tmp0:/keepout"})
+               cr.CleanupDirs()
+       }
+}
index 9d97384109597d9b0e7e56ad96657c1171fafbf5..20928dbef769b0d4dd419ec0f8693541c93ba369 100644 (file)
@@ -35,6 +35,7 @@ type ThrottledLogger struct {
        stop        bool
        flusherDone chan bool
        Timestamper
+       Immediate *log.Logger
 }
 
 // RFC3339Fixed is a fixed-width version of RFC3339 with microsecond precision,
@@ -59,6 +60,9 @@ func (tl *ThrottledLogger) Write(p []byte) (n int, err error) {
        sc := bufio.NewScanner(bytes.NewBuffer(p))
        for sc.Scan() {
                _, err = fmt.Fprintf(tl.buf, "%s %s\n", now, sc.Text())
+               if tl.Immediate != nil {
+                       tl.Immediate.Printf("%s %s\n", now, sc.Text())
+               }
        }
        return len(p), err
 }
@@ -180,9 +184,10 @@ func (arvlog *ArvLogWriter) Write(p []byte) (n int, err error) {
        }
 
        // write to API
-       lr := arvadosclient.Dict{"object_uuid": arvlog.UUID,
-               "event_type": arvlog.loggingStream,
-               "properties": map[string]string{"text": string(p)}}
+       lr := arvadosclient.Dict{"log": arvadosclient.Dict{
+               "object_uuid": arvlog.UUID,
+               "event_type":  arvlog.loggingStream,
+               "properties":  map[string]string{"text": string(p)}}}
        err2 := arvlog.ArvClient.Create("logs", lr, nil)
 
        if err1 != nil || err2 != nil {
index d8fdaa31650c3e5b51930bfdb32d520cb89b86b5..bce324d478571aefe0dddf0a0647199a3f0fc1e4 100644 (file)
@@ -2,6 +2,7 @@ package main
 
 import (
        "fmt"
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        . "gopkg.in/check.v1"
        "time"
 )
@@ -23,7 +24,7 @@ var _ = Suite(&LoggingTestSuite{})
 func (s *LoggingTestSuite) TestWriteLogs(c *C) {
        api := &ArvTestClient{}
        kc := &KeepTestClient{}
-       cr := NewContainerRunner(api, kc, nil)
+       cr := NewContainerRunner(api, kc, nil, "zzzzz-zzzzzzzzzzzzzzz")
        cr.CrunchLog.Timestamper = (&TestTimestamper{}).Timestamp
 
        cr.CrunchLog.Print("Hello world!")
@@ -39,16 +40,17 @@ func (s *LoggingTestSuite) TestWriteLogs(c *C) {
        logtext := "2015-12-29T15:51:45.000000001Z Hello world!\n" +
                "2015-12-29T15:51:45.000000002Z Goodbye\n"
 
-       c.Check(api.Content["event_type"], Equals, "crunch-run")
-       c.Check(api.Content["properties"].(map[string]string)["text"], Equals, logtext)
+       c.Check(api.Content["log"].(arvadosclient.Dict)["event_type"], Equals, "crunch-run")
+       c.Check(api.Content["log"].(arvadosclient.Dict)["properties"].(map[string]string)["text"], Equals, logtext)
        c.Check(string(kc.Content), Equals, logtext)
 }
 
 func (s *LoggingTestSuite) TestWriteLogsLarge(c *C) {
        api := &ArvTestClient{}
        kc := &KeepTestClient{}
-       cr := NewContainerRunner(api, kc, nil)
+       cr := NewContainerRunner(api, kc, nil, "zzzzz-zzzzzzzzzzzzzzz")
        cr.CrunchLog.Timestamper = (&TestTimestamper{}).Timestamp
+       cr.CrunchLog.Immediate = nil
 
        for i := 0; i < 2000000; i += 1 {
                cr.CrunchLog.Printf("Hello %d", i)
@@ -67,7 +69,7 @@ func (s *LoggingTestSuite) TestWriteLogsLarge(c *C) {
 func (s *LoggingTestSuite) TestWriteMultipleLogs(c *C) {
        api := &ArvTestClient{}
        kc := &KeepTestClient{}
-       cr := NewContainerRunner(api, kc, nil)
+       cr := NewContainerRunner(api, kc, nil, "zzzzz-zzzzzzzzzzzzzzz")
        ts := &TestTimestamper{}
        cr.CrunchLog.Timestamper = ts.Timestamp
        stdout := NewThrottledLogger(cr.NewLogWriter("stdout"))
@@ -81,14 +83,14 @@ func (s *LoggingTestSuite) TestWriteMultipleLogs(c *C) {
        cr.CrunchLog.Close()
        logtext1 := "2015-12-29T15:51:45.000000001Z Hello world!\n" +
                "2015-12-29T15:51:45.000000003Z Goodbye\n"
-       c.Check(api.Content["event_type"], Equals, "crunch-run")
-       c.Check(api.Content["properties"].(map[string]string)["text"], Equals, logtext1)
+       c.Check(api.Content["log"].(arvadosclient.Dict)["event_type"], Equals, "crunch-run")
+       c.Check(api.Content["log"].(arvadosclient.Dict)["properties"].(map[string]string)["text"], Equals, logtext1)
 
        stdout.Close()
        logtext2 := "2015-12-29T15:51:45.000000002Z Doing stuff\n" +
                "2015-12-29T15:51:45.000000004Z Blurb\n"
-       c.Check(api.Content["event_type"], Equals, "stdout")
-       c.Check(api.Content["properties"].(map[string]string)["text"], Equals, logtext2)
+       c.Check(api.Content["log"].(arvadosclient.Dict)["event_type"], Equals, "stdout")
+       c.Check(api.Content["log"].(arvadosclient.Dict)["properties"].(map[string]string)["text"], Equals, logtext2)
 
        mt, err := cr.LogCollection.ManifestText()
        c.Check(err, IsNil)
index 4a2693a6788f473adbe90570b7faeffe7357b222..a068a2a77b3c6805b1eecdfc73d1df2eefd50ed0 100644 (file)
@@ -17,7 +17,11 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
        "git.curoverse.com/arvados.git/sdk/go/manifest"
        "io"
+       "log"
+       "os"
+       "path/filepath"
        "strings"
+       "sync"
 )
 
 // Block is a data block in a manifest stream
@@ -77,6 +81,12 @@ func (m *CollectionFileWriter) Close() error {
        return nil
 }
 
+func (m *CollectionFileWriter) NewFile(fn string) {
+       m.offset += m.length
+       m.length = 0
+       m.fn = fn
+}
+
 func (m *CollectionFileWriter) goUpload() {
        var errors []error
        uploader := m.uploader
@@ -93,11 +103,12 @@ func (m *CollectionFileWriter) goUpload() {
        finish <- errors
 }
 
-// CollectionWriter makes implements creating new Keep collections by opening files
+// CollectionWriter implements creating new Keep collections by opening files
 // and writing to them.
 type CollectionWriter struct {
        IKeepClient
        Streams []*CollectionFileWriter
+       mtx     sync.Mutex
 }
 
 // Open a new file for writing in the Keep collection.
@@ -125,6 +136,8 @@ func (m *CollectionWriter) Open(path string) io.WriteCloser {
                fn}
        go fw.goUpload()
 
+       m.mtx.Lock()
+       defer m.mtx.Unlock()
        m.Streams = append(m.Streams, fw)
 
        return fw
@@ -133,6 +146,9 @@ func (m *CollectionWriter) Open(path string) io.WriteCloser {
 // Finish writing the collection, wait for all blocks to complete uploading.
 func (m *CollectionWriter) Finish() error {
        var errstring string
+       m.mtx.Lock()
+       defer m.mtx.Unlock()
+
        for _, stream := range m.Streams {
                if stream.uploader == nil {
                        continue
@@ -168,7 +184,12 @@ func (m *CollectionWriter) ManifestText() (mt string, err error) {
 
        var buf bytes.Buffer
 
+       m.mtx.Lock()
+       defer m.mtx.Unlock()
        for _, v := range m.Streams {
+               if len(v.FileStreamSegments) == 0 {
+                       continue
+               }
                k := v.StreamName
                if k == "." {
                        buf.WriteString(".")
@@ -177,9 +198,13 @@ func (m *CollectionWriter) ManifestText() (mt string, err error) {
                        k = strings.Replace(k, "\n", "", -1)
                        buf.WriteString("./" + k)
                }
-               for _, b := range v.Blocks {
-                       buf.WriteString(" ")
-                       buf.WriteString(b)
+               if len(v.Blocks) > 0 {
+                       for _, b := range v.Blocks {
+                               buf.WriteString(" ")
+                               buf.WriteString(b)
+                       }
+               } else {
+                       buf.WriteString(" d41d8cd98f00b204e9800998ecf8427e+0")
                }
                for _, f := range v.FileStreamSegments {
                        buf.WriteString(" ")
@@ -191,3 +216,83 @@ func (m *CollectionWriter) ManifestText() (mt string, err error) {
        }
        return buf.String(), nil
 }
+
+type WalkUpload struct {
+       kc          IKeepClient
+       stripPrefix string
+       streamMap   map[string]*CollectionFileWriter
+       status      *log.Logger
+}
+
+// WalkFunc walks a directory tree, uploads each file found and adds it to the
+// CollectionWriter.
+func (m *WalkUpload) WalkFunc(path string, info os.FileInfo, err error) error {
+
+       if info.IsDir() {
+               return nil
+       }
+
+       var dir string
+       if len(path) > (len(m.stripPrefix) + len(info.Name()) + 1) {
+               dir = path[len(m.stripPrefix)+1 : (len(path) - len(info.Name()) - 1)]
+       }
+       if dir == "" {
+               dir = "."
+       }
+
+       fn := path[(len(path) - len(info.Name())):]
+
+       if m.streamMap[dir] == nil {
+               m.streamMap[dir] = &CollectionFileWriter{
+                       m.kc,
+                       &manifest.ManifestStream{StreamName: dir},
+                       0,
+                       0,
+                       nil,
+                       make(chan *Block),
+                       make(chan []error),
+                       ""}
+               go m.streamMap[dir].goUpload()
+       }
+
+       fileWriter := m.streamMap[dir]
+
+       // Reset the CollectionFileWriter for a new file
+       fileWriter.NewFile(fn)
+
+       file, err := os.Open(path)
+       if err != nil {
+               return err
+       }
+       defer file.Close()
+
+       m.status.Printf("Uploading %v/%v (%v bytes)", dir, fn, info.Size())
+
+       _, err = io.Copy(fileWriter, file)
+       if err != nil {
+               return err
+       }
+
+       // Commits the current file.  Legal to call this repeatedly.
+       fileWriter.Close()
+
+       return nil
+}
+
+func (cw *CollectionWriter) WriteTree(root string, status *log.Logger) (manifest string, err error) {
+       streamMap := make(map[string]*CollectionFileWriter)
+       wu := &WalkUpload{cw.IKeepClient, root, streamMap, status}
+       err = filepath.Walk(root, wu.WalkFunc)
+
+       if err != nil {
+               return "", err
+       }
+
+       cw.mtx.Lock()
+       for _, st := range streamMap {
+               cw.Streams = append(cw.Streams, st)
+       }
+       cw.mtx.Unlock()
+
+       return cw.ManifestText()
+}
diff --git a/services/crunch-run/upload_test.go b/services/crunch-run/upload_test.go
new file mode 100644 (file)
index 0000000..b4b1efd
--- /dev/null
@@ -0,0 +1,145 @@
+package main
+
+import (
+       . "gopkg.in/check.v1"
+       "io/ioutil"
+       "log"
+       "os"
+       "sync"
+)
+
+type UploadTestSuite struct{}
+
+// Gocheck boilerplate
+var _ = Suite(&UploadTestSuite{})
+
+func (s *TestSuite) TestSimpleUpload(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
+
+       cw := CollectionWriter{&KeepTestClient{}, nil, sync.Mutex{}}
+       str, err := cw.WriteTree(tmpdir, log.New(os.Stdout, "", 0))
+       c.Check(err, IsNil)
+       c.Check(str, Equals, ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:file1.txt\n")
+}
+
+func (s *TestSuite) TestSimpleUploadTwofiles(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
+       ioutil.WriteFile(tmpdir+"/"+"file2.txt", []byte("bar"), 0600)
+
+       cw := CollectionWriter{&KeepTestClient{}, nil, sync.Mutex{}}
+       str, err := cw.WriteTree(tmpdir, log.New(os.Stdout, "", 0))
+
+       c.Check(err, IsNil)
+       c.Check(str, Equals, ". 3858f62230ac3c915f300c664312c63f+6 0:3:file1.txt 3:3:file2.txt\n")
+}
+
+func (s *TestSuite) TestSimpleUploadSubdir(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       os.Mkdir(tmpdir+"/subdir", 0700)
+
+       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
+       ioutil.WriteFile(tmpdir+"/subdir/file2.txt", []byte("bar"), 0600)
+
+       cw := CollectionWriter{&KeepTestClient{}, nil, sync.Mutex{}}
+       str, err := cw.WriteTree(tmpdir, log.New(os.Stdout, "", 0))
+
+       c.Check(err, IsNil)
+
+       // streams can get added in either order because of scheduling
+       // of goroutines.
+       if str != `. acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:file1.txt
+./subdir 37b51d194a7513e45b56f6524f2d51f2+3 0:3:file2.txt
+` && str != `./subdir 37b51d194a7513e45b56f6524f2d51f2+3 0:3:file2.txt
+. acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:file1.txt
+` {
+               c.Error("Did not get expected manifest text")
+       }
+}
+
+func (s *TestSuite) TestSimpleUploadLarge(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       file, _ := os.Create(tmpdir + "/" + "file1.txt")
+       data := make([]byte, 1024*1024-1)
+       for i := range data {
+               data[i] = byte(i % 10)
+       }
+       for i := 0; i < 65; i++ {
+               file.Write(data)
+       }
+       file.Close()
+
+       ioutil.WriteFile(tmpdir+"/"+"file2.txt", []byte("bar"), 0600)
+
+       cw := CollectionWriter{&KeepTestClient{}, nil, sync.Mutex{}}
+       str, err := cw.WriteTree(tmpdir, log.New(os.Stdout, "", 0))
+
+       c.Check(err, IsNil)
+       c.Check(str, Equals, ". 00ecf01e0d93385115c9f8bed757425d+67108864 485cd630387b6b1846fe429f261ea05f+1048514 0:68157375:file1.txt 68157375:3:file2.txt\n")
+}
+
+func (s *TestSuite) TestUploadEmptySubdir(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       os.Mkdir(tmpdir+"/subdir", 0700)
+
+       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
+
+       cw := CollectionWriter{&KeepTestClient{}, nil, sync.Mutex{}}
+       str, err := cw.WriteTree(tmpdir, log.New(os.Stdout, "", 0))
+
+       c.Check(err, IsNil)
+       c.Check(str, Equals, `. acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:file1.txt
+`)
+}
+
+func (s *TestSuite) TestUploadEmptyFile(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte(""), 0600)
+
+       cw := CollectionWriter{&KeepTestClient{}, nil, sync.Mutex{}}
+       str, err := cw.WriteTree(tmpdir, log.New(os.Stdout, "", 0))
+
+       c.Check(err, IsNil)
+       c.Check(str, Equals, `. d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1.txt
+`)
+}
+
+func (s *TestSuite) TestUploadError(c *C) {
+       tmpdir, _ := ioutil.TempDir("", "")
+       defer func() {
+               os.RemoveAll(tmpdir)
+       }()
+
+       ioutil.WriteFile(tmpdir+"/"+"file1.txt", []byte("foo"), 0600)
+
+       cw := CollectionWriter{&KeepErrorTestClient{}, nil, sync.Mutex{}}
+       str, err := cw.WriteTree(tmpdir, log.New(os.Stdout, "", 0))
+
+       c.Check(err, NotNil)
+       c.Check(str, Equals, "")
+}
index 1229f2917e21f9b17d897db8fe85e7adb4e43429..9b7eb7543a4ebed086aba2d409f44fcc789ef222 100644 (file)
@@ -130,8 +130,9 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
 
        sdkParams := arvadosclient.Dict{
                "select":  fieldsWanted,
-               "order":   []string{"modified_at ASC"},
-               "filters": [][]string{[]string{"modified_at", ">=", "1900-01-01T00:00:00Z"}}}
+               "order":   []string{"modified_at ASC", "uuid ASC"},
+               "filters": [][]string{[]string{"modified_at", ">=", "1900-01-01T00:00:00Z"}},
+               "offset": 0}
 
        if params.BatchSize > 0 {
                sdkParams["limit"] = params.BatchSize
@@ -176,9 +177,10 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
 
        // These values are just for getting the loop to run the first time,
        // afterwards they'll be set to real values.
-       previousTotalCollections := -1
-       totalCollections := 0
-       for totalCollections > previousTotalCollections {
+       remainingCollections := 1
+       var totalCollections int
+       var previousTotalCollections int
+       for remainingCollections > 0 {
                // We're still finding new collections
 
                // Write the heap profile for examining memory usage
@@ -193,6 +195,16 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
                if err != nil {
                        return
                }
+               batchCollections := len(collections.Items)
+
+               // We must always have at least one collection in the batch
+               if batchCollections < 1 {
+                       err = fmt.Errorf("API query returned no collections for %+v", sdkParams)
+                       return
+               }
+
+               // Update count of remaining collections
+               remainingCollections = collections.ItemsAvailable - sdkParams["offset"].(int) - batchCollections
 
                // Process collection and update our date filter.
                latestModificationDate, maxManifestSize, totalManifestSize, err := ProcessCollections(params.Logger,
@@ -202,16 +214,24 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
                if err != nil {
                        return results, err
                }
-               sdkParams["filters"].([][]string)[0][2] = latestModificationDate.Format(time.RFC3339)
+               if sdkParams["filters"].([][]string)[0][2] != latestModificationDate.Format(time.RFC3339) {
+                       sdkParams["filters"].([][]string)[0][2] = latestModificationDate.Format(time.RFC3339)
+                       sdkParams["offset"] = 0
+               } else {
+                       sdkParams["offset"] = sdkParams["offset"].(int) + batchCollections
+               }
 
                // update counts
                previousTotalCollections = totalCollections
                totalCollections = len(results.UUIDToCollection)
 
-               log.Printf("%d collections read, %d new in last batch, "+
+               log.Printf("%d collections read, %d (%d new) in last batch, "+
+                       "%d remaining, "+
                        "%s latest modified date, %.0f %d %d avg,max,total manifest size",
                        totalCollections,
+                       batchCollections,
                        totalCollections-previousTotalCollections,
+                       remainingCollections,
                        sdkParams["filters"].([][]string)[0][2],
                        float32(totalManifestSize)/float32(totalCollections),
                        maxManifestSize, totalManifestSize)
@@ -227,6 +247,30 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections, err e
                }
        }
 
+       // Make one final API request to verify that we have processed all collections available up to the latest modification date
+       var collections SdkCollectionList
+       sdkParams["filters"].([][]string)[0][1] = "<="
+       sdkParams["limit"] = 0
+       err = params.Client.List("collections", sdkParams, &collections)
+       if err != nil {
+               return
+       }
+       finalNumberOfCollectionsAvailable, err :=
+               util.NumberItemsAvailable(params.Client, "collections")
+       if err != nil {
+               return
+       }
+       if totalCollections < finalNumberOfCollectionsAvailable {
+               err = fmt.Errorf("API server indicates a total of %d collections "+
+                               "available up to %v, but we only retrieved %d. "+
+                               "Refusing to continue as this could indicate an "+
+                               "otherwise undetected failure.",
+                               finalNumberOfCollectionsAvailable, 
+                               sdkParams["filters"].([][]string)[0][2],
+                               totalCollections)
+               return
+       }
+
        // Write the heap profile for examining memory usage
        err = WriteHeapProfile()
 
index c98c95af66d89b257be6f7e79d4fd4371138281c..9bdcc5f7a123e5a0d15dc237e2144bfe5358fdb4 100644 (file)
@@ -131,12 +131,9 @@ class BaseComputeNodeDriver(RetryMixin):
             self.ping_host, arvados_node['uuid'],
             arvados_node['info']['ping_secret'])
 
-    def find_node(self, name):
-        node = [n for n in self.list_nodes() if n.name == name]
-        if node:
-            return node[0]
-        else:
-            return None
+    @staticmethod
+    def _name_key(cloud_object):
+        return cloud_object.name
 
     def create_node(self, size, arvados_node):
         try:
@@ -151,15 +148,12 @@ class BaseComputeNodeDriver(RetryMixin):
             # loop forever because subsequent create_node attempts will fail
             # due to node name collision.  So check if the node we intended to
             # create shows up in the cloud node list and return it if found.
-            try:
-                node = self.find_node(kwargs['name'])
-                if node:
-                    return node
-            except:
-                # Ignore possible exception from find_node in favor of
-                # re-raising the original create_node exception.
-                pass
-            raise
+            node = self.search_for(kwargs['name'], 'list_nodes', self._name_key)
+            if node:
+                return node
+            else:
+                # something else went wrong, re-raise the exception
+                raise
 
     def post_create_node(self, cloud_node):
         # ComputeNodeSetupActor calls this method after the cloud node is
index d89c48e270bcc119638c70fc3d5f2928fbe1f8e3..991a2983c7217f1a29368293513587d117d01d59 100644 (file)
@@ -75,9 +75,6 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         self.real.ex_create_tags(cloud_node,
                                  {'Name': arvados_node_fqdn(arvados_node)})
 
-    def find_node(self, name):
-        raise NotImplementedError("ec2.ComputeNodeDriver.find_node")
-
     def list_nodes(self):
         # Need to populate Node.size
         nodes = super(ComputeNodeDriver, self).list_nodes()
index c5bf0b8cda42d211adcfbb61ffb3d73f460a7830..bbabdd4c761b5a0e3809449878227adea7db0a5a 100644 (file)
@@ -38,10 +38,6 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         self._disktype_links = {dt.name: self._object_link(dt)
                                 for dt in self.real.ex_list_disktypes()}
 
-    @staticmethod
-    def _name_key(cloud_object):
-        return cloud_object.name
-
     @staticmethod
     def _object_link(cloud_object):
         return cloud_object.extra.get('selfLink')
index dd45165deaa1514789c42428dacbb4bcf862b5a5..dcfe1ceb133e671527e70967dd25a783d64210a6 100644 (file)
@@ -12,13 +12,15 @@ import httplib2
 import pykka
 from apiclient import errors as apierror
 
+from .fullstopactor import FullStopActor
+
 # IOError is the base class for socket.error, ssl.SSLError, and friends.
 # It seems like it hits the sweet spot for operations we want to retry:
 # it's low-level, but unlikely to catch code bugs.
 NETWORK_ERRORS = (IOError,)
 ARVADOS_ERRORS = NETWORK_ERRORS + (apierror.Error,)
 
-actor_class = pykka.ThreadingActor
+actor_class = FullStopActor
 
 class NodeManagerConfig(ConfigParser.SafeConfigParser):
     """Node Manager Configuration class.
diff --git a/services/nodemanager/arvnodeman/fullstopactor.py b/services/nodemanager/arvnodeman/fullstopactor.py
new file mode 100644 (file)
index 0000000..07e0625
--- /dev/null
@@ -0,0 +1,17 @@
+from __future__ import absolute_import, print_function
+
+import errno
+import logging
+import os
+import threading
+import traceback
+
+import pykka
+
+class FullStopActor(pykka.ThreadingActor):
+    def on_failure(self, exception_type, exception_value, tb):
+        lg = getattr(self, "_logger", logging)
+        if (exception_type in (threading.ThreadError, MemoryError) or
+            exception_type is OSError and exception_value.errno == errno.ENOMEM):
+            lg.critical("Unhandled exception is a fatal error, killing Node Manager")
+            os.killpg(os.getpgid(0), 9)
diff --git a/services/nodemanager/tests/test_failure.py b/services/nodemanager/tests/test_failure.py
new file mode 100644 (file)
index 0000000..afebb9c
--- /dev/null
@@ -0,0 +1,48 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import errno
+import logging
+import threading
+import unittest
+
+import mock
+import pykka
+
+from . import testutil
+
+import arvnodeman.fullstopactor
+
+class BogusActor(arvnodeman.fullstopactor.FullStopActor):
+    def __init__(self, e):
+        super(BogusActor, self).__init__()
+        self.exp = e
+
+    def doStuff(self):
+        raise self.exp
+
+class ActorUnhandledExceptionTest(unittest.TestCase):
+    def test1(self):
+        for e in (MemoryError(), threading.ThreadError(), OSError(errno.ENOMEM, "")):
+            with mock.patch('os.killpg') as killpg_mock:
+                act = BogusActor.start(e)
+                act.tell({
+                    'command': 'pykka_call',
+                    'attr_path': ("doStuff",),
+                    'args': [],
+                    'kwargs': {}
+                })
+                act.stop(block=True)
+                self.assertTrue(killpg_mock.called)
+
+        with mock.patch('os.killpg') as killpg_mock:
+            act = BogusActor.start(OSError(errno.ENOENT, ""))
+            act.tell({
+                'command': 'pykka_call',
+                'attr_path': ("doStuff",),
+                'args': [],
+                'kwargs': {}
+            })
+            act.stop(block=True)
+            self.assertFalse(killpg_mock.called)
diff --git a/tools/arvbox/bin/arvbox b/tools/arvbox/bin/arvbox
new file mode 100755 (executable)
index 0000000..d790cb6
--- /dev/null
@@ -0,0 +1,402 @@
+#!/bin/sh
+
+set -e
+
+if ! test -d /sys/fs/cgroup ; then
+     echo "Arvbox requires cgroups to be mounted at /sys/fs/cgroup in order to use"
+     echo "Docker-in-Docker.  Older operating systems that put cgroups in other"
+     echo "places (such as /cgroup) are not supported."
+     exit 1
+fi
+
+if ! which docker >/dev/null 2>/dev/null ; then
+  echo "Arvbox requires Docker.  To install, run the following command as root:"
+  echo "curl -sSL https://get.docker.com/ | sh"
+  exit 1
+fi
+
+if test -z "$ARVBOX_DOCKER" ; then
+    if which greadlink >/dev/null 2>/dev/null ; then
+        ARVBOX_DOCKER=$(greadlink -f $(dirname $0)/../lib/arvbox/docker)
+    else
+        ARVBOX_DOCKER=$(readlink -f $(dirname $0)/../lib/arvbox/docker)
+    fi
+fi
+
+if test -z "$ARVBOX_CONTAINER" ; then
+    ARVBOX_CONTAINER=arvbox
+fi
+
+if test -z "$ARVBOX_BASE" ; then
+    ARVBOX_BASE="$HOME/.arvbox"
+fi
+
+if test -z "$ARVBOX_DATA" ; then
+    ARVBOX_DATA="$ARVBOX_BASE/$ARVBOX_CONTAINER"
+fi
+
+if test -z "$ARVADOS_ROOT" ; then
+    ARVADOS_ROOT="$ARVBOX_DATA/arvados"
+fi
+
+if test -z "$ARVADOS_DEV_ROOT" ; then
+    ARVADOS_DEV_ROOT="$ARVBOX_DATA/arvados-dev"
+fi
+
+if test -z "$SSO_ROOT" ; then
+    SSO_ROOT="$ARVBOX_DATA/sso-devise-omniauth-provider"
+fi
+
+PG_DATA="$ARVBOX_DATA/postgres"
+VAR_DATA="$ARVBOX_DATA/var"
+PASSENGER="$ARVBOX_DATA/passenger"
+GEMS="$ARVBOX_DATA/gems"
+
+getip() {
+    docker inspect $ARVBOX_CONTAINER | grep \"IPAddress\" | head -n1 | tr -d ' ":,\n' | cut -c10-
+}
+
+updateconf() {
+    if test -f ~/.config/arvados/$ARVBOX_CONTAINER.conf ; then
+        sed "s/ARVADOS_API_HOST=.*/ARVADOS_API_HOST=$(getip):8000/" <$HOME/.config/arvados/$ARVBOX_CONTAINER.conf >$HOME/.config/arvados/$ARVBOX_CONTAINER.conf.tmp
+        mv ~/.config/arvados/$ARVBOX_CONTAINER.conf.tmp ~/.config/arvados/$ARVBOX_CONTAINER.conf
+    else
+        mkdir -p $HOME/.config/arvados
+        cat >$HOME/.config/arvados/$ARVBOX_CONTAINER.conf <<EOF
+ARVADOS_API_HOST=$(getip):8000
+ARVADOS_API_TOKEN=
+ARVADOS_API_HOST_INSECURE=true
+EOF
+    fi
+}
+
+wait_for_arvbox() {
+    FF=/tmp/arvbox-fifo-$$
+    mkfifo $FF
+    docker logs -f $ARVBOX_CONTAINER > $FF &
+    LOGPID=$!
+    while read line ; do
+        echo $line
+        if echo $line | grep "Workbench is running at" >/dev/null ; then
+            kill $LOGPID
+        fi
+    done < $FF
+    rm $FF
+    echo
+    if test -n "$localip" ; then
+        echo "export ARVADOS_API_HOST=$localip:8000"
+    else
+        echo "export ARVADOS_API_HOST=$(getip):8000"
+    fi
+}
+
+run() {
+    if docker ps -a | grep -E "$ARVBOX_CONTAINER$" -q ; then
+        echo "Container $ARVBOX_CONTAINER is already running, use stop, restart or reboot"
+        exit 0
+    fi
+
+    if echo "$1" | grep '^public' ; then
+        if test -n "$ARVBOX_PUBLISH_IP" ; then
+            localip=$ARVBOX_PUBLISH_IP
+        else
+            defaultdev=$(/sbin/ip route|awk '/default/ { print $5 }')
+            localip=$(ip addr show $defaultdev | grep 'inet ' | sed 's/ *inet \(.*\)\/.*/\1/')
+        fi
+        iptemp=$(tempfile)
+        echo $localip > $iptemp
+        chmod og+r $iptemp
+        PUBLIC="--volume=$iptemp:/var/run/localip_override
+              --publish=80:80
+              --publish=8000:8000
+              --publish=8900:8900
+              --publish=9001:9001
+              --publish=9002:9002
+              --publish=25100:25100
+              --publish=25107:25107
+              --publish=25108:25108
+              --publish=8001:8001"
+    else
+        PUBLIC=""
+    fi
+
+    if echo "$1" | grep 'demo$' ; then
+        if test -d "$ARVBOX_DATA" ; then
+            echo "It looks like you already have a development container named $ARVBOX_CONTAINER."
+            echo "Set ARVBOX_CONTAINER to set a different name for your demo container"
+            exit 1
+        fi
+
+        if ! (docker ps -a | grep -E "$ARVBOX_CONTAINER-data$" -q) ; then
+            docker create -v /var/lib/postgresql -v /var/lib/arvados --name $ARVBOX_CONTAINER-data arvados/arvbox-demo /bin/true
+        fi
+
+        docker run \
+               --detach \
+               --name=$ARVBOX_CONTAINER \
+               --privileged \
+               --volumes-from $ARVBOX_CONTAINER-data \
+               $PUBLIC \
+               arvados/arvbox-demo
+        updateconf
+        wait_for_arvbox
+    else
+        mkdir -p "$PG_DATA" "$VAR_DATA" "$PASSENGER" "$GEMS"
+
+        if ! test -d "$ARVADOS_ROOT" ; then
+            git clone https://github.com/curoverse/arvados.git "$ARVADOS_ROOT"
+        fi
+        if ! test -d "$SSO_ROOT" ; then
+            git clone https://github.com/curoverse/sso-devise-omniauth-provider.git "$SSO_ROOT"
+        fi
+
+        if test "$1" = test ; then
+            shift
+
+            if ! test -d "$ARVADOS_DEV_ROOT" ; then
+                git clone https://github.com/curoverse/arvados-dev.git "$ARVADOS_DEV_ROOT"
+            fi
+
+            mkdir -p $VAR_DATA/test
+
+            docker run \
+                   --detach \
+                   --name=$ARVBOX_CONTAINER \
+                   --privileged \
+                   "--volume=$ARVADOS_ROOT:/usr/src/arvados:rw" \
+                   "--volume=$ARVADOS_DEV_ROOT:/usr/src/arvados-dev:rw" \
+                   "--volume=$SSO_ROOT:/usr/src/sso:rw" \
+                   "--volume=$PG_DATA:/var/lib/postgresql:rw" \
+                   "--volume=$VAR_DATA:/var/lib/arvados:rw" \
+                   "--volume=$PASSENGER:/var/lib/passenger:rw" \
+                   "--volume=$GEMS:/var/lib/gems:rw" \
+                   arvados/arvbox-dev \
+                   /usr/local/bin/runsvinit -svdir=/etc/test-service
+
+            docker exec -ti \
+                    $ARVBOX_CONTAINER \
+                    /usr/local/lib/arvbox/runsu.sh \
+                    /usr/local/lib/arvbox/waitforpostgres.sh
+
+            docker exec -ti \
+                   $ARVBOX_CONTAINER \
+                   /usr/local/lib/arvbox/runsu.sh \
+                   /var/lib/arvbox/service/sso/run-service --only-setup
+
+            docker exec -ti \
+                   $ARVBOX_CONTAINER \
+                   /usr/local/lib/arvbox/runsu.sh \
+                   /var/lib/arvbox/service/api/run-service --only-setup
+
+            docker exec -ti \
+                   $ARVBOX_CONTAINER \
+                   /usr/local/lib/arvbox/runsu.sh \
+                   /usr/src/arvados-dev/jenkins/run-tests.sh \
+                   --temp /var/lib/arvados/test \
+                   WORKSPACE=/usr/src/arvados \
+                   GEM_HOME=/var/lib/gems \
+                   "$@"
+        elif echo "$1" | grep 'dev$' ; then
+            docker run \
+                   --detach \
+                   --name=$ARVBOX_CONTAINER \
+                   --privileged \
+                   "--volume=$ARVADOS_ROOT:/usr/src/arvados:rw" \
+                   "--volume=$SSO_ROOT:/usr/src/sso:rw" \
+                   "--volume=$PG_DATA:/var/lib/postgresql:rw" \
+                   "--volume=$VAR_DATA:/var/lib/arvados:rw" \
+                   "--volume=$PASSENGER:/var/lib/passenger:rw" \
+                   "--volume=$GEMS:/var/lib/gems:rw" \
+                   $PUBLIC \
+                   arvados/arvbox-dev
+            updateconf
+            wait_for_arvbox
+            echo "The Arvados source code is checked out at: $ARVADOS_ROOT"
+        else
+            echo "Unknown configuration '$1'"
+        fi
+    fi
+}
+
+stop() {
+    if docker ps -a --filter "status=running" | grep -E "$ARVBOX_CONTAINER$" -q ; then
+        docker stop $ARVBOX_CONTAINER
+    fi
+
+    VOLUMES=--volumes=true
+    if docker ps -a --filter "status=created" | grep -E "$ARVBOX_CONTAINER$" -q ; then
+        docker rm $VOLUMES $ARVBOX_CONTAINER
+    fi
+    if docker ps -a --filter "status=exited" | grep -E "$ARVBOX_CONTAINER$" -q ; then
+        docker rm $VOLUMES $ARVBOX_CONTAINER
+    fi
+}
+
+build() {
+    if ! test -f "$ARVBOX_DOCKER/Dockerfile.base" ;  then
+        echo "Could not find Dockerfile ($ARVBOX_DOCKER/Dockerfile.base)"
+        exit 1
+    fi
+    docker build -t arvados/arvbox-base -f "$ARVBOX_DOCKER/Dockerfile.base" "$ARVBOX_DOCKER"
+    if test "$1" = localdemo -o "$1" = publicdemo ; then
+        docker build -t arvados/arvbox-demo -f "$ARVBOX_DOCKER/Dockerfile.demo" "$ARVBOX_DOCKER"
+    else
+        docker build -t arvados/arvbox-dev -f "$ARVBOX_DOCKER/Dockerfile.dev" "$ARVBOX_DOCKER"
+    fi
+}
+
+check() {
+    case "$1" in
+        localdemo|publicdemo|dev|publicdev|test)
+            true
+            ;;
+        *)
+            echo "Argument to $subcmd must be one of localdemo, publicdemo, dev, publicdev, test"
+            exit 1
+        ;;
+    esac
+}
+
+subcmd="$1"
+if test -n "$subcmd" ; then
+    shift
+fi
+case "$subcmd" in
+    build)
+        check $@
+        build $@
+        ;;
+
+    start|run)
+        check $@
+        run $@
+        ;;
+
+    sh*)
+        docker exec -ti $ARVBOX_CONTAINER /usr/bin/env TERM=$TERM GEM_HOME=/var/lib/gems /bin/bash
+        ;;
+
+    stop)
+        stop
+        ;;
+
+    restart)
+        check $@
+        stop
+        run $@
+        ;;
+
+    reboot)
+        check $@
+        stop
+        build $@
+        run $@
+        ;;
+
+    ip|open)
+        if test "$subcmd" = 'ip' ; then
+            echo $(getip)
+        else
+            xdg-open http://$(getip)
+        fi
+        ;;
+
+    status)
+        echo "Selected: $ARVBOX_CONTAINER"
+        if docker ps -a --filter "status=running" | grep -E "$ARVBOX_CONTAINER$" -q ; then
+            echo "Status: running"
+            echo "IP: $(getip)"
+        else
+            echo "Status: not running"
+        fi
+        if test -d "$ARVBOX_DATA" ; then
+            echo "Data: $ARVBOX_DATA"
+        elif docker ps -a | grep -E "$ARVBOX_CONTAINER-data$" -q ; then
+            echo "Data: $ARVBOX_CONTAINER-data"
+        else
+            echo "Data: none"
+        fi
+        ;;
+
+    reset|destroy)
+        stop
+        if test -d "$ARVBOX_DATA" ; then
+            if test "$subcmd" = destroy ; then
+                if test "$1" != -f ; then
+                    echo "WARNING!  This will delete your entire arvbox ($ARVBOX_DATA)."
+                    echo "Use destroy -f if you really mean it."
+                    exit 1
+                fi
+                set -x
+                rm -rf "$ARVBOX_DATA"
+            else
+                if test "$1" != -f ; then
+                    echo "WARNING!  This will delete your arvbox data ($ARVBOX_DATA)."
+                    echo "Code and downloaded packages will be preserved."
+                    echo "Use reset -f if you really mean it."
+                    exit 1
+                fi
+                set -x
+                rm -rf "$ARVBOX_DATA/postgres"
+                rm -rf "$ARVBOX_DATA/var"
+            fi
+        else
+            if test "$1" != -f ; then
+                echo "WARNING!  This will delete your data container $ARVBOX_CONTAINER-data.  Use -f if you really mean it."
+                exit 1
+            fi
+            set -x
+            docker rm "$ARVBOX_CONTAINER-data"
+        fi
+        ;;
+
+    log)
+        if test -n "$1" ; then
+            docker exec -ti $ARVBOX_CONTAINER /usr/bin/env TERM=$TERM less --follow-name +GF "/etc/service/$1/log/main/current"
+        else
+            docker exec -ti $ARVBOX_CONTAINER /usr/bin/env TERM=$TERM tail $(docker exec -ti $ARVBOX_CONTAINER find -L /etc -path '/etc/service/*/log/main/current' -printf " %p")
+        fi
+        ;;
+
+    sv)
+        if test -n "$1" ; then
+            docker exec -ti $ARVBOX_CONTAINER sv "$1" "$2"
+        else
+            echo "Usage: $0 $subcmd <service>"
+            echo "Available services:"
+            docker exec -ti $ARVBOX_CONTAINER ls /etc/service
+        fi
+        ;;
+
+    clone)
+        if test -n "$2" ; then
+            cp -r "$ARVBOX_BASE/$1" "$ARVBOX_BASE/$2"
+            echo "Created new arvbox $2"
+            echo "export ARVBOX_CONTAINER=$2"
+        else
+            echo "clone <from> <to>   clone an arvbox"
+            echo "available arvboxes: $(ls $ARVBOX_BASE)"
+        fi
+        ;;
+
+    *)
+        echo "Arvados-in-a-box"
+        echo
+        echo "$(basename $0) (build|start|run|open|shell|ip|stop|reboot|reset|destroy|log|svrestart)"
+        echo
+        echo "build <config>      build arvbox Docker image"
+        echo "start|run <config>  start $ARVBOX_CONTAINER container"
+        echo "open       open arvbox workbench in a web browser"
+        echo "shell      enter arvbox shell"
+        echo "ip         print arvbox ip address"
+        echo "status     print some information about current arvbox"
+        echo "stop       stop arvbox container"
+        echo "restart <config>  stop, then run again"
+        echo "reboot  <config>  stop, build arvbox Docker image, run"
+        echo "reset      delete arvbox arvados data (be careful!)"
+        echo "destroy    delete all arvbox code and data (be careful!)"
+        echo "log       <service> tail log of specified service"
+        echo "sv        <start|stop|restart> <service> change state of service inside arvbox"
+        echo "clone <from> <to>   clone an arvbox"
+        ;;
+esac
diff --git a/tools/arvbox/lib/arvbox/docker/Dockerfile.base b/tools/arvbox/lib/arvbox/docker/Dockerfile.base
new file mode 100644 (file)
index 0000000..280ac68
--- /dev/null
@@ -0,0 +1,42 @@
+FROM debian:8
+
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get -yq install \
+    postgresql-9.4 git gcc golang-go runit \
+    ruby rake bundler curl libpq-dev \
+    libcurl4-openssl-dev libssl-dev zlib1g-dev libpcre3-dev \
+    openssh-server python-setuptools netcat-traditional \
+    libpython-dev fuse libfuse-dev python-pip python-yaml \
+    pkg-config libattr1-dev python-llfuse python-pycurl \
+    libwww-perl libio-socket-ssl-perl libcrypt-ssleay-perl \
+    libjson-perl nginx gitolite3 lsof python-epydoc graphviz \
+    apt-transport-https ca-certificates slurm-wlm
+
+VOLUME /var/lib/docker
+VOLUME /var/log/nginx
+VOLUME /etc/ssl/private
+
+RUN apt-key adv --keyserver hkp://pool.sks-keyservers.net:80 --recv-keys 58118E89F3A912897C070ADBF76221572C52609D || \
+    apt-key adv --keyserver hkp://pgp.mit.edu:80 --recv-keys 58118E89F3A912897C070ADBF76221572C52609D
+
+RUN mkdir -p /etc/apt/sources.list.d && \
+    echo deb https://apt.dockerproject.org/repo debian-jessie main > /etc/apt/sources.list.d/docker.list && \
+    apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get -yq install docker-engine=1.9.1-0~jessie
+
+RUN rm -rf /var/lib/postgresql && mkdir -p /var/lib/postgresql
+
+RUN cd /root && \
+    GOPATH=$PWD go get github.com/curoverse/runsvinit && \
+    install bin/runsvinit /usr/local/bin
+
+ADD fuse.conf /etc/
+
+ADD crunch-setup.sh gitolite.rc \
+    keep-setup.sh common.sh createusers.sh \
+    logger runsu.sh waitforpostgres.sh \
+    application_yml_override.py \
+    /usr/local/lib/arvbox/
+
+# Start the supervisor.
+CMD ["/usr/local/bin/runsvinit"]
diff --git a/tools/arvbox/lib/arvbox/docker/Dockerfile.demo b/tools/arvbox/lib/arvbox/docker/Dockerfile.demo
new file mode 100644 (file)
index 0000000..1f13415
--- /dev/null
@@ -0,0 +1,17 @@
+FROM arvados/arvbox-base
+
+RUN cd /usr/src && \
+    git clone https://github.com/curoverse/arvados.git && \
+    git clone https://github.com/curoverse/sso-devise-omniauth-provider.git sso
+
+ADD service/ /var/lib/arvbox/service
+RUN rmdir /etc/service && ln -sf /var/lib/arvbox/service /etc
+
+RUN chown -R 1000:1000 /usr/src && /usr/local/lib/arvbox/createusers.sh
+
+RUN sudo -u arvbox /var/lib/arvbox/service/sso/run-service --only-deps
+RUN sudo -u arvbox /var/lib/arvbox/service/api/run-service --only-deps
+RUN sudo -u arvbox /var/lib/arvbox/service/workbench/run-service --only-deps
+RUN sudo -u arvbox /var/lib/arvbox/service/doc/run-service --only-deps
+RUN sudo -u arvbox /var/lib/arvbox/service/vm/run-service --only-deps
+RUN sudo -u arvbox /var/lib/arvbox/service/sdk/run-service
diff --git a/tools/arvbox/lib/arvbox/docker/Dockerfile.dev b/tools/arvbox/lib/arvbox/docker/Dockerfile.dev
new file mode 100644 (file)
index 0000000..051c274
--- /dev/null
@@ -0,0 +1,16 @@
+FROM arvados/arvbox-base
+
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get -yq install \
+    python-virtualenv python3-virtualenv linkchecker xvfb iceweasel
+
+RUN set -e && \
+ PJS=phantomjs-1.9.7-linux-x86_64 && \
+ curl -L -o/tmp/$PJS.tar.bz2 https://bitbucket.org/ariya/phantomjs/downloads/$PJS.tar.bz2 && \
+ tar -C /usr/local -xjf /tmp/$PJS.tar.bz2 && \
+ ln -s ../$PJS/bin/phantomjs /usr/local/bin/
+
+ADD service/ /var/lib/arvbox/service
+RUN rmdir /etc/service && ln -sf /var/lib/arvbox/service /etc
+
+RUN mkdir /etc/test-service && ln -sf /var/lib/arvbox/service/postgres /etc/test-service
diff --git a/tools/arvbox/lib/arvbox/docker/application_yml_override.py b/tools/arvbox/lib/arvbox/docker/application_yml_override.py
new file mode 100755 (executable)
index 0000000..98a8e48
--- /dev/null
@@ -0,0 +1,24 @@
+#!/usr/bin/env python
+
+import yaml
+
+try:
+    with open("application.yml.override") as f:
+        b = yaml.load(f)
+except IOError:
+    exit()
+
+with open("application.yml") as f:
+    a = yaml.load(f)
+
+def recursiveMerge(a, b):
+    if isinstance(a, dict) and isinstance(b, dict):
+        for k in b:
+            print k
+            a[k] = recursiveMerge(a.get(k), b[k])
+        return a
+    else:
+        return b
+
+with open("application.yml", "w") as f:
+    yaml.dump(recursiveMerge(a, b), f)
diff --git a/tools/arvbox/lib/arvbox/docker/common.sh b/tools/arvbox/lib/arvbox/docker/common.sh
new file mode 100644 (file)
index 0000000..4c2de47
--- /dev/null
@@ -0,0 +1,62 @@
+
+if test -s /var/run/localip_override ; then
+    localip=$(cat /var/run/localip_override)
+else
+    defaultdev=$(/sbin/ip route|awk '/default/ { print $5 }')
+    localip=$(ip addr show $defaultdev | grep 'inet ' | sed 's/ *inet \(.*\)\/.*/\1/')
+fi
+
+export GEM_HOME=/var/lib/gems
+export GEM_PATH=/var/lib/gems
+
+declare -A services
+services=(
+  [workbench]=80
+  [api]=8000
+  [sso]=8900
+  [arv-git-httpd]=9001
+  [keep-web]=9002
+  [keepproxy]=25100
+  [keepstore0]=25107
+  [keepstore1]=25108
+  [ssh]=22
+  [doc]=8001
+)
+
+if test "$(id arvbox -u 2>/dev/null)" = 0 ; then
+    PGUSER=postgres
+    PGGROUP=postgres
+else
+    PGUSER=arvbox
+    PGGROUP=arvbox
+fi
+
+run_bundler() {
+    if test -f Gemfile.lock ; then
+        frozen=--frozen
+    else
+        frozen=""
+    fi
+    if ! flock /var/lib/arvados/gems.lock bundle install --path $GEM_HOME --local --no-deployment $frozen "$@" ; then
+        flock /var/lib/arvados/gems.lock bundle install --path $GEM_HOME --no-deployment $frozen "$@"
+    fi
+}
+
+pip_install() {
+    pushd /var/lib/arvados/pip
+    for p in $(ls http*.tar.gz) ; do
+        if test -f $p ; then
+            ln -sf $p $(echo $p | sed 's/.*%2F\(.*\)/\1/')
+        fi
+    done
+    for p in $(ls http*.whl) ; do
+        if test -f $p ; then
+            ln -sf $p $(echo $p | sed 's/.*%2F\(.*\)/\1/')
+        fi
+    done
+    popd
+
+    if ! pip install --no-index --find-links /var/lib/arvados/pip $1 ; then
+        pip install $1
+    fi
+}
diff --git a/tools/arvbox/lib/arvbox/docker/createusers.sh b/tools/arvbox/lib/arvbox/docker/createusers.sh
new file mode 100755 (executable)
index 0000000..b77c9c2
--- /dev/null
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+set -e -o pipefail
+
+if ! grep "^arvbox:" /etc/passwd >/dev/null 2>/dev/null ; then
+    HOSTUID=$(ls -nd /usr/src/arvados | sed 's/ */ /' | cut -d' ' -f4)
+    HOSTGID=$(ls -nd /usr/src/arvados | sed 's/ */ /' | cut -d' ' -f5)
+    FUSEGID=$(ls -nd /dev/fuse | sed 's/ */ /' | cut -d' ' -f5)
+
+    mkdir -p /var/lib/arvados/git /var/lib/gems /var/lib/passenger
+
+    groupadd --gid $HOSTGID --non-unique arvbox
+    groupadd --gid $FUSEGID --non-unique fuse
+    groupadd --gid $HOSTGID --non-unique git
+    useradd --home-dir /var/lib/arvados \
+            --uid $HOSTUID --gid $HOSTGID \
+            --non-unique \
+            --groups docker,fuse \
+            arvbox
+    useradd --home-dir /var/lib/arvados/git --uid $HOSTUID --gid $HOSTGID --non-unique git
+    useradd --groups docker,fuse crunch
+
+    chown arvbox:arvbox -R /usr/local /var/lib/arvados /var/lib/gems \
+          /var/lib/passenger /var/lib/postgresql \
+          /var/lib/nginx /var/log/nginx /etc/ssl/private
+
+    mkdir -p /var/lib/gems/ruby/2.1.0
+    chown arvbox:arvbox -R /var/lib/gems/ruby/2.1.0
+
+    mkdir -p /tmp/crunch0 /tmp/crunch1
+    chown crunch:crunch -R /tmp/crunch0 /tmp/crunch1
+
+    echo "arvbox    ALL=(crunch) NOPASSWD: ALL" >> /etc/sudoers
+fi
diff --git a/tools/arvbox/lib/arvbox/docker/crunch-setup.sh b/tools/arvbox/lib/arvbox/docker/crunch-setup.sh
new file mode 100755 (executable)
index 0000000..178fec1
--- /dev/null
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+exec 2>&1
+set -eux -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+mkdir -p /var/lib/arvados/gostuff
+cd /var/lib/arvados/gostuff
+
+export GOPATH=$PWD
+mkdir -p "$GOPATH/src/git.curoverse.com"
+ln -sfn "/usr/src/arvados" "$GOPATH/src/git.curoverse.com/arvados.git"
+flock /var/lib/arvados/gostuff.lock go get -t "git.curoverse.com/arvados.git/services/crunchstat"
+install bin/crunchstat /usr/local/bin
+
+export ARVADOS_API_HOST=$localip:${services[api]}
+export ARVADOS_API_HOST_INSECURE=1
+export ARVADOS_API_TOKEN=$(cat /usr/src/arvados/services/api/superuser_token)
+export CRUNCH_JOB_BIN=/usr/src/arvados/sdk/cli/bin/crunch-job
+export PERLLIB=/usr/src/arvados/sdk/perl/lib
+export CRUNCH_TMP=/tmp/$1
+export CRUNCH_DISPATCH_LOCKFILE=/var/lock/$1-dispatch
+export CRUNCH_JOB_DOCKER_BIN=docker
+export HOME=/tmp/$1
+
+cd /usr/src/arvados/services/api
+exec bundle exec ./script/crunch-dispatch.rb development
diff --git a/tools/arvbox/lib/arvbox/docker/fuse.conf b/tools/arvbox/lib/arvbox/docker/fuse.conf
new file mode 100644 (file)
index 0000000..a439ab8
--- /dev/null
@@ -0,0 +1 @@
+user_allow_other
diff --git a/tools/arvbox/lib/arvbox/docker/gitolite.rc b/tools/arvbox/lib/arvbox/docker/gitolite.rc
new file mode 100644 (file)
index 0000000..03c4b29
--- /dev/null
@@ -0,0 +1,213 @@
+# This is based on the default Gitolite configuration file with the following
+# changes applied as described here:
+# http://doc.arvados.org/install/install-arv-git-httpd.html
+
+# configuration variables for gitolite
+
+# This file is in perl syntax.  But you do NOT need to know perl to edit it --
+# just mind the commas, use single quotes unless you know what you're doing,
+# and make sure the brackets and braces stay matched up!
+
+# (Tip: perl allows a comma after the last item in a list also!)
+
+# HELP for commands can be had by running the command with "-h".
+
+# HELP for all the other FEATURES can be found in the documentation (look for
+# "list of non-core programs shipped with gitolite" in the master index) or
+# directly in the corresponding source file.
+
+my $repo_aliases;
+my $aliases_src = "$ENV{HOME}/.gitolite/arvadosaliases.pl";
+if ($ENV{HOME} && (-e $aliases_src)) {
+    $repo_aliases = do $aliases_src;
+}
+$repo_aliases ||= {};
+
+%RC = (
+
+    REPO_ALIASES => $repo_aliases,
+
+    # ------------------------------------------------------------------
+
+    # default umask gives you perms of '0700'; see the rc file docs for
+    # how/why you might change this
+    UMASK                           =>  0022,
+
+    # look for "git-config" in the documentation
+    GIT_CONFIG_KEYS                 =>  '',
+
+    # comment out if you don't need all the extra detail in the logfile
+    LOG_EXTRA                       =>  1,
+    # logging options
+    # 1. leave this section as is for 'normal' gitolite logging (default)
+    # 2. uncomment this line to log ONLY to syslog:
+    # LOG_DEST                      => 'syslog',
+    # 3. uncomment this line to log to syslog and the normal gitolite log:
+    # LOG_DEST                      => 'syslog,normal',
+    # 4. prefixing "repo-log," to any of the above will **also** log just the
+    #    update records to "gl-log" in the bare repo directory:
+    # LOG_DEST                      => 'repo-log,normal',
+    # LOG_DEST                      => 'repo-log,syslog',
+    # LOG_DEST                      => 'repo-log,syslog,normal',
+
+    # roles.  add more roles (like MANAGER, TESTER, ...) here.
+    #   WARNING: if you make changes to this hash, you MUST run 'gitolite
+    #   compile' afterward, and possibly also 'gitolite trigger POST_COMPILE'
+    ROLES => {
+        READERS                     =>  1,
+        WRITERS                     =>  1,
+    },
+
+    # enable caching (currently only Redis).  PLEASE RTFM BEFORE USING!!!
+    # CACHE                         =>  'Redis',
+
+    # ------------------------------------------------------------------
+
+    # rc variables used by various features
+
+    # the 'info' command prints this as additional info, if it is set
+        # SITE_INFO                 =>  'Please see http://blahblah/gitolite for more help',
+
+    # the CpuTime feature uses these
+        # display user, system, and elapsed times to user after each git operation
+        # DISPLAY_CPU_TIME          =>  1,
+        # display a warning if total CPU times (u, s, cu, cs) crosses this limit
+        # CPU_TIME_WARN_LIMIT       =>  0.1,
+
+    # the Mirroring feature needs this
+        # HOSTNAME                  =>  "foo",
+
+    # TTL for redis cache; PLEASE SEE DOCUMENTATION BEFORE UNCOMMENTING!
+        # CACHE_TTL                 =>  600,
+
+    # ------------------------------------------------------------------
+
+    # suggested locations for site-local gitolite code (see cust.html)
+
+        # this one is managed directly on the server
+        # LOCAL_CODE                =>  "$ENV{HOME}/local",
+
+        # or you can use this, which lets you put everything in a subdirectory
+        # called "local" in your gitolite-admin repo.  For a SECURITY WARNING
+        # on this, see http://gitolite.com/gitolite/non-core.html#pushcode
+        # LOCAL_CODE                =>  "$rc{GL_ADMIN_BASE}/local",
+
+    # ------------------------------------------------------------------
+
+    # List of commands and features to enable
+
+    ENABLE => [
+
+        # COMMANDS
+
+            # These are the commands enabled by default
+            'help',
+            'desc',
+            'info',
+            'perms',
+            'writable',
+
+            # Uncomment or add new commands here.
+            # 'create',
+            # 'fork',
+            # 'mirror',
+            # 'readme',
+            # 'sskm',
+            # 'D',
+
+        # These FEATURES are enabled by default.
+
+            # essential (unless you're using smart-http mode)
+            'ssh-authkeys',
+
+            # creates git-config enties from gitolite.conf file entries like 'config foo.bar = baz'
+            'git-config',
+
+            # creates git-daemon-export-ok files; if you don't use git-daemon, comment this out
+            'daemon',
+
+            # creates projects.list file; if you don't use gitweb, comment this out
+            'gitweb',
+
+        # These FEATURES are disabled by default; uncomment to enable.  If you
+        # need to add new ones, ask on the mailing list :-)
+
+        # user-visible behaviour
+
+            # prevent wild repos auto-create on fetch/clone
+            # 'no-create-on-read',
+            # no auto-create at all (don't forget to enable the 'create' command!)
+            # 'no-auto-create',
+
+            # access a repo by another (possibly legacy) name
+            'Alias',
+
+            # give some users direct shell access.  See documentation in
+            # sts.html for details on the following two choices.
+            # "Shell $ENV{HOME}/.gitolite.shell-users",
+            # 'Shell alice bob',
+
+            # set default roles from lines like 'option default.roles-1 = ...', etc.
+            # 'set-default-roles',
+
+            # show more detailed messages on deny
+            # 'expand-deny-messages',
+
+            # show a message of the day
+            # 'Motd',
+
+        # system admin stuff
+
+            # enable mirroring (don't forget to set the HOSTNAME too!)
+            # 'Mirroring',
+
+            # allow people to submit pub files with more than one key in them
+            # 'ssh-authkeys-split',
+
+            # selective read control hack
+            # 'partial-copy',
+
+            # manage local, gitolite-controlled, copies of read-only upstream repos
+            # 'upstream',
+
+            # updates 'description' file instead of 'gitweb.description' config item
+            # 'cgit',
+
+            # allow repo-specific hooks to be added
+            # 'repo-specific-hooks',
+
+        # performance, logging, monitoring...
+
+            # be nice
+            # 'renice 10',
+
+            # log CPU times (user, system, cumulative user, cumulative system)
+            # 'CpuTime',
+
+        # syntactic_sugar for gitolite.conf and included files
+
+            # allow backslash-escaped continuation lines in gitolite.conf
+            # 'continuation-lines',
+
+            # create implicit user groups from directory names in keydir/
+            # 'keysubdirs-as-groups',
+
+            # allow simple line-oriented macros
+            # 'macros',
+
+        # Kindergarten mode
+
+            # disallow various things that sensible people shouldn't be doing anyway
+            # 'Kindergarten',
+    ],
+
+);
+
+# ------------------------------------------------------------------------------
+# per perl rules, this should be the last line in such a file:
+1;
+
+# Local variables:
+# mode: perl
+# End:
+# vim: set syn=perl:
diff --git a/tools/arvbox/lib/arvbox/docker/keep-setup.sh b/tools/arvbox/lib/arvbox/docker/keep-setup.sh
new file mode 100755 (executable)
index 0000000..b66463f
--- /dev/null
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+exec 2>&1
+sleep 2
+set -eux -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+mkdir -p /var/lib/arvados/gostuff
+cd /var/lib/arvados/gostuff
+
+export GOPATH=$PWD
+mkdir -p "$GOPATH/src/git.curoverse.com"
+ln -sfn "/usr/src/arvados" "$GOPATH/src/git.curoverse.com/arvados.git"
+flock /var/lib/arvados/gostuff.lock go get -t "git.curoverse.com/arvados.git/services/keepstore"
+install bin/keepstore /usr/local/bin
+
+mkdir -p /var/lib/arvados/$1
+
+export ARVADOS_API_HOST=$localip:${services[api]}
+export ARVADOS_API_HOST_INSECURE=1
+export ARVADOS_API_TOKEN=$(cat /var/lib/arvados/superuser_token)
+
+set +e
+read -rd $'\000' keepservice <<EOF
+{
+ "service_host":"$localip",
+ "service_port":$2,
+ "service_ssl_flag":false,
+ "service_type":"disk"
+}
+EOF
+set -e
+
+if test -s /var/lib/arvados/$1-uuid ; then
+    keep_uuid=$(cat /var/lib/arvados/$1-uuid)
+    arv keep_service update --uuid $keep_uuid --keep-service "$keepservice"
+else
+    UUID=$(arv --format=uuid keep_service create --keep-service "$keepservice")
+    echo $UUID > /var/lib/arvados/$1-uuid
+fi
+
+set +e
+killall -HUP keepproxy
+
+exec /usr/local/bin/keepstore \
+     -listen=:$2 \
+     -enforce-permissions=true \
+     -blob-signing-key-file=/var/lib/arvados/blob_signing_key \
+     -max-buffers=20 \
+     -volume=/var/lib/arvados/$1
diff --git a/tools/arvbox/lib/arvbox/docker/logger b/tools/arvbox/lib/arvbox/docker/logger
new file mode 100755 (executable)
index 0000000..a79a518
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/sh
+exec svlogd -tt ./main
diff --git a/tools/arvbox/lib/arvbox/docker/runit-docker/.gitignore b/tools/arvbox/lib/arvbox/docker/runit-docker/.gitignore
new file mode 100644 (file)
index 0000000..bbf313b
--- /dev/null
@@ -0,0 +1,32 @@
+# Object files
+*.o
+*.ko
+*.obj
+*.elf
+
+# Precompiled Headers
+*.gch
+*.pch
+
+# Libraries
+*.lib
+*.a
+*.la
+*.lo
+
+# Shared objects (inc. Windows DLLs)
+*.dll
+*.so
+*.so.*
+*.dylib
+
+# Executables
+*.exe
+*.out
+*.app
+*.i*86
+*.x86_64
+*.hex
+
+# Debug files
+*.dSYM/
diff --git a/tools/arvbox/lib/arvbox/docker/runit-docker/LICENSE b/tools/arvbox/lib/arvbox/docker/runit-docker/LICENSE
new file mode 100644 (file)
index 0000000..d158667
--- /dev/null
@@ -0,0 +1,28 @@
+Copyright (c) 2015, Kosma Moczek
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+* Neither the name of runit-docker nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
diff --git a/tools/arvbox/lib/arvbox/docker/runit-docker/Makefile b/tools/arvbox/lib/arvbox/docker/runit-docker/Makefile
new file mode 100644 (file)
index 0000000..9a28963
--- /dev/null
@@ -0,0 +1,18 @@
+CFLAGS=-std=c99 -Wall -O2 -fPIC -D_POSIX_SOURCE -D_GNU_SOURCE
+LDLIBS=-ldl
+
+PROGNAME=runit-docker
+
+all: $(PROGNAME).so
+
+%.so: %.c
+       gcc -shared $(CFLAGS) $(LDLIBS) -o $@ $^
+
+install: runit-docker.so
+       mkdir -p $(DESTDIR)/sbin
+       mkdir -p $(DESTDIR)/lib
+       install -m 755 $(PROGNAME) $(DESTDIR)/sbin/
+       install -m 755 $(PROGNAME).so $(DESTDIR)/lib/
+
+clean:
+       $(RM) $(PROGNAME).so
diff --git a/tools/arvbox/lib/arvbox/docker/runit-docker/README.md b/tools/arvbox/lib/arvbox/docker/runit-docker/README.md
new file mode 100644 (file)
index 0000000..1bcb8cc
--- /dev/null
@@ -0,0 +1,24 @@
+# runit-docker
+
+Docker and `runsvdir` don't quite agree on what each signal means, causing
+TONS of frustration when attempting to use `runsvdir` as init under Docker.
+`runit-docker` is a plug'n'play adapter library which does signal translation
+without the overhead and nuisance of running a nanny process.
+
+## Features
+
+* Pressing Ctrl-C does a clean shutdown.
+* `docker stop` does a clean shutdown.
+
+Under the hood, `runit-docker` translates `SIGTERM` and `SIGINT` to `SIGHUP`.
+
+## Usage
+
+* Build with `make`, install with `make install`.
+* Add `CMD ["/sbin/runit-docker"]` to your `Dockerfile`.
+* Run `debian/rules clean build binary` to build a Debian package.
+
+## Author
+
+runit-docker was written by Kosma Moczek &lt;kosma.moczek@pixers.pl&gt; during a single Scrum
+planning meeting. Damn meetings.
diff --git a/tools/arvbox/lib/arvbox/docker/runit-docker/debian/changelog b/tools/arvbox/lib/arvbox/docker/runit-docker/debian/changelog
new file mode 100644 (file)
index 0000000..7d8689f
--- /dev/null
@@ -0,0 +1,12 @@
+runit-docker (1.1) unstable; urgency=low
+
+  * Simplify logic.
+  * Install for SIGINT as well.
+
+ -- Kosma Moczek <kosma@kosma.pl>  Mon, 11 May 2015 12:23:59 +0000
+
+runit-docker (1.0) unstable; urgency=low
+
+  * Initial release
+
+ -- Kosma Moczek <kosma@kosma.pl>  Mon, 11 May 2015 12:23:59 +0000
diff --git a/tools/arvbox/lib/arvbox/docker/runit-docker/debian/compat b/tools/arvbox/lib/arvbox/docker/runit-docker/debian/compat
new file mode 100644 (file)
index 0000000..ec63514
--- /dev/null
@@ -0,0 +1 @@
+9
diff --git a/tools/arvbox/lib/arvbox/docker/runit-docker/debian/control b/tools/arvbox/lib/arvbox/docker/runit-docker/debian/control
new file mode 100644 (file)
index 0000000..4060915
--- /dev/null
@@ -0,0 +1,14 @@
+Source: runit-docker
+Section: contrib/admin
+Priority: optional
+Maintainer: Kosma Moczek <kosma@kosma.pl>
+Build-Depends: debhelper (>= 9)
+Standards-Version: 3.9.5
+Homepage: https://github.com/kosma/runit-docker
+#Vcs-Git: git://anonscm.debian.org/collab-maint/runit-docker.git
+#Vcs-Browser: http://anonscm.debian.org/?p=collab-maint/runit-docker.git;a=summary
+
+Package: runit-docker
+Architecture: any
+Depends: ${shlibs:Depends}, ${misc:Depends}
+Description: painlessly use runit in Docker containers
diff --git a/tools/arvbox/lib/arvbox/docker/runit-docker/debian/copyright b/tools/arvbox/lib/arvbox/docker/runit-docker/debian/copyright
new file mode 100644 (file)
index 0000000..8679a6a
--- /dev/null
@@ -0,0 +1,31 @@
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: runit-docker
+Source: https://github.com/kosma/runit-docker
+
+Files: *
+Copyright: 2015 Kosma Moczek <kosma@kosma.pl>
+License: MIT
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+ * Neither the name of runit-docker nor the names of its
+   contributors may be used to endorse or promote products derived from
+   this software without specific prior written permission.
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/tools/arvbox/lib/arvbox/docker/runit-docker/debian/docs b/tools/arvbox/lib/arvbox/docker/runit-docker/debian/docs
new file mode 100644 (file)
index 0000000..b43bf86
--- /dev/null
@@ -0,0 +1 @@
+README.md
diff --git a/tools/arvbox/lib/arvbox/docker/runit-docker/debian/rules b/tools/arvbox/lib/arvbox/docker/runit-docker/debian/rules
new file mode 100755 (executable)
index 0000000..ce15cce
--- /dev/null
@@ -0,0 +1,32 @@
+#!/usr/bin/make -f
+# See debhelper(7) (uncomment to enable)
+# output every command that modifies files on the build system.
+#DH_VERBOSE = 1
+
+# see EXAMPLES in dpkg-buildflags(1) and read /usr/share/dpkg/*
+DPKG_EXPORT_BUILDFLAGS = 1
+include /usr/share/dpkg/default.mk
+
+# see FEATURE AREAS in dpkg-buildflags(1)
+#export DEB_BUILD_MAINT_OPTIONS = hardening=+all
+
+# see ENVIRONMENT in dpkg-buildflags(1)
+# package maintainers to append CFLAGS
+#export DEB_CFLAGS_MAINT_APPEND  = -Wall -pedantic
+# package maintainers to append LDFLAGS
+#export DEB_LDFLAGS_MAINT_APPEND = -Wl,--as-needed
+
+
+# main packaging script based on dh7 syntax
+%:
+       dh $@ 
+
+# debmake generated override targets
+# This is example for Cmake (See http://bugs.debian.org/641051 )
+#override_dh_auto_configure:
+#      dh_auto_configure -- \
+#      -DCMAKE_LIBRARY_PATH=$(DEB_HOST_MULTIARCH)
+
+
+
+
diff --git a/tools/arvbox/lib/arvbox/docker/runit-docker/debian/source/format b/tools/arvbox/lib/arvbox/docker/runit-docker/debian/source/format
new file mode 100644 (file)
index 0000000..163aaf8
--- /dev/null
@@ -0,0 +1 @@
+3.0 (quilt)
diff --git a/tools/arvbox/lib/arvbox/docker/runit-docker/runit-docker b/tools/arvbox/lib/arvbox/docker/runit-docker/runit-docker
new file mode 100755 (executable)
index 0000000..fdbaad5
--- /dev/null
@@ -0,0 +1,4 @@
+#!/bin/sh
+
+export LD_PRELOAD=/lib/runit-docker.so
+exec runsvdir /etc/service
diff --git a/tools/arvbox/lib/arvbox/docker/runit-docker/runit-docker.c b/tools/arvbox/lib/arvbox/docker/runit-docker/runit-docker.c
new file mode 100644 (file)
index 0000000..825a35f
--- /dev/null
@@ -0,0 +1,32 @@
+#include <signal.h>
+#include <dlfcn.h>
+#include <stdlib.h>
+
+
+int sigaction(int signum, const struct sigaction *act, struct sigaction *oldact)
+{
+  static int (*real_sigaction)(int signum, const struct sigaction *act, struct sigaction *oldact) = NULL;
+
+  // Retrieve the real sigaction we just shadowed.
+  if (real_sigaction == NULL) {
+    real_sigaction = (void *) dlsym(RTLD_NEXT, "sigaction");
+    // Prevent further shadowing in children.
+    unsetenv("LD_PRELOAD");
+  }
+
+  if (signum == SIGTERM) {
+    // Skip this handler, it doesn't do what we want.
+    return 0;
+  }
+
+  if (signum == SIGHUP) {
+    // Install this handler for others as well.
+    real_sigaction(SIGTERM, act, oldact);
+    real_sigaction(SIGINT, act, oldact);
+  }
+
+  // Forward the call the the real sigaction.
+  return real_sigaction(signum, act, oldact);
+}
+
+// vim: ts=2 sw=2 et
diff --git a/tools/arvbox/lib/arvbox/docker/runsu.sh b/tools/arvbox/lib/arvbox/docker/runsu.sh
new file mode 100755 (executable)
index 0000000..1557d09
--- /dev/null
@@ -0,0 +1,14 @@
+#!/bin/sh
+
+HOSTUID=$(ls -nd /usr/src/arvados | sed 's/ */ /' | cut -d' ' -f4)
+HOSTGID=$(ls -nd /usr/src/arvados | sed 's/ */ /' | cut -d' ' -f5)
+
+flock /var/lib/arvados/createusers.lock /usr/local/lib/arvbox/createusers.sh
+
+export HOME=/var/lib/arvados
+
+if test -z "$1" ; then
+    exec chpst -u arvbox:arvbox:docker $0-service
+else
+    exec chpst -u arvbox:arvbox:docker $@
+fi
diff --git a/tools/arvbox/lib/arvbox/docker/service/api/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/api/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/api/log/run b/tools/arvbox/lib/arvbox/docker/service/api/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/api/run b/tools/arvbox/lib/arvbox/docker/service/api/run
new file mode 120000 (symlink)
index 0000000..a388c8b
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/api/run-service b/tools/arvbox/lib/arvbox/docker/service/api/run-service
new file mode 100755 (executable)
index 0000000..058939c
--- /dev/null
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+exec 2>&1
+set -ex -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+cd /usr/src/arvados/services/api
+export RAILS_ENV=development
+
+run_bundler --without=development
+bundle exec passenger start --runtime-check-only --runtime-dir=/var/lib/passenger
+
+if test "$1" = "--only-deps" ; then
+    exit
+fi
+
+set -u
+
+if ! test -s /var/lib/arvados/api_uuid_prefix ; then
+    ruby -e 'puts "#{rand(2**64).to_s(36)[0,5]}"' > /var/lib/arvados/api_uuid_prefix
+fi
+uuid_prefix=$(cat /var/lib/arvados/api_uuid_prefix)
+
+if ! test -s /var/lib/arvados/api_secret_token ; then
+    ruby -e 'puts rand(2**400).to_s(36)' > /var/lib/arvados/api_secret_token
+fi
+secret_token=$(cat /var/lib/arvados/api_secret_token)
+
+if ! test -s /var/lib/arvados/blob_signing_key ; then
+    ruby -e 'puts rand(2**400).to_s(36)' > /var/lib/arvados/blob_signing_key
+fi
+blob_signing_key=$(cat /var/lib/arvados/blob_signing_key)
+
+# self signed key will be created by SSO server script.
+test -s /var/lib/arvados/self-signed.key
+
+sso_app_secret=$(cat /var/lib/arvados/sso_app_secret)
+
+if test -s /var/lib/arvados/vm-uuid ; then
+    vm_uuid=$(cat /var/lib/arvados/vm-uuid)
+else
+    vm_uuid=$uuid_prefix-2x53u-$(ruby -e 'puts rand(2**400).to_s(36)[0,15]')
+    echo $vm_uuid > /var/lib/arvados/vm-uuid
+fi
+
+cat >config/application.yml <<EOF
+development:
+  uuid_prefix: $uuid_prefix
+  secret_token: $secret_token
+  blob_signing_key: $blob_signing_key
+  sso_app_secret: $sso_app_secret
+  sso_app_id: arvados-server
+  sso_provider_url: "https://$localip:${services[sso]}"
+  sso_insecure: true
+  workbench_address: "http://$localip/"
+  git_repo_ssh_base: "git@$localip:"
+  git_repo_https_base: "http://$localip:${services[arv-git-httpd]}/"
+  new_users_are_active: true
+  auto_admin_first_user: true
+  auto_setup_new_users: true
+  auto_setup_new_users_with_vm_uuid: $vm_uuid
+  auto_setup_new_users_with_repository: true
+  default_collection_replication: 1
+EOF
+
+(cd config && /usr/local/lib/arvbox/application_yml_override.py)
+
+if ! test -f /var/lib/arvados/api_database_pw ; then
+    ruby -e 'puts rand(2**128).to_s(36)' > /var/lib/arvados/api_database_pw
+fi
+database_pw=$(cat /var/lib/arvados/api_database_pw)
+
+if ! (psql postgres -c "\du" | grep "^ arvados ") >/dev/null ; then
+    psql postgres -c "create user arvados with password '$database_pw'"
+    psql postgres -c "ALTER USER arvados CREATEDB;"
+fi
+
+sed "s/password:.*/password: $database_pw/" <config/database.yml.example >config/database.yml
+
+if ! test -f /var/lib/arvados/api_database_setup ; then
+   bundle exec rake db:setup
+   touch /var/lib/arvados/api_database_setup
+fi
+
+if ! test -s /var/lib/arvados/superuser_token ; then
+    bundle exec ./script/create_superuser_token.rb > /var/lib/arvados/superuser_token
+fi
+
+rm -rf tmp
+
+bundle exec rake db:migrate
+
+set +u
+if test "$1" = "--only-setup" ; then
+    exit
+fi
+
+ARVADOS_WEBSOCKETS=1 exec bundle exec passenger start --port=${services[api]} \
+                  --runtime-dir=/var/lib/passenger \
+                  --ssl --ssl-certificate=/var/lib/arvados/self-signed.pem \
+                  --ssl-certificate-key=/var/lib/arvados/self-signed.key
diff --git a/tools/arvbox/lib/arvbox/docker/service/arv-git-httpd/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/arv-git-httpd/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/arv-git-httpd/log/run b/tools/arvbox/lib/arvbox/docker/service/arv-git-httpd/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/arv-git-httpd/run b/tools/arvbox/lib/arvbox/docker/service/arv-git-httpd/run
new file mode 120000 (symlink)
index 0000000..a388c8b
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/arv-git-httpd/run-service b/tools/arvbox/lib/arvbox/docker/service/arv-git-httpd/run-service
new file mode 100755 (executable)
index 0000000..854464e
--- /dev/null
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+exec 2>&1
+set -eux -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+mkdir -p /var/lib/arvados/gostuff
+cd /var/lib/arvados/gostuff
+
+export GOPATH=$PWD
+mkdir -p "$GOPATH/src/git.curoverse.com"
+ln -sfn "/usr/src/arvados" "$GOPATH/src/git.curoverse.com/arvados.git"
+flock /var/lib/arvados/gostuff.lock go get -t "git.curoverse.com/arvados.git/services/arv-git-httpd"
+install bin/arv-git-httpd /usr/local/bin
+
+export ARVADOS_API_HOST=$localip:${services[api]}
+export ARVADOS_API_HOST_INSECURE=1
+export GITOLITE_HTTP_HOME=/var/lib/arvados/git
+export GL_BYPASS_ACCESS_CHECKS=1
+export PATH="$PATH:/var/lib/arvados/git/bin"
+cd ~git
+
+exec /usr/local/bin/arv-git-httpd \
+     -address=:${services[arv-git-httpd]} \
+     -git-command=/usr/share/gitolite3/gitolite-shell \
+     -repo-root=/var/lib/arvados/git/repositories
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/log/run b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/run b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/run
new file mode 120000 (symlink)
index 0000000..a388c8b
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/run-service b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch-local/run-service
new file mode 100755 (executable)
index 0000000..211b438
--- /dev/null
@@ -0,0 +1,22 @@
+#!/bin/bash
+
+exec 2>&1
+set -eux -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+mkdir -p /var/lib/arvados/gostuff
+cd /var/lib/arvados/gostuff
+
+export GOPATH=$PWD
+mkdir -p "$GOPATH/src/git.curoverse.com"
+ln -sfn "/usr/src/arvados" "$GOPATH/src/git.curoverse.com/arvados.git"
+flock /var/lib/arvados/gostuff.lock go get -t "git.curoverse.com/arvados.git/services/crunch-run"
+flock /var/lib/arvados/gostuff.lock go get -t "git.curoverse.com/arvados.git/services/crunch-dispatch-local"
+install bin/crunch-run bin/crunch-dispatch-local /usr/local/bin
+
+export ARVADOS_API_HOST=$localip:${services[api]}
+export ARVADOS_API_HOST_INSECURE=1
+export ARVADOS_API_TOKEN=$(cat /var/lib/arvados/superuser_token)
+
+exec /usr/local/bin/crunch-dispatch-local -crunch-run-command=/usr/local/bin/crunch-run
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/log/run b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/run b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/run
new file mode 120000 (symlink)
index 0000000..a388c8b
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/run-service b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch0/run-service
new file mode 100755 (executable)
index 0000000..fa3a73a
--- /dev/null
@@ -0,0 +1,2 @@
+#!/bin/sh
+exec /usr/local/lib/arvbox/crunch-setup.sh crunch0
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/log/run b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/run b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/run
new file mode 120000 (symlink)
index 0000000..a388c8b
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/run-service b/tools/arvbox/lib/arvbox/docker/service/crunch-dispatch1/run-service
new file mode 100755 (executable)
index 0000000..6430e9c
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/sh
+sleep 1
+exec /usr/local/lib/arvbox/crunch-setup.sh crunch1
diff --git a/tools/arvbox/lib/arvbox/docker/service/doc/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/doc/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/doc/log/run b/tools/arvbox/lib/arvbox/docker/service/doc/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/doc/run b/tools/arvbox/lib/arvbox/docker/service/doc/run
new file mode 120000 (symlink)
index 0000000..a388c8b
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/doc/run-service b/tools/arvbox/lib/arvbox/docker/service/doc/run-service
new file mode 100755 (executable)
index 0000000..1ee62f4
--- /dev/null
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+exec 2>&1
+set -ex -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+cd /usr/src/arvados/doc
+run_bundler --without=development
+
+if test "$1" = "--only-deps" ; then
+    exit
+fi
+
+set -u
+
+cat <<EOF >/var/lib/arvados/doc-nginx.conf
+worker_processes auto;
+pid /var/lib/arvados/doc-nginx.pid;
+
+error_log stderr;
+daemon off;
+
+events {
+       worker_connections 64;
+}
+
+http {
+     access_log off;
+     include /etc/nginx/mime.types;
+     default_type application/octet-stream;
+     server {
+            listen ${services[doc]} default_server;
+            listen [::]:${services[doc]} default_server;
+            root /usr/src/arvados/doc/.site;
+            index index.html;
+            server_name _;
+     }
+}
+EOF
+
+bundle exec rake generate baseurl=http://$localip:${services[doc]} arvados_api_host=$localip:${services[api]} arvados_workbench_host=http://$localip
+
+exec nginx -c /var/lib/arvados/doc-nginx.conf
diff --git a/tools/arvbox/lib/arvbox/docker/service/docker/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/docker/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/docker/log/run b/tools/arvbox/lib/arvbox/docker/service/docker/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/docker/run b/tools/arvbox/lib/arvbox/docker/service/docker/run
new file mode 100755 (executable)
index 0000000..1ecdc16
--- /dev/null
@@ -0,0 +1,102 @@
+#!/bin/bash
+
+# Taken from https://github.com/jpetazzo/dind
+
+exec 2>&1
+
+# Ensure that all nodes in /dev/mapper correspond to mapped devices currently loaded by the device-mapper kernel driver
+dmsetup mknodes
+
+: {LOG:=stdio}
+
+# First, make sure that cgroups are mounted correctly.
+CGROUP=/sys/fs/cgroup
+[ -d $CGROUP ] || mkdir $CGROUP
+
+if mountpoint -q $CGROUP ; then
+    break
+else
+    mount -n -t tmpfs -o uid=0,gid=0,mode=0755 cgroup $CGROUP
+fi
+
+if ! mountpoint -q $CGROUP ; then
+    echo "Could not find or mount cgroups. Tried /sys/fs/cgroup and /cgroup.  Did you use --privileged?"
+    exit 1
+fi
+
+if [ -d /sys/kernel/security ] && ! mountpoint -q /sys/kernel/security
+then
+    mount -t securityfs none /sys/kernel/security || {
+        echo "Could not mount /sys/kernel/security."
+        echo "AppArmor detection and --privileged mode might break."
+    }
+fi
+
+# Mount the cgroup hierarchies exactly as they are in the parent system.
+for SUBSYS in $(cut -d: -f2 /proc/1/cgroup)
+do
+        [ -d $CGROUP/$SUBSYS ] || mkdir $CGROUP/$SUBSYS
+        mountpoint -q $CGROUP/$SUBSYS ||
+                mount -n -t cgroup -o $SUBSYS cgroup $CGROUP/$SUBSYS
+
+        # The two following sections address a bug which manifests itself
+        # by a cryptic "lxc-start: no ns_cgroup option specified" when
+        # trying to start containers withina container.
+        # The bug seems to appear when the cgroup hierarchies are not
+        # mounted on the exact same directories in the host, and in the
+        # container.
+
+        # Named, control-less cgroups are mounted with "-o name=foo"
+        # (and appear as such under /proc/<pid>/cgroup) but are usually
+        # mounted on a directory named "foo" (without the "name=" prefix).
+        # Systemd and OpenRC (and possibly others) both create such a
+        # cgroup. To avoid the aforementioned bug, we symlink "foo" to
+        # "name=foo". This shouldn't have any adverse effect.
+        echo $SUBSYS | grep -q ^name= && {
+                NAME=$(echo $SUBSYS | sed s/^name=//)
+                ln -s $SUBSYS $CGROUP/$NAME
+        }
+
+        # Likewise, on at least one system, it has been reported that
+        # systemd would mount the CPU and CPU accounting controllers
+        # (respectively "cpu" and "cpuacct") with "-o cpuacct,cpu"
+        # but on a directory called "cpu,cpuacct" (note the inversion
+        # in the order of the groups). This tries to work around it.
+        [ $SUBSYS = cpuacct,cpu ] && ln -s $SUBSYS $CGROUP/cpu,cpuacct
+done
+
+# Note: as I write those lines, the LXC userland tools cannot setup
+# a "sub-container" properly if the "devices" cgroup is not in its
+# own hierarchy. Let's detect this and issue a warning.
+grep -q :devices: /proc/1/cgroup ||
+       echo "WARNING: the 'devices' cgroup should be in its own hierarchy."
+grep -qw devices /proc/1/cgroup ||
+       echo "WARNING: it looks like the 'devices' cgroup is not mounted."
+
+# Now, close extraneous file descriptors.
+pushd /proc/self/fd >/dev/null
+for FD in *
+do
+       case "$FD" in
+       # Keep stdin/stdout/stderr
+       [012])
+               ;;
+       # Nuke everything else
+       *)
+               eval exec "$FD>&-"
+               ;;
+       esac
+done
+popd >/dev/null
+
+
+# If a pidfile is still around (for example after a container restart),
+# delete it so that docker can start.
+rm -rf /var/run/docker.pid
+
+read pid cmd state ppid pgrp session tty_nr tpgid rest < /proc/self/stat
+trap "kill -TERM -$pgrp; exit" EXIT TERM KILL SIGKILL SIGTERM SIGQUIT
+
+if ! docker daemon --storage-driver=overlay $DOCKER_DAEMON_ARGS ; then
+    docker daemon $DOCKER_DAEMON_ARGS
+fi
diff --git a/tools/arvbox/lib/arvbox/docker/service/gitolite/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/gitolite/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/gitolite/log/run b/tools/arvbox/lib/arvbox/docker/service/gitolite/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/gitolite/run b/tools/arvbox/lib/arvbox/docker/service/gitolite/run
new file mode 120000 (symlink)
index 0000000..a388c8b
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/gitolite/run-service b/tools/arvbox/lib/arvbox/docker/service/gitolite/run-service
new file mode 100755 (executable)
index 0000000..e0e8771
--- /dev/null
@@ -0,0 +1,116 @@
+#!/bin/bash
+
+exec 2>&1
+set -eux -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+mkdir -p /var/lib/arvados/git
+
+export ARVADOS_API_HOST=$localip:${services[api]}
+export ARVADOS_API_HOST_INSECURE=1
+export ARVADOS_API_TOKEN=$(cat /var/lib/arvados/superuser_token)
+
+export USER=git
+export USERNAME=git
+export LOGNAME=git
+export HOME=/var/lib/arvados/git
+
+cd ~arvbox
+
+mkdir -p ~arvbox/.ssh ~git/.ssh
+chmod 0700 ~arvbox/.ssh ~git/.ssh
+
+if ! test -s ~arvbox/.ssh/id_rsa ; then
+    ssh-keygen -t rsa -P '' -f .ssh/id_rsa
+    cp ~arvbox/.ssh/id_rsa ~arvbox/.ssh/id_rsa.pub ~git/.ssh
+fi
+
+if test -s ~arvbox/.ssh/known_hosts ; then
+    ssh-keygen -f ".ssh/known_hosts" -R localhost
+fi
+
+if ! test -f /var/lib/arvados/gitolite-setup ; then
+    cd ~git
+
+    # Do a no-op login to populate known_hosts
+    # with the hostkey, so it won't try to ask
+    # about it later.
+    cp .ssh/id_rsa.pub .ssh/authorized_keys
+    ssh -o stricthostkeychecking=no git@localhost true
+    rm .ssh/authorized_keys
+
+    cp /usr/local/lib/arvbox/gitolite.rc .gitolite.rc
+
+    gitolite setup -pk .ssh/id_rsa.pub
+
+    if ! test -d gitolite-admin ; then
+        git clone git@localhost:gitolite-admin
+    fi
+
+    cd gitolite-admin
+    git config user.email arvados
+    git config user.name arvados
+    git config push.default simple
+    git push
+
+    touch /var/lib/arvados/gitolite-setup
+else
+    # Do a no-op login to populate known_hosts
+    # with the hostkey, so it won't try to ask
+    # about it later.  Don't run anything,
+    # get the default gitolite behavior.
+    ssh -o stricthostkeychecking=no git@localhost
+fi
+
+prefix=$(arv --format=uuid user current | cut -d- -f1)
+
+if ! test -s /var/lib/arvados/arvados-git-uuid ; then
+    repo_uuid=$(arv --format=uuid repository create --repository "{\"owner_uuid\":\"$prefix-tpzed-000000000000000\", \"name\":\"arvados\"}")
+    echo $repo_uuid > /var/lib/arvados/arvados-git-uuid
+fi
+
+repo_uuid=$(cat /var/lib/arvados/arvados-git-uuid)
+
+if ! test -s /var/lib/arvados/arvados-git-link-uuid ; then
+    all_users_group_uuid="$prefix-j7d0g-fffffffffffffff"
+
+    set +e
+    read -rd $'\000' newlink <<EOF
+{
+ "tail_uuid":"$all_users_group_uuid",
+ "head_uuid":"$repo_uuid",
+ "link_class":"permission",
+ "name":"can_read"
+}
+EOF
+    set -e
+    link_uuid=$(arv --format=uuid link create --link "$newlink")
+    echo $link_uuid > /var/lib/arvados/arvados-git-link-uuid
+fi
+
+if ! test -d /var/lib/arvados/git/repositories/$repo_uuid.git ; then
+    git clone --bare /usr/src/arvados /var/lib/arvados/git/repositories/$repo_uuid.git
+else
+    git --git-dir=/var/lib/arvados/git/repositories/$repo_uuid.git fetch -f /usr/src/arvados master:master
+fi
+
+cd /usr/src/arvados/services/api
+export RAILS_ENV=development
+
+git_user_key=$(cat ~git/.ssh/id_rsa.pub)
+
+cat > config/arvados-clients.yml <<EOF
+development:
+  gitolite_url: /var/lib/arvados/git/repositories/gitolite-admin.git
+  gitolite_tmp: /var/lib/arvados/git
+  arvados_api_host: $localip:${services[api]}
+  arvados_api_token: "$ARVADOS_API_TOKEN"
+  arvados_api_host_insecure: true
+  gitolite_arvados_git_user_key: "$git_user_key"
+EOF
+
+while true ; do
+    bundle exec script/arvados-git-sync.rb development
+    sleep 120
+done
diff --git a/tools/arvbox/lib/arvbox/docker/service/keep-web/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/keep-web/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/keep-web/log/run b/tools/arvbox/lib/arvbox/docker/service/keep-web/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/keep-web/run b/tools/arvbox/lib/arvbox/docker/service/keep-web/run
new file mode 120000 (symlink)
index 0000000..a388c8b
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/keep-web/run-service b/tools/arvbox/lib/arvbox/docker/service/keep-web/run-service
new file mode 100755 (executable)
index 0000000..a2c6aa1
--- /dev/null
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+exec 2>&1
+set -eux -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+mkdir -p /var/lib/arvados/gostuff
+cd /var/lib/arvados/gostuff
+
+export GOPATH=$PWD
+mkdir -p "$GOPATH/src/git.curoverse.com"
+ln -sfn "/usr/src/arvados" "$GOPATH/src/git.curoverse.com/arvados.git"
+flock /var/lib/arvados/gostuff.lock go get -t "git.curoverse.com/arvados.git/services/keep-web"
+install bin/keep-web /usr/local/bin
+
+export ARVADOS_API_HOST=$localip:${services[api]}
+export ARVADOS_API_HOST_INSECURE=1
+export ARVADOS_API_TOKEN=$(cat /var/lib/arvados/superuser_token)
+
+exec /usr/local/bin/keep-web -trust-all-content -listen=:${services[keep-web]}
diff --git a/tools/arvbox/lib/arvbox/docker/service/keepproxy/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/keepproxy/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/keepproxy/log/run b/tools/arvbox/lib/arvbox/docker/service/keepproxy/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/keepproxy/run b/tools/arvbox/lib/arvbox/docker/service/keepproxy/run
new file mode 120000 (symlink)
index 0000000..a388c8b
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/keepproxy/run-service b/tools/arvbox/lib/arvbox/docker/service/keepproxy/run-service
new file mode 100755 (executable)
index 0000000..413a67e
--- /dev/null
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+exec 2>&1
+sleep 2
+set -eux -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+mkdir -p /var/lib/arvados/gostuff
+cd /var/lib/arvados/gostuff
+
+export GOPATH=$PWD
+mkdir -p "$GOPATH/src/git.curoverse.com"
+ln -sfn "/usr/src/arvados" "$GOPATH/src/git.curoverse.com/arvados.git"
+flock /var/lib/arvados/gostuff.lock go get -t "git.curoverse.com/arvados.git/services/keepproxy"
+install bin/keepproxy /usr/local/bin
+
+export ARVADOS_API_HOST=$localip:${services[api]}
+export ARVADOS_API_HOST_INSECURE=1
+export ARVADOS_API_TOKEN=$(cat /var/lib/arvados/superuser_token)
+
+set +e
+read -rd $'\000' keepservice <<EOF
+{
+ "service_host":"$localip",
+ "service_port":${services[keepproxy]},
+ "service_ssl_flag":false,
+ "service_type":"proxy"
+}
+EOF
+set -e
+
+if test -s /var/lib/arvados/keepproxy-uuid ; then
+    keep_uuid=$(cat /var/lib/arvados/keepproxy-uuid)
+    arv keep_service update --uuid $keep_uuid --keep-service "$keepservice"
+else
+    UUID=$(arv --format=uuid keep_service create --keep-service "$keepservice")
+    echo $UUID > /var/lib/arvados/keepproxy-uuid
+fi
+
+exec /usr/local/bin/keepproxy -listen=:${services[keepproxy]}
diff --git a/tools/arvbox/lib/arvbox/docker/service/keepstore0/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/keepstore0/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/keepstore0/log/run b/tools/arvbox/lib/arvbox/docker/service/keepstore0/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/keepstore0/run b/tools/arvbox/lib/arvbox/docker/service/keepstore0/run
new file mode 120000 (symlink)
index 0000000..a388c8b
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/keepstore0/run-service b/tools/arvbox/lib/arvbox/docker/service/keepstore0/run-service
new file mode 100755 (executable)
index 0000000..cf411e4
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/bash
+. /usr/local/lib/arvbox/common.sh
+exec /usr/local/lib/arvbox/keep-setup.sh keep0 ${services[keepstore0]}
diff --git a/tools/arvbox/lib/arvbox/docker/service/keepstore1/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/keepstore1/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/keepstore1/log/run b/tools/arvbox/lib/arvbox/docker/service/keepstore1/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/keepstore1/run b/tools/arvbox/lib/arvbox/docker/service/keepstore1/run
new file mode 120000 (symlink)
index 0000000..a388c8b
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/keepstore1/run-service b/tools/arvbox/lib/arvbox/docker/service/keepstore1/run-service
new file mode 100755 (executable)
index 0000000..8d34d06
--- /dev/null
@@ -0,0 +1,3 @@
+#!/bin/bash
+. /usr/local/lib/arvbox/common.sh
+exec /usr/local/lib/arvbox/keep-setup.sh keep1 ${services[keepstore1]}
diff --git a/tools/arvbox/lib/arvbox/docker/service/postgres/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/postgres/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/postgres/log/run b/tools/arvbox/lib/arvbox/docker/service/postgres/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/postgres/run b/tools/arvbox/lib/arvbox/docker/service/postgres/run
new file mode 100755 (executable)
index 0000000..4918bd7
--- /dev/null
@@ -0,0 +1,12 @@
+#!/bin/bash
+
+flock /var/lib/arvados/createusers.lock /usr/local/lib/arvbox/createusers.sh
+
+. /usr/local/lib/arvbox/common.sh
+
+chown -R $PGUSER:$PGGROUP /var/lib/postgresql
+chown -R $PGUSER:$PGGROUP /var/run/postgresql
+chown -R $PGUSER:$PGGROUP /etc/postgresql
+chown -R $PGUSER:$PGGROUP /etc/ssl/private
+
+exec chpst -u $PGUSER:$PGGROUP $0-service
diff --git a/tools/arvbox/lib/arvbox/docker/service/postgres/run-service b/tools/arvbox/lib/arvbox/docker/service/postgres/run-service
new file mode 100755 (executable)
index 0000000..a05be62
--- /dev/null
@@ -0,0 +1,14 @@
+#!/bin/bash
+
+exec 2>&1
+set -eux -o pipefail
+
+if ! test -d /var/lib/postgresql/9.4/main ; then
+    /usr/lib/postgresql/9.4/bin/initdb -D /var/lib/postgresql/9.4/main
+    sh -c "while ! (psql postgres -c'\du' | grep '^ arvbox ') >/dev/null ; do createuser -s arvbox ; sleep 1 ; done" &
+fi
+mkdir -p /var/run/postgresql/9.4-main.pg_stat_tmp
+
+rm -f /var/lib/postgresql/9.4/main/postmaster.pid
+
+exec /usr/lib/postgresql/9.4/bin/postgres -D /var/lib/postgresql/9.4/main -c config_file=/etc/postgresql/9.4/main/postgresql.conf
diff --git a/tools/arvbox/lib/arvbox/docker/service/ready/run b/tools/arvbox/lib/arvbox/docker/service/ready/run
new file mode 120000 (symlink)
index 0000000..a388c8b
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/ready/run-service b/tools/arvbox/lib/arvbox/docker/service/ready/run-service
new file mode 100755 (executable)
index 0000000..f560de0
--- /dev/null
@@ -0,0 +1,96 @@
+#!/bin/bash
+
+. /usr/local/lib/arvbox/common.sh
+
+set -eu -o pipefail
+
+if ! [[ -d /tmp/arvbox-ready ]] ; then
+   echo
+   echo "Arvados-in-a-box starting"
+   echo
+   echo "Note: if this is a fresh arvbox installation, it may take 10-15 minutes (or longer) to download and"
+   echo "install dependencies.  Use \"arvbox log\" to monitor the progress of specific services."
+   echo
+   mkdir -p /tmp/arvbox-ready
+fi
+
+sleep 3
+
+waiting=""
+
+for s in "${!services[@]}"
+do
+  if ! [[ -f /tmp/arvbox-ready/$s ]] ; then
+    if nc -z localhost ${services[$s]} ; then
+      echo "$s is ready at $localip:${services[$s]}"
+      touch /tmp/arvbox-ready/$s
+    else
+      waiting="$waiting $s"
+    fi
+  fi
+done
+
+if ! docker version >/dev/null 2>/dev/null ; then
+  waiting="$waiting docker"
+fi
+
+if ! which arv >/dev/null ; then
+  waiting="$waiting sdk"
+elif ! which arv-get >/dev/null ; then
+  waiting="$waiting sdk"
+fi
+
+if ! (ps x | grep -v grep | grep "crunch-dispatch") > /dev/null ; then
+    waiting="$waiting crunch-dispatch"
+fi
+
+export ARVADOS_API_HOST=$localip:${services[api]}
+export ARVADOS_API_HOST_INSECURE=1
+
+vm_ok=0
+if test -s /var/lib/arvados/vm-uuid -a -s /var/lib/arvados/superuser_token; then
+    vm_uuid=$(cat /var/lib/arvados/vm-uuid)
+    export ARVADOS_API_TOKEN=$(cat /var/lib/arvados/superuser_token)
+    if (which arv && arv virtual_machine get --uuid $vm_uuid) >/dev/null 2>/dev/null ; then
+        vm_ok=1
+    fi
+fi
+
+if test $vm_ok = 0 ; then
+    waiting="$waiting vm"
+fi
+
+if ! [[ -z "$waiting" ]] ; then
+    if ps x | grep -v grep | grep "bundle install" > /dev/null; then
+        gemcount=$(ls /var/lib/gems/ruby/2.1.0/gems 2>/dev/null | wc -l)
+
+        gemlockcount=0
+        for l in /usr/src/arvados/services/api/Gemfile.lock \
+                     /usr/src/arvados/apps/workbench/Gemfile.lock \
+                     /usr/src/sso/Gemfile.lock ; do
+            gc=$(cat $l \
+                        | grep -vE "(GEM|PLATFORMS|DEPENDENCIES|$^|remote:|specs:)" \
+                        | sed 's/^ *//' | sed 's/(.*)//' | sed 's/ *$//' | sort | uniq | wc -l)
+            gemlockcount=$(($gemlockcount + $gc))
+        done
+        waiting="$waiting (installing ruby gems $gemcount/$gemlockcount)"
+    fi
+
+    if ps x | grep -v grep | grep "c++.*/var/lib/passenger" > /dev/null ; then
+        waiting="$waiting (compiling passenger)"
+    fi
+
+    if ps x | grep -v grep | grep "pip install" > /dev/null; then
+        waiting="$waiting (installing python packages)"
+    fi
+    echo "    Waiting for$waiting ..."
+    exit 1
+fi
+
+echo
+echo "Your Arvados-in-a-box is ready!"
+echo "Workbench is running at http://$localip"
+
+rm -r /tmp/arvbox-ready
+
+sv stop ready >/dev/null
diff --git a/tools/arvbox/lib/arvbox/docker/service/sdk/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/sdk/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/sdk/log/run b/tools/arvbox/lib/arvbox/docker/service/sdk/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/sdk/run b/tools/arvbox/lib/arvbox/docker/service/sdk/run
new file mode 100755 (executable)
index 0000000..816b166
--- /dev/null
@@ -0,0 +1,5 @@
+#!/bin/sh
+set -e
+
+/usr/local/lib/arvbox/runsu.sh $0-service
+sv stop sdk
diff --git a/tools/arvbox/lib/arvbox/docker/service/sdk/run-service b/tools/arvbox/lib/arvbox/docker/service/sdk/run-service
new file mode 100755 (executable)
index 0000000..b51f0fc
--- /dev/null
@@ -0,0 +1,24 @@
+#!/bin/bash
+
+exec 2>&1
+set -eux -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+mkdir -p ~/.pip /var/lib/arvados/pip
+cat > ~/.pip/pip.conf <<EOF
+[global]
+download_cache = /var/lib/arvados/pip
+EOF
+
+cd /usr/src/arvados/sdk/cli
+run_bundler --binstubs=$PWD/binstubs
+ln -sf /usr/src/arvados/sdk/cli/binstubs/arv /usr/local/bin/arv
+
+cd /usr/src/arvados/sdk/python
+python setup.py sdist
+pip_install $(ls dist/arvados-python-client-*.tar.gz | tail -n1)
+
+cd /usr/src/arvados/services/fuse
+python setup.py sdist
+pip_install $(ls dist/arvados_fuse-*.tar.gz | tail -n1)
diff --git a/tools/arvbox/lib/arvbox/docker/service/slurmctld/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/slurmctld/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/slurmctld/log/run b/tools/arvbox/lib/arvbox/docker/service/slurmctld/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/slurmctld/run b/tools/arvbox/lib/arvbox/docker/service/slurmctld/run
new file mode 100755 (executable)
index 0000000..bd75bd0
--- /dev/null
@@ -0,0 +1,31 @@
+#!/bin/bash
+
+exec 2>&1
+set -eux -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+cat > /etc/slurm-llnl/slurm.conf  <<EOF
+ControlMachine=$HOSTNAME
+ControlAddr=$HOSTNAME
+AuthType=auth/munge
+DefaultStorageLoc=/var/log/slurm-llnl
+SelectType=select/cons_res
+SelectTypeParameters=CR_CPU_Memory
+SlurmUser=arvbox
+SlurmdUser=arvbox
+SlurmctldPort=7002
+SlurmctldTimeout=300
+SlurmdPort=7003
+SlurmdSpoolDir=/var/tmp/slurmd.spool
+SlurmdTimeout=300
+StateSaveLocation=/var/tmp/slurm.state
+NodeName=$HOSTNAME
+PartitionName=compute State=UP Default=YES Nodes=$HOSTNAME
+EOF
+
+mkdir -p /var/run/munge
+
+/usr/sbin/munged -f
+
+exec /usr/sbin/slurmctld -v -D
diff --git a/tools/arvbox/lib/arvbox/docker/service/slurmd/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/slurmd/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/slurmd/log/run b/tools/arvbox/lib/arvbox/docker/service/slurmd/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/slurmd/run b/tools/arvbox/lib/arvbox/docker/service/slurmd/run
new file mode 100755 (executable)
index 0000000..865a7e6
--- /dev/null
@@ -0,0 +1,6 @@
+#!/bin/bash
+
+exec 2>&1
+set -eux -o pipefail
+
+exec /usr/local/lib/arvbox/runsu.sh /usr/sbin/slurmd -v -D
diff --git a/tools/arvbox/lib/arvbox/docker/service/ssh/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/ssh/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/ssh/log/run b/tools/arvbox/lib/arvbox/docker/service/ssh/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/ssh/run b/tools/arvbox/lib/arvbox/docker/service/ssh/run
new file mode 100755 (executable)
index 0000000..0f23542
--- /dev/null
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+exec 2>&1
+set -eux -o pipefail
+
+if ! test -d /var/run/sshd ; then
+   mkdir /var/run/sshd
+   chmod 0755 /var/run/sshd
+fi
+/usr/sbin/sshd -D
diff --git a/tools/arvbox/lib/arvbox/docker/service/sso/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/sso/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/sso/log/run b/tools/arvbox/lib/arvbox/docker/service/sso/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/sso/run b/tools/arvbox/lib/arvbox/docker/service/sso/run
new file mode 120000 (symlink)
index 0000000..a388c8b
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/runsu.sh
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/sso/run-service b/tools/arvbox/lib/arvbox/docker/service/sso/run-service
new file mode 100755 (executable)
index 0000000..da413e0
--- /dev/null
@@ -0,0 +1,87 @@
+#!/bin/bash
+
+exec 2>&1
+set -ex -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+cd /usr/src/sso
+export RAILS_ENV=development
+
+run_bundler --without=development
+bundle exec passenger start --runtime-check-only --runtime-dir=/var/lib/passenger
+
+if test "$1" = "--only-deps" ; then
+    exit
+fi
+
+set -u
+
+if ! test -s /var/lib/arvados/sso_uuid_prefix ; then
+  ruby -e 'puts "#{rand(2**64).to_s(36)[0,5]}"' > /var/lib/arvados/sso_uuid_prefix
+fi
+uuid_prefix=$(cat /var/lib/arvados/sso_uuid_prefix)
+
+if ! test -s /var/lib/arvados/sso_secret_token ; then
+  ruby -e 'puts rand(2**400).to_s(36)' > /var/lib/arvados/sso_secret_token
+fi
+secret_token=$(cat /var/lib/arvados/sso_secret_token)
+
+if ! test -s /var/lib/arvados/self-signed.key ; then
+  openssl req -new -x509 -nodes -out /var/lib/arvados/self-signed.pem -keyout /var/lib/arvados/self-signed.key -days 365 -subj '/CN=localhost'
+fi
+
+cat >config/application.yml <<EOF
+development:
+  uuid_prefix: $uuid_prefix
+  secret_token: $secret_token
+  default_link_url: "http://$localip"
+  allow_account_registration: true
+EOF
+
+(cd config && /usr/local/lib/arvbox/application_yml_override.py)
+
+if ! test -f /var/lib/arvados/sso_database_pw ; then
+    ruby -e 'puts rand(2**128).to_s(36)' > /var/lib/arvados/sso_database_pw
+fi
+database_pw=$(cat /var/lib/arvados/sso_database_pw)
+
+if ! (psql postgres -c "\du" | grep "^ arvados_sso ") >/dev/null ; then
+    psql postgres -c "create user arvados_sso with password '$database_pw'"
+    psql postgres -c "ALTER USER arvados_sso CREATEDB;"
+fi
+
+sed "s/password:.*/password: $database_pw/" <config/database.yml.example >config/database.yml
+
+if ! test -f /var/lib/arvados/sso_database_setup ; then
+   bundle exec rake db:setup
+
+   if ! test -s /var/lib/arvados/sso_app_secret ; then
+       ruby -e 'puts rand(2**400).to_s(36)' > /var/lib/arvados/sso_app_secret
+   fi
+   app_secret=$(cat /var/lib/arvados/sso_app_secret)
+
+   bundle exec rails console <<EOF
+c = Client.new
+c.name = "joshid"
+c.app_id = "arvados-server"
+c.app_secret = "$app_secret"
+c.save!
+EOF
+
+   touch /var/lib/arvados/sso_database_setup
+fi
+
+rm -rf tmp
+
+bundle exec rake db:migrate
+
+set +u
+if test "$1" = "--only-setup" ; then
+    exit
+fi
+
+exec bundle exec passenger start --port=${services[sso]} \
+     --runtime-dir=/var/lib/passenger \
+     --ssl --ssl-certificate=/var/lib/arvados/self-signed.pem \
+     --ssl-certificate-key=/var/lib/arvados/self-signed.key
diff --git a/tools/arvbox/lib/arvbox/docker/service/vm/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/vm/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/vm/log/run b/tools/arvbox/lib/arvbox/docker/service/vm/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/vm/run b/tools/arvbox/lib/arvbox/docker/service/vm/run
new file mode 100755 (executable)
index 0000000..0c10cb0
--- /dev/null
@@ -0,0 +1,21 @@
+#!/bin/bash
+set -e
+
+. /usr/local/lib/arvbox/common.sh
+
+git config --system "credential.http://$localip:${services[arv-git-httpd]}/.username" none
+git config --system "credential.http://$localip:${services[arv-git-httpd]}/.helper" '!cred(){ cat >/dev/null; if [ "$1" = get ]; then echo password=$ARVADOS_API_TOKEN; fi; };cred'
+
+/usr/local/lib/arvbox/runsu.sh $0-service
+
+cd /usr/src/arvados/services/login-sync
+
+export ARVADOS_API_HOST=$localip:${services[api]}
+export ARVADOS_API_HOST_INSECURE=1
+export ARVADOS_API_TOKEN=$(cat /var/lib/arvados/superuser_token)
+export ARVADOS_VIRTUAL_MACHINE_UUID=$(cat /var/lib/arvados/vm-uuid)
+
+while true ; do
+      bundle exec arvados-login-sync
+      sleep 120
+done
diff --git a/tools/arvbox/lib/arvbox/docker/service/vm/run-service b/tools/arvbox/lib/arvbox/docker/service/vm/run-service
new file mode 100755 (executable)
index 0000000..fb209f5
--- /dev/null
@@ -0,0 +1,36 @@
+#!/bin/bash
+
+exec 2>&1
+sleep 2
+set -ex -o pipefail
+
+. /usr/local/lib/arvbox/common.sh
+
+cd /usr/src/arvados/services/login-sync
+run_bundler
+
+if test "$1" = "--only-deps" ; then
+    exit
+fi
+
+set -u
+
+export ARVADOS_API_HOST=$localip:${services[api]}
+export ARVADOS_API_HOST_INSECURE=1
+export ARVADOS_API_TOKEN=$(cat /var/lib/arvados/superuser_token)
+export ARVADOS_VIRTUAL_MACHINE_UUID=$(cat /var/lib/arvados/vm-uuid)
+
+set +e
+read -rd $'\000' vm <<EOF
+{
+ "uuid": "$ARVADOS_VIRTUAL_MACHINE_UUID",
+ "hostname":"$localip"
+}
+EOF
+set -e
+
+if arv virtual_machine get --uuid $ARVADOS_VIRTUAL_MACHINE_UUID ; then
+    arv virtual_machine update --uuid $ARVADOS_VIRTUAL_MACHINE_UUID --virtual-machine "$vm"
+else
+    arv virtual_machine create --virtual-machine "$vm"
+fi
diff --git a/tools/arvbox/lib/arvbox/docker/service/workbench/log/main/.gitstub b/tools/arvbox/lib/arvbox/docker/service/workbench/log/main/.gitstub
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/tools/arvbox/lib/arvbox/docker/service/workbench/log/run b/tools/arvbox/lib/arvbox/docker/service/workbench/log/run
new file mode 120000 (symlink)
index 0000000..d6aef4a
--- /dev/null
@@ -0,0 +1 @@
+/usr/local/lib/arvbox/logger
\ No newline at end of file
diff --git a/tools/arvbox/lib/arvbox/docker/service/workbench/run b/tools/arvbox/lib/arvbox/docker/service/workbench/run
new file mode 100755 (executable)
index 0000000..6ac0476
--- /dev/null
@@ -0,0 +1,15 @@
+#!/bin/sh
+set -e
+
+/usr/local/lib/arvbox/runsu.sh $0-service $1
+
+cd /usr/src/arvados/apps/workbench
+
+rm -rf tmp
+mkdir tmp
+chown arvbox:arvbox tmp
+
+if test "$1" != "--only-deps" ; then
+    exec bundle exec passenger start --port 80 \
+         --user arvbox --runtime-dir=/var/lib/passenger
+fi
diff --git a/tools/arvbox/lib/arvbox/docker/service/workbench/run-service b/tools/arvbox/lib/arvbox/docker/service/workbench/run-service
new file mode 100755 (executable)
index 0000000..850022a
--- /dev/null
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+exec 2>&1
+set -ex -o pipefail
+
+.  /usr/local/lib/arvbox/common.sh
+
+cd /usr/src/arvados/apps/workbench
+export RAILS_ENV=development
+
+run_bundler --without=development
+bundle exec passenger start --runtime-check-only --runtime-dir=/var/lib/passenger
+
+if test "$1" = "--only-deps" ; then
+    exit
+fi
+
+set -u
+
+if ! test -s /var/lib/arvados/workbench_secret_token ; then
+  ruby -e 'puts rand(2**400).to_s(36)' > /var/lib/arvados/workbench_secret_token
+fi
+secret_token=$(cat /var/lib/arvados/workbench_secret_token)
+
+if ! test -s self-signed.key ; then
+  openssl req -new -x509 -nodes -out self-signed.pem -keyout self-signed.key -days 365 -subj '/CN=localhost'
+fi
+
+cat >config/application.yml <<EOF
+development:
+  secret_token: $secret_token
+  arvados_login_base: https://$localip:${services[api]}/login
+  arvados_v1_base: https://$localip:${services[api]}/arvados/v1
+  arvados_insecure_https: true
+  keep_web_download_url: http://$localip:${services[keep-web]}/c=%{uuid_or_pdh}
+  keep_web_url: http://$localip:${services[keep-web]}/c=%{uuid_or_pdh}
+  arvados_docsite: http://$localip:${services[doc]}/
+EOF
+
+(cd config && /usr/local/lib/arvbox/application_yml_override.py)
diff --git a/tools/arvbox/lib/arvbox/docker/waitforpostgres.sh b/tools/arvbox/lib/arvbox/docker/waitforpostgres.sh
new file mode 100755 (executable)
index 0000000..84d9904
--- /dev/null
@@ -0,0 +1,4 @@
+#!/bin/sh
+while ! psql postgres -c\\du >/dev/null 2>/dev/null ; do
+    sleep 1
+done
index f422501b10ff1858f9b636621aaaba4bad662d5b..2ac12abcba23e381073589cf209915b88a9d8cef 100644 (file)
@@ -21,6 +21,11 @@ from crunchstat_summary import logger
 AVAILABLE_RAM_RATIO = 0.95
 
 
+# Workaround datetime.datetime.strptime() thread-safety bug by calling
+# it once before starting threads.  https://bugs.python.org/issue7980
+datetime.datetime.strptime('1999-12-31_23:59:59', '%Y-%m-%d_%H:%M:%S')
+
+
 class Task(object):
     def __init__(self):
         self.starttime = None