10383: Merge branch 'master' into 10383-arv-put-incremental-upload
authorLucas Di Pentima <lucas@curoverse.com>
Fri, 9 Dec 2016 22:30:02 +0000 (19:30 -0300)
committerLucas Di Pentima <lucas@curoverse.com>
Fri, 9 Dec 2016 22:30:02 +0000 (19:30 -0300)
208 files changed:
README.md
apps/workbench/Gemfile
apps/workbench/Gemfile.lock
apps/workbench/app/controllers/application_controller.rb
apps/workbench/app/controllers/projects_controller.rb
apps/workbench/app/controllers/work_units_controller.rb
apps/workbench/app/models/container_work_unit.rb
apps/workbench/app/models/proxy_work_unit.rb
apps/workbench/app/views/container_requests/_show_inputs.html.erb
apps/workbench/app/views/pipeline_instances/_running_component.html.erb
apps/workbench/app/views/pipeline_instances/_show_components_running.html.erb
apps/workbench/app/views/projects/_show_dashboard.html.erb
apps/workbench/app/views/projects/_show_processes.html.erb [new file with mode: 0644]
apps/workbench/app/views/projects/show.html.erb
apps/workbench/test/controllers/disabled_api_test.rb
apps/workbench/test/controllers/projects_controller_test.rb
apps/workbench/test/integration/application_layout_test.rb
apps/workbench/test/integration/container_requests_test.rb
apps/workbench/test/integration/pipeline_instances_test.rb
apps/workbench/test/integration/projects_test.rb
apps/workbench/test/integration/work_units_test.rb
apps/workbench/test/performance/browsing_test.rb
build/README
build/build-dev-docker-jobs-image.sh [new file with mode: 0755]
build/run-build-packages-one-target.sh
build/run-build-packages.sh
build/run-library.sh
build/run-tests.sh
doc/_config.yml
doc/_includes/_arvados_cwl_runner.liquid [new file with mode: 0644]
doc/_includes/_container_runtime_constraints.liquid
doc/_includes/_container_scheduling_parameters.liquid [new file with mode: 0644]
doc/_includes/_register_cwl_workflow.liquid [new file with mode: 0644]
doc/_includes/_what_is_cwl.liquid [new file with mode: 0644]
doc/api/methods/container_requests.html.textile.liquid
doc/api/methods/containers.html.textile.liquid
doc/images/upload-using-workbench.png
doc/images/workbench-dashboard.png
doc/images/workbench-move-selected.png
doc/user/cwl/cwl-runner.html.textile.liquid
doc/user/getting_started/workbench.html.textile.liquid
doc/user/topics/arv-copy.html.textile.liquid
doc/user/topics/running-workflow-command-line.html.textile.liquid [new file with mode: 0644]
doc/user/tutorials/tutorial-pipeline-workbench.html.textile.liquid [deleted file]
doc/user/tutorials/tutorial-workflow-workbench.html.textile.liquid [new file with mode: 0644]
doc/user/tutorials/writing-cwl-workflow.html.textile.liquid [new file with mode: 0644]
sdk/cli/bin/crunch-job
sdk/cli/test/test_arv-keep-get.rb
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arvcontainer.py
sdk/cwl/arvados_cwl/arvdocker.py
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/arvados_cwl/arvworkflow.py
sdk/cwl/arvados_cwl/done.py
sdk/cwl/arvados_cwl/runner.py
sdk/cwl/setup.py
sdk/cwl/tests/test_container.py
sdk/cwl/tests/test_submit.py
sdk/dev-jobs.dockerfile [new file with mode: 0644]
sdk/go/httpserver/id_generator.go [new file with mode: 0644]
sdk/go/httpserver/request_limiter.go
sdk/go/logger/logger.go [deleted file]
sdk/go/logger/util.go [deleted file]
sdk/go/util/util.go [deleted file]
sdk/python/arvados/_version.py [new file with mode: 0644]
sdk/python/arvados/arvfile.py
sdk/python/arvados/commands/arv_copy.py
sdk/python/arvados/commands/keepdocker.py
sdk/python/arvados/commands/ls.py
sdk/python/arvados/commands/put.py
sdk/python/arvados/commands/run.py
sdk/python/arvados/commands/ws.py
sdk/python/arvados/keep.py
sdk/python/bin/arv-get
sdk/python/bin/arv-normalize
sdk/python/setup.py
sdk/python/tests/arvados_testutil.py
sdk/python/tests/test_arv_copy.py [new file with mode: 0644]
sdk/python/tests/test_arv_keepdocker.py [new file with mode: 0644]
sdk/python/tests/test_arv_ls.py
sdk/python/tests/test_arv_normalize.py [new file with mode: 0644]
sdk/python/tests/test_arv_put.py
sdk/python/tests/test_arv_run.py [new file with mode: 0644]
sdk/python/tests/test_arv_ws.py
sdk/python/tests/test_events.py
sdk/python/tests/test_keep_client.py
services/api/Gemfile
services/api/Gemfile.lock
services/api/app/controllers/application_controller.rb
services/api/app/controllers/arvados/v1/collections_controller.rb
services/api/app/controllers/arvados/v1/groups_controller.rb
services/api/app/controllers/arvados/v1/schema_controller.rb
services/api/app/controllers/arvados/v1/user_agreements_controller.rb
services/api/app/controllers/arvados/v1/users_controller.rb
services/api/app/controllers/arvados/v1/virtual_machines_controller.rb
services/api/app/controllers/database_controller.rb
services/api/app/middlewares/arvados_api_token.rb
services/api/app/models/arvados_model.rb
services/api/app/models/blob.rb
services/api/app/models/collection.rb
services/api/app/models/commit_ancestor.rb
services/api/app/models/job.rb
services/api/app/models/link.rb
services/api/app/models/log.rb
services/api/app/models/node.rb
services/api/app/models/repository.rb
services/api/app/models/user.rb
services/api/config/boot.rb
services/api/config/initializers/inflections.rb
services/api/config/initializers/load_config.rb
services/api/config/initializers/preload_all_models.rb
services/api/lib/crunch_dispatch.rb
services/api/lib/current_api_client.rb
services/api/lib/eventbus.rb
services/api/lib/load_param.rb
services/api/lib/salvage_collection.rb
services/api/script/arvados-git-sync.rb
services/api/script/migrate-gitolite-to-uuid-storage.rb
services/api/test/factories/user.rb
services/api/test/fixtures/container_requests.yml
services/api/test/fixtures/pipeline_templates.yml
services/api/test/fixtures/workflows.yml
services/api/test/functional/arvados/v1/api_client_authorizations_controller_test.rb
services/api/test/functional/arvados/v1/collections_controller_test.rb
services/api/test/functional/arvados/v1/filters_test.rb
services/api/test/functional/arvados/v1/groups_controller_test.rb
services/api/test/functional/arvados/v1/jobs_controller_test.rb
services/api/test/functional/arvados/v1/links_controller_test.rb
services/api/test/functional/arvados/v1/repositories_controller_test.rb
services/api/test/functional/arvados/v1/schema_controller_test.rb
services/api/test/functional/arvados/v1/users_controller_test.rb
services/api/test/functional/arvados/v1/virtual_machines_controller_test.rb
services/api/test/helpers/users_test_helper.rb
services/api/test/integration/collections_api_test.rb
services/api/test/integration/collections_performance_test.rb
services/api/test/integration/cross_origin_test.rb
services/api/test/integration/database_reset_test.rb
services/api/test/integration/user_sessions_test.rb
services/api/test/integration/websocket_test.rb
services/api/test/test_helper.rb
services/api/test/unit/app_version_test.rb
services/api/test/unit/authorized_key_test.rb
services/api/test/unit/collection_performance_test.rb
services/api/test/unit/collection_test.rb
services/api/test/unit/commit_test.rb
services/api/test/unit/container_test.rb
services/api/test/unit/fail_jobs_test.rb
services/api/test/unit/job_test.rb
services/api/test/unit/log_test.rb
services/api/test/unit/node_test.rb
services/api/test/unit/owner_test.rb
services/api/test/unit/permission_test.rb
services/api/test/unit/pipeline_instance_test.rb
services/api/test/unit/salvage_collection_test.rb
services/api/test/unit/user_test.rb
services/api/test/websocket_runner.rb [deleted file]
services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
services/crunch-dispatch-slurm/squeue.go
services/crunch-run/crunchrun.go
services/datamanager/collection/collection.go [deleted file]
services/datamanager/collection/collection_test.go [deleted file]
services/datamanager/collection/testing.go [deleted file]
services/datamanager/datamanager.go [deleted file]
services/datamanager/datamanager_test.go [deleted file]
services/datamanager/experimental/datamanager.py [deleted file]
services/datamanager/experimental/datamanager_test.py [deleted file]
services/datamanager/keep/keep.go [deleted file]
services/datamanager/keep/keep_test.go [deleted file]
services/datamanager/loggerutil/loggerutil.go [deleted file]
services/datamanager/summary/canonical_string.go [deleted file]
services/datamanager/summary/file.go [deleted file]
services/datamanager/summary/pull_list.go [deleted file]
services/datamanager/summary/pull_list_test.go [deleted file]
services/datamanager/summary/summary.go [deleted file]
services/datamanager/summary/summary_test.go [deleted file]
services/datamanager/summary/trash_list.go [deleted file]
services/datamanager/summary/trash_list_test.go [deleted file]
services/fuse/arvados_fuse/_version.py [new file with mode: 0644]
services/fuse/arvados_fuse/command.py
services/fuse/setup.py
services/fuse/tests/test_command_args.py
services/keep-balance/balance.go
services/keepstore/azure_blob_volume.go
services/keepstore/azure_blob_volume_test.go
services/keepstore/bufferpool.go
services/keepstore/config.go
services/keepstore/config_test.go
services/keepstore/handler_test.go
services/keepstore/handlers.go
services/keepstore/keepstore.go
services/keepstore/logging_router.go
services/keepstore/pull_worker.go
services/keepstore/s3_volume.go
services/keepstore/s3_volume_test.go
services/keepstore/trash_worker.go
services/keepstore/usage.go
services/keepstore/volume_unix.go
services/nodemanager/arvnodeman/_version.py [new file with mode: 0644]
services/nodemanager/arvnodeman/computenode/driver/gce.py
services/nodemanager/arvnodeman/launcher.py
services/nodemanager/setup.py
services/nodemanager/tests/test_arguments.py [new file with mode: 0644]
services/nodemanager/tests/testutil.py
tools/arvbash/arvbash.sh [new file with mode: 0755]
tools/arvbox/lib/arvbox/docker/Dockerfile.base
tools/arvbox/lib/arvbox/docker/common.sh
tools/arvbox/lib/arvbox/docker/service/api/run-service

index cf09171b024716b4a57a412746c7d1a9decbb87e..419ca15957cc4cd2aa1b00048dd61feb2c6375f3 100644 (file)
--- a/README.md
+++ b/README.md
@@ -59,6 +59,7 @@ contributers to Arvados.
 ## Development
 
 [![Build Status](https://ci.curoverse.com/buildStatus/icon?job=run-tests)](https://ci.curoverse.com/job/run-tests/)
+[![Go Report Card](https://goreportcard.com/badge/github.com/curoverse/arvados)](https://goreportcard.com/report/github.com/curoverse/arvados)
 
 The Arvados public bug tracker is located at https://dev.arvados.org/projects/arvados/issues
 
index 20d64d17a16dc87b2a076c13b2ad6c356b0a041a..eac4fdf601c8d4033db80e99b72c71a02cfa4e0e 100644 (file)
@@ -1,6 +1,6 @@
 source 'https://rubygems.org'
 
-gem 'rails', '~> 4.1.0'
+gem 'rails', '~> 4.1'
 gem 'arvados', '>= 0.1.20150511150219'
 
 gem 'activerecord-nulldb-adapter'
@@ -19,7 +19,7 @@ gem 'coffee-rails'
 # in production environments by default.
 group :assets do
   gem 'sass-rails'
-  gem 'uglifier', '>= 1.0.3'
+  gem 'uglifier', '~> 2.0'
 
   # See https://github.com/sstephenson/execjs#readme for more supported runtimes
   gem 'therubyracer', :platforms => :ruby
@@ -33,7 +33,7 @@ group :development do
 end
 
 group :test, :diagnostics, :performance do
-  gem 'minitest', '>= 5.0.0'
+  gem 'minitest', '~> 5.0'
   gem 'selenium-webdriver'
   gem 'capybara'
   gem 'poltergeist'
index a8431a7dfd373d0357053df0e06df901498ca0ca..0abe868ccf75bac683e44529ce939a421e772d45 100644 (file)
@@ -92,7 +92,7 @@ GEM
     deep_merge (1.0.1)
     docile (1.1.5)
     erubis (2.7.0)
-    execjs (2.2.2)
+    execjs (2.7.0)
     extlib (0.9.16)
     faraday (0.9.2)
       multipart-post (>= 1.2, < 3)
@@ -123,7 +123,7 @@ GEM
       signet (~> 0.7)
     headless (1.0.2)
     highline (1.6.21)
-    httpclient (2.6.0.1)
+    httpclient (2.8.2.4)
     i18n (0.7.0)
     jquery-rails (3.1.2)
       railties (>= 3.0, < 5.0)
@@ -158,7 +158,7 @@ GEM
       metaclass (~> 0.0.1)
     morrisjs-rails (0.5.1)
       railties (> 3.1, < 5)
-    multi_json (1.12.0)
+    multi_json (1.12.1)
     multipart-post (2.0.0)
     net-scp (1.2.1)
       net-ssh (>= 2.6.5)
@@ -257,7 +257,7 @@ GEM
     tilt (1.4.1)
     tzinfo (1.2.2)
       thread_safe (~> 0.1)
-    uglifier (2.7.0)
+    uglifier (2.7.2)
       execjs (>= 0.3.0)
       json (>= 1.8.0)
     websocket (1.2.2)
@@ -292,7 +292,7 @@ DEPENDENCIES
   less-rails
   lograge
   logstash-event
-  minitest (>= 5.0.0)
+  minitest (~> 5.0)
   mocha
   morrisjs-rails
   multi_json
@@ -301,7 +301,7 @@ DEPENDENCIES
   piwik_analytics
   poltergeist
   rack-mini-profiler
-  rails (~> 4.1.0)
+  rails (~> 4.1)
   rails-perftest
   raphael-rails
   ruby-debug-passenger
@@ -316,8 +316,8 @@ DEPENDENCIES
   sshkey
   themes_for_rails!
   therubyracer
-  uglifier (>= 1.0.3)
+  uglifier (~> 2.0)
   wiselinks
 
 BUNDLED WITH
-   1.12.1
+   1.13.2
index c9ce8ce0b748a9473d2cd5f80739d070f1f8aef5..ee3ac4d6810588b7d630705a5efe4cd6e08bd6ae 100644 (file)
@@ -907,7 +907,7 @@ class ApplicationController < ActionController::Base
   # from the top three levels.
   # That is: get toplevel projects under home, get subprojects of
   # these projects, and so on until we hit the limit.
-  def my_wanted_projects user, page_size=100
+  def my_wanted_projects(user, page_size=100)
     return @my_wanted_projects if @my_wanted_projects
 
     from_top = []
@@ -922,7 +922,7 @@ class ApplicationController < ActionController::Base
       break if current_level.results.size == 0
       @too_many_projects = true if current_level.items_available > current_level.results.size
       from_top.concat current_level.results
-      uuids = current_level.results.collect { |x| x.uuid }
+      uuids = current_level.results.collect(&:uuid)
       depth += 1
       if depth >= 3
         @reached_level_limit = true
@@ -933,12 +933,12 @@ class ApplicationController < ActionController::Base
   end
 
   helper_method :my_wanted_projects_tree
-  def my_wanted_projects_tree user, page_size=100
-    build_my_wanted_projects_tree user, page_size
+  def my_wanted_projects_tree(user, page_size=100)
+    build_my_wanted_projects_tree(user, page_size)
     [@my_wanted_projects_tree, @too_many_projects, @reached_level_limit]
   end
 
-  def build_my_wanted_projects_tree user, page_size=100
+  def build_my_wanted_projects_tree(user, page_size=100)
     return @my_wanted_projects_tree if @my_wanted_projects_tree
 
     parent_of = {user.uuid => 'me'}
index 0a2044a0e23e96b741d77658dfa91057fe57bdfa..48b2c421fb08385e648d95ba8cac809e948eb7b8 100644 (file)
@@ -55,8 +55,10 @@ class ProjectsController < ApplicationController
     pane_list = []
 
     procs = ["arvados#containerRequest"]
+    procs_pane_name = 'Processes'
     if PipelineInstance.api_exists?(:index)
       procs << "arvados#pipelineInstance"
+      procs_pane_name = 'Pipelines_and_processes'
     end
 
     workflows = ["arvados#workflow"]
@@ -76,7 +78,7 @@ class ProjectsController < ApplicationController
       }
     pane_list <<
       {
-        :name => 'Pipelines_and_processes',
+        :name => procs_pane_name,
         :filters => [%w(uuid is_a) + [procs]]
       }
     pane_list <<
index 3b611aa25b74e28663d9b7ecc2b0647670f066c8..550bdb7e953f7fe47a899cbe674a39ed457a9529 100644 (file)
@@ -57,7 +57,7 @@ class WorkUnitsController < ApplicationController
       workflow = Workflow.find? template_uuid
       if workflow.definition
         begin
-          wf_json = YAML::load(workflow.definition)
+          wf_json = ActiveSupport::HashWithIndifferentAccess.new YAML::load(workflow.definition)
         rescue => e
           logger.error "Error converting definition yaml to json: #{e.message}"
           raise ArgumentError, "Error converting definition yaml to json: #{e.message}"
@@ -77,11 +77,21 @@ class WorkUnitsController < ApplicationController
       attrs['cwd'] = "/var/spool/cwl"
       attrs['output_path'] = "/var/spool/cwl"
 
+      input_defaults = {}
+      if wf_json
+        inputs = get_cwl_inputs(wf_json)
+        inputs.each do |input|
+          if input[:default]
+            input_defaults[cwl_shortname(input[:id])] = input[:default]
+          end
+        end
+      end
+
       # mounts
       mounts = {
         "/var/lib/cwl/cwl.input.json" => {
           "kind" => "json",
-          "content" => {}
+          "content" => input_defaults
         },
         "stdout" => {
           "kind" => "file",
index 88aab306cedc8b9ea5a8a94a27cc394f2022780b..ed82f18036c1025bffb16e5267701045039bb167 100644 (file)
@@ -99,12 +99,21 @@ class ContainerWorkUnit < ProxyWorkUnit
   end
 
   def log_collection
-    get_combined(:log)
+    if @proxied.is_a?(ContainerRequest)
+      get(:log_uuid)
+    else
+      get(:log)
+    end
   end
 
   def outputs
     items = []
-    items << get_combined(:output) if get_combined(:output)
+    if @proxied.is_a?(ContainerRequest)
+      out = get(:output_uuid)
+    else
+      out = get(:output)
+    end
+    items << out if out
     items
   end
 
index 48bc3a04bc95dd41915e317449e7287ed4e42bce..771fdac47617fdd8855b8f0c00c10d5625064404 100644 (file)
@@ -189,7 +189,7 @@ class ProxyWorkUnit < WorkUnit
   def cputime
     if state_label != "Queued"
       if started_at
-        (runtime_constraints.andand[:min_nodes] || 1) * ((finished_at || Time.now()) - started_at)
+        (runtime_constraints.andand[:min_nodes] || 1).to_i * ((finished_at || Time.now()) - started_at)
       end
     end
   end
@@ -276,14 +276,14 @@ class ProxyWorkUnit < WorkUnit
       if children.any?
         cpu_time = children.map { |c|
           if c.started_at
-             (c.runtime_constraints.andand[:min_nodes] || 1) * ((c.finished_at || Time.now()) - c.started_at)
+             (c.runtime_constraints.andand[:min_nodes] || 1).to_i * ((c.finished_at || Time.now()) - c.started_at)
           else
             0
           end
         }.reduce(:+) || 0
       else
         if started_at
-          cpu_time = (runtime_constraints.andand[:min_nodes] || 1) * ((finished_at || Time.now()) - started_at)
+          cpu_time = (runtime_constraints.andand[:min_nodes] || 1).to_i * ((finished_at || Time.now()) - started_at)
         end
       end
 
index a6c4bffacd2fc1add6043a16226e28bbae15affa..b2fb245454aae2ead67ca1851ba5f57700678512 100644 (file)
@@ -1,22 +1,30 @@
-<% n_inputs = cwl_inputs_required(@object, get_cwl_inputs(@object.mounts[:"/var/lib/cwl/workflow.json"][:content]), [:mounts, :"/var/lib/cwl/cwl.input.json", :content]) %>
+<%
+n_inputs = if @object.mounts[:"/var/lib/cwl/workflow.json"] && @object.mounts[:"/var/lib/cwl/cwl.input.json"]
+             cwl_inputs_required(@object, get_cwl_inputs(@object.mounts[:"/var/lib/cwl/workflow.json"][:content]), [:mounts, :"/var/lib/cwl/cwl.input.json", :content])
+           else
+             0
+           end
+%>
 
 <% content_for :pi_input_form do %>
 <form role="form" style="width:60%">
   <div class="form-group">
-    <% workflow = @object.mounts[:"/var/lib/cwl/workflow.json"][:content] %>
-    <% inputs = get_cwl_inputs(workflow) %>
-    <% inputs.each do |input| %>
-      <label for="#input-<%= cwl_shortname(input[:id]) %>">
-        <%= input[:label] || cwl_shortname(input[:id]) %>
-      </label>
-      <div>
-        <p class="form-control-static">
-          <%= render_cwl_input @object, input, [:mounts, :"/var/lib/cwl/cwl.input.json", :content] %>
+    <% workflow = @object.mounts[:"/var/lib/cwl/workflow.json"].andand[:content] %>
+    <% if workflow %>
+      <% inputs = get_cwl_inputs(workflow) %>
+      <% inputs.each do |input| %>
+        <label for="#input-<%= cwl_shortname(input[:id]) %>">
+          <%= input[:label] || cwl_shortname(input[:id]) %>
+        </label>
+        <div>
+          <p class="form-control-static">
+            <%= render_cwl_input @object, input, [:mounts, :"/var/lib/cwl/cwl.input.json", :content] %>
+          </p>
+        </div>
+        <p class="help-block">
+          <%= input[:doc] %>
         </p>
-      </div>
-      <p class="help-block">
-        <%= input[:doc] %>
-      </p>
+      <% end %>
     <% end %>
   </div>
 </form>
index ded535ef3ad5109e81a33ea1fd9815cde8ac6905..06ed01ee6efd71282f6c1e647bdd542c717b1569 100644 (file)
@@ -48,7 +48,7 @@
           <div class="col-md-3">
             <% if current_job[:started_at] %>
               <% walltime = ((if current_job[:finished_at] then current_job[:finished_at] else Time.now() end) - current_job[:started_at]) %>
-              <% cputime = (current_job[:runtime_constraints].andand[:min_nodes] || 1) *
+              <% cputime = (current_job[:runtime_constraints].andand[:min_nodes] || 1).to_i *
                            ((current_job[:finished_at] || Time.now()) - current_job[:started_at]) %>
               <%= render_runtime(walltime, false) %>
               <% if cputime > 0 %> / <%= render_runtime(cputime, false) %> (<%= (cputime/walltime).round(1) %>&Cross;)<% end %>
index 4343f2e57b5adbb64dfb0fbabe177b9d7f937b7a..a4eb6ffb2abad2b959c9bfe48718d5f951227b59 100644 (file)
@@ -66,7 +66,7 @@
     <%
         cputime = pipeline_jobs.map { |j|
         if j[:job][:started_at]
-          (j[:job][:runtime_constraints].andand[:min_nodes] || 1) * ((j[:job][:finished_at] || Time.now()) - j[:job][:started_at])
+          (j[:job][:runtime_constraints].andand[:min_nodes] || 1).to_i * ((j[:job][:finished_at] || Time.now()) - j[:job][:started_at])
         else
           0
         end
index e0093bf6de320a3aacd471e8dbc3c4128983aed5..ab6eb16f5153862061a40c6b59f85bd89819c4f5 100644 (file)
   preload_links_for_objects(collection_pdhs + collection_uuids)
 %>
 
+<%
+  if !PipelineInstance.api_exists?(:index)
+    recent_procs_title = 'Recent processes'
+    run_proc_title = 'Choose a workflow to run:'
+  else
+    recent_procs_title = 'Recent pipelines and processes'
+    run_proc_title = 'Choose a pipeline or workflow to run:'
+  end
+%>
+
   <div class="row">
     <div class="col-md-6">
       <div class="panel panel-default" style="min-height: 10.5em">
         <div class="panel-heading">
-          <span class="panel-title">Recent pipelines and processes</span>
+          <span class="panel-title"><%=recent_procs_title%></span>
           <% if current_user.andand.is_active %>
             <span class="pull-right recent-processes-actions">
               <span>
                 <%= link_to(
                 choose_work_unit_templates_path(
-                  title: 'Choose a pipeline or workflow to run:',
+                  title: run_proc_title,
                   action_name: 'Next: choose inputs <i class="fa fa-fw fa-arrow-circle-right"></i>',
                   action_href: work_units_path,
                   action_method: 'post',
                   action_data: {'selection_param' => 'work_unit[template_uuid]', 'work_unit[owner_uuid]' => current_user.uuid, 'success' => 'redirect-to-created-object'}.to_json),
                 { class: "btn btn-primary btn-xs", remote: true }) do %>
-                  <i class="fa fa-fw fa-gear"></i> Run a pipeline...
+                  <i class="fa fa-fw fa-gear"></i> Run a process...
                 <% end %>
               </span>
               <span>
diff --git a/apps/workbench/app/views/projects/_show_processes.html.erb b/apps/workbench/app/views/projects/_show_processes.html.erb
new file mode 100644 (file)
index 0000000..71f6a89
--- /dev/null
@@ -0,0 +1,5 @@
+<%= render_pane 'tab_contents', to_string: true, locals: {
+      limit: 50,
+      filters: [['uuid', 'is_a', ["arvados#containerRequest"]]],
+      sortable_columns: { 'name' => 'container_requests.name', 'description' => 'container_requests.description' }
+    }.merge(local_assigns) %>
index e52d826cf60da778f9b343d1d44578c6cc5b0c7f..56055645170b8640b69c5691352ca858c45205ce 100644 (file)
@@ -9,6 +9,16 @@
   </h2>
 <% end %>
 
+<%
+  if !PipelineInstance.api_exists?(:index)
+    run_proc_title = 'Choose a workflow to run:'
+    run_proc_hover = 'Run a workflow in this project'
+  else
+    run_proc_title = 'Choose a pipeline or workflow to run:'
+    run_proc_hover = 'Run a pipeline or workflow in this project'
+  end
+%>
+
 <% content_for :tab_line_buttons do %>
   <% if @object.editable? %>
     <div class="btn-group btn-group-sm">
     </div>
     <%= link_to(
           choose_work_unit_templates_path(
-            title: 'Choose a pipeline or workflow to run:',
+            title: run_proc_title,
             action_name: 'Next: choose inputs <i class="fa fa-fw fa-arrow-circle-right"></i>',
             action_href: work_units_path,
             action_method: 'post',
             action_data: {'selection_param' => 'work_unit[template_uuid]', 'work_unit[owner_uuid]' => @object.uuid, 'success' => 'redirect-to-created-object'}.to_json),
-          { class: "btn btn-primary btn-sm", remote: true, title: "Run a pipeline or workflow in this project" }) do %>
-      <i class="fa fa-fw fa-gear"></i> Run a pipeline...
+          { class: "btn btn-primary btn-sm", remote: true, title: run_proc_hover }) do %>
+      <i class="fa fa-fw fa-gear"></i> Run a process...
     <% end %>
     <%= link_to projects_path({'project[owner_uuid]' => @object.uuid, 'options' => {'ensure_unique_name' => true}}), method: :post, title: "Add a subproject to this project", class: 'btn btn-sm btn-primary' do %>
       <i class="fa fa-fw fa-plus"></i>
index a41d87f31ab34187804bbfb25beb12da310f9aea..47276c02e835419cf89601e78c153f4f5431df21 100644 (file)
@@ -12,6 +12,7 @@ class DisabledApiTest < ActionController::TestCase
     get :index, {}, session_for(:active)
     assert_includes @response.body, "zzzzz-xvhdp-cr4runningcntnr" # expect crs
     assert_not_includes @response.body, "zzzzz-d1hrv-"   # expect no pipelines
+    assert_includes @response.body, "Run a process"
   end
 
   [
@@ -33,6 +34,7 @@ class DisabledApiTest < ActionController::TestCase
   end
 
   [
+    :admin,
     :active,
     nil,
   ].each do |user|
@@ -58,6 +60,7 @@ class DisabledApiTest < ActionController::TestCase
       assert_includes resp, "href=\"#Pipelines_and_processes\""
       assert_includes resp, "href=\"#Workflows\""
       assert_not_includes resp, "href=\"#Pipeline_templates\""
+      assert_includes @response.body, "Run a process" if user == :admin
     end
   end
 end
index d31d6e3458a94f629bc21329ba3fa5db1b79061e..d0b1e287ff6c26f923c319f55c8cf30039ca24bf 100644 (file)
@@ -490,27 +490,28 @@ class ProjectsControllerTest < ActionController::TestCase
     ["user1_with_load", 2, ["project_with_10_collections"], "project_with_2_pipelines_and_60_crs"],
     ["admin", 5, ["anonymously_accessible_project", "subproject_in_anonymous_accessible_project"], "aproject"],
   ].each do |user, page_size, tree_segment, unexpected|
+    # Note: this test is sensitive to database collation. It passes
+    # with en_US.UTF-8.
     test "build my projects tree for #{user} user and verify #{unexpected} is omitted" do
       use_token user
-      ctrl = ProjectsController.new
-
-      current_user = User.find(api_fixture('users')[user]['uuid'])
 
-      my_tree = ctrl.send :my_wanted_projects_tree, current_user, page_size
+      tree, _, _ = @controller.send(:my_wanted_projects_tree,
+                                    User.current,
+                                    page_size)
 
       tree_segment_at_depth_1 = api_fixture('groups')[tree_segment[0]]
       tree_segment_at_depth_2 = api_fixture('groups')[tree_segment[1]] if tree_segment[1]
 
-      tree_nodes = {}
-      my_tree[0].each do |x|
-        tree_nodes[x[:object]['uuid']] = x[:depth]
+      node_depth = {}
+      tree.each do |x|
+        node_depth[x[:object]['uuid']] = x[:depth]
       end
 
-      assert_equal(1, tree_nodes[tree_segment_at_depth_1['uuid']])
-      assert_equal(2, tree_nodes[tree_segment_at_depth_2['uuid']]) if tree_segment[1]
+      assert_equal(1, node_depth[tree_segment_at_depth_1['uuid']])
+      assert_equal(2, node_depth[tree_segment_at_depth_2['uuid']]) if tree_segment[1]
 
       unexpected_project = api_fixture('groups')[unexpected]
-      assert_nil(tree_nodes[unexpected_project['uuid']])
+      assert_nil(node_depth[unexpected_project['uuid']], node_depth.inspect)
     end
   end
 
index c4eb941b08894bb4cf58c88f75ba7e721e120f3f..b49cbf91c7f52f701ab7d898b866ed3619c9c5c6 100644 (file)
@@ -251,12 +251,12 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
 
       assert_text 'Recent pipelines and processes' # seeing dashboard now
       within('.recent-processes-actions') do
-        assert page.has_link?('Run a pipeline')
+        assert page.has_link?('Run a process')
         assert page.has_link?('All processes')
       end
 
       within('.recent-processes') do
-        assert_text 'pipeline_with_job'
+        assert_text 'running'
 
         within('.row-zzzzz-xvhdp-cr4runningcntnr') do
           assert_text 'requester_for_running_cr'
@@ -269,7 +269,7 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
 
         assert_text 'completed container request'
         within('.row-zzzzz-xvhdp-cr4completedctr')do
-          assert page.has_link? '1f4b0bc7583c2a7f9102c395f4ffc5e3+45'
+          assert page.has_link? 'foo_file'
         end
       end
 
index df6584ebb6490cedac2fe439a1a77110a9feeb84..bd3a813f72af4b8ea77c35568b6f737a1389237d 100644 (file)
@@ -96,4 +96,17 @@ class ContainerRequestsTest < ActionDispatch::IntegrationTest
     wait_for_ajax
     assert_text 'This container is queued'
   end
+
+  test "Run button enabled when workflow is empty and no inputs are needed" do
+    visit page_with_token("active")
+
+    find('.btn', text: 'Run a process').click
+    within('.modal-dialog') do
+      find('.selectable', text: 'Valid workflow with no definition yaml').click
+      find('.btn', text: 'Next: choose inputs').click
+    end
+
+    assert_text 'This workflow does not need any further inputs'
+    page.assert_selector 'a', text: 'Run'
+  end
 end
index 171580bbaa2bc9816a9ba1061e40142d0487c8e9..338280684ecd3a07a5f8e5f244c528a57d7b51b5 100644 (file)
@@ -391,7 +391,7 @@ class PipelineInstancesTest < ActionDispatch::IntegrationTest
     collection = api_fixture('collections', collection_fixture)
 
     # create a pipeline instance
-    find('.btn', text: 'Run a pipeline').click
+    find('.btn', text: 'Run a process').click
     within('.modal-dialog') do
       find('.selectable', text: template_name).click
       find('.btn', text: 'Next: choose inputs').click
index e5877aca6d1e88824b2575ba571eda21de403ee3..27eac8ab566ca916d42569c52ceba15e41e7fcd1 100644 (file)
@@ -746,7 +746,7 @@ class ProjectsTest < ActionDispatch::IntegrationTest
       project = api_fixture('groups')['aproject']
       visit page_with_token 'active', '/projects/' + project['uuid']
 
-      find('.btn', text: 'Run a pipeline').click
+      find('.btn', text: 'Run a process').click
 
       # in the chooser, verify preview and click Next button
       within('.modal-dialog') do
index f04616dd383ac49f927a544fb2e7d372c30b8acb..5b5848ee7766580003ee10be1c28ba944070bf36 100644 (file)
@@ -109,8 +109,8 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
   end
 
   [
-    ['Two Part Pipeline Template', 'part-one', 'Provide a value for the following'],
-    ['Workflow with input specifications', 'this workflow has inputs specified', 'Provide a value for the following'],
+    ['Pipeline with default input specifications', 'part-one', 'Provide values for the following'],
+    ['Workflow with default input specifications', 'this workflow has inputs specified', 'Provide a value for the following'],
   ].each do |template_name, preview_txt, process_txt|
     test "run a process using template #{template_name} from dashboard" do
       visit page_with_token('admin')
@@ -118,7 +118,7 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
 
       within('.recent-processes-actions') do
         assert page.has_link?('All processes')
-        find('a', text: 'Run a pipeline').click
+        find('a', text: 'Run a process').click
       end
 
       # in the chooser, verify preview and click Next button
@@ -131,6 +131,10 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
       # in the process page now
       assert_text process_txt
       assert_selector 'a', text: template_name
+
+      assert_equal "Set value for ex_string_def", find('div.form-group > div > p.form-control-static > a', text: "hello-testing-123")[:"data-title"]
+
+      page.assert_selector 'a.disabled,button.disabled', text: 'Run'
     end
   end
 
index d068ee2aaf08f3e7451ae817599e7dcaaca0d535..dcfd7d86e050532319662f13666db55dd69bf06c 100644 (file)
@@ -19,7 +19,7 @@ class BrowsingTest < WorkbenchPerformanceTest
   test "home page" do
     visit_page_with_token
     assert_text 'Dashboard'
-    assert_selector 'a', text: 'Run a pipeline'
+    assert_selector 'a', text: 'Run a process'
   end
 
   test "search for hash" do
index 418254457dd41e61f1e39bf698b297fcf59dcc26..4c67839a1006693f3d70f8adaf2823e4fa4f11e5 100644 (file)
@@ -23,7 +23,11 @@ run-build-packages-python-and-ruby.sh    Build Python and Ruby packages suitable
 
 run-build-docker-images.sh               Build arvbox Docker images.
 
-run-build-docker-jobs-image.sh           Build arvados/jobs Docker image.
+run-build-docker-jobs-image.sh           Build arvados/jobs Docker image
+                                         (uses published debian packages)
+
+build-dev-docker-jobs-image.sh           Build developer arvados/jobs Docker image
+                                         (uses local git tree)
 
 run-library.sh                           A library of functions shared by the
                                          various scripts in this
diff --git a/build/build-dev-docker-jobs-image.sh b/build/build-dev-docker-jobs-image.sh
new file mode 100755 (executable)
index 0000000..5a6e777
--- /dev/null
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+read -rd "\000" helpmessage <<EOF
+Build an arvados/jobs Docker image from local git tree.
+
+Intended for use by developers working on arvados-python-client or
+arvados-cwl-runner and need to run a crunch job with a custom package
+version.  Also supports building custom cwltool if CWLTOOL is set.
+
+Syntax:
+        WORKSPACE=/path/to/arvados $(basename $0)
+
+WORKSPACE=path         Path to the Arvados source tree to build packages from
+CWLTOOL=path           (optional) Path to cwltool git repository.
+
+EOF
+
+set -e
+
+if [[ -z "$WORKSPACE" ]] ; then
+    echo "$helpmessage"
+    echo
+    echo "Must set WORKSPACE"
+    exit 1
+fi
+
+if [[ -z "$ARVADOS_API_HOST" || -z "$ARVADOS_API_TOKEN" ]] ; then
+    echo "$helpmessage"
+    echo
+    echo "Must set ARVADOS_API_HOST and ARVADOS_API_TOKEN"
+    exit 1
+fi
+
+cd "$WORKSPACE"
+
+(cd sdk/python && python setup.py sdist)
+sdk=$(cd sdk/python/dist && ls -t arvados-python-client-*.tar.gz | head -n1)
+
+(cd sdk/cwl && python setup.py sdist)
+runner=$(cd sdk/cwl/dist && ls -t arvados-cwl-runner-*.tar.gz | head -n1)
+
+rm -rf sdk/cwl/cwltool_dist
+mkdir -p sdk/cwl/cwltool_dist
+if [[ -n "$CWLTOOL" ]] ; then
+    (cd "$CWLTOOL" && python setup.py sdist)
+    cwltool=$(cd "$CWLTOOL/dist" && ls -t cwltool-*.tar.gz | head -n1)
+    cp "$CWLTOOL/dist/$cwltool" $WORKSPACE/sdk/cwl/cwltool_dist
+fi
+
+. build/run-library.sh
+
+python_sdk_ts=$(cd sdk/python && timestamp_from_git)
+cwl_runner_ts=$(cd sdk/cwl && timestamp_from_git)
+
+if [[ $python_sdk_ts -gt $cwl_runner_ts ]]; then
+    gittag=$(git log --first-parent --max-count=1 --format=format:%H sdk/python)
+else
+    gittag=$(git log --first-parent --max-count=1 --format=format:%H sdk/cwl)
+fi
+
+docker build --build-arg sdk=$sdk --build-arg runner=$runner --build-arg cwltool=$cwltool -f "$WORKSPACE/sdk/dev-jobs.dockerfile" -t arvados/jobs:$gittag "$WORKSPACE/sdk"
+echo arv-keepdocker arvados/jobs $gittag
+arv-keepdocker arvados/jobs $gittag
index adcb87f34d79b6e344317e75fa8fca3897d166cc..16c7129d9b8efc0747da64f394366680f1ce4fc6 100755 (executable)
@@ -14,7 +14,9 @@ Syntax:
     Run package install test script "test-packages-$target.sh"
 --debug
     Output debug information (default: false)
---only-test
+--only-build <package>
+    Build only a specific package
+--only-test <package>
     Test only a specific package
 
 WORKSPACE=path         Path to the Arvados source tree to build packages from
@@ -40,7 +42,7 @@ if ! [[ -d "$WORKSPACE" ]]; then
 fi
 
 PARSEDOPTS=$(getopt --name "$0" --longoptions \
-    help,debug,test-packages,target:,command:,only-test: \
+    help,debug,test-packages,target:,command:,only-test:,only-build: \
     -- "" "$@")
 if [ $? -ne 0 ]; then
     exit 1
@@ -62,8 +64,12 @@ while [ $# -gt 0 ]; do
             TARGET="$2"; shift
             ;;
         --only-test)
+            test_packages=1
             packages="$2"; shift
             ;;
+        --only-build)
+            ONLY_BUILD="$2"; shift
+            ;;
         --debug)
             DEBUG=" --debug"
             ;;
@@ -121,7 +127,6 @@ popd
 
 if test -z "$packages" ; then
     packages="arvados-api-server
-        arvados-data-manager
         arvados-docker-cleaner
         arvados-git-httpd
         arvados-node-manager
@@ -191,6 +196,7 @@ else
     if docker run --rm \
         "${docker_volume_args[@]}" \
         --env ARVADOS_DEBUG=1 \
+        --env "ONLY_BUILD=$ONLY_BUILD" \
         "$IMAGE" $COMMAND
     then
         echo
index 320f9d445c3a052a62bf5b8560b2080c98b06904..0f10d26a9de37049c5a4ef7f1e1aed99e41db037 100755 (executable)
@@ -15,8 +15,10 @@ Options:
     Build api server and workbench packages with vendor/bundle included
 --debug
     Output debug information (default: false)
---target
+--target <target>
     Distribution to build packages for (default: debian7)
+--only-build <package>
+    Build only a specific package (or $ONLY_BUILD from environment)
 --command
     Build command to execute (defaults to the run command defined in the
     Docker image)
@@ -31,7 +33,7 @@ TARGET=debian7
 COMMAND=
 
 PARSEDOPTS=$(getopt --name "$0" --longoptions \
-    help,build-bundle-packages,debug,target: \
+    help,build-bundle-packages,debug,target:,only-build: \
     -- "" "$@")
 if [ $? -ne 0 ]; then
     exit 1
@@ -48,6 +50,9 @@ while [ $# -gt 0 ]; do
         --target)
             TARGET="$2"; shift
             ;;
+        --only-build)
+            ONLY_BUILD="$2"; shift
+            ;;
         --debug)
             DEBUG=1
             ;;
@@ -105,7 +110,7 @@ case "$TARGET" in
             'pycurl<7.21.5' contextlib2 pyyaml 'rdflib>=4.2.0' \
             shellescape mistune typing avro ruamel.ordereddict
             cachecontrol requests)
-        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client)
+        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client==0.37.0)
         ;;
     debian8)
         FORMAT=deb
@@ -116,7 +121,7 @@ case "$TARGET" in
             'pycurl<7.21.5' pyyaml 'rdflib>=4.2.0' \
             shellescape mistune typing avro ruamel.ordereddict
             cachecontrol)
-        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client)
+        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client==0.37.0)
         ;;
     ubuntu1204)
         FORMAT=deb
@@ -127,7 +132,7 @@ case "$TARGET" in
             contextlib2 'pycurl<7.21.5' pyyaml 'rdflib>=4.2.0' \
             shellescape mistune typing avro isodate ruamel.ordereddict
             cachecontrol requests)
-        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client)
+        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client==0.37.0)
         ;;
     ubuntu1404)
         FORMAT=deb
@@ -136,7 +141,7 @@ case "$TARGET" in
             rsa 'pycurl<7.21.5' backports.ssl_match_hostname pyyaml 'rdflib>=4.2.0' \
             shellescape mistune typing avro ruamel.ordereddict
             cachecontrol)
-        PYTHON3_BACKPORTS=(docker-py==1.7.2 requests websocket-client)
+        PYTHON3_BACKPORTS=(docker-py==1.7.2 requests websocket-client==0.37.0)
         ;;
     centos6)
         FORMAT=rpm
@@ -156,7 +161,7 @@ case "$TARGET" in
             'rdflib>=4.2.0' shellescape mistune typing avro requests \
             isodate pyparsing sparqlwrapper html5lib==0.9999999 keepalive \
             ruamel.ordereddict cachecontrol)
-        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client)
+        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client==0.37.0)
         export PYCURL_SSL_LIBRARY=nss
         ;;
     centos7)
@@ -176,7 +181,7 @@ case "$TARGET" in
             'rdflib>=4.2.0' shellescape mistune typing avro \
             isodate pyparsing sparqlwrapper html5lib==0.9999999 keepalive \
             ruamel.ordereddict cachecontrol)
-        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client)
+        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client==0.37.0)
         export PYCURL_SSL_LIBRARY=nss
         ;;
     *)
@@ -248,6 +253,7 @@ fi
 # Perl packages
 debug_echo -e "\nPerl packages\n"
 
+if [[ -z "$ONLY_BUILD" ]] || [[ "libarvados-perl" = "$ONLY_BUILD" ]] ; then
 cd "$WORKSPACE/sdk/perl"
 
 if [[ -e Makefile ]]; then
@@ -263,6 +269,7 @@ perl Makefile.PL INSTALL_BASE=install >"$STDOUT_IF_DEBUG" && \
     "Curoverse, Inc." dir "$(version_from_git)" install/man/=/usr/share/man \
     "$WORKSPACE/LICENSE-2.0.txt=/usr/share/doc/libarvados-perl/LICENSE-2.0.txt" && \
     mv --no-clobber libarvados-perl*.$FORMAT "$WORKSPACE/packages/$TARGET/"
+fi
 
 # Ruby gems
 debug_echo -e "\nRuby gems\n"
@@ -417,8 +424,6 @@ package_go_binary services/crunch-run crunch-run \
     "Supervise a single Crunch container"
 package_go_binary services/crunchstat crunchstat \
     "Gather cpu/memory/network statistics of running Crunch jobs"
-package_go_binary services/datamanager arvados-data-manager \
-    "Ensure block replication levels, report disk usage, and determine which blocks should be deleted when space is needed"
 package_go_binary services/keep-balance keep-balance \
     "Rebalance and garbage-collect data blocks stored in Arvados Keep"
 package_go_binary services/keepproxy keepproxy \
@@ -443,7 +448,7 @@ package_go_binary tools/keep-exercise keep-exercise \
 # 2014-05-15
 cd $WORKSPACE/packages/$TARGET
 rm -rf "$WORKSPACE/sdk/python/build"
-fpm_build $WORKSPACE/sdk/python "${PYTHON2_PKG_PREFIX}-arvados-python-client" 'Curoverse, Inc.' 'python' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/sdk/python/arvados_python_client.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Arvados Python SDK" --deb-recommends=git
+fpm_build $WORKSPACE/sdk/python "${PYTHON2_PKG_PREFIX}-arvados-python-client" 'Curoverse, Inc.' 'python' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/sdk/python/arvados_python_client.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Arvados Python SDK" --depends "${PYTHON2_PKG_PREFIX}-setuptools" --deb-recommends=git
 
 # cwl-runner
 cd $WORKSPACE/packages/$TARGET
@@ -467,7 +472,7 @@ fpm_build lockfile "" "" python 0.12.2 --epoch 1
 # So we build this thing separately.
 #
 # Ward, 2016-03-17
-fpm_build schema_salad "" "" python 1.18.20161005190847 --depends "${PYTHON2_PKG_PREFIX}-lockfile >= 1:0.12.2-2"
+fpm_build schema_salad "" "" python 1.20.20161122192122 --depends "${PYTHON2_PKG_PREFIX}-lockfile >= 1:0.12.2-2"
 
 # And schema_salad now depends on ruamel-yaml, which apparently has a braindead setup.py that requires special arguments to build (otherwise, it aborts with 'error: you have to install with "pip install ."'). Sigh.
 # Ward, 2016-05-26
@@ -478,7 +483,8 @@ fpm_build ruamel.yaml "" "" python 0.12.4 --python-setup-py-arguments "--single-
 fpm_build cwltest "" "" python 1.0.20160907111242
 
 # And for cwltool we have the same problem as for schema_salad. Ward, 2016-03-17
-fpm_build cwltool "" "" python 1.0.20161107145355
+cwltoolversion=$(cat "$WORKSPACE/sdk/cwl/setup.py" | grep cwltool== | sed "s/.*==\(1\.0\..*\)'.*/\1/")
+fpm_build cwltool "" "" python $cwltoolversion
 
 # FPM eats the trailing .0 in the python-rdflib-jsonld package when built with 'rdflib-jsonld>=0.3.0'. Force the version. Ward, 2016-03-25
 fpm_build rdflib-jsonld "" "" python 0.3.0
@@ -495,17 +501,17 @@ fi
 # not omit the python- prefix first.
 cd $WORKSPACE/packages/$TARGET
 rm -rf "$WORKSPACE/services/fuse/build"
-fpm_build $WORKSPACE/services/fuse "${PYTHON2_PKG_PREFIX}-arvados-fuse" 'Curoverse, Inc.' 'python' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/fuse/arvados_fuse.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Keep FUSE driver"
+fpm_build $WORKSPACE/services/fuse "${PYTHON2_PKG_PREFIX}-arvados-fuse" 'Curoverse, Inc.' 'python' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/fuse/arvados_fuse.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Keep FUSE driver" --depends "${PYTHON2_PKG_PREFIX}-setuptools"
 
 # The node manager
 cd $WORKSPACE/packages/$TARGET
 rm -rf "$WORKSPACE/services/nodemanager/build"
-fpm_build $WORKSPACE/services/nodemanager arvados-node-manager 'Curoverse, Inc.' 'python' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/nodemanager/arvados_node_manager.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Arvados node manager"
+fpm_build $WORKSPACE/services/nodemanager arvados-node-manager 'Curoverse, Inc.' 'python' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/nodemanager/arvados_node_manager.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Arvados node manager" --depends "${PYTHON2_PKG_PREFIX}-setuptools"
 
 # The Docker image cleaner
 cd $WORKSPACE/packages/$TARGET
 rm -rf "$WORKSPACE/services/dockercleaner/build"
-fpm_build $WORKSPACE/services/dockercleaner arvados-docker-cleaner 'Curoverse, Inc.' 'python3' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/dockercleaner/arvados_docker_cleaner.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Arvados Docker image cleaner"
+fpm_build $WORKSPACE/services/dockercleaner arvados-docker-cleaner 'Curoverse, Inc.' 'python3' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/dockercleaner/arvados_docker_cleaner.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Arvados Docker image cleaner" --depends "${PYTHON3_PKG_PREFIX}-websocket-client = 0.37.0" --iteration 3
 
 # The Arvados crunchstat-summary tool
 cd $WORKSPACE/packages/$TARGET
@@ -539,6 +545,11 @@ esac
 
 for deppkg in "${PYTHON_BACKPORTS[@]}"; do
     outname=$(echo "$deppkg" | sed -e 's/^python-//' -e 's/[<=>].*//' -e 's/_/-/g' -e "s/^/${PYTHON2_PKG_PREFIX}-/")
+
+    if [[ -n "$ONLY_BUILD" ]] && [[ "$outname" != "$ONLY_BUILD" ]] ; then
+        continue
+    fi
+
     case "$deppkg" in
         httplib2|google-api-python-client)
             # Work around 0640 permissions on some package files.
@@ -588,6 +599,7 @@ handle_rails_package arvados-api-server "$WORKSPACE/services/api" \
     --license="GNU Affero General Public License, version 3.0"
 
 # Build the workbench server package
+if [[ -z "$ONLY_BUILD" ]] || [[ "arvados-workbench" = "$ONLY_BUILD" ]] ; then
 (
     set -e
     cd "$WORKSPACE/apps/workbench"
@@ -612,6 +624,7 @@ handle_rails_package arvados-api-server "$WORKSPACE/services/api" \
     # Remove generated configuration files so they don't go in the package.
     rm config/application.yml config/environments/production.rb
 )
+fi
 
 if [[ "$?" != "0" ]]; then
   echo "ERROR: Asset precompilation failed"
index f0b120f6bf1e4e011a69f9f811ee67ad55624938..541e684bf3ac202940742bd787a62baa93ffc7b9 100755 (executable)
@@ -69,6 +69,10 @@ handle_ruby_gem() {
     local gem_version="$(nohash_version_from_git)"
     local gem_src_dir="$(pwd)"
 
+    if [[ -n "$ONLY_BUILD" ]] && [[ "$gem_name" != "$ONLY_BUILD" ]] ; then
+        return 0
+    fi
+
     if ! [[ -e "${gem_name}-${gem_version}.gem" ]]; then
         find -maxdepth 1 -name "${gem_name}-*.gem" -delete
 
@@ -84,6 +88,10 @@ package_go_binary() {
     local description="$1"; shift
     local license_file="${1:-agpl-3.0.txt}"; shift
 
+    if [[ -n "$ONLY_BUILD" ]] && [[ "$prog" != "$ONLY_BUILD" ]] ; then
+        return 0
+    fi
+
     debug_echo "package_go_binary $src_path as $prog"
 
     local basename="${src_path##*/}"
@@ -143,6 +151,11 @@ _build_rails_package_scripts() {
 
 handle_rails_package() {
     local pkgname="$1"; shift
+
+    if [[ -n "$ONLY_BUILD" ]] && [[ "$pkgname" != "$ONLY_BUILD" ]] ; then
+        return 0
+    fi
+
     local srcdir="$1"; shift
     local license_path="$1"; shift
     local scripts_dir="$(mktemp --tmpdir -d "$pkgname-XXXXXXXX.scripts")" && \
@@ -208,6 +221,10 @@ fpm_build () {
   VERSION=$1
   shift
 
+  if [[ -n "$ONLY_BUILD" ]] && [[ "$PACKAGE_NAME" != "$ONLY_BUILD" ]] && [[ "$PACKAGE" != "$ONLY_BUILD" ]] ; then
+      return 0
+  fi
+
   local default_iteration_value="$(default_iteration "$PACKAGE" "$VERSION")"
 
   case "$PACKAGE_TYPE" in
index 8959cfbe09c3ea7ac6ded2142b626259787d2121..560a6933e8afe63e48f963dfa8c990a2da2b9897 100755 (executable)
@@ -156,6 +156,12 @@ sanity_checks() {
     echo -n 'virtualenv: '
     virtualenv --version \
         || fatal "No virtualenv. Try: apt-get install virtualenv (on ubuntu: python-virtualenv)"
+    echo -n 'ruby: '
+    ruby -v \
+        || fatal "No ruby. Install >=2.1.9 (using rbenv, rvm, or source)"
+    echo -n 'bundler: '
+    bundle version \
+        || fatal "No bundler. Try: gem install bundler"
     echo -n 'go: '
     go version \
         || fatal "No go binary. See http://golang.org/doc/install"
@@ -758,10 +764,6 @@ gostuff=(
     sdk/go/keepclient
     services/keep-balance
     services/keepproxy
-    services/datamanager/summary
-    services/datamanager/collection
-    services/datamanager/keep
-    services/datamanager
     services/crunch-dispatch-local
     services/crunch-dispatch-slurm
     services/crunch-run
index 96aea34d36173a07c7952480d1fd59dcf3f90409..30df754b7af8394e0472c241aebd59abf37bcbb2 100644 (file)
@@ -28,9 +28,9 @@ navbar:
     - Welcome:
       - user/index.html.textile.liquid
       - user/getting_started/community.html.textile.liquid
-    - Run a pipeline using Workbench:
+    - Run a workflow using Workbench:
       - user/getting_started/workbench.html.textile.liquid
-      - user/tutorials/tutorial-pipeline-workbench.html.textile.liquid
+      - user/tutorials/tutorial-workflow-workbench.html.textile.liquid
     - Access an Arvados virtual machine:
       - user/getting_started/vm-login-with-webshell.html.textile.liquid
       - user/getting_started/ssh-access-unix.html.textile.liquid
@@ -47,13 +47,15 @@ navbar:
       - user/cwl/cwl-runner.html.textile.liquid
       - user/cwl/cwl-style.html.textile.liquid
     - Working on the command line:
+      - user/topics/running-workflow-command-line.html.textile.liquid
       - user/topics/running-pipeline-command-line.html.textile.liquid
       - user/topics/arv-run.html.textile.liquid
     - Working with git repositories:
       - user/tutorials/add-new-repository.html.textile.liquid
       - user/tutorials/git-arvados-guide.html.textile.liquid
-    - Develop an Arvados pipeline:
+    - Develop an Arvados workflow:
       - user/tutorials/intro-crunch.html.textile.liquid
+      - user/tutorials/writing-cwl-workflow.html.textile.liquid
       - user/tutorials/running-external-program.html.textile.liquid
       - user/topics/crunch-tools-overview.html.textile.liquid
       - user/tutorials/tutorial-firstscript.html.textile.liquid
diff --git a/doc/_includes/_arvados_cwl_runner.liquid b/doc/_includes/_arvados_cwl_runner.liquid
new file mode 100644 (file)
index 0000000..9a10bab
--- /dev/null
@@ -0,0 +1,62 @@
+h3. Submit a workflow and wait for results
+
+Use @arvados-cwl-runner@ to submit CWL workflows to Arvados.  After submitting the job, it will wait for the workflow to complete and print out the final result to standard output.
+
+*Note:* Once submitted, the workflow runs entirely on Arvados, so even if you interrupt @arvados-cwl-runner@ or log out, the workflow will continue to run.
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner bwa-mem.cwl bwa-mem-input.yml</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
+{
+    "aligned_sam": {
+        "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
+        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
+        "class": "File",
+        "size": 30738986
+    }
+}
+</code></pre>
+</notextile>
+
+h3. Submit a workflow with no waiting
+
+To submit a workflow and exit immediately, use the @--no-wait@ option.  This will submit the workflow to Arvados, print out the UUID of the job that was submitted to standard output, and exit.
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --no-wait bwa-mem.cwl bwa-mem-input.yml</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Upload local files: "bwa-mem.cwl"
+2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Uploaded to qr1hi-4zz18-eqnfwrow8aysa9q
+2016-06-30 15:07:52 arvados.cwl-runner[12480] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
+qr1hi-8i9sb-fm2n3b1w0l6bskg
+</code></pre>
+</notextile>
+
+h3. Run a workflow locally
+
+To run a workflow with local control, use @--local@.  This means that the host where you run @arvados-cwl-runner@ will be responsible for submitting jobs. With @--local@, if you interrupt @arvados-cwl-runner@ or log out, the workflow will be terminated.
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --local bwa-mem.cwl bwa-mem-input.yml</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-07-01 10:05:19 arvados.cwl-runner[16290] INFO: Pipeline instance qr1hi-d1hrv-92wcu6ldtio74r4
+2016-07-01 10:05:28 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Queued
+2016-07-01 10:05:29 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Running
+2016-07-01 10:05:45 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Complete
+2016-07-01 10:05:46 arvados.cwl-runner[16290] INFO: Overall process status is success
+{
+    "aligned_sam": {
+        "size": 30738986,
+        "path": "keep:15f56bad0aaa7364819bf14ca2a27c63+88/HWI-ST1027_129_D0THKACXX.1_1.sam",
+        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
+        "class": "File"
+    }
+}
+</code></pre>
+</notextile>
index 849db42e47827c7a3cc2ddea8a28f36d3434979e..d505bfd9e0ec9981b84a19f046ea8260f34577e4 100644 (file)
@@ -8,4 +8,3 @@ table(table table-bordered table-condensed).
 |vcpus|integer|Number of cores to be used to run this process.|Optional. However, a ContainerRequest that is in "Committed" state must provide this.|
 |keep_cache_ram|integer|Number of keep cache bytes to be used to run this process.|Optional.|
 |API|boolean|When set, ARVADOS_API_HOST and ARVADOS_API_TOKEN will be set, and container will have networking enabled to access the Arvados API server.|Optional.|
-|partition|array of strings|Specify the names of one or more compute partitions that may run this container.  If not provided, the system chooses where to run the container.|Optional.|
diff --git a/doc/_includes/_container_scheduling_parameters.liquid b/doc/_includes/_container_scheduling_parameters.liquid
new file mode 100644 (file)
index 0000000..ee2ca07
--- /dev/null
@@ -0,0 +1,7 @@
+Scheduling parameters
+
+Parameters to be passed to the container scheduler (e.g., SLURM) when running a container.
+
+table(table table-bordered table-condensed).
+|_. Key|_. Type|_. Description|_. Notes|
+|partitions|array of strings|The names of one or more compute partitions that may run this container. If not provided, the system will choose where to run the container.|Optional.|
diff --git a/doc/_includes/_register_cwl_workflow.liquid b/doc/_includes/_register_cwl_workflow.liquid
new file mode 100644 (file)
index 0000000..438115b
--- /dev/null
@@ -0,0 +1,21 @@
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --create-workflow bwa-mem.cwl</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Upload local files: "bwa-mem.cwl"
+2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Uploaded to qr1hi-4zz18-7e0hedrmkuyoei3
+2016-07-01 12:21:01 arvados.cwl-runner[15796] INFO: Created template qr1hi-p5p6p-rjleou1dwr167v5
+qr1hi-p5p6p-rjleou1dwr167v5
+</code></pre>
+</notextile>
+
+You can provide a partial input file to set default values for the workflow input parameters:
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --create-workflow bwa-mem.cwl bwa-mem-template.yml</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Upload local files: "bwa-mem.cwl"
+2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Uploaded to qr1hi-4zz18-0f91qkovk4ml18o
+2016-07-01 14:09:50 arvados.cwl-runner[3730] INFO: Created template qr1hi-p5p6p-0deqe6nuuyqns2i
+qr1hi-p5p6p-0deqe6nuuyqns2i
+</code></pre>
+</notextile>
diff --git a/doc/_includes/_what_is_cwl.liquid b/doc/_includes/_what_is_cwl.liquid
new file mode 100644 (file)
index 0000000..d7b890d
--- /dev/null
@@ -0,0 +1 @@
+The "Common Workflow Language (CWL)":http://commonwl.org is a multi-vendor open standard for describing analysis tools and workflows that are portable across a variety of platforms.  CWL is the recommended way to develop and run workflows for Arvados.  Arvados supports the "CWL v1.0":http://commonwl.org/v1.0 specification.
index 304226d5de59af541906b975b2b4cd2313e2290e..05a8cf56a8f48f24ae9eefa6de9ce69e108a76ff 100644 (file)
@@ -35,20 +35,27 @@ table(table table-bordered table-condensed).
   "vcpus":2,
   "API":true
 }</code></pre>See "Runtime constraints":#runtime_constraints for more details.|
+|scheduling_parameters|hash|Parameters to be passed to the container scheduler when running this container.|e.g.,<pre><code>{
+"partitions":["fastcpu","vfastcpu"]
+}</code></pre>See "Scheduling parameters":#scheduling_parameters for more details.|
 |container_image|string|Portable data hash of a collection containing the docker image to run the container.|Required.|
 |environment|hash|Environment variables and values that should be set in the container environment (@docker run --env@). This augments and (when conflicts exist) overrides environment variables given in the image's Dockerfile.||
 |cwd|string|Initial working directory, given as an absolute path (in the container) or a path relative to the WORKDIR given in the image's Dockerfile.|Required.|
 |command|array of strings|Command to execute in the container.|Required. e.g., @["echo","hello"]@|
 |output_path|string|Path to a directory or file inside the container that should be preserved as container's output when it finishes. This path must be, or be inside, one of the mount targets. For best performance, point output_path to a writable collection mount.|Required.|
-|priority|integer|Higher value means spend more resources on this container_request, i.e., go ahead of other queued containers, bring up more nodes etc.|Priority 0 means a container should not be run on behalf of this request. Clients are expected to submit container requests with zero priority in order to prevew the container that will be used to satisfy it. Priority can be null if and only if state!="Committed".|
+|priority|integer|Higher value means spend more resources on this container_request, i.e., go ahead of other queued containers, bring up more nodes etc.|Priority 0 means a container should not be run on behalf of this request. Clients are expected to submit container requests with zero priority in order to preview the container that will be used to satisfy it. Priority can be null if and only if state!="Committed".|
 |expires_at|datetime|After this time, priority is considered to be zero.|Not yet implemented.|
 |use_existing|boolean|If possible, use an existing (non-failed) container to satisfy the request instead of creating a new one.|Default is true|
+|log_uuid|string|Log collection containing log messages provided by the scheduler and crunch processes.|Null if the container has not yet completed.|
+|output_uuid|string|Output collection created when the container finished successfully.|Null if the container has failed or not yet completed.|
 |filters|string|Additional constraints for satisfying the container_request, given in the same form as the filters parameter accepted by the container_requests.list API.|
 
 h2(#mount_types). {% include 'mount_types' %}
 
 h2(#runtime_constraints). {% include 'container_runtime_constraints' %}
 
+h2(#scheduling_parameters). {% include 'container_scheduling_parameters' %}
+
 h2(#container_reuse). Container reuse
 
 When a container request is "Committed", the system will try to find and reuse any preexisting Container with the same exact command, cwd, environment, output_path, container_image, mounts, and runtime_constraints as this container request. The serialized fields environment, mounts and runtime_constraints are sorted to facilitate comparison.
index 221141cebc82537631b2c6b7fd53d18f3af36afa..7eed8b0d30f84aa07dddbc46edf0a35773b63977 100644 (file)
@@ -36,6 +36,9 @@ Generally this will contain additional keys that are not present in any correspo
   "vcpus":2,
   "API":true
 }</code></pre>See "Runtime constraints":#runtime_constraints for more details.|
+|scheduling_parameters|hash|Parameters to be passed to the container scheduler when running this container.|e.g.,<pre><code>{
+"partitions":["fastcpu","vfastcpu"]
+}</code></pre>See "Scheduling parameters":#scheduling_parameters for more details.|
 |output|string|Portable data hash of the output collection.|Null if the container is not yet finished.|
 |container_image|string|Portable data hash of a collection containing the docker image used to run the container.||
 |progress|number|A number between 0.0 and 1.0 describing the fraction of work done.||
@@ -58,6 +61,8 @@ h2(#mount_types). {% include 'mount_types' %}
 
 h2(#runtime_constraints). {% include 'container_runtime_constraints' %}
 
+h2(#scheduling_parameters). {% include 'container_scheduling_parameters' %}
+
 h2. Methods
 
 See "Common resource methods":{{site.baseurl}}/api/methods.html for more information about @create@, @delete@, @get@, @list@, and @update@.
index de8dc9e477f53e1369da23f5705d57cfdc901f0a..3d67577e68412a7e11b8f3307ff162caaee30520 100644 (file)
Binary files a/doc/images/upload-using-workbench.png and b/doc/images/upload-using-workbench.png differ
index 76df32c9e2b27aefdb96e6119b4e1bb216d18174..3cdf1e4a66cfd552fbcf41ce46c1e13687d673a2 100644 (file)
Binary files a/doc/images/workbench-dashboard.png and b/doc/images/workbench-dashboard.png differ
index 5ed1ef53e1b4a29a18cb5cb1394f1750e3963df7..bba1a1c60176748f51fb653bfdb3918a7e485e7b 100644 (file)
Binary files a/doc/images/workbench-move-selected.png and b/doc/images/workbench-move-selected.png differ
index 573e2b9b9eff6bde696312d0632a07821c5e22ef..ecff47f059e8cc9c7a6ff03aaef7e605d2ba45fc 100644 (file)
@@ -4,13 +4,17 @@ navsection: userguide
 title: Using Common Workflow Language
 ...
 
-The "Common Workflow Language (CWL)":http://commonwl.org is a multi-vendor open standard for describing analysis tools and workflows that are portable across a variety of platforms.  CWL is the recommended way to develop and run workflows for Arvados.  Arvados supports the "CWL v1.0":http://commonwl.org/v1.0 specification.
+{% include 'what_is_cwl' %}
 
 {% include 'tutorial_expectations' %}
 
-h2. Setting up
+h2. Preparing to work with Arvados CWL runner
 
-The @arvados-cwl-runner@ client is installed by default on Arvados shell nodes.  However, if you do not have @arvados-cwl-runner@, you may install it using @pip@:
+h3. arvados-cwl-runner
+
+The @arvados-cwl-runner@ client is installed by default on Arvados shell nodes.
+
+However, if you do not have @arvados-cwl-runner@, you may install it using @pip@:
 
 <notextile>
 <pre><code>~$ <span class="userinput">virtualenv ~/venv</span>
@@ -20,9 +24,11 @@ The @arvados-cwl-runner@ client is installed by default on Arvados shell nodes.
 </code></pre>
 </notextile>
 
-h3. Docker
+h3. Check Docker access
 
-Certain features of @arvados-cwl-runner@ require access to Docker.  You can determine if you have access to Docker by running @docker version@:
+Certain features of @arvados-cwl-runner@ require access to Docker.
+
+You can determine if you have access to Docker by running @docker version@:
 
 <notextile>
 <pre><code>~$ <span class="userinput">docker version</span>
@@ -44,9 +50,9 @@ Server:
 </code></pre>
 </notextile>
 
-If this returns an error, contact the sysadmin of your cluster for assistance.  Alternatively, if you have Docker installed on your local workstation, you may follow the instructions above to install @arvados-cwl-runner@.
+If this returns an error, contact the sysadmin of your cluster for assistance.
 
-h3. Getting the example files
+h3. Get the example files
 
 The tutorial files are located in the documentation section of the Arvados source repository:
 
@@ -72,60 +78,7 @@ If you do not wish to create an account on "https://cloud.curoverse.com":https:/
 
 h2. Submitting a workflow to an Arvados cluster
 
-Use @arvados-cwl-runner@ to submit CWL workflows to Arvados.  After submitting the job, it will wait for the workflow to complete and print out the final result to standard output.  Note that once submitted, the workflow runs entirely on Arvados, so even if you interrupt @arvados-cwl-runner@ or log out, the workflow will continue to run.
-
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner bwa-mem.cwl bwa-mem-input.yml</span>
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
-2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
-{
-    "aligned_sam": {
-        "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
-        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
-        "class": "File",
-        "size": 30738986
-    }
-}
-</code></pre>
-</notextile>
-
-To submit a workflow and exit immediately, use the @--no-wait@ option.  This will print out the uuid of the job that was submitted to standard output.
-
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --no-wait bwa-mem.cwl bwa-mem-input.yml</span>
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Upload local files: "bwa-mem.cwl"
-2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Uploaded to qr1hi-4zz18-eqnfwrow8aysa9q
-2016-06-30 15:07:52 arvados.cwl-runner[12480] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-qr1hi-8i9sb-fm2n3b1w0l6bskg
-</code></pre>
-</notextile>
-
-To run a workflow with local control, use @--local@.  This means that the host where you run @arvados-cwl-runner@ will be responsible for submitting jobs. With @--local@, if you interrupt @arvados-cwl-runner@ or log out, the workflow will be terminated.
-
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --local bwa-mem.cwl bwa-mem-input.yml</span>
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-07-01 10:05:19 arvados.cwl-runner[16290] INFO: Pipeline instance qr1hi-d1hrv-92wcu6ldtio74r4
-2016-07-01 10:05:28 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Queued
-2016-07-01 10:05:29 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Running
-2016-07-01 10:05:45 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Complete
-2016-07-01 10:05:46 arvados.cwl-runner[16290] INFO: Overall process status is success
-{
-    "aligned_sam": {
-        "size": 30738986,
-        "path": "keep:15f56bad0aaa7364819bf14ca2a27c63+88/HWI-ST1027_129_D0THKACXX.1_1.sam",
-        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
-        "class": "File"
-    }
-}
-</code></pre>
-</notextile>
+{% include 'arvados_cwl_runner' %}
 
 h2. Work reuse
 
@@ -143,29 +96,9 @@ If you reference a local file which is not in @arv-mount@, then @arvados-cwl-run
 
 h2. Registering a workflow to use in Workbench
 
-Use @--create-workflow@ to register a CWL workflow with Arvados.  This enables you to share workflows with other Arvados users, and run them by clicking the <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a pipeline...</span> button on the Workbench Dashboard.
-
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --create-workflow bwa-mem.cwl</span>
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Upload local files: "bwa-mem.cwl"
-2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Uploaded to qr1hi-4zz18-7e0hedrmkuyoei3
-2016-07-01 12:21:01 arvados.cwl-runner[15796] INFO: Created template qr1hi-p5p6p-rjleou1dwr167v5
-qr1hi-p5p6p-rjleou1dwr167v5
-</code></pre>
-</notextile>
-
-You can provide a partial input file to set default values for the workflow input parameters:
+Use @--create-workflow@ to register a CWL workflow with Arvados.  This enables you to share workflows with other Arvados users, and run them by clicking the <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a process...</span> button on the Workbench Dashboard.
 
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --create-workflow bwa-mem.cwl bwa-mem-template.yml</span>
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Upload local files: "bwa-mem.cwl"
-2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Uploaded to qr1hi-4zz18-0f91qkovk4ml18o
-2016-07-01 14:09:50 arvados.cwl-runner[3730] INFO: Created template qr1hi-p5p6p-0deqe6nuuyqns2i
-qr1hi-p5p6p-0deqe6nuuyqns2i
-</code></pre>
-</notextile>
+{% include 'register_cwl_workflow' %}
 
 h2. Making workflows directly executable
 
@@ -226,7 +159,7 @@ arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107,
 
 h2. Developing workflows
 
-For an introduction and and detailed documentation about writing CWL, see the "User Guide":http://commonwl.org/v1.0/UserGuide.html and the "Specification":http://commonwl.org/v1.0 .
+For an introduction and and detailed documentation about writing CWL, see the "CWL User Guide":http://commonwl.org/v1.0/UserGuide.html and the "CWL Specification":http://commonwl.org/v1.0 .
 
 To run on Arvados, a workflow should provide a @DockerRequirement@ in the @hints@ section.
 
index 6e334ba0dd7583ddae43b48149a270d3d92ad458..a64727fed00fc7df4971da4c12058c65c53ac45c 100644 (file)
@@ -14,4 +14,4 @@ You may be asked to log in using a Google account.  Arvados uses only your name
 
 Once your account is active, logging in to the Workbench will present you with the Dashboard. This gives a summary of your projects and recent activity in the Arvados instance.  "You are now ready to run your first pipeline.":{{ site.baseurl }}/user/tutorials/tutorial-pipeline-workbench.html
 
-!{{ site.baseurl }}/images/workbench-dashboard.png!
+!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/workbench-dashboard.png!
index ed0a126a41cf6d07f018ff7fe00cad49d5b32fd8..223f2fe311b82ec76c24c258293d84d8e38c92e9 100644 (file)
@@ -4,6 +4,9 @@ navsection: userguide
 title: "Using arv-copy"
 ...
 
+{% include 'crunch1only_begin' %}
+On those sites, the "copy a pipeline template" feature described below is not available. However, "copy a workflow" feature is not yet implemented.
+{% include 'crunch1only_end' %}
 
 This tutorial describes how to copy Arvados objects from one cluster to another by using @arv-copy@.
 
diff --git a/doc/user/topics/running-workflow-command-line.html.textile.liquid b/doc/user/topics/running-workflow-command-line.html.textile.liquid
new file mode 100644 (file)
index 0000000..f70d3e8
--- /dev/null
@@ -0,0 +1,17 @@
+---
+layout: default
+navsection: userguide
+title: "Running an Arvados workflow"
+...
+
+{% include 'what_is_cwl' %}
+
+{% include 'tutorial_expectations' %}
+
+h2. arvados-cwl-runner
+
+The arvados-cwl-runner tool can be used to submit workflows to Arvados cluster using the command prompt.
+
+The following examples assume that you have prepared to run arvados-cwl-runner tool as explained in the "Using Common Workflow Language":{{site.baseurl}}/user/topics/running-workflow-command-line.html.textile.liquid page.
+
+{% include 'arvados_cwl_runner' %}
diff --git a/doc/user/tutorials/tutorial-pipeline-workbench.html.textile.liquid b/doc/user/tutorials/tutorial-pipeline-workbench.html.textile.liquid
deleted file mode 100644 (file)
index 6537fda..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
----
-layout: default
-navsection: userguide
-title: "Running a pipeline using Workbench"
-...
-
-{% include 'crunch1only_begin' %}
-On those sites, the details will be slightly different and the example pipeline might not be available.
-{% include 'crunch1only_end' %}
-
-A "pipeline" (sometimes called a "workflow" in other systems) is a sequence of steps that apply various programs or tools to transform input data to output data.  Pipelines are the principal means of performing computation with Arvados.  This tutorial demonstrates how to run a single-stage pipeline to take a small data set of paired-end reads from a sample "exome":https://en.wikipedia.org/wiki/Exome in "FASTQ":https://en.wikipedia.org/wiki/FASTQ_format format and align them to "Chromosome 19":https://en.wikipedia.org/wiki/Chromosome_19_%28human%29 using the "bwa mem":http://bio-bwa.sourceforge.net/ tool, producing a "Sequence Alignment/Map (SAM)":https://samtools.github.io/ file.  This tutorial will introduce the following Arvados features:
-
-<div>
-* How to create a new pipeline from an existing template.
-* How to browse and select input data for the pipeline and submit the pipeline to run on the Arvados cluster.
-* How to access your pipeline results.
-</div>
-
-notextile. <div class="spaced-out">
-
-h3. Steps
-
-# Start from the *Workbench Dashboard*.  You can access the Dashboard by clicking on *<i class="fa fa-lg fa-fw fa-dashboard"></i> Dashboard* in the upper left corner of any Workbench page.
-# Click on the <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a pipeline...</span> button.  This will open a dialog box titled *Choose a pipeline to run*.
-# In the search box, type in *Tutorial align using bwa mem*.
-# Select *<i class="fa fa-fw fa-gear"></i> Tutorial align using bwa mem* and click the <span class="btn btn-sm btn-primary" >Next: choose inputs <i class="fa fa-fw fa-arrow-circle-right"></i></span> button.  This will create a new pipeline in your *Home* project and will open it. You can now supply the inputs for the pipeline.
-# The first input parameter to the pipeline is *"reference_collection" parameter for run-command script in bwa-mem component*.  Click the <span class="btn btn-sm btn-primary">Choose</span> button beneath that header.  This will open a dialog box titled *Choose a dataset for "reference_collection" parameter for run-command script in bwa-mem component*.
-# Open the *Home <span class="caret"></span>* menu and select *All Projects*. Search for and select *<i class="fa fa-fw fa-archive"></i> Tutorial chromosome 19 reference* and click the <span class="btn btn-sm btn-primary" >OK</span> button.
-# Repeat the previous two steps to set the *"sample" parameter for run-command script in bwa-mem component* parameter to *<i class="fa fa-fw fa-archive"></i> Tutorial sample exome*.
-# Click on the <span class="btn btn-sm btn-primary" >Run <i class="fa fa-fw fa-play"></i></span> button.  The page updates to show you that the pipeline has been submitted to run on the Arvados cluster.
-# After the pipeline starts running, you can track the progress by watching log messages from jobs.  This page refreshes automatically.  You will see a <span class="label label-success">complete</span> label when the pipeline completes successfully.
-# Click on the *Output* link to see the results of the job.  This will load a new page listing the output files from this pipeline.  You'll see the output SAM file from the alignment tool under the *Files* tab.
-# Click on the <span class="btn btn-sm btn-info"><i class="fa fa-download"></i></span> download button to the right of the SAM file to download your results.
-
-notextile. </div>
diff --git a/doc/user/tutorials/tutorial-workflow-workbench.html.textile.liquid b/doc/user/tutorials/tutorial-workflow-workbench.html.textile.liquid
new file mode 100644 (file)
index 0000000..445ce75
--- /dev/null
@@ -0,0 +1,27 @@
+---
+layout: default
+navsection: userguide
+title: "Running a workflow using Workbench"
+...
+
+A "workflow" (sometimes called a "pipeline" in other systems) is a sequence of steps that apply various programs or tools to transform input data to output data.  Workflows are the principal means of performing computation with Arvados.  This tutorial demonstrates how to run a single-stage workflow to take a small data set of paired-end reads from a sample "exome":https://en.wikipedia.org/wiki/Exome in "FASTQ":https://en.wikipedia.org/wiki/FASTQ_format format and align them to "Chromosome 19":https://en.wikipedia.org/wiki/Chromosome_19_%28human%29 using the "bwa mem":http://bio-bwa.sourceforge.net/ tool, producing a "Sequence Alignment/Map (SAM)":https://samtools.github.io/ file.  This tutorial will introduce the following Arvados features:
+
+<div>
+* How to create a new process from an existing workflow.
+* How to browse and select input data for the workflow and submit the process to run on the Arvados cluster.
+* How to access your process results.
+</div>
+
+h3. Steps
+
+# Start from the *Workbench Dashboard*.  You can access the Dashboard by clicking on *<i class="fa fa-lg fa-fw fa-dashboard"></i> Dashboard* in the upper left corner of any Workbench page.
+# Click on the <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a process...</span> button.  This will open a dialog box titled *Choose a pipeline or workflow to run*.
+# In the search box, type in *Tutorial bwa mem cwl*.
+# Select *<i class="fa fa-fw fa-gear"></i> Tutorial bwa mem cwl* and click the <span class="btn btn-sm btn-primary" >Next: choose inputs <i class="fa fa-fw fa-arrow-circle-right"></i></span> button.  This will create a new process in your *Home* project and will open it. You can now supply the inputs for the process. Please note that all required inputs are populated with default values and you can change them if you prefer.
+# For example, let's see how to change *"reference" parameter* for this workflow. Click the <span class="btn btn-sm btn-primary">Choose</span> button beneath the *"reference" parameter* header.  This will open a dialog box titled *Choose a dataset for "reference" parameter for cwl-runner in bwa-mem.cwl component*.
+# Open the *Home <span class="caret"></span>* menu and select *All Projects*. Search for and select *<i class="fa fa-fw fa-archive"></i> Tutorial chromosome 19 reference*. You will then see a list of files. Select *<i class="fa fa-fw fa-file"></i> 19-fasta.bwt* and click the <span class="btn btn-sm btn-primary" >OK</span> button.
+# Repeat the previous two steps to set the *"read_p1" parameter for cwl-runner script in bwa-mem.cwl component* and *"read_p2" parameter for cwl-runner script in bwa-mem.cwl component* parameters.
+# Click on the <span class="btn btn-sm btn-primary" >Run <i class="fa fa-fw fa-play"></i></span> button.  The page updates to show you that the process has been submitted to run on the Arvados cluster.
+# After the process starts running, you can track the progress by watching log messages from the component(s).  This page refreshes automatically.  You will see a <span class="label label-success">complete</span> label when the process completes successfully.
+# Click on the *Output* link to see the results of the process.  This will load a new page listing the output files from this process.  You'll see the output SAM file from the alignment tool under the *Files* tab.
+# Click on the <span class="btn btn-sm btn-info"><i class="fa fa-download"></i></span> download button to the right of the SAM file to download your results.
diff --git a/doc/user/tutorials/writing-cwl-workflow.html.textile.liquid b/doc/user/tutorials/writing-cwl-workflow.html.textile.liquid
new file mode 100644 (file)
index 0000000..cd282c8
--- /dev/null
@@ -0,0 +1,28 @@
+---
+layout: default
+navsection: userguide
+title: "Writing a CWL workflow"
+...
+
+{% include 'what_is_cwl' %}
+
+{% include 'tutorial_expectations' %}
+
+h2. Registering a CWL workflow
+
+Use @--create-workflow@ to register a CWL workflow with Arvados.
+
+The following examples assume that you have prepared to run arvados-cwl-runner tool as explained in the "Using Common Workflow Language":{{site.baseurl}}/user/topics/running-workflow-command-line.html.textile.liquid page.
+
+{% include 'register_cwl_workflow' %}
+
+h2. Running a CWL workflow
+
+h3. Running a workflow at command prompt
+
+Not yet implemented
+
+h3. Running a workflow using Workbench
+
+The workflow can also be executed using Workbench. Go to the Workbench Dashboard and click the <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a process...</span> button and select the desired workflow.
+
index 358743608b1f7f5e796e5d3f3d90e3c9dc6f8cb6..40c9cf325cf11f11d077b6783f0b6fd46b47c74d 100755 (executable)
@@ -864,9 +864,9 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
         ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
         ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP "
         .q{&& declare -a VOLUMES=() }
-        .q{&& if which crunchrunner >/dev/null ; then VOLUMES+=("--volume=$(which crunchrunner):/usr/local/bin/crunchrunner") ; fi }
-        .q{&& if test -f /etc/ssl/certs/ca-certificates.crt ; then VOLUMES+=("--volume=/etc/ssl/certs/ca-certificates.crt:/etc/arvados/ca-certificates.crt") ; }
-        .q{elif test -f /etc/pki/tls/certs/ca-bundle.crt ; then VOLUMES+=("--volume=/etc/pki/tls/certs/ca-bundle.crt:/etc/arvados/ca-certificates.crt") ; fi };
+        .q{&& if which crunchrunner >/dev/null ; then VOLUMES+=("--volume=$(which crunchrunner):/usr/local/bin/crunchrunner:ro") ; fi }
+        .q{&& if test -f /etc/ssl/certs/ca-certificates.crt ; then VOLUMES+=("--volume=/etc/ssl/certs/ca-certificates.crt:/etc/arvados/ca-certificates.crt:ro") ; }
+        .q{elif test -f /etc/pki/tls/certs/ca-bundle.crt ; then VOLUMES+=("--volume=/etc/pki/tls/certs/ca-bundle.crt:/etc/arvados/ca-certificates.crt:ro") ; fi };
 
     $command .= "&& exec arv-mount --read-write --mount-by-pdh=by_pdh --mount-tmp=tmp --crunchstat-interval=10 --allow-other $arv_file_cache \Q$keep_mnt\E --exec ";
     $ENV{TASK_KEEPMOUNT} = "$keep_mnt/by_pdh";
index d0224aedb01e354408f5a710eb88dff344314a90..04f454369cc6541be477fe3f585c637f45c7aee9 100644 (file)
@@ -20,6 +20,14 @@ class TestArvKeepGet < Minitest::Test
     assert_match /^usage:/, err
   end
 
+  def test_get_version
+    out, err = capture_subprocess_io do
+      assert_arv_get '--version'
+    end
+    assert_empty(out, "STDOUT not expected: '#{out}'")
+    assert_match(/[0-9]+\.[0-9]+\.[0-9]+/, err, "Version information incorrect: '#{err}'")
+  end
+
   def test_help
     out, err = capture_subprocess_io do
       assert_arv_get '-h'
index 2b2e15bf6caf31f9016266879ec6e032df2e6380..31272c825ea0f14874a6a13609c9df746ecc8871 100644 (file)
@@ -21,6 +21,7 @@ import schema_salad
 
 import arvados
 import arvados.config
+from arvados.errors import ApiError
 
 from .arvcontainer import ArvadosContainer, RunnerContainer
 from .arvjob import ArvadosJob, RunnerJob, RunnerTemplate
@@ -72,7 +73,9 @@ class ArvCwlRunner(object):
         else:
             self.keep_client = arvados.keep.KeepClient(api_client=self.api, num_retries=self.num_retries)
 
-        for api in ["jobs", "containers"]:
+        self.work_api = None
+        expected_api = ["jobs", "containers"]
+        for api in expected_api:
             try:
                 methods = self.api._rootDesc.get('resources')[api]['methods']
                 if ('httpMethod' in methods['create'] and
@@ -81,11 +84,12 @@ class ArvCwlRunner(object):
                     break
             except KeyError:
                 pass
+
         if not self.work_api:
             if work_api is None:
                 raise Exception("No supported APIs")
             else:
-                raise Exception("Unsupported API '%s'" % work_api)
+                raise Exception("Unsupported API '%s', expected one of %s" % (work_api, expected_api))
 
     def arv_make_tool(self, toolpath_object, **kwargs):
         kwargs["work_api"] = self.work_api
@@ -120,7 +124,7 @@ class ArvCwlRunner(object):
                         logger.info("Job %s (%s) is Running", j.name, uuid)
                         j.running = True
                         j.update_pipeline_component(event["properties"]["new_attributes"])
-                elif event["properties"]["new_attributes"]["state"] in ("Complete", "Failed", "Cancelled"):
+                elif event["properties"]["new_attributes"]["state"] in ("Complete", "Failed", "Cancelled", "Final"):
                     uuid = event["object_uuid"]
                     try:
                         self.cond.acquire()
@@ -150,7 +154,7 @@ class ArvCwlRunner(object):
                     continue
 
                 if self.work_api == "containers":
-                    table = self.poll_api.containers()
+                    table = self.poll_api.container_requests()
                 elif self.work_api == "jobs":
                     table = self.poll_api.jobs()
 
@@ -277,6 +281,12 @@ class ArvCwlRunner(object):
         if self.work_api == "containers":
             try:
                 current = self.api.containers().current().execute(num_retries=self.num_retries)
+            except ApiError as e:
+                # Status code 404 just means we're not running in a container.
+                if e.resp.status != 404:
+                    logger.info("Getting current container: %s", e)
+                return
+            try:
                 self.api.containers().update(uuid=current['uuid'],
                                              body={
                                                  'output': self.final_output_collection.portable_data_hash(),
@@ -308,14 +318,18 @@ class ArvCwlRunner(object):
             if self.work_api == "jobs":
                 tmpl = RunnerTemplate(self, tool, job_order,
                                       kwargs.get("enable_reuse"),
-                                      uuid=existing_uuid)
+                                      uuid=existing_uuid,
+                                      submit_runner_ram=kwargs.get("submit_runner_ram"),
+                                      name=kwargs.get("name"))
                 tmpl.save()
                 # cwltool.main will write our return value to stdout.
                 return tmpl.uuid
             else:
                 return upload_workflow(self, tool, job_order,
                                        self.project_uuid,
-                                       uuid=existing_uuid)
+                                       uuid=existing_uuid,
+                                       submit_runner_ram=kwargs.get("submit_runner_ram"),
+                                       name=kwargs.get("name"))
 
         self.ignore_docker_for_reuse = kwargs.get("ignore_docker_for_reuse")
 
@@ -346,16 +360,20 @@ class ArvCwlRunner(object):
                                          self.output_callback,
                                          **kwargs).next()
                 else:
-                    runnerjob = RunnerContainer(self, tool, job_order, kwargs.get("enable_reuse"), self.output_name, self.output_tags)
+                    runnerjob = RunnerContainer(self, tool, job_order, kwargs.get("enable_reuse"), self.output_name,
+                                                self.output_tags, submit_runner_ram=kwargs.get("submit_runner_ram"),
+                                                name=kwargs.get("name"))
             else:
-                runnerjob = RunnerJob(self, tool, job_order, kwargs.get("enable_reuse"), self.output_name, self.output_tags)
+                runnerjob = RunnerJob(self, tool, job_order, kwargs.get("enable_reuse"), self.output_name,
+                                      self.output_tags, submit_runner_ram=kwargs.get("submit_runner_ram"),
+                                      name=kwargs.get("name"))
 
         if not kwargs.get("submit") and "cwl_runner_job" not in kwargs and not self.work_api == "containers":
             # Create pipeline for local run
             self.pipeline = self.api.pipeline_instances().create(
                 body={
                     "owner_uuid": self.project_uuid,
-                    "name": shortname(tool.tool["id"]),
+                    "name": kwargs["name"] if kwargs.get("name") else shortname(tool.tool["id"]),
                     "components": {},
                     "state": "RunningOnClient"}).execute(num_retries=self.num_retries)
             logger.info("Pipeline instance %s", self.pipeline["uuid"])
@@ -524,6 +542,14 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                         help="Compute checksum of contents while collecting outputs",
                         dest="compute_checksum")
 
+    parser.add_argument("--submit-runner-ram", type=int,
+                        help="RAM (in MiB) required for the workflow runner job (default 1024)",
+                        default=1024)
+
+    parser.add_argument("--name", type=str,
+                        help="Name to use for workflow execution instance.",
+                        default=None)
+
     parser.add_argument("workflow", type=str, nargs="?", default=None, help="The workflow to execute")
     parser.add_argument("job_order", nargs=argparse.REMAINDER, help="The input object to the workflow.")
 
@@ -547,6 +573,10 @@ def main(args, stdout, stderr, api_client=None, keep_client=None):
     job_order_object = None
     arvargs = parser.parse_args(args)
 
+    if arvargs.version:
+        print versionstring()
+        return
+
     if arvargs.update_workflow:
         if arvargs.update_workflow.find('-7fd4e-') == 5:
             want_api = 'containers'
@@ -586,6 +616,7 @@ def main(args, stdout, stderr, api_client=None, keep_client=None):
 
     arvargs.conformance_test = None
     arvargs.use_container = True
+    arvargs.relax_path_checks = True
 
     return cwltool.main.main(args=arvargs,
                              stdout=stdout,
index 1fda4122172c06f926ec89941868be4914be0b13..08da4ca16efa1185aa40642817329b8b2f18d005 100644 (file)
@@ -62,7 +62,7 @@ class ArvadosContainer(object):
                 }
 
         if self.generatefiles["listing"]:
-            raise UnsupportedRequirement("Generate files not supported")
+            raise UnsupportedRequirement("InitialWorkDirRequirement not supported with --api=containers")
 
         container_request["environment"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir}
         if self.environment:
@@ -115,24 +115,23 @@ class ArvadosContainer(object):
                 body=container_request
             ).execute(num_retries=self.arvrunner.num_retries)
 
-            self.arvrunner.processes[response["container_uuid"]] = self
+            self.arvrunner.processes[response["uuid"]] = self
 
-            container = self.arvrunner.api.containers().get(
-                uuid=response["container_uuid"]
-            ).execute(num_retries=self.arvrunner.num_retries)
-
-            logger.info("Container request %s (%s) state is %s with container %s %s", self.name, response["uuid"], response["state"], container["uuid"], container["state"])
+            logger.info("Container request %s (%s) state is %s", self.name, response["uuid"], response["state"])
 
-            if container["state"] in ("Complete", "Cancelled"):
-                self.done(container)
+            if response["state"] == "Final":
+                self.done(response)
         except Exception as e:
             logger.error("Got error %s" % str(e))
             self.output_callback({}, "permanentFail")
 
     def done(self, record):
         try:
-            if record["state"] == "Complete":
-                rcode = record["exit_code"]
+            container = self.arvrunner.api.containers().get(
+                uuid=record["container_uuid"]
+            ).execute(num_retries=self.arvrunner.num_retries)
+            if container["state"] == "Complete":
+                rcode = container["exit_code"]
                 if self.successCodes and rcode in self.successCodes:
                     processStatus = "success"
                 elif self.temporaryFailCodes and rcode in self.temporaryFailCodes:
@@ -146,24 +145,14 @@ class ArvadosContainer(object):
             else:
                 processStatus = "permanentFail"
 
-            try:
-                outputs = {}
-                if record["output"]:
-                    outputs = done.done(self, record, "/tmp", self.outdir, "/keep")
-            except WorkflowException as e:
-                logger.error("Error while collecting output for container %s:\n%s", self.name, e, exc_info=(e if self.arvrunner.debug else False))
-                processStatus = "permanentFail"
-            except Exception as e:
-                logger.exception("Got unknown exception while collecting output for container %s:", self.name)
-                processStatus = "permanentFail"
-
-            # Note: Currently, on error output_callback is expecting an empty dict,
-            # anything else will fail.
-            if not isinstance(outputs, dict):
-                logger.error("Unexpected output type %s '%s'", type(outputs), outputs)
-                outputs = {}
-                processStatus = "permanentFail"
+            outputs = {}
 
+            if container["output"]:
+                try:
+                    outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep")
+                except Exception as e:
+                    logger.error("Got error %s" % str(e))
+                    self.output_callback({}, "permanentFail")
             self.output_callback(outputs, processStatus)
         finally:
             del self.arvrunner.processes[record["uuid"]]
@@ -237,7 +226,7 @@ class RunnerContainer(Runner):
             },
             "runtime_constraints": {
                 "vcpus": 1,
-                "ram": 1024*1024*256,
+                "ram": 1024*1024 * self.submit_runner_ram,
                 "API": True
             }
         }
@@ -252,9 +241,23 @@ class RunnerContainer(Runner):
         ).execute(num_retries=self.arvrunner.num_retries)
 
         self.uuid = response["uuid"]
-        self.arvrunner.processes[response["container_uuid"]] = self
+        self.arvrunner.processes[response["uuid"]] = self
 
         logger.info("Submitted container %s", response["uuid"])
 
-        if response["state"] in ("Complete", "Failed", "Cancelled"):
+        if response["state"] == "Final":
             self.done(response)
+
+    def done(self, record):
+        try:
+            container = self.arvrunner.api.containers().get(
+                uuid=record["container_uuid"]
+            ).execute(num_retries=self.arvrunner.num_retries)
+        except Exception as e:
+            logger.exception("While getting runner container: %s", e)
+            self.arvrunner.output_callback({}, "permanentFail")
+            del self.arvrunner.processes[record["uuid"]]
+        else:
+            super(RunnerContainer, self).done(container)
+        finally:
+            del self.arvrunner.processes[record["uuid"]]
index b9691d215c4d46e071d13740c2d3c90b9f7d1a81..7f6ab587d323a7dc65e39c00e8e1b38d019f009d 100644 (file)
@@ -36,7 +36,10 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
         cwltool.docker.get_image(dockerRequirement, pull_image)
 
         # Upload image to Arvados
-        args = ["--project-uuid="+project_uuid, image_name]
+        args = []
+        if project_uuid:
+            args.append("--project-uuid="+project_uuid)
+        args.append(image_name)
         if image_tag:
             args.append(image_tag)
         logger.info("Uploading Docker image %s", ":".join(args[1:]))
index 1afb9afc0e4cf4a888ad34b73cb0a5d7713f5ef9..e34145a143558bcedb64bd19b9cf1f5955e21640 100644 (file)
@@ -257,7 +257,8 @@ class RunnerJob(Runner):
             "repository": "arvados",
             "script_parameters": self.job_order,
             "runtime_constraints": {
-                "docker_image": arvados_jobs_image(self.arvrunner)
+                "docker_image": arvados_jobs_image(self.arvrunner),
+                "min_ram_mb_per_node": self.submit_runner_ram
             }
         }
 
@@ -280,7 +281,7 @@ class RunnerJob(Runner):
         self.arvrunner.pipeline = self.arvrunner.api.pipeline_instances().create(
             body={
                 "owner_uuid": self.arvrunner.project_uuid,
-                "name": shortname(self.tool.tool["id"]),
+                "name": self.name,
                 "components": {"cwl-runner": job_spec },
                 "state": "RunningOnServer"}).execute(num_retries=self.arvrunner.num_retries)
         logger.info("Created pipeline %s", self.arvrunner.pipeline["uuid"])
@@ -308,7 +309,8 @@ class RunnerTemplate(object):
         'string': 'text',
     }
 
-    def __init__(self, runner, tool, job_order, enable_reuse, uuid):
+    def __init__(self, runner, tool, job_order, enable_reuse, uuid,
+                 submit_runner_ram=0, name=None):
         self.runner = runner
         self.tool = tool
         self.job = RunnerJob(
@@ -317,7 +319,9 @@ class RunnerTemplate(object):
             job_order=job_order,
             enable_reuse=enable_reuse,
             output_name=None,
-            output_tags=None)
+            output_tags=None,
+            submit_runner_ram=submit_runner_ram,
+            name=name)
         self.uuid = uuid
 
     def pipeline_component_spec(self):
index f425ae8df9d2478a566c3dc47aea7e7e1c678d45..703bb47d8e85cd24b38db6f943f6a1306da0e140 100644 (file)
@@ -18,7 +18,8 @@ from .perf import Perf
 logger = logging.getLogger('arvados.cwl-runner')
 metrics = logging.getLogger('arvados.cwl-runner.metrics')
 
-def upload_workflow(arvRunner, tool, job_order, project_uuid, uuid=None):
+def upload_workflow(arvRunner, tool, job_order, project_uuid, uuid=None,
+                    submit_runner_ram=0, name=None):
     upload_docker(arvRunner, tool)
 
     document_loader, workflowobj, uri = (tool.doc_loader, tool.doc_loader.fetch(tool.tool["id"]), tool.tool["id"])
@@ -33,13 +34,17 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid, uuid=None):
         if sn in job_order:
             inp["default"] = job_order[sn]
 
-    name = os.path.basename(tool.tool["id"])
+    if not name:
+        name = tool.tool.get("label", os.path.basename(tool.tool["id"]))
+
     upload_dependencies(arvRunner, name, document_loader,
                         packed, uri, False)
 
+    # TODO nowhere for submit_runner_ram to go.
+
     body = {
         "workflow": {
-            "name": tool.tool.get("label", name),
+            "name": name,
             "description": tool.tool.get("doc", ""),
             "definition":yaml.safe_dump(packed)
         }}
index 31f353e5d8e0395028d50b86973fd4bb6e197e8f..c755565135fe98ca42e80b1e904ad7d65c3ad6af 100644 (file)
@@ -35,6 +35,9 @@ def done(self, record, tmpdir, outdir, keepdir):
         }, ensure_unique_name=True).execute(
             num_retries=self.arvrunner.num_retries)
 
+    return done_outputs(self, record, tmpdir, outdir, keepdir)
+
+def done_outputs(self, record, tmpdir, outdir, keepdir):
     self.builder.outdir = outdir
     self.builder.pathmapper.keepdir = keepdir
     return self.collect_outputs("keep:" + record["output"])
index 3bbcb8b091763cd565051c23a65ce13d1069f08f..2d13e6640b1c57b3ccfb22c9c3c9bac79b6ecda8 100644 (file)
@@ -161,7 +161,9 @@ def arvados_jobs_image(arvrunner):
     return img
 
 class Runner(object):
-    def __init__(self, runner, tool, job_order, enable_reuse, output_name, output_tags):
+    def __init__(self, runner, tool, job_order, enable_reuse,
+                 output_name, output_tags, submit_runner_ram=0,
+                 name=None):
         self.arvrunner = runner
         self.tool = tool
         self.job_order = job_order
@@ -171,12 +173,22 @@ class Runner(object):
         self.final_output = None
         self.output_name = output_name
         self.output_tags = output_tags
+        self.name = name
+
+        if submit_runner_ram:
+            self.submit_runner_ram = submit_runner_ram
+        else:
+            self.submit_runner_ram = 1024
+
+        if self.submit_runner_ram <= 0:
+            raise Exception("Value of --submit-runner-ram must be greater than zero")
 
     def update_pipeline_component(self, record):
         pass
 
     def arvados_job_spec(self, *args, **kwargs):
-        self.name = os.path.basename(self.tool.tool["id"])
+        if self.name is None:
+            self.name = os.path.basename(self.tool.tool["id"])
         workflowmapper = upload_instance(self.arvrunner, self.name, self.tool, self.job_order)
         adjustDirObjs(self.job_order, trim_listing)
         return workflowmapper
@@ -203,8 +215,10 @@ class Runner(object):
                                                            api_client=self.arvrunner.api,
                                                            keep_client=self.arvrunner.keep_client,
                                                            num_retries=self.arvrunner.num_retries)
-                with outc.open("cwl.output.json") as f:
-                    outputs = json.load(f)
+                if "cwl.output.json" in outc:
+                    with outc.open("cwl.output.json") as f:
+                        if f.size() > 0:
+                            outputs = json.load(f)
                 def keepify(fileobj):
                     path = fileobj["location"]
                     if not path.startswith("keep:"):
@@ -215,4 +229,5 @@ class Runner(object):
                 logger.exception("While getting final output object: %s", e)
             self.arvrunner.output_callback(outputs, processStatus)
         finally:
-            del self.arvrunner.processes[record["uuid"]]
+            if record["uuid"] in self.arvrunner.processes:
+                del self.arvrunner.processes[record["uuid"]]
index 9d9a1e1a7acf99f46d61d96de384681da114925a..7a8ec3a0dd3c51743720076f67c230d6af4706c8 100644 (file)
@@ -48,8 +48,9 @@ setup(name='arvados-cwl-runner',
       # Make sure to update arvados/build/run-build-packages.sh as well
       # when updating the cwltool version pin.
       install_requires=[
-          'cwltool==1.0.20161107145355',
-          'arvados-python-client>=0.1.20160826210445'
+          'cwltool==1.0.20161128202906',
+          'arvados-python-client>=0.1.20160826210445',
+          'setuptools'
       ],
       data_files=[
           ('share/doc/arvados-cwl-runner', ['LICENSE-2.0.txt', 'README.rst']),
index bb4bac31dd1767081cdc12a313496a4bb13b4546..bb661550da45ae3b8fe0be1c3d5f78e69bdaabe6 100644 (file)
@@ -159,54 +159,11 @@ class TestContainer(unittest.TestCase):
         runner.num_retries = 0
         runner.ignore_docker_for_reuse = False
 
-        col().open.return_value = []
-        api.collections().list().execute.side_effect = ({"items": []},
-                                                        {"items": [{"manifest_text": "XYZ"}]})
-
-        arvjob = arvados_cwl.ArvadosContainer(runner)
-        arvjob.name = "testjob"
-        arvjob.builder = mock.MagicMock()
-        arvjob.output_callback = mock.MagicMock()
-        arvjob.collect_outputs = mock.MagicMock()
-        arvjob.successCodes = [0]
-        arvjob.outdir = "/var/spool/cwl"
-
-        arvjob.done({
-            "state": "Complete",
-            "output": "99999999999999999999999999999993+99",
-            "log": "99999999999999999999999999999994+99",
-            "uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz",
-            "exit_code": 0
-        })
-
-        api.collections().list.assert_has_calls([
-            mock.call(),
-            mock.call(filters=[['owner_uuid', '=', 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'],
-                          ['portable_data_hash', '=', '99999999999999999999999999999993+99'],
-                          ['name', '=', 'Output 9999999 of testjob']]),
-            mock.call().execute(num_retries=0),
-            mock.call(limit=1, filters=[['portable_data_hash', '=', '99999999999999999999999999999993+99']],
-                 select=['manifest_text']),
-            mock.call().execute(num_retries=0)])
-
-        api.collections().create.assert_called_with(
-            ensure_unique_name=True,
-            body={'portable_data_hash': '99999999999999999999999999999993+99',
-                  'manifest_text': 'XYZ',
-                  'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
-                  'name': 'Output 9999999 of testjob'})
-
-    @mock.patch("arvados.collection.Collection")
-    def test_done_use_existing_collection(self, col):
-        api = mock.MagicMock()
-
-        runner = mock.MagicMock()
-        runner.api = api
-        runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
-        runner.num_retries = 0
+        runner.api.containers().get().execute.return_value = {"state":"Complete",
+                                                              "output": "abc+123",
+                                                              "exit_code": 0}
 
         col().open.return_value = []
-        api.collections().list().execute.side_effect = ({"items": [{"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2"}]},)
 
         arvjob = arvados_cwl.ArvadosContainer(runner)
         arvjob.name = "testjob"
@@ -216,19 +173,17 @@ class TestContainer(unittest.TestCase):
         arvjob.successCodes = [0]
         arvjob.outdir = "/var/spool/cwl"
 
+        arvjob.collect_outputs.return_value = {"out": "stuff"}
+
         arvjob.done({
-            "state": "Complete",
-            "output": "99999999999999999999999999999993+99",
-            "log": "99999999999999999999999999999994+99",
-            "uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz",
-            "exit_code": 0
+            "state": "Final",
+            "log_uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz1",
+            "output_uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2",
+            "uuid": "zzzzz-xvhdp-zzzzzzzzzzzzzzz",
+            "container_uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
         })
 
-        api.collections().list.assert_has_calls([
-            mock.call(),
-            mock.call(filters=[['owner_uuid', '=', 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'],
-                               ['portable_data_hash', '=', '99999999999999999999999999999993+99'],
-                               ['name', '=', 'Output 9999999 of testjob']]),
-            mock.call().execute(num_retries=0)])
-
         self.assertFalse(api.collections().create.called)
+
+        arvjob.collect_outputs.assert_called_with("keep:abc+123")
+        arvjob.output_callback.assert_called_with({"out": "stuff"}, "success")
index c4b0ceab0e219856a5f3bdec125d3ce06bbb8921..d917aef57a8ea2378b43193e7ff01f066a73f1d2 100644 (file)
@@ -98,7 +98,8 @@ def stubs(func):
         }
         stubs.expect_job_spec = {
             'runtime_constraints': {
-                'docker_image': 'arvados/jobs:'+arvados_cwl.__version__
+                'docker_image': 'arvados/jobs:'+arvados_cwl.__version__,
+                'min_ram_mb_per_node': 1024
             },
             'script_parameters': {
                 'x': {
@@ -134,7 +135,7 @@ def stubs(func):
             'owner_uuid': None,
             "components": {
                 "cwl-runner": {
-                    'runtime_constraints': {'docker_image': 'arvados/jobs:'+arvados_cwl.__version__},
+                    'runtime_constraints': {'docker_image': 'arvados/jobs:'+arvados_cwl.__version__, 'min_ram_mb_per_node': 1024},
                     'script_parameters': {
                         'y': {"value": {'basename': '99999999999999999999999999999998+99', 'location': 'keep:99999999999999999999999999999998+99', 'class': 'Directory'}},
                         'x': {"value": {'basename': 'blorp.txt', 'class': 'File', 'location': 'keep:99999999999999999999999999999994+99/blorp.txt'}},
@@ -193,7 +194,7 @@ def stubs(func):
             'runtime_constraints': {
                 'API': True,
                 'vcpus': 1,
-                'ram': 268435456
+                'ram': 1024*1024*1024
             }
         }
 
@@ -274,6 +275,36 @@ class TestSubmit(unittest.TestCase):
         self.assertEqual(capture_stdout.getvalue(),
                          stubs.expect_pipeline_uuid + '\n')
 
+
+    @mock.patch("time.sleep")
+    @stubs
+    def test_submit_runner_ram(self, stubs, tm):
+        capture_stdout = cStringIO.StringIO()
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--debug", "--submit-runner-ram=2048",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        stubs.expect_pipeline_instance["components"]["cwl-runner"]["runtime_constraints"]["min_ram_mb_per_node"] = 2048
+
+        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
+        stubs.api.pipeline_instances().create.assert_called_with(
+            body=expect_pipeline)
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_pipeline_uuid + '\n')
+
+
+    @mock.patch("time.sleep")
+    @stubs
+    def test_submit_invalid_runner_ram(self, stubs, tm):
+        capture_stdout = cStringIO.StringIO()
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--debug", "--submit-runner-ram=-2048",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 1)
+
     @mock.patch("time.sleep")
     @stubs
     def test_submit_output_name(self, stubs, tm):
@@ -294,6 +325,25 @@ class TestSubmit(unittest.TestCase):
         self.assertEqual(capture_stdout.getvalue(),
                          stubs.expect_pipeline_uuid + '\n')
 
+
+    @mock.patch("time.sleep")
+    @stubs
+    def test_submit_pipeline_name(self, stubs, tm):
+        capture_stdout = cStringIO.StringIO()
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--debug", "--name=hello job 123",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        stubs.expect_pipeline_instance["name"] = "hello job 123"
+
+        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
+        stubs.api.pipeline_instances().create.assert_called_with(
+            body=expect_pipeline)
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_pipeline_uuid + '\n')
+
     @mock.patch("time.sleep")
     @stubs
     def test_submit_output_tags(self, stubs, tm):
@@ -438,6 +488,46 @@ class TestSubmit(unittest.TestCase):
         self.assertEqual(capture_stdout.getvalue(),
                          stubs.expect_container_request_uuid + '\n')
 
+    @stubs
+    def test_submit_container_runner_ram(self, stubs):
+        capture_stdout = cStringIO.StringIO()
+        try:
+            exited = arvados_cwl.main(
+                ["--submit", "--no-wait", "--api=containers", "--debug", "--submit-runner-ram=2048",
+                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            self.assertEqual(exited, 0)
+        except:
+            logging.exception("")
+
+        stubs.expect_container_spec["runtime_constraints"]["ram"] = 2048*1024*1024
+
+        expect_container = copy.deepcopy(stubs.expect_container_spec)
+        stubs.api.container_requests().create.assert_called_with(
+            body=expect_container)
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+
+    @stubs
+    def test_submit_container_name(self, stubs):
+        capture_stdout = cStringIO.StringIO()
+        try:
+            exited = arvados_cwl.main(
+                ["--submit", "--no-wait", "--api=containers", "--debug", "--name=hello container 123",
+                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            self.assertEqual(exited, 0)
+        except:
+            logging.exception("")
+
+        stubs.expect_container_spec["name"] = "hello container 123"
+
+        expect_container = copy.deepcopy(stubs.expect_container_spec)
+        stubs.api.container_requests().create.assert_called_with(
+            body=expect_container)
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+
     @mock.patch("arvados.commands.keepdocker.find_one_image_hash")
     @mock.patch("cwltool.docker.get_image")
     @mock.patch("arvados.api")
@@ -482,23 +572,9 @@ class TestSubmit(unittest.TestCase):
         self.assertEqual("arvados/jobs:"+arvados_cwl.__version__, arvados_cwl.runner.arvados_jobs_image(arvrunner))
 
 class TestCreateTemplate(unittest.TestCase):
-    @stubs
-    def test_create(self, stubs):
-        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
-
-        capture_stdout = cStringIO.StringIO()
-
-        exited = arvados_cwl.main(
-            ["--create-workflow", "--debug",
-             "--project-uuid", project_uuid,
-             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
-
-        stubs.api.pipeline_instances().create.refute_called()
-        stubs.api.jobs().create.refute_called()
+    existing_template_uuid = "zzzzz-d1hrv-validworkfloyml"
 
-        expect_component = copy.deepcopy(stubs.expect_job_spec)
+    def _adjust_script_params(self, expect_component):
         expect_component['script_parameters']['x'] = {
             'dataclass': 'File',
             'required': True,
@@ -516,6 +592,26 @@ class TestCreateTemplate(unittest.TestCase):
             'required': True,
             'type': 'Directory',
         }
+
+    @stubs
+    def test_create(self, stubs):
+        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+
+        capture_stdout = cStringIO.StringIO()
+
+        exited = arvados_cwl.main(
+            ["--create-workflow", "--debug",
+             "--api=jobs",
+             "--project-uuid", project_uuid,
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        stubs.api.pipeline_instances().create.refute_called()
+        stubs.api.jobs().create.refute_called()
+
+        expect_component = copy.deepcopy(stubs.expect_job_spec)
+        self._adjust_script_params(expect_component)
         expect_template = {
             "components": {
                 "submit_wf.cwl": expect_component,
@@ -530,6 +626,76 @@ class TestCreateTemplate(unittest.TestCase):
                          stubs.expect_pipeline_template_uuid + '\n')
 
 
+    @stubs
+    def test_create_name(self, stubs):
+        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+
+        capture_stdout = cStringIO.StringIO()
+
+        exited = arvados_cwl.main(
+            ["--create-workflow", "--debug",
+             "--project-uuid", project_uuid,
+             "--api=jobs",
+             "--name", "testing 123",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        stubs.api.pipeline_instances().create.refute_called()
+        stubs.api.jobs().create.refute_called()
+
+        expect_component = copy.deepcopy(stubs.expect_job_spec)
+        self._adjust_script_params(expect_component)
+        expect_template = {
+            "components": {
+                "testing 123": expect_component,
+            },
+            "name": "testing 123",
+            "owner_uuid": project_uuid,
+        }
+        stubs.api.pipeline_templates().create.assert_called_with(
+            body=JsonDiffMatcher(expect_template), ensure_unique_name=True)
+
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_pipeline_template_uuid + '\n')
+
+
+    @stubs
+    def test_update_name(self, stubs):
+        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+
+        capture_stdout = cStringIO.StringIO()
+
+        exited = arvados_cwl.main(
+            ["--update-workflow", self.existing_template_uuid,
+             "--debug",
+             "--project-uuid", project_uuid,
+             "--api=jobs",
+             "--name", "testing 123",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        stubs.api.pipeline_instances().create.refute_called()
+        stubs.api.jobs().create.refute_called()
+
+        expect_component = copy.deepcopy(stubs.expect_job_spec)
+        self._adjust_script_params(expect_component)
+        expect_template = {
+            "components": {
+                "testing 123": expect_component,
+            },
+            "name": "testing 123",
+            "owner_uuid": project_uuid,
+        }
+        stubs.api.pipeline_templates().create.refute_called()
+        stubs.api.pipeline_templates().update.assert_called_with(
+            body=JsonDiffMatcher(expect_template), uuid=self.existing_template_uuid)
+
+        self.assertEqual(capture_stdout.getvalue(),
+                         self.existing_template_uuid + '\n')
+
+
 class TestCreateWorkflow(unittest.TestCase):
     existing_workflow_uuid = "zzzzz-7fd4e-validworkfloyml"
     expect_workflow = open("tests/wf/expect_packed.cwl").read()
@@ -565,6 +731,39 @@ class TestCreateWorkflow(unittest.TestCase):
         self.assertEqual(capture_stdout.getvalue(),
                          stubs.expect_workflow_uuid + '\n')
 
+
+    @stubs
+    def test_create_name(self, stubs):
+        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+
+        capture_stdout = cStringIO.StringIO()
+
+        exited = arvados_cwl.main(
+            ["--create-workflow", "--debug",
+             "--api=containers",
+             "--project-uuid", project_uuid,
+             "--name", "testing 123",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        stubs.api.pipeline_templates().create.refute_called()
+        stubs.api.container_requests().create.refute_called()
+
+        body = {
+            "workflow": {
+                "owner_uuid": project_uuid,
+                "name": "testing 123",
+                "description": "",
+                "definition": self.expect_workflow,
+            }
+        }
+        stubs.api.workflows().create.assert_called_with(
+            body=JsonDiffMatcher(body))
+
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_workflow_uuid + '\n')
+
     @stubs
     def test_incompatible_api(self, stubs):
         capture_stderr = cStringIO.StringIO()
@@ -607,12 +806,38 @@ class TestCreateWorkflow(unittest.TestCase):
                          self.existing_workflow_uuid + '\n')
 
 
+    @stubs
+    def test_update_name(self, stubs):
+        capture_stdout = cStringIO.StringIO()
+
+        exited = arvados_cwl.main(
+            ["--update-workflow", self.existing_workflow_uuid,
+             "--debug", "--name", "testing 123",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        body = {
+            "workflow": {
+                "name": "testing 123",
+                "description": "",
+                "definition": self.expect_workflow,
+            }
+        }
+        stubs.api.workflows().update.assert_called_with(
+            uuid=self.existing_workflow_uuid,
+            body=JsonDiffMatcher(body))
+        self.assertEqual(capture_stdout.getvalue(),
+                         self.existing_workflow_uuid + '\n')
+
+
 class TestTemplateInputs(unittest.TestCase):
     expect_template = {
         "components": {
             "inputs_test.cwl": {
                 'runtime_constraints': {
                     'docker_image': 'arvados/jobs:'+arvados_cwl.__version__,
+                    'min_ram_mb_per_node': 1024
                 },
                 'script_parameters': {
                     'cwl:tool':
@@ -657,7 +882,7 @@ class TestTemplateInputs(unittest.TestCase):
     @stubs
     def test_inputs_empty(self, stubs):
         exited = arvados_cwl.main(
-            ["--create-template", "--no-wait",
+            ["--create-template",
              "tests/wf/inputs_test.cwl", "tests/order/empty_order.json"],
             cStringIO.StringIO(), sys.stderr, api_client=stubs.api)
         self.assertEqual(exited, 0)
@@ -668,7 +893,7 @@ class TestTemplateInputs(unittest.TestCase):
     @stubs
     def test_inputs(self, stubs):
         exited = arvados_cwl.main(
-            ["--create-template", "--no-wait",
+            ["--create-template",
              "tests/wf/inputs_test.cwl", "tests/order/inputs_test_order.json"],
             cStringIO.StringIO(), sys.stderr, api_client=stubs.api)
         self.assertEqual(exited, 0)
diff --git a/sdk/dev-jobs.dockerfile b/sdk/dev-jobs.dockerfile
new file mode 100644 (file)
index 0000000..bd80a0c
--- /dev/null
@@ -0,0 +1,38 @@
+# Dockerfile for building an arvados/jobs Docker image from local git tree.
+#
+# Intended for use by developers working on arvados-python-client or
+# arvados-cwl-runner and need to run a crunch job with a custom package
+# version.
+#
+# Use arvados/build/build-dev-docker-jobs-image.sh to build.
+#
+# (This dockerfile file must be located in the arvados/sdk/ directory because
+#  of the docker build root.)
+
+FROM debian:jessie
+MAINTAINER Ward Vandewege <ward@curoverse.com>
+
+ENV DEBIAN_FRONTEND noninteractive
+
+RUN apt-get update -q && apt-get install -qy git python-pip python-virtualenv python-dev libcurl4-gnutls-dev libgnutls28-dev nodejs
+
+RUN pip install -U setuptools
+
+ARG sdk
+ARG runner
+ARG cwltool
+
+ADD python/dist/$sdk /tmp/
+ADD cwl/cwltool_dist/$cwltool /tmp/
+ADD cwl/dist/$runner /tmp/
+
+RUN cd /tmp/arvados-python-client-* && python setup.py install
+RUN if test -d /tmp/cwltool-* ; then cd /tmp/cwltool-* && python setup.py install ; fi
+RUN cd /tmp/arvados-cwl-runner-* && python setup.py install
+
+# Install dependencies and set up system.
+RUN /usr/sbin/adduser --disabled-password \
+      --gecos 'Crunch execution user' crunch && \
+    /usr/bin/install --directory --owner=crunch --group=crunch --mode=0700 /keep /tmp/crunch-src /tmp/crunch-job
+
+USER crunch
diff --git a/sdk/go/httpserver/id_generator.go b/sdk/go/httpserver/id_generator.go
new file mode 100644 (file)
index 0000000..c2830f7
--- /dev/null
@@ -0,0 +1,31 @@
+package httpserver
+
+import (
+       "strconv"
+       "sync"
+       "time"
+)
+
+// IDGenerator generates alphanumeric strings suitable for use as
+// unique IDs (a given IDGenerator will never return the same ID
+// twice).
+type IDGenerator struct {
+       // Prefix is prepended to each returned ID.
+       Prefix string
+
+       lastID int64
+       mtx    sync.Mutex
+}
+
+// Next returns a new ID string. It is safe to call Next from multiple
+// goroutines.
+func (g *IDGenerator) Next() string {
+       id := time.Now().UnixNano()
+       g.mtx.Lock()
+       if id <= g.lastID {
+               id = g.lastID + 1
+       }
+       g.lastID = id
+       g.mtx.Unlock()
+       return g.Prefix + strconv.FormatInt(id, 36)
+}
index 178ffb90f4facbebdfd6809bb1448e84904bc82f..ee35f4748b78ecfabac5c431ac5ad73340e4f300 100644 (file)
@@ -4,18 +4,42 @@ import (
        "net/http"
 )
 
+// RequestCounter is an http.Handler that tracks the number of
+// requests in progress.
+type RequestCounter interface {
+       http.Handler
+
+       // Current() returns the number of requests in progress.
+       Current() int
+
+       // Max() returns the maximum number of concurrent requests
+       // that will be accepted.
+       Max() int
+}
+
 type limiterHandler struct {
        requests chan struct{}
        handler  http.Handler
 }
 
-func NewRequestLimiter(maxRequests int, handler http.Handler) http.Handler {
+// NewRequestLimiter returns a RequestCounter that delegates up to
+// maxRequests at a time to the given handler, and responds 503 to all
+// incoming requests beyond that limit.
+func NewRequestLimiter(maxRequests int, handler http.Handler) RequestCounter {
        return &limiterHandler{
                requests: make(chan struct{}, maxRequests),
                handler:  handler,
        }
 }
 
+func (h *limiterHandler) Current() int {
+       return len(h.requests)
+}
+
+func (h *limiterHandler) Max() int {
+       return cap(h.requests)
+}
+
 func (h *limiterHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        select {
        case h.requests <- struct{}{}:
diff --git a/sdk/go/logger/logger.go b/sdk/go/logger/logger.go
deleted file mode 100644 (file)
index 6dd7fb3..0000000
+++ /dev/null
@@ -1,204 +0,0 @@
-// Logger periodically writes a log to the Arvados SDK.
-//
-// This package is useful for maintaining a log object that is updated
-// over time. This log object will be periodically written to the log,
-// as specified by WriteInterval in the Params.
-//
-// This package is safe for concurrent use as long as:
-// The maps passed to a LogMutator are not accessed outside of the
-// LogMutator
-//
-// Usage:
-// arvLogger := logger.NewLogger(params)
-// arvLogger.Update(func(properties map[string]interface{},
-//     entry map[string]interface{}) {
-//   // Modifiy properties and entry however you want
-//   // properties is a shortcut for entry["properties"].(map[string]interface{})
-//   // properties can take any (valid) values you want to give it,
-//   // entry will only take the fields listed at
-//   // http://doc.arvados.org/api/schema/Log.html
-//   // Valid values for properties are anything that can be json
-//   // encoded (i.e. will not error if you call json.Marshal() on it.
-// })
-package logger
-
-import (
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "log"
-       "time"
-)
-
-const (
-       startSuffix              = "-start"
-       partialSuffix            = "-partial"
-       finalSuffix              = "-final"
-       numberNoMoreWorkMessages = 2 // To return from FinalUpdate() & Work().
-)
-
-type LoggerParams struct {
-       Client          *arvadosclient.ArvadosClient // The client we use to write log entries
-       EventTypePrefix string                       // The prefix we use for the event type in the log entry
-       WriteInterval   time.Duration                // Wait at least this long between log writes
-}
-
-// A LogMutator is a function which modifies the log entry.
-// It takes two maps as arguments, properties is the first and entry
-// is the second
-// properties is a shortcut for entry["properties"].(map[string]interface{})
-// properties can take any values you want to give it.
-// entry will only take the fields listed at http://doc.arvados.org/api/schema/Log.html
-// properties and entry are only safe to access inside the LogMutator,
-// they should not be stored anywhere, otherwise you'll risk
-// concurrent access.
-type LogMutator func(map[string]interface{}, map[string]interface{})
-
-// A Logger is used to build up a log entry over time and write every
-// version of it.
-type Logger struct {
-       // The data we write
-       data       map[string]interface{} // The entire map that we give to the api
-       entry      map[string]interface{} // Convenience shortcut into data
-       properties map[string]interface{} // Convenience shortcut into data
-
-       params LoggerParams // Parameters we were given
-
-       // Variables to coordinate updating and writing.
-       modified    bool            // Has this data been modified since the last write?
-       workToDo    chan LogMutator // Work to do in the worker thread.
-       writeTicker *time.Ticker    // On each tick we write the log data to arvados, if it has been modified.
-       hasWritten  bool            // Whether we've written at all yet.
-       noMoreWork  chan bool       // Signals that we're done writing.
-
-       writeHooks []LogMutator // Mutators we call before each write.
-}
-
-// Create a new logger based on the specified parameters.
-func NewLogger(params LoggerParams) (l *Logger, err error) {
-       // sanity check parameters
-       if &params.Client == nil {
-               err = fmt.Errorf("Nil arvados client in LoggerParams passed in to NewLogger()")
-               return
-       }
-       if params.EventTypePrefix == "" {
-               err = fmt.Errorf("Empty event type prefix in LoggerParams passed in to NewLogger()")
-               return
-       }
-
-       l = &Logger{
-               data:        make(map[string]interface{}),
-               entry:       make(map[string]interface{}),
-               properties:  make(map[string]interface{}),
-               params:      params,
-               workToDo:    make(chan LogMutator, 10),
-               writeTicker: time.NewTicker(params.WriteInterval),
-               noMoreWork:  make(chan bool, numberNoMoreWorkMessages)}
-
-       l.data["log"] = l.entry
-       l.entry["properties"] = l.properties
-
-       // Start the worker goroutine.
-       go l.work()
-
-       return l, nil
-}
-
-// Exported functions will be called from other goroutines, therefore
-// all they are allowed to do is enqueue work to be done in the worker
-// goroutine.
-
-// Enqueues an update. This will happen in another goroutine after
-// this method returns.
-func (l *Logger) Update(mutator LogMutator) {
-       l.workToDo <- mutator
-}
-
-// Similar to Update(), but writes the log entry as soon as possible
-// (ignoring MinimumWriteInterval) and blocks until the entry has been
-// written. This is useful if you know that you're about to quit
-// (e.g. if you discovered a fatal error, or you're finished), since
-// go will not wait for timers (including the pending write timer) to
-// go off before exiting.
-func (l *Logger) FinalUpdate(mutator LogMutator) {
-       // TODO(misha): Consider not accepting any future updates somehow,
-       // since they won't get written if they come in after this.
-
-       // Stop the periodic write ticker. We'll perform the final write
-       // before returning from this function.
-       l.workToDo <- func(p map[string]interface{}, e map[string]interface{}) {
-               l.writeTicker.Stop()
-       }
-
-       // Apply the final update
-       l.workToDo <- mutator
-
-       // Perform the final write and signal that we can return.
-       l.workToDo <- func(p map[string]interface{}, e map[string]interface{}) {
-               l.write(true)
-               for i := 0; i < numberNoMoreWorkMessages; {
-                       l.noMoreWork <- true
-               }
-       }
-
-       // Wait until we've performed the write.
-       <-l.noMoreWork
-}
-
-// Adds a hook which will be called every time this logger writes an entry.
-func (l *Logger) AddWriteHook(hook LogMutator) {
-       // We do the work in a LogMutator so that it happens in the worker
-       // goroutine.
-       l.workToDo <- func(p map[string]interface{}, e map[string]interface{}) {
-               l.writeHooks = append(l.writeHooks, hook)
-       }
-}
-
-// The worker loop
-func (l *Logger) work() {
-       for {
-               select {
-               case <-l.writeTicker.C:
-                       if l.modified {
-                               l.write(false)
-                               l.modified = false
-                       }
-               case mutator := <-l.workToDo:
-                       mutator(l.properties, l.entry)
-                       l.modified = true
-               case <-l.noMoreWork:
-                       return
-               }
-       }
-}
-
-// Actually writes the log entry.
-func (l *Logger) write(isFinal bool) {
-
-       // Run all our hooks
-       for _, hook := range l.writeHooks {
-               hook(l.properties, l.entry)
-       }
-
-       // Update the event type.
-       if isFinal {
-               l.entry["event_type"] = l.params.EventTypePrefix + finalSuffix
-       } else if l.hasWritten {
-               l.entry["event_type"] = l.params.EventTypePrefix + partialSuffix
-       } else {
-               l.entry["event_type"] = l.params.EventTypePrefix + startSuffix
-       }
-       l.hasWritten = true
-
-       // Write the log entry.
-       // This is a network write and will take a while, which is bad
-       // because we're blocking all the other work on this goroutine.
-       //
-       // TODO(misha): Consider rewriting this so that we can encode l.data
-       // into a string, and then perform the actual write in another
-       // routine. This will be tricky and will require support in the
-       // client.
-       err := l.params.Client.Create("logs", l.data, nil)
-       if err != nil {
-               log.Printf("Received error writing %v: %v", l.data, err)
-       }
-}
diff --git a/sdk/go/logger/util.go b/sdk/go/logger/util.go
deleted file mode 100644 (file)
index 6425aca..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-// Helper methods for interacting with Logger.
-package logger
-
-// Retrieves the map[string]interface{} stored at parent[key] if it
-// exists, otherwise it makes it and stores it there.
-// This is useful for logger because you may not know if a map you
-// need has already been created.
-func GetOrCreateMap(
-       parent map[string]interface{},
-       key string) (child map[string]interface{}) {
-       read, exists := parent[key]
-       if exists {
-               child = read.(map[string]interface{})
-
-       } else {
-               child = make(map[string]interface{})
-               parent[key] = child
-       }
-       return
-}
diff --git a/sdk/go/util/util.go b/sdk/go/util/util.go
deleted file mode 100644 (file)
index ac510de..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-/* Helper methods for dealing with responses from API Server. */
-
-package util
-
-import (
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-)
-
-func UserIsAdmin(arv *arvadosclient.ArvadosClient) (is_admin bool, err error) {
-       type user struct {
-               IsAdmin bool `json:"is_admin"`
-       }
-       var u user
-       err = arv.Call("GET", "users", "", "current", nil, &u)
-       return u.IsAdmin, err
-}
-
-// Returns the total count of a particular type of resource
-//
-//   resource - the arvados resource to count
-// return
-//   count - the number of items of type resource the api server reports, if no error
-//   err - error accessing the resource, or nil if no error
-func NumberItemsAvailable(client *arvadosclient.ArvadosClient, resource string) (count int, err error) {
-       var response struct {
-               ItemsAvailable int `json:"items_available"`
-       }
-       sdkParams := arvadosclient.Dict{"limit": 0}
-       err = client.List(resource, sdkParams, &response)
-       if err == nil {
-               count = response.ItemsAvailable
-       }
-       return
-}
diff --git a/sdk/python/arvados/_version.py b/sdk/python/arvados/_version.py
new file mode 100644 (file)
index 0000000..d823afc
--- /dev/null
@@ -0,0 +1,3 @@
+import pkg_resources
+
+__version__ = pkg_resources.require('arvados-python-client')[0].version
index eadb3a9bd1638cb9b384c3592b4790fe1f97e3bd..4cc2591ebb25034d0145de40c11f6638e3973864 100644 (file)
@@ -516,7 +516,7 @@ class _BlockManager(object):
                     return
                 self._keep.get(b)
             except Exception:
-                pass
+                _logger.exception("Exception doing block prefetch")
 
     @synchronized
     def start_get_threads(self):
index badbd668d951c46dd882b2468940463a17610728..1f72635406e4ecf62f7fa1245334cce96ae7b215 100755 (executable)
@@ -35,6 +35,7 @@ import arvados.commands._util as arv_cmd
 import arvados.commands.keepdocker
 
 from arvados.api import OrderedJsonModel
+from arvados._version import __version__
 
 COMMIT_HASH_RE = re.compile(r'^[0-9a-f]{1,40}$')
 
@@ -61,6 +62,9 @@ src_owner_uuid = None
 def main():
     copy_opts = argparse.ArgumentParser(add_help=False)
 
+    copy_opts.add_argument(
+        '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
+        help='Print version and exit.')
     copy_opts.add_argument(
         '-v', '--verbose', dest='verbose', action='store_true',
         help='Verbose output.')
index 9310f066219ae3063153e4a4393ecba771b7c6ff..3a0b64c38f4f543d5b52c8f96f4b38b03a4d741a 100644 (file)
@@ -21,6 +21,8 @@ import arvados.commands._util as arv_cmd
 import arvados.commands.put as arv_put
 import ciso8601
 
+from arvados._version import __version__
+
 EARLIEST_DATETIME = datetime.datetime(datetime.MINYEAR, 1, 1, 0, 0, 0)
 STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
 
@@ -28,6 +30,9 @@ DockerImage = collections.namedtuple(
     'DockerImage', ['repo', 'tag', 'hash', 'created', 'vsize'])
 
 keepdocker_parser = argparse.ArgumentParser(add_help=False)
+keepdocker_parser.add_argument(
+    '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
+    help='Print version and exit.')
 keepdocker_parser.add_argument(
     '-f', '--force', action='store_true', default=False,
     help="Re-upload the image even if it already exists on the server")
index e87244d7d12426d060bcb8a724445406448eb160..a2f2e542754f7e2e44edbd5673cf36d2c5d130af 100755 (executable)
@@ -3,10 +3,13 @@
 from __future__ import print_function
 
 import argparse
+import sys
 
 import arvados
 import arvados.commands._util as arv_cmd
 
+from arvados._version import __version__
+
 def parse_args(args):
     parser = argparse.ArgumentParser(
         description='List contents of a manifest',
@@ -16,6 +19,9 @@ def parse_args(args):
                         help="""Collection UUID or locator""")
     parser.add_argument('-s', action='store_true',
                         help="""List file sizes, in KiB.""")
+    parser.add_argument('--version', action='version',
+                        version="%s %s" % (sys.argv[0], __version__),
+                        help='Print version and exit.')
 
     return parser.parse_args(args)
 
index 88956cdce69696f91730fd0eb2adc5430c9b3a08..88d5a79d48ff867573afec933fb5f1fb561ce318 100644 (file)
@@ -24,6 +24,7 @@ import tempfile
 import threading
 import time
 from apiclient import errors as apiclient_errors
+from arvados._version import __version__
 
 import arvados.commands._util as arv_cmd
 
@@ -32,6 +33,9 @@ api_client = None
 
 upload_opts = argparse.ArgumentParser(add_help=False)
 
+upload_opts.add_argument('--version', action='version',
+                         version="%s %s" % (sys.argv[0], __version__),
+                         help='Print version and exit.')
 upload_opts.add_argument('paths', metavar='path', type=str, nargs='*',
                          help="""
 Local file or directory. Default: read from standard input.
index 54df452394e47bc7b44437bf580a3af2dc17b36e..8403327b44342befc23b7e1a3650213378f70b86 100644 (file)
@@ -11,22 +11,38 @@ import put
 import time
 import subprocess
 import logging
+import sys
 import arvados.commands._util as arv_cmd
 
+from arvados._version import __version__
+
 logger = logging.getLogger('arvados.arv-run')
 logger.setLevel(logging.INFO)
 
 arvrun_parser = argparse.ArgumentParser(parents=[arv_cmd.retry_opt])
-arvrun_parser.add_argument('--dry-run', action="store_true", help="Print out the pipeline that would be submitted and exit")
-arvrun_parser.add_argument('--local', action="store_true", help="Run locally using arv-run-pipeline-instance")
-arvrun_parser.add_argument('--docker-image', type=str, help="Docker image to use, otherwise use instance default.")
-arvrun_parser.add_argument('--ignore-rcode', action="store_true", help="Commands that return non-zero return codes should not be considered failed.")
-arvrun_parser.add_argument('--no-reuse', action="store_true", help="Do not reuse past jobs.")
-arvrun_parser.add_argument('--no-wait', action="store_true", help="Do not wait and display logs after submitting command, just exit.")
-arvrun_parser.add_argument('--project-uuid', type=str, help="Parent project of the pipeline")
-arvrun_parser.add_argument('--git-dir', type=str, default="", help="Git repository passed to arv-crunch-job when using --local")
-arvrun_parser.add_argument('--repository', type=str, default="arvados", help="repository field of component, default 'arvados'")
-arvrun_parser.add_argument('--script-version', type=str, default="master", help="script_version field of component, default 'master'")
+arvrun_parser.add_argument('--dry-run', action="store_true",
+                           help="Print out the pipeline that would be submitted and exit")
+arvrun_parser.add_argument('--local', action="store_true",
+                           help="Run locally using arv-run-pipeline-instance")
+arvrun_parser.add_argument('--docker-image', type=str,
+                           help="Docker image to use, otherwise use instance default.")
+arvrun_parser.add_argument('--ignore-rcode', action="store_true",
+                           help="Commands that return non-zero return codes should not be considered failed.")
+arvrun_parser.add_argument('--no-reuse', action="store_true",
+                           help="Do not reuse past jobs.")
+arvrun_parser.add_argument('--no-wait', action="store_true",
+                           help="Do not wait and display logs after submitting command, just exit.")
+arvrun_parser.add_argument('--project-uuid', type=str,
+                           help="Parent project of the pipeline")
+arvrun_parser.add_argument('--git-dir', type=str, default="",
+                           help="Git repository passed to arv-crunch-job when using --local")
+arvrun_parser.add_argument('--repository', type=str, default="arvados",
+                           help="repository field of component, default 'arvados'")
+arvrun_parser.add_argument('--script-version', type=str, default="master",
+                           help="script_version field of component, default 'master'")
+arvrun_parser.add_argument('--version', action='version',
+                           version="%s %s" % (sys.argv[0], __version__),
+                           help='Print version and exit.')
 arvrun_parser.add_argument('args', nargs=argparse.REMAINDER)
 
 class ArvFile(object):
index f6dee177d9a6b1e5a69e44d1edefd396280f0ed7..72ef1befed85ffd4d8b883270ebefa0a3bcd3dac 100644 (file)
@@ -6,12 +6,16 @@ import argparse
 import arvados
 import json
 from arvados.events import subscribe
+from arvados._version import __version__
 import signal
 
 def main(arguments=None):
     logger = logging.getLogger('arvados.arv-ws')
 
     parser = argparse.ArgumentParser()
+    parser.add_argument('--version', action='version',
+                        version="%s %s" % (sys.argv[0], __version__),
+                        help='Print version and exit.')
     parser.add_argument('-u', '--uuid', type=str, default="", help="Filter events on object_uuid")
     parser.add_argument('-f', '--filters', type=str, default="", help="Arvados query filter to apply to log events (JSON encoded)")
     parser.add_argument('-s', '--start-time', type=str, default="", help="Arvados query filter to fetch log events created at or after this time. This will be server time in UTC. Allowed format: YYYY-MM-DD or YYYY-MM-DD hh:mm:ss")
index db7835be3746f8f67eddd61d2aac505356e601f4..c98947945669338384147d9a8a0baf6917c43db9 100644 (file)
@@ -511,8 +511,10 @@ class KeepClient(object):
             with self.successful_copies_lock:
                 self.successful_copies += replicas_nr
                 self.response = response
+            with self.pending_tries_notification:
+                self.pending_tries_notification.notify_all()
         
-        def write_fail(self, ks, status_code):
+        def write_fail(self, ks):
             with self.pending_tries_notification:
                 self.pending_tries += 1
                 self.pending_tries_notification.notify()
@@ -520,8 +522,36 @@ class KeepClient(object):
         def pending_copies(self):
             with self.successful_copies_lock:
                 return self.wanted_copies - self.successful_copies
-    
-    
+
+        def get_next_task(self):
+            with self.pending_tries_notification:
+                while True:
+                    if self.pending_copies() < 1:
+                        # This notify_all() is unnecessary --
+                        # write_success() already called notify_all()
+                        # when pending<1 became true, so it's not
+                        # possible for any other thread to be in
+                        # wait() now -- but it's cheap insurance
+                        # against deadlock so we do it anyway:
+                        self.pending_tries_notification.notify_all()
+                        # Drain the queue and then raise Queue.Empty
+                        while True:
+                            self.get_nowait()
+                            self.task_done()
+                    elif self.pending_tries > 0:
+                        service, service_root = self.get_nowait()
+                        if service.finished():
+                            self.task_done()
+                            continue
+                        self.pending_tries -= 1
+                        return service, service_root
+                    elif self.empty():
+                        self.pending_tries_notification.notify_all()
+                        raise Queue.Empty
+                    else:
+                        self.pending_tries_notification.wait()
+
+
     class KeepWriterThreadPool(object):
         def __init__(self, data, data_hash, copies, max_service_replicas, timeout=None):
             self.total_task_nr = 0
@@ -551,74 +581,64 @@ class KeepClient(object):
                 worker.start()
             # Wait for finished work
             self.queue.join()
-            with self.queue.pending_tries_notification:
-                self.queue.pending_tries_notification.notify_all()
-            for worker in self.workers:
-                worker.join()
         
         def response(self):
             return self.queue.response
     
     
     class KeepWriterThread(threading.Thread):
+        TaskFailed = RuntimeError()
+
         def __init__(self, queue, data, data_hash, timeout=None):
             super(KeepClient.KeepWriterThread, self).__init__()
             self.timeout = timeout
             self.queue = queue
             self.data = data
             self.data_hash = data_hash
-        
+            self.daemon = True
+
         def run(self):
-            while not self.queue.empty():
-                if self.queue.pending_copies() > 0:
-                    # Avoid overreplication, wait for some needed re-attempt
-                    with self.queue.pending_tries_notification:
-                        if self.queue.pending_tries <= 0:
-                            self.queue.pending_tries_notification.wait()
-                            continue # try again when awake
-                        self.queue.pending_tries -= 1
-
-                    # Get to work
-                    try:
-                        service, service_root = self.queue.get_nowait()
-                    except Queue.Empty:
-                        continue
-                    if service.finished():
-                        self.queue.task_done()
-                        continue
-                    success = bool(service.put(self.data_hash,
-                                                self.data,
-                                                timeout=self.timeout))
-                    result = service.last_result()
-                    if success:
-                        _logger.debug("KeepWriterThread %s succeeded %s+%i %s",
-                                      str(threading.current_thread()),
-                                      self.data_hash,
-                                      len(self.data),
-                                      service_root)
-                        try:
-                            replicas_stored = int(result['headers']['x-keep-replicas-stored'])
-                        except (KeyError, ValueError):
-                            replicas_stored = 1
-                        
-                        self.queue.write_success(result['body'].strip(), replicas_stored)
-                    else:
-                        if result.get('status_code', None):
-                            _logger.debug("Request fail: PUT %s => %s %s",
-                                          self.data_hash,
-                                          result['status_code'],
-                                          result['body'])
-                        self.queue.write_fail(service, result.get('status_code', None)) # Schedule a re-attempt with next service
-                    # Mark as done so the queue can be join()ed
-                    self.queue.task_done()
+            while True:
+                try:
+                    service, service_root = self.queue.get_next_task()
+                except Queue.Empty:
+                    return
+                try:
+                    locator, copies = self.do_task(service, service_root)
+                except Exception as e:
+                    if e is not self.TaskFailed:
+                        _logger.exception("Exception in KeepWriterThread")
+                    self.queue.write_fail(service)
                 else:
-                    # Remove the task from the queue anyways
-                    try:
-                        self.queue.get_nowait()
-                        # Mark as done so the queue can be join()ed
-                        self.queue.task_done()
-                    except Queue.Empty:
-                        continue
+                    self.queue.write_success(locator, copies)
+                finally:
+                    self.queue.task_done()
+
+        def do_task(self, service, service_root):
+            success = bool(service.put(self.data_hash,
+                                        self.data,
+                                        timeout=self.timeout))
+            result = service.last_result()
+
+            if not success:
+                if result.get('status_code', None):
+                    _logger.debug("Request fail: PUT %s => %s %s",
+                                  self.data_hash,
+                                  result['status_code'],
+                                  result['body'])
+                raise self.TaskFailed
+
+            _logger.debug("KeepWriterThread %s succeeded %s+%i %s",
+                          str(threading.current_thread()),
+                          self.data_hash,
+                          len(self.data),
+                          service_root)
+            try:
+                replicas_stored = int(result['headers']['x-keep-replicas-stored'])
+            except (KeyError, ValueError):
+                replicas_stored = 1
+
+            return result['body'].strip(), replicas_stored
 
 
     def __init__(self, api_client=None, proxy=None,
index 60d4bec3b95c429643d7df4a600f72754954809a..f91b3977090da7c6f8b30844635174d122e67ba2 100755 (executable)
@@ -11,6 +11,8 @@ import logging
 import arvados
 import arvados.commands._util as arv_cmd
 
+from arvados._version import __version__
+
 logger = logging.getLogger('arvados.arv-get')
 
 def abort(msg, code=1):
@@ -20,6 +22,9 @@ def abort(msg, code=1):
 parser = argparse.ArgumentParser(
     description='Copy data from Keep to a local file or pipe.',
     parents=[arv_cmd.retry_opt])
+parser.add_argument('--version', action='version',
+                    version="%s %s" % (sys.argv[0], __version__),
+                    help='Print version and exit.')
 parser.add_argument('locator', type=str,
                     help="""
 Collection locator, optionally with a file path or prefix.
index b059d79459278e4859cdd7221183f2b863a3e73b..05a055e10855066588a707d539b2f250ea527be6 100755 (executable)
@@ -7,16 +7,22 @@ import re
 import string
 import sys
 
+import arvados
+from arvados._version import __version__
+
 parser = argparse.ArgumentParser(
     description='Read manifest on standard input and put normalized manifest on standard output.')
 
-parser.add_argument('--extract', type=str, help="The file to extract from the input manifest")
-parser.add_argument('--strip', action='store_true', help="Strip authorization tokens")
+parser.add_argument('--extract', type=str,
+                    help="The file to extract from the input manifest")
+parser.add_argument('--strip', action='store_true',
+                    help="Strip authorization tokens")
+parser.add_argument('--version', action='version',
+                    version="%s %s" % (sys.argv[0], __version__),
+                    help='Print version and exit.')
 
 args = parser.parse_args()
 
-import arvados
-
 r = sys.stdin.read()
 
 cr = arvados.CollectionReader(r)
index e0aae9625eb54d82eb4ee983696487079fa0d441..9d7d2481fdbb69c1635f932e7171663be9739e14 100644 (file)
@@ -51,6 +51,7 @@ setup(name='arvados-python-client',
           'httplib2',
           'pycurl >=7.19.5.1, <7.21.5',
           'python-gflags<3.0',
+          'setuptools',
           'ws4py'
       ],
       test_suite='tests',
index 71c9b178e7525808508babf86a383a37b4ab4ba6..dae3dd3b7b19c923ff53381e9f3ebef8c5abae49 100644 (file)
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 
 import arvados
+import contextlib
 import errno
 import hashlib
 import httplib
@@ -11,6 +12,7 @@ import os
 import pycurl
 import Queue
 import shutil
+import sys
 import tempfile
 import unittest
 
@@ -50,6 +52,17 @@ def mock_api_responses(api_client, body, codes, headers={}):
 def str_keep_locator(s):
     return '{}+{}'.format(hashlib.md5(s).hexdigest(), len(s))
 
+@contextlib.contextmanager
+def redirected_streams(stdout=None, stderr=None):
+    orig_stdout, sys.stdout = sys.stdout, stdout or sys.stdout
+    orig_stderr, sys.stderr = sys.stderr, stderr or sys.stderr
+    try:
+        yield
+    finally:
+        sys.stdout = orig_stdout
+        sys.stderr = orig_stderr
+
+
 class FakeCurl:
     @classmethod
     def make(cls, code, body='', headers={}):
diff --git a/sdk/python/tests/test_arv_copy.py b/sdk/python/tests/test_arv_copy.py
new file mode 100644 (file)
index 0000000..e291ee0
--- /dev/null
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import io
+import os
+import sys
+import tempfile
+import unittest
+
+import arvados.commands.arv_copy as arv_copy
+import arvados_testutil as tutil
+
+class ArvCopyTestCase(unittest.TestCase):
+    def run_copy(self, args):
+        sys.argv = ['arv-copy'] + args
+        return arv_copy.main()
+
+    def test_unsupported_arg(self):
+        with self.assertRaises(SystemExit):
+            self.run_copy(['-x=unknown'])
+
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with tutil.redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.run_copy(['--version'])
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
diff --git a/sdk/python/tests/test_arv_keepdocker.py b/sdk/python/tests/test_arv_keepdocker.py
new file mode 100644 (file)
index 0000000..bb94db5
--- /dev/null
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import io
+import os
+import sys
+import tempfile
+import unittest
+
+import arvados.commands.keepdocker as arv_keepdocker
+import arvados_testutil as tutil
+
+
+class ArvKeepdockerTestCase(unittest.TestCase):
+    def run_arv_keepdocker(self, args):
+        sys.argv = ['arv-keepdocker'] + args
+        return arv_keepdocker.main()
+
+    def test_unsupported_arg(self):
+        with self.assertRaises(SystemExit):
+            self.run_arv_keepdocker(['-x=unknown'])
+
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with tutil.redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.run_arv_keepdocker(['--version'])
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
index 664b57fc00a57cef068e352232d55d0dfa548a58..5064f07d722ee77efc0c8a4f733eaf86d02b8b39 100644 (file)
@@ -2,15 +2,17 @@
 # -*- coding: utf-8 -*-
 
 import io
+import os
 import random
-
+import sys
 import mock
+import tempfile
 
 import arvados.errors as arv_error
 import arvados.commands.ls as arv_ls
 import run_test_server
 
-from arvados_testutil import str_keep_locator
+from arvados_testutil import str_keep_locator, redirected_streams
 
 class ArvLsTestCase(run_test_server.TestCaseWithServers):
     FAKE_UUID = 'zzzzz-4zz18-12345abcde12345'
@@ -78,3 +80,12 @@ class ArvLsTestCase(run_test_server.TestCaseWithServers):
             arv_error.NotFoundError)
         self.assertNotEqual(0, self.run_ls([self.FAKE_UUID], api_client))
         self.assertNotEqual('', self.stderr.getvalue())
+
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.run_ls(['--version'], None)
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
diff --git a/sdk/python/tests/test_arv_normalize.py b/sdk/python/tests/test_arv_normalize.py
new file mode 100644 (file)
index 0000000..8bce7e3
--- /dev/null
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import subprocess
+import sys
+import tempfile
+import unittest
+
+
+class ArvNormalizeTestCase(unittest.TestCase):
+    def run_arv_normalize(self, args=[]):
+        p = subprocess.Popen([sys.executable, 'bin/arv-normalize'] + args,
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        (stdout, stderr) = p.communicate()
+        return p.returncode, stdout, stderr
+
+    def test_unsupported_arg(self):
+        returncode, out, err = self.run_arv_normalize(['-x=unknown'])
+        self.assertNotEqual(0, returncode)
+
+    def test_version_argument(self):
+        returncode, out, err = self.run_arv_normalize(['--version'])
+        self.assertEqual(0, returncode)
+        self.assertEqual('', out)
+        self.assertNotEqual('', err)
+        self.assertRegexpMatches(err, "[0-9]+\.[0-9]+\.[0-9]+")
index 0c1d3779fbaae8badd19809a275329e57fdc4a3c..bc933e27f8de55d7611865bf10d92993a54937f1 100644 (file)
@@ -2,6 +2,7 @@
 # -*- coding: utf-8 -*-
 
 import apiclient
+import io
 import mock
 import os
 import pwd
@@ -468,6 +469,15 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase):
                 delattr(self, outbuf)
         super(ArvadosPutTest, self).tearDown()
 
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with tutil.redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.call_main_with_args(['--version'])
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
+
     def test_simple_file_put(self):
         self.call_main_on_test_file()
 
diff --git a/sdk/python/tests/test_arv_run.py b/sdk/python/tests/test_arv_run.py
new file mode 100644 (file)
index 0000000..3d04d27
--- /dev/null
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import io
+import os
+import sys
+import tempfile
+import unittest
+
+import arvados.commands.run as arv_run
+import arvados_testutil as tutil
+
+class ArvRunTestCase(unittest.TestCase):
+    def run_arv_run(self, args):
+        sys.argv = ['arv-run'] + args
+        return arv_run.main()
+
+    def test_unsupported_arg(self):
+        with self.assertRaises(SystemExit):
+            self.run_arv_run(['-x=unknown'])
+
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with tutil.redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.run_arv_run(['--version'])
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
index 5a018273a4d0c8aa6b35970cbb151227083e0a47..2a85e04e87c06067bd7d83773295cf049f747852 100644 (file)
@@ -1,8 +1,14 @@
 #!/usr/bin/env python
 
+import io
+import os
+import sys
+import tempfile
 import unittest
+
 import arvados.errors as arv_error
 import arvados.commands.ws as arv_ws
+import arvados_testutil as tutil
 
 class ArvWsTestCase(unittest.TestCase):
     def run_ws(self, args):
@@ -11,3 +17,12 @@ class ArvWsTestCase(unittest.TestCase):
     def test_unsupported_arg(self):
         with self.assertRaises(SystemExit):
             self.run_ws(['-x=unknown'])
+
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with tutil.redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.run_ws(['--version'])
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
index f2cdba28c775a523bc178052644cb4a76dac2771..0199724339b76f0fb37e5af89c8d62c53f332711 100644 (file)
@@ -17,6 +17,8 @@ class WebsocketTest(run_test_server.TestCaseWithServers):
     TIME_FUTURE = time.time()+3600
     MOCK_WS_URL = 'wss://[{}]/'.format(arvados_testutil.TEST_HOST)
 
+    TEST_TIMEOUT = 10.0
+
     def setUp(self):
         self.ws = None
 
@@ -262,20 +264,16 @@ class WebsocketTest(run_test_server.TestCaseWithServers):
 
     @mock.patch('arvados.events._EventClient')
     def test_run_forever_survives_reconnects(self, websocket_client):
-        connection_cond = threading.Condition()
-        def ws_connect():
-            with connection_cond:
-                connection_cond.notify_all()
-        websocket_client().connect.side_effect = ws_connect
+        connected = threading.Event()
+        websocket_client().connect.side_effect = connected.set
         client = arvados.events.EventClient(
             self.MOCK_WS_URL, [], lambda event: None, None)
-        with connection_cond:
-            forever_thread = threading.Thread(target=client.run_forever)
-            forever_thread.start()
-            # Simulate an unexpected disconnect, and wait for reconnect.
-            close_thread = threading.Thread(target=client.on_closed)
-            close_thread.start()
-            connection_cond.wait()
+        forever_thread = threading.Thread(target=client.run_forever)
+        forever_thread.start()
+        # Simulate an unexpected disconnect, and wait for reconnect.
+        close_thread = threading.Thread(target=client.on_closed)
+        close_thread.start()
+        self.assertTrue(connected.wait(timeout=self.TEST_TIMEOUT))
         close_thread.join()
         run_forever_alive = forever_thread.is_alive()
         client.close()
@@ -285,7 +283,10 @@ class WebsocketTest(run_test_server.TestCaseWithServers):
 
 
 class PollClientTestCase(unittest.TestCase):
+    TEST_TIMEOUT = 10.0
+
     class MockLogs(object):
+
         def __init__(self):
             self.logs = []
             self.lock = threading.Lock()
@@ -300,12 +301,11 @@ class PollClientTestCase(unittest.TestCase):
                 self.logs = []
             return {'items': retval, 'items_available': len(retval)}
 
-
     def setUp(self):
         self.logs = self.MockLogs()
         self.arv = mock.MagicMock(name='arvados.api()')
         self.arv.logs().list().execute.side_effect = self.logs.return_list
-        self.callback_cond = threading.Condition()
+        self.callback_called = threading.Event()
         self.recv_events = []
 
     def tearDown(self):
@@ -313,9 +313,8 @@ class PollClientTestCase(unittest.TestCase):
             self.client.close(timeout=None)
 
     def callback(self, event):
-        with self.callback_cond:
-            self.recv_events.append(event)
-            self.callback_cond.notify_all()
+        self.recv_events.append(event)
+        self.callback_called.set()
 
     def build_client(self, filters=None, callback=None, last_log_id=None, poll_time=99):
         if filters is None:
@@ -333,11 +332,11 @@ class PollClientTestCase(unittest.TestCase):
         test_log = {'id': 12345, 'testkey': 'testtext'}
         self.logs.add({'id': 123})
         self.build_client(poll_time=.01)
-        with self.callback_cond:
-            self.client.start()
-            self.callback_cond.wait()
-            self.logs.add(test_log.copy())
-            self.callback_cond.wait()
+        self.client.start()
+        self.assertTrue(self.callback_called.wait(self.TEST_TIMEOUT))
+        self.callback_called.clear()
+        self.logs.add(test_log.copy())
+        self.assertTrue(self.callback_called.wait(self.TEST_TIMEOUT))
         self.client.close(timeout=None)
         self.assertIn(test_log, self.recv_events)
 
@@ -345,9 +344,8 @@ class PollClientTestCase(unittest.TestCase):
         client_filter = ['kind', '=', 'arvados#test']
         self.build_client()
         self.client.subscribe([client_filter[:]])
-        with self.callback_cond:
-            self.client.start()
-            self.callback_cond.wait()
+        self.client.start()
+        self.assertTrue(self.callback_called.wait(self.TEST_TIMEOUT))
         self.client.close(timeout=None)
         self.assertTrue(self.was_filter_used(client_filter))
 
@@ -362,11 +360,10 @@ class PollClientTestCase(unittest.TestCase):
 
     def test_run_forever(self):
         self.build_client()
-        with self.callback_cond:
-            self.client.start()
-            forever_thread = threading.Thread(target=self.client.run_forever)
-            forever_thread.start()
-            self.callback_cond.wait()
+        self.client.start()
+        forever_thread = threading.Thread(target=self.client.run_forever)
+        forever_thread.start()
+        self.assertTrue(self.callback_called.wait(self.TEST_TIMEOUT))
         self.assertTrue(forever_thread.is_alive())
         self.client.close()
         forever_thread.join()
index 908539b8cae010f1cf0f23046bdcaf1f15f136b0..85b5bc81f00902a2a816d606bbc2cecff06de289 100644 (file)
@@ -1081,58 +1081,74 @@ class KeepClientRetryPutTestCase(KeepClientRetryTestMixin, unittest.TestCase):
             self.check_exception(copies=2, num_retries=3)
 
 
-class KeepClientAvoidClientOverreplicationTestCase(unittest.TestCase, tutil.ApiClientMock):
-    
-    
+class AvoidOverreplication(unittest.TestCase, tutil.ApiClientMock):
+
     class FakeKeepService(object):
-        def __init__(self, delay, will_succeed, replicas=1):
+        def __init__(self, delay, will_succeed=False, will_raise=None, replicas=1):
             self.delay = delay
-            self.success = will_succeed
+            self.will_succeed = will_succeed
+            self.will_raise = will_raise
             self._result = {}
             self._result['headers'] = {}
             self._result['headers']['x-keep-replicas-stored'] = str(replicas)
             self._result['body'] = 'foobar'
-        
+
         def put(self, data_hash, data, timeout):
             time.sleep(self.delay)
-            return self.success
-        
+            if self.will_raise is not None:
+                raise self.will_raise
+            return self.will_succeed
+
         def last_result(self):
-            return self._result
-        
+            if self.will_succeed:
+                return self._result
+
         def finished(self):
             return False
     
-    
-    def test_only_write_enough_on_success(self):
-        copies = 3
-        pool = arvados.KeepClient.KeepWriterThreadPool(
+    def setUp(self):
+        self.copies = 3
+        self.pool = arvados.KeepClient.KeepWriterThreadPool(
             data = 'foo',
             data_hash = 'acbd18db4cc2f85cedef654fccc4a4d8+3',
-            max_service_replicas = copies,
-            copies = copies
+            max_service_replicas = self.copies,
+            copies = self.copies
         )
+
+    def test_only_write_enough_on_success(self):
         for i in range(10):
             ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
-            pool.add_task(ks, None)
-        pool.join()
-        self.assertEqual(pool.done(), copies)
+            self.pool.add_task(ks, None)
+        self.pool.join()
+        self.assertEqual(self.pool.done(), self.copies)
 
     def test_only_write_enough_on_partial_success(self):
-        copies = 3
-        pool = arvados.KeepClient.KeepWriterThreadPool(
-            data = 'foo',
-            data_hash = 'acbd18db4cc2f85cedef654fccc4a4d8+3',
-            max_service_replicas = copies,
-            copies = copies
-        )
         for i in range(5):
             ks = self.FakeKeepService(delay=i/10.0, will_succeed=False)
-            pool.add_task(ks, None)
+            self.pool.add_task(ks, None)
+            ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
+            self.pool.add_task(ks, None)
+        self.pool.join()
+        self.assertEqual(self.pool.done(), self.copies)
+
+    def test_only_write_enough_when_some_crash(self):
+        for i in range(5):
+            ks = self.FakeKeepService(delay=i/10.0, will_raise=Exception())
+            self.pool.add_task(ks, None)
+            ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
+            self.pool.add_task(ks, None)
+        self.pool.join()
+        self.assertEqual(self.pool.done(), self.copies)
+
+    def test_fail_when_too_many_crash(self):
+        for i in range(self.copies+1):
+            ks = self.FakeKeepService(delay=i/10.0, will_raise=Exception())
+            self.pool.add_task(ks, None)
+        for i in range(self.copies-1):
             ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
-            pool.add_task(ks, None)
-        pool.join()
-        self.assertEqual(pool.done(), copies)
+            self.pool.add_task(ks, None)
+        self.pool.join()
+        self.assertEqual(self.pool.done(), self.copies-1)
     
 
 @tutil.skip_sleep
index 5d9b031e0295a62b86a6f0b4d6e9c13cc784da70..88237c9063b906d5df33baea2fda94a0032d3a00 100644 (file)
@@ -1,6 +1,6 @@
 source 'https://rubygems.org'
 
-gem 'rails', '~> 3.2.0'
+gem 'rails', '~> 3.2'
 
 # Bundle edge Rails instead:
 # gem 'rails',     :git => 'git://github.com/rails/rails.git'
@@ -12,14 +12,13 @@ group :test, :development do
   # Note: "require: false" here tells bunder not to automatically
   # 'require' the packages during application startup. Installation is
   # still mandatory.
+  gem 'test-unit', '~> 3.0', require: false
   gem 'simplecov', '~> 0.7.1', require: false
   gem 'simplecov-rcov', require: false
   gem 'mocha', require: false
 end
 
-# This might not be needed in :test and :development, but we load it
-# anyway to make sure it always gets in Gemfile.lock and to help
-# reveal install problems sooner rather than later.
+# pg is the only supported database driver.
 gem 'pg'
 
 # Start using multi_json once we are on Rails 3.2;
@@ -31,13 +30,13 @@ gem 'oj'
 # Gems used only for assets and not required
 # in production environments by default.
 group :assets do
-  gem 'sass-rails',   '>= 3.2.0'
-  gem 'coffee-rails', '~> 3.2.0'
+  gem 'sass-rails',   '~> 3.2'
+  gem 'coffee-rails', '~> 3.2'
 
   # See https://github.com/sstephenson/execjs#readme for more supported runtimes
   gem 'therubyracer'
 
-  gem 'uglifier', '>= 1.0.3'
+  gem 'uglifier', '~> 2.0'
 end
 
 gem 'jquery-rails'
@@ -60,8 +59,8 @@ gem 'acts_as_api'
 
 gem 'passenger'
 
-gem 'omniauth', '1.1.1'
-gem 'omniauth-oauth2', '1.1.1'
+gem 'omniauth', '~> 1.1'
+gem 'omniauth-oauth2', '~> 1.1'
 
 gem 'andand'
 
@@ -78,7 +77,7 @@ gem 'arvados-cli', '>= 0.1.20161017193526'
 # pg_power lets us use partial indexes in schema.rb in Rails 3
 gem 'pg_power'
 
-gem 'puma'
+gem 'puma', '~> 2.0'
 gem 'sshkey'
 gem 'safe_yaml'
 gem 'lograge'
index 6f7875163b63fa6af8462f2999e42bad4902f37d..9c9c4ae9e58b3105c47aa5eb330e052b3747a19f 100644 (file)
@@ -1,12 +1,12 @@
 GEM
   remote: https://rubygems.org/
   specs:
-    actionmailer (3.2.17)
-      actionpack (= 3.2.17)
+    actionmailer (3.2.22.5)
+      actionpack (= 3.2.22.5)
       mail (~> 2.5.4)
-    actionpack (3.2.17)
-      activemodel (= 3.2.17)
-      activesupport (= 3.2.17)
+    actionpack (3.2.22.5)
+      activemodel (= 3.2.22.5)
+      activesupport (= 3.2.22.5)
       builder (~> 3.0.0)
       erubis (~> 2.7.0)
       journey (~> 1.0.4)
@@ -14,31 +14,31 @@ GEM
       rack-cache (~> 1.2)
       rack-test (~> 0.6.1)
       sprockets (~> 2.2.1)
-    activemodel (3.2.17)
-      activesupport (= 3.2.17)
+    activemodel (3.2.22.5)
+      activesupport (= 3.2.22.5)
       builder (~> 3.0.0)
-    activerecord (3.2.17)
-      activemodel (= 3.2.17)
-      activesupport (= 3.2.17)
+    activerecord (3.2.22.5)
+      activemodel (= 3.2.22.5)
+      activesupport (= 3.2.22.5)
       arel (~> 3.0.2)
       tzinfo (~> 0.3.29)
-    activeresource (3.2.17)
-      activemodel (= 3.2.17)
-      activesupport (= 3.2.17)
-    activesupport (3.2.17)
+    activeresource (3.2.22.5)
+      activemodel (= 3.2.22.5)
+      activesupport (= 3.2.22.5)
+    activesupport (3.2.22.5)
       i18n (~> 0.6, >= 0.6.4)
       multi_json (~> 1.0)
-    acts_as_api (0.4.2)
+    acts_as_api (0.4.3)
       activemodel (>= 3.0.0)
       activesupport (>= 3.0.0)
       rack (>= 1.1.0)
     addressable (2.4.0)
     andand (1.3.3)
     arel (3.0.3)
-    arvados (0.1.20160420143004)
+    arvados (0.1.20160513152536)
       activesupport (>= 3, < 4.2.6)
       andand (~> 1.3, >= 1.3.3)
-      google-api-client (>= 0.7, < 0.9)
+      google-api-client (>= 0.7, < 0.8.9)
       i18n (~> 0)
       json (~> 1.7, >= 1.7.7)
       jwt (>= 0.1.5, < 2)
@@ -56,62 +56,71 @@ GEM
       extlib (>= 0.9.15)
       multi_json (>= 1.0.0)
     builder (3.0.4)
-    capistrano (2.15.5)
+    capistrano (2.15.9)
       highline
       net-scp (>= 1.0.0)
       net-sftp (>= 2.0.0)
       net-ssh (>= 2.0.14)
       net-ssh-gateway (>= 1.1.0)
-    coffee-rails (3.2.1)
+    coffee-rails (3.2.2)
       coffee-script (>= 2.2.0)
-      railties (~> 3.2.0.beta)
-    coffee-script (2.2.0)
+      railties (~> 3.2.0)
+    coffee-script (2.4.1)
       coffee-script-source
       execjs
-    coffee-script-source (1.7.0)
+    coffee-script-source (1.10.0)
     curb (0.9.3)
-    daemon_controller (1.2.0)
-    database_cleaner (1.2.0)
+    database_cleaner (1.5.3)
     erubis (2.7.0)
-    eventmachine (1.0.3)
-    execjs (2.0.2)
+    eventmachine (1.2.0.1)
+    execjs (2.7.0)
     extlib (0.9.16)
-    factory_girl (4.4.0)
+    factory_girl (4.7.0)
       activesupport (>= 3.0.0)
-    factory_girl_rails (4.4.1)
-      factory_girl (~> 4.4.0)
+    factory_girl_rails (4.7.0)
+      factory_girl (~> 4.7.0)
       railties (>= 3.0.0)
     faraday (0.9.2)
       multipart-post (>= 1.2, < 3)
-    faye-websocket (0.7.2)
+    faye-websocket (0.10.4)
       eventmachine (>= 0.12.0)
-      websocket-driver (>= 0.3.1)
-    google-api-client (0.7.1)
-      addressable (>= 2.3.2)
-      autoparse (>= 0.3.3)
-      extlib (>= 0.9.15)
-      faraday (>= 0.9.0)
-      jwt (>= 0.1.5)
-      launchy (>= 2.1.1)
-      multi_json (>= 1.0.0)
-      retriable (>= 1.4)
-      signet (>= 0.5.0)
-      uuidtools (>= 2.1.0)
-    hashie (1.2.0)
-    highline (1.6.21)
+      websocket-driver (>= 0.5.1)
+    google-api-client (0.8.7)
+      activesupport (>= 3.2, < 5.0)
+      addressable (~> 2.3)
+      autoparse (~> 0.3)
+      extlib (~> 0.9)
+      faraday (~> 0.9)
+      googleauth (~> 0.3)
+      launchy (~> 2.4)
+      multi_json (~> 1.10)
+      retriable (~> 1.4)
+      signet (~> 0.6)
+    googleauth (0.5.1)
+      faraday (~> 0.9)
+      jwt (~> 1.4)
+      logging (~> 2.0)
+      memoist (~> 0.12)
+      multi_json (~> 1.11)
+      os (~> 0.9)
+      signet (~> 0.7)
+    hashie (3.4.6)
+    highline (1.7.8)
     hike (1.2.3)
-    httpauth (0.2.1)
     i18n (0.7.0)
     journey (1.0.4)
-    jquery-rails (3.1.0)
+    jquery-rails (3.1.4)
       railties (>= 3.0, < 5.0)
       thor (>= 0.14, < 2.0)
     json (1.8.3)
-    jwt (0.1.13)
-      multi_json (>= 1.5)
+    jwt (1.5.6)
     launchy (2.4.3)
       addressable (~> 2.3)
-    libv8 (3.16.14.3)
+    libv8 (3.16.14.15)
+    little-plugger (1.1.4)
+    logging (2.1.0)
+      little-plugger (~> 1.1)
+      multi_json (~> 1.10)
     lograge (0.3.6)
       actionpack (>= 3)
       activesupport (>= 3)
@@ -120,100 +129,105 @@ GEM
     mail (2.5.4)
       mime-types (~> 1.16)
       treetop (~> 1.4.8)
+    memoist (0.15.0)
     metaclass (0.0.4)
     mime-types (1.25.1)
-    mocha (1.1.0)
+    mocha (1.2.0)
       metaclass (~> 0.0.1)
-    multi_json (1.12.0)
+    multi_json (1.12.1)
+    multi_xml (0.5.5)
     multipart-post (2.0.0)
-    net-scp (1.2.0)
+    net-scp (1.2.1)
       net-ssh (>= 2.6.5)
     net-sftp (2.1.2)
       net-ssh (>= 2.6.5)
-    net-ssh (2.8.0)
+    net-ssh (3.2.0)
     net-ssh-gateway (1.2.0)
       net-ssh (>= 2.6.5)
-    oauth2 (0.8.1)
-      faraday (~> 0.8)
-      httpauth (~> 0.1)
-      jwt (~> 0.1.4)
-      multi_json (~> 1.0)
-      rack (~> 1.2)
+    oauth2 (1.2.0)
+      faraday (>= 0.8, < 0.10)
+      jwt (~> 1.0)
+      multi_json (~> 1.3)
+      multi_xml (~> 0.5)
+      rack (>= 1.2, < 3)
     oj (2.15.0)
-    omniauth (1.1.1)
-      hashie (~> 1.2)
-      rack
-    omniauth-oauth2 (1.1.1)
-      oauth2 (~> 0.8.0)
-      omniauth (~> 1.0)
-    passenger (4.0.41)
-      daemon_controller (>= 1.2.0)
+    omniauth (1.3.1)
+      hashie (>= 1.2, < 4)
+      rack (>= 1.0, < 3)
+    omniauth-oauth2 (1.4.0)
+      oauth2 (~> 1.0)
+      omniauth (~> 1.2)
+    os (0.9.6)
+    passenger (5.0.30)
       rack
       rake (>= 0.8.1)
-    pg (0.17.1)
+    pg (0.19.0)
     pg_power (1.6.4)
       pg
       rails (~> 3.1)
-    polyglot (0.3.4)
-    puma (2.8.2)
-      rack (>= 1.1, < 2.0)
-    rack (1.4.5)
-    rack-cache (1.2)
+    polyglot (0.3.5)
+    power_assert (0.3.1)
+    puma (2.16.0)
+    rack (1.4.7)
+    rack-cache (1.6.1)
       rack (>= 0.4)
     rack-ssl (1.3.4)
       rack
-    rack-test (0.6.2)
+    rack-test (0.6.3)
       rack (>= 1.0)
-    rails (3.2.17)
-      actionmailer (= 3.2.17)
-      actionpack (= 3.2.17)
-      activerecord (= 3.2.17)
-      activeresource (= 3.2.17)
-      activesupport (= 3.2.17)
+    rails (3.2.22.5)
+      actionmailer (= 3.2.22.5)
+      actionpack (= 3.2.22.5)
+      activerecord (= 3.2.22.5)
+      activeresource (= 3.2.22.5)
+      activesupport (= 3.2.22.5)
       bundler (~> 1.0)
-      railties (= 3.2.17)
-    railties (3.2.17)
-      actionpack (= 3.2.17)
-      activesupport (= 3.2.17)
+      railties (= 3.2.22.5)
+    railties (3.2.22.5)
+      actionpack (= 3.2.22.5)
+      activesupport (= 3.2.22.5)
       rack-ssl (~> 1.3.2)
       rake (>= 0.8.7)
       rdoc (~> 3.4)
       thor (>= 0.14.6, < 2.0)
-    rake (10.2.2)
+    rake (11.3.0)
     rdoc (3.12.2)
       json (~> 1.4)
-    ref (1.0.5)
-    retriable (2.1.0)
-    ruby-prof (0.15.2)
-    rvm-capistrano (1.5.1)
+    ref (2.0.0)
+    retriable (1.4.1)
+    ruby-prof (0.16.2)
+    rvm-capistrano (1.5.6)
       capistrano (~> 2.15.4)
     safe_yaml (1.0.4)
-    sass (3.3.4)
+    sass (3.4.22)
     sass-rails (3.2.6)
       railties (~> 3.2.0)
       sass (>= 3.1.10)
       tilt (~> 1.3)
-    signet (0.5.1)
-      addressable (>= 2.2.3)
-      faraday (>= 0.9.0.rc5)
-      jwt (>= 0.1.5)
-      multi_json (>= 1.0.0)
+    signet (0.7.3)
+      addressable (~> 2.3)
+      faraday (~> 0.9)
+      jwt (~> 1.5)
+      multi_json (~> 1.10)
     simplecov (0.7.1)
       multi_json (~> 1.0)
       simplecov-html (~> 0.7.1)
     simplecov-html (0.7.1)
     simplecov-rcov (0.2.3)
       simplecov (>= 0.4.1)
-    sprockets (2.2.2)
+    sprockets (2.2.3)
       hike (~> 1.2)
       multi_json (~> 1.0)
       rack (~> 1.0)
       tilt (~> 1.1, != 1.3.0)
-    sshkey (1.6.1)
-    test_after_commit (0.2.3)
+    sshkey (1.8.0)
+    test-unit (3.2.1)
+      power_assert
+    test_after_commit (1.1.0)
+      activerecord (>= 3.2)
     themes_for_rails (0.5.1)
       rails (>= 3.0.0)
-    therubyracer (0.12.1)
+    therubyracer (0.12.2)
       libv8 (~> 3.16.14.0)
       ref
     thor (0.19.1)
@@ -222,12 +236,13 @@ GEM
       polyglot
       polyglot (>= 0.3.1)
     trollop (2.1.2)
-    tzinfo (0.3.39)
-    uglifier (2.5.0)
+    tzinfo (0.3.51)
+    uglifier (2.7.2)
       execjs (>= 0.3.0)
       json (>= 1.8.0)
-    uuidtools (2.1.5)
-    websocket-driver (0.3.2)
+    websocket-driver (0.6.4)
+      websocket-extensions (>= 0.1.0)
+    websocket-extensions (0.1.2)
 
 PLATFORMS
   ruby
@@ -237,7 +252,7 @@ DEPENDENCIES
   andand
   arvados (>= 0.1.20150615153458)
   arvados-cli (>= 0.1.20161017193526)
-  coffee-rails (~> 3.2.0)
+  coffee-rails (~> 3.2)
   database_cleaner
   factory_girl_rails
   faye-websocket
@@ -247,22 +262,26 @@ DEPENDENCIES
   mocha
   multi_json
   oj
-  omniauth (= 1.1.1)
-  omniauth-oauth2 (= 1.1.1)
+  omniauth (~> 1.1)
+  omniauth-oauth2 (~> 1.1)
   passenger
   pg
   pg_power
-  puma
-  rails (~> 3.2.0)
+  puma (~> 2.0)
+  rails (~> 3.2)
   ruby-prof
   rvm-capistrano
   safe_yaml
-  sass-rails (>= 3.2.0)
+  sass-rails (~> 3.2)
   simplecov (~> 0.7.1)
   simplecov-rcov
   sshkey
+  test-unit (~> 3.0)
   test_after_commit
   themes_for_rails
   therubyracer
   trollop
-  uglifier (>= 1.0.3)
+  uglifier (~> 2.0)
+
+BUNDLED WITH
+   1.13.6
index 776f7e190e06ad0a486dad78c04affe84493175a..d58c432b6c3f2ec5b1f17a47e798975b2759166f 100644 (file)
@@ -46,7 +46,7 @@ class ApplicationController < ActionController::Base
 
   theme :select_theme
 
-  attr_accessor :resource_attrs
+  attr_writer :resource_attrs
 
   begin
     rescue_from(Exception,
@@ -59,6 +59,18 @@ class ApplicationController < ActionController::Base
                 :with => :render_not_found)
   end
 
+  def initialize *args
+    super
+    @object = nil
+    @objects = nil
+    @offset = nil
+    @limit = nil
+    @select = nil
+    @distinct = nil
+    @response_resource_name = nil
+    @attrs = nil
+  end
+
   def default_url_options
     if Rails.configuration.host
       {:host => Rails.configuration.host}
@@ -420,7 +432,7 @@ class ApplicationController < ActionController::Base
   end
 
   def find_object_by_uuid
-    if params[:id] and params[:id].match /\D/
+    if params[:id] and params[:id].match(/\D/)
       params[:uuid] = params.delete :id
     end
     @where = { uuid: params[:uuid] }
@@ -567,7 +579,7 @@ class ApplicationController < ActionController::Base
         }
       end
     end
-    super *opts
+    super(*opts)
   end
 
   def select_theme
index 922cf7dac16b87741013c23e4073d4070a6fbe43..017c023db2ad1363fdd8549040bc402e17dce59f 100644 (file)
@@ -125,9 +125,9 @@ class Arvados::V1::CollectionsController < ApplicationController
           visited[uuid] = job.as_api_response
           if direction == :search_up
             # Follow upstream collections referenced in the script parameters
-            find_collections(visited, job) do |hash, uuid|
+            find_collections(visited, job) do |hash, col_uuid|
               search_edges(visited, hash, :search_up) if hash
-              search_edges(visited, uuid, :search_up) if uuid
+              search_edges(visited, col_uuid, :search_up) if col_uuid
             end
           elsif direction == :search_down
             # Follow downstream job output
index d6adbf08516a7c2c199cb240017b5e64ede195be..5d91a81074cdfe9e75df182132af4b17f1ff85e3 100644 (file)
@@ -68,9 +68,14 @@ class Arvados::V1::GroupsController < ApplicationController
      Collection,
      Human, Specimen, Trait]
 
-    table_names = klasses.map(&:table_name)
+    table_names = Hash[klasses.collect { |k| [k, k.table_name] }]
+
+    disabled_methods = Rails.configuration.disable_api_methods
+    avail_klasses = table_names.select{|k, t| !disabled_methods.include?(t+'.index')}
+    klasses = avail_klasses.keys
+
     request_filters.each do |col, op, val|
-      if col.index('.') && !table_names.include?(col.split('.', 2)[0])
+      if col.index('.') && !table_names.values.include?(col.split('.', 2)[0])
         raise ArgumentError.new("Invalid attribute '#{col}' in filter")
       end
     end
index 2c55b15068ca3e8e1a2046b44d20dc4fa86ba32e..5f43ba8af8e2c9146af1fc267bd6888748b05510 100644 (file)
@@ -85,7 +85,7 @@ class Arvados::V1::SchemaController < ApplicationController
       if Rails.application.config.websocket_address
         discovery[:websocketUrl] = Rails.application.config.websocket_address
       elsif ENV['ARVADOS_WEBSOCKETS']
-        discovery[:websocketUrl] = (root_url.sub /^http/, 'ws') + "websocket"
+        discovery[:websocketUrl] = root_url.sub(/^http/, 'ws') + "websocket"
       end
 
       ActiveRecord::Base.descendants.reject(&:abstract_class?).each do |k|
@@ -377,21 +377,21 @@ class Arvados::V1::SchemaController < ApplicationController
               method = d_methods[action.to_sym]
             end
             if ctl_class.respond_to? "_#{action}_requires_parameters".to_sym
-              ctl_class.send("_#{action}_requires_parameters".to_sym).each do |k, v|
+              ctl_class.send("_#{action}_requires_parameters".to_sym).each do |l, v|
                 if v.is_a? Hash
-                  method[:parameters][k] = v
+                  method[:parameters][l] = v
                 else
-                  method[:parameters][k] = {}
+                  method[:parameters][l] = {}
                 end
-                if !method[:parameters][k][:default].nil?
+                if !method[:parameters][l][:default].nil?
                   # The JAVA SDK is sensitive to all values being strings
-                  method[:parameters][k][:default] = method[:parameters][k][:default].to_s
+                  method[:parameters][l][:default] = method[:parameters][l][:default].to_s
                 end
-                method[:parameters][k][:type] ||= 'string'
-                method[:parameters][k][:description] ||= ''
-                method[:parameters][k][:location] = (route.segment_keys.include?(k) ? 'path' : 'query')
-                if method[:parameters][k][:required].nil?
-                  method[:parameters][k][:required] = v != false
+                method[:parameters][l][:type] ||= 'string'
+                method[:parameters][l][:description] ||= ''
+                method[:parameters][l][:location] = (route.segment_keys.include?(l) ? 'path' : 'query')
+                if method[:parameters][l][:required].nil?
+                  method[:parameters][l][:required] = v != false
                 end
               end
             end
index 32adde9507554ee9195bbc812b51cc1d86d753ba..f23cd98c354824b4998373183b2cbaa17b08a715 100644 (file)
@@ -17,7 +17,6 @@ class Arvados::V1::UserAgreementsController < ApplicationController
       # use this installation.
       @objects = []
     else
-      current_user_uuid = current_user.uuid
       act_as_system_user do
         uuids = Link.where("owner_uuid = ? and link_class = ? and name = ? and tail_uuid = ? and head_uuid like ?",
                            system_user_uuid,
@@ -25,7 +24,7 @@ class Arvados::V1::UserAgreementsController < ApplicationController
                            'require',
                            system_user_uuid,
                            Collection.uuid_like_pattern).
-          collect &:head_uuid
+          collect(&:head_uuid)
         @objects = Collection.where('uuid in (?)', uuids)
       end
     end
index 03efed999fcb9791df63d4c6bc8475003f55b4c7..db5e7bd952323f661bbcd11312a937956f4d5044 100644 (file)
@@ -159,7 +159,7 @@ class Arvados::V1::UsersController < ApplicationController
   end
 
   def apply_filters(model_class=nil)
-    return super if @read_users.any? &:is_admin
+    return super if @read_users.any?(&:is_admin)
     if params[:uuid] != current_user.andand.uuid
       # Non-admin index/show returns very basic information about readable users.
       safe_attrs = ["uuid", "is_active", "email", "first_name", "last_name"]
index e6474aa4e0328a6759039921b9962d627b0b374d..99b663da43b8d05fde6db0966fe5da515fdc0d84 100644 (file)
@@ -23,7 +23,7 @@ class Arvados::V1::VirtualMachinesController < ApplicationController
     @users = {}
     User.eager_load(:authorized_keys).
       where('users.uuid in (?)',
-            @vms.map { |vm| vm.login_permissions.map &:tail_uuid }.flatten.uniq).
+            @vms.map { |vm| vm.login_permissions.map(&:tail_uuid) }.flatten.uniq).
       each do |u|
       @users[u.uuid] = u
     end
index 21c8e4710cb5e62dbe224a6034c68ea1bd40b05e..6699f7363b35e9a8b49bb6badc1aba48bc3bd1a0 100644 (file)
@@ -11,7 +11,7 @@ class DatabaseController < ApplicationController
     # we can tell they're not valuable.
     user_uuids = User.
       where('email is null or email not like ?', '%@example.com').
-      collect &:uuid
+      collect(&:uuid)
     fixture_uuids =
       YAML::load_file(File.expand_path('../../../test/fixtures/users.yml',
                                        __FILE__)).
index d8c04a1adbfcd0512bdbf38a4225081709ca2de8..2487f2ecb7db7820dd35d1c5a393fa22dbc0f7cf 100644 (file)
@@ -7,7 +7,7 @@ class ArvadosApiToken
   # Create a new ArvadosApiToken handler
   # +app+  The next layer of the Rack stack.
   def initialize(app = nil, options = nil)
-    @app = app if app.respond_to?(:call)
+    @app = app.respond_to?(:call) ? app : nil
   end
 
   def call env
index 18d5647cc929e760a72ed48ed709a9d18b8da8a3..aed0309591e4ecbfa4c309747daee695417b16c8 100644 (file)
@@ -239,7 +239,7 @@ class ArvadosModel < ActiveRecord::Base
   end
 
   def logged_attributes
-    attributes.except *Rails.configuration.unlogged_attributes
+    attributes.except(*Rails.configuration.unlogged_attributes)
   end
 
   def self.full_text_searchable_columns
@@ -490,7 +490,7 @@ class ArvadosModel < ActiveRecord::Base
   end
 
   def foreign_key_attributes
-    attributes.keys.select { |a| a.match /_uuid$/ }
+    attributes.keys.select { |a| a.match(/_uuid$/) }
   end
 
   def skip_uuid_read_permission_check
@@ -505,7 +505,7 @@ class ArvadosModel < ActiveRecord::Base
     foreign_key_attributes.each do |attr|
       attr_value = send attr
       if attr_value.is_a? String and
-          attr_value.match /^[0-9a-f]{32,}(\+[@\w]+)*$/
+          attr_value.match(/^[0-9a-f]{32,}(\+[@\w]+)*$/)
         begin
           send "#{attr}=", Collection.normalize_uuid(attr_value)
         rescue
@@ -584,13 +584,12 @@ class ArvadosModel < ActiveRecord::Base
     unless uuid.is_a? String
       return nil
     end
-    resource_class = nil
 
     uuid.match HasUuid::UUID_REGEX do |re|
       return uuid_prefixes[re[1]] if uuid_prefixes[re[1]]
     end
 
-    if uuid.match /.+@.+/
+    if uuid.match(/.+@.+/)
       return Email
     end
 
index 41d5b27093c3ab55c296f7a592b9defb7e25d6dc..00c2501865fa6098244b09487376f83514620e34 100644 (file)
@@ -64,9 +64,9 @@ class Blob
   #   Return value: true if the locator has a valid signature, false otherwise
   #   Arguments: signed_blob_locator, opts
   #
-  def self.verify_signature *args
+  def self.verify_signature(*args)
     begin
-      self.verify_signature! *args
+      self.verify_signature!(*args)
       true
     rescue Blob::InvalidSignatureError
       false
index 8579509de70e9eff1c46d25563ca239fcf9dff8d..901084c7636a61496944b4c18616e2d79c3508e5 100644 (file)
@@ -32,6 +32,11 @@ class Collection < ArvadosModel
     t.add :expires_at
   end
 
+  after_initialize do
+    @signatures_checked = false
+    @computed_pdh_for_manifest_text = false
+  end
+
   def self.attributes_required_columns
     super.merge(
                 # If we don't list manifest_text explicitly, the
@@ -61,7 +66,9 @@ class Collection < ArvadosModel
     # subsequent passes without checking any signatures. This is
     # important because the signatures have probably been stripped off
     # by the time we get to a second validation pass!
-    return true if @signatures_checked and @signatures_checked == computed_pdh
+    if @signatures_checked && @signatures_checked == computed_pdh
+      return true
+    end
 
     if self.manifest_text_changed?
       # Check permissions on the collection manifest.
@@ -197,7 +204,7 @@ class Collection < ArvadosModel
         utf8 = manifest_text
         utf8.force_encoding Encoding::UTF_8
         if utf8.valid_encoding? and utf8 == manifest_text.encode(Encoding::UTF_8)
-          manifest_text = utf8
+          self.manifest_text = utf8
           return true
         end
       rescue
@@ -283,10 +290,10 @@ class Collection < ArvadosModel
     hash_part = nil
     size_part = nil
     uuid.split('+').each do |token|
-      if token.match /^[0-9a-f]{32,}$/
+      if token.match(/^[0-9a-f]{32,}$/)
         raise "uuid #{uuid} has multiple hash parts" if hash_part
         hash_part = token
-      elsif token.match /^\d+$/
+      elsif token.match(/^\d+$/)
         raise "uuid #{uuid} has multiple size parts" if size_part
         size_part = token
       end
index 71ea57fb95ce15f1d3c9d95479a0c3ddf145b446..419eca2e01fd3002762124045707b5b8dc38bf8f 100644 (file)
@@ -16,13 +16,13 @@ class CommitAncestor < ActiveRecord::Base
     @gitdirbase = Rails.configuration.git_repositories_dir
     self.is = nil
     Dir.foreach @gitdirbase do |repo|
-      next if repo.match /^\./
+      next if repo.match(/^\./)
       git_dir = repo.match(/\.git$/) ? repo : File.join(repo, '.git')
       repo_name = repo.sub(/\.git$/, '')
       ENV['GIT_DIR'] = File.join(@gitdirbase, git_dir)
-      IO.foreach("|git rev-list --format=oneline '#{self.descendant.gsub /[^0-9a-f]/,""}'") do |line|
+      IO.foreach("|git rev-list --format=oneline '#{self.descendant.gsub(/[^0-9a-f]/,"")}'") do |line|
         self.is = false
-        sha1, message = line.strip.split(" ", 2)
+        sha1, _ = line.strip.split(" ", 2)
         if sha1 == self.ancestor
           self.is = true
           break
index 30ca7f8cb29581b0c65b0a70dd49c02f3b59f753..ef3d0b5e1028b41a2bab4d63aa300efb4743bc0a 100644 (file)
@@ -67,6 +67,10 @@ class Job < ArvadosModel
             (Complete = 'Complete'),
            ]
 
+  after_initialize do
+    @need_crunch_dispatch_trigger = false
+  end
+
   def assert_finished
     update_attributes(finished_at: finished_at || db_current_time,
                       success: success.nil? ? false : success,
@@ -336,7 +340,7 @@ class Job < ArvadosModel
         assign_uuid
         Commit.tag_in_internal_repository repository, script_version, uuid
       rescue
-        uuid = uuid_was
+        self.uuid = uuid_was
         raise
       end
     end
@@ -565,24 +569,6 @@ class Job < ArvadosModel
   end
 
   def ensure_no_collection_uuids_in_script_params
-    # recursive_hash_search searches recursively through hashes and
-    # arrays in 'thing' for string fields matching regular expression
-    # 'pattern'.  Returns true if pattern is found, false otherwise.
-    def recursive_hash_search thing, pattern
-      if thing.is_a? Hash
-        thing.each do |k, v|
-          return true if recursive_hash_search v, pattern
-        end
-      elsif thing.is_a? Array
-        thing.each do |k|
-          return true if recursive_hash_search k, pattern
-        end
-      elsif thing.is_a? String
-        return true if thing.match pattern
-      end
-      false
-    end
-
     # Fail validation if any script_parameters field includes a string containing a
     # collection uuid pattern.
     if self.script_parameters_changed?
@@ -593,4 +579,22 @@ class Job < ArvadosModel
     end
     true
   end
+
+  # recursive_hash_search searches recursively through hashes and
+  # arrays in 'thing' for string fields matching regular expression
+  # 'pattern'.  Returns true if pattern is found, false otherwise.
+  def recursive_hash_search thing, pattern
+    if thing.is_a? Hash
+      thing.each do |k, v|
+        return true if recursive_hash_search v, pattern
+      end
+    elsif thing.is_a? Array
+      thing.each do |k|
+        return true if recursive_hash_search k, pattern
+      end
+    elsif thing.is_a? String
+      return true if thing.match pattern
+    end
+    false
+  end
 end
index 24872b21ec7163852cf86d0c0ceb3d3b41f13608..649a6f80c281fc83f2d6eaf4b0fc80fe82c28ce6 100644 (file)
@@ -8,7 +8,6 @@ class Link < ArvadosModel
   after_update :maybe_invalidate_permissions_cache
   after_create :maybe_invalidate_permissions_cache
   after_destroy :maybe_invalidate_permissions_cache
-  attr_accessor :head_kind, :tail_kind
   validate :name_links_are_obsolete
 
   api_accessible :user, extend: :common do |t|
index 7eab402609b482a238f8a40313bf622ece86c3c0..3207d1f288f2f264c671d6709063d93140ce3fec 100644 (file)
@@ -4,7 +4,6 @@ class Log < ArvadosModel
   include CommonApiTemplate
   serialize :properties, Hash
   before_validation :set_default_event_at
-  attr_accessor :object, :object_kind
   after_save :send_notify
 
   api_accessible :user, extend: :common do |t|
index e470e4c2bd9c47a45b395a4c90f4814edf89a417..18550204669c7cc6353d87cfc863bcbf3c4d876a 100644 (file)
@@ -32,6 +32,10 @@ class Node < ArvadosModel
     t.add lambda { |x| Rails.configuration.compute_node_nameservers }, :as => :nameservers
   end
 
+  after_initialize do
+    @bypass_arvados_authorization = false
+  end
+
   def domain
     super || Rails.configuration.compute_node_domain
   end
@@ -226,7 +230,7 @@ class Node < ArvadosModel
     (0..Rails.configuration.max_compute_nodes-1).each do |slot_number|
       hostname = hostname_for_slot(slot_number)
       hostfile = File.join Rails.configuration.dns_server_conf_dir, "#{hostname}.conf"
-      if !File.exists? hostfile
+      if !File.exist? hostfile
         n = Node.where(:slot_number => slot_number).first
         if n.nil? or n.ip_address.nil?
           dns_server_update(hostname, UNUSED_NODE_IP)
index f361a49db5dcd49b649d7e7f79c255e214eae97a..13b00df544cf1b20b3378d08aba78d264d1f570a 100644 (file)
@@ -86,7 +86,7 @@ class Repository < ArvadosModel
       prefix_match = Regexp.escape(owner.username + "/")
       errmsg_start = "must be the owner's username, then '/', then"
     end
-    if not /^#{prefix_match}[A-Za-z][A-Za-z0-9]*$/.match(name)
+    if not (/^#{prefix_match}[A-Za-z][A-Za-z0-9]*$/.match(name))
       errors.add(:name,
                  "#{errmsg_start} a letter followed by alphanumerics")
       false
index 9363cc4f02aa04d08552b9e343bbda9f8dcda5c1..964de38d0bfbf56a195a2d622d71c763db508753 100644 (file)
@@ -64,7 +64,7 @@ class User < ArvadosModel
   def is_invited
     !!(self.is_active ||
        Rails.configuration.new_users_are_active ||
-       self.groups_i_can(:read).select { |x| x.match /-f+$/ }.first)
+       self.groups_i_can(:read).select { |x| x.match(/-f+$/) }.first)
   end
 
   def groups_i_can(verb)
@@ -242,7 +242,7 @@ class User < ArvadosModel
 
     # delete "All users" group read permissions for this user
     group = Group.where(name: 'All users').select do |g|
-      g[:uuid].match /-f+$/
+      g[:uuid].match(/-f+$/)
     end.first
     Link.destroy_all(tail_uuid: self.uuid,
                      head_uuid: group[:uuid],
index 4489e58688ca642d8e0e9489f6896f49f9b89da6..f2830ae3166dc7fc2849feff72258dccca1e5f97 100644 (file)
@@ -3,4 +3,4 @@ require 'rubygems'
 # Set up gems listed in the Gemfile.
 ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
 
-require 'bundler/setup' if File.exists?(ENV['BUNDLE_GEMFILE'])
+require 'bundler/setup' if File.exist?(ENV['BUNDLE_GEMFILE'])
index 79bca3af389506f6bf63ee594b76399e6164514a..b6dadf7e2e386220fd8c1eaa2c9de0f7ea653a08 100644 (file)
@@ -10,8 +10,8 @@
 # end
 
 ActiveSupport::Inflector.inflections do |inflect|
-  inflect.plural /^([Ss]pecimen)$/i, '\1s'
-  inflect.singular /^([Ss]pecimen)s?/i, '\1'
-  inflect.plural /^([Hh]uman)$/i, '\1s'
-  inflect.singular /^([Hh]uman)s?/i, '\1'
+  inflect.plural(/^([Ss]pecimen)$/i, '\1s')
+  inflect.singular(/^([Ss]pecimen)s?/i, '\1')
+  inflect.plural(/^([Hh]uman)$/i, '\1s')
+  inflect.singular(/^([Hh]uman)s?/i, '\1')
 end
index 76234d3e4b0f6ab148f73cb7a1242af1eacefb6a..fd3c9773933703a58f06da0aaff3ad837cad41af 100644 (file)
@@ -6,7 +6,7 @@ rescue LoadError
   # configured by application.yml (i.e., here!) instead.
 end
 
-if (File.exists?(File.expand_path '../omniauth.rb', __FILE__) and
+if (File.exist?(File.expand_path '../omniauth.rb', __FILE__) and
     not defined? WARNED_OMNIAUTH_CONFIG)
   Rails.logger.warn <<-EOS
 DEPRECATED CONFIGURATION:
@@ -26,7 +26,7 @@ $application_config = {}
 
 %w(application.default application).each do |cfgfile|
   path = "#{::Rails.root.to_s}/config/#{cfgfile}.yml"
-  if File.exists? path
+  if File.exist? path
     yaml = ERB.new(IO.read path).result(binding)
     confs = YAML.load(yaml, deserialize_symbols: true)
     # Ignore empty YAML file:
index 7e2612377434b9e3bfc245a8b4dc6143d6ad00c6..1ae531c169af05428f390953a344b1d24cafa4b0 100644 (file)
@@ -7,6 +7,6 @@ require_relative 'load_config.rb'
 
 if Rails.env == 'development'
   Dir.foreach("#{Rails.root}/app/models") do |model_file|
-    require_dependency model_file if model_file.match /\.rb$/
+    require_dependency model_file if model_file.match(/\.rb$/)
   end
 end
index ce94f737a2467f855a7156ba76873db57cd183ee..48b0eb5983a750aad99e195cd2f08a7b4c01c92e 100644 (file)
@@ -27,7 +27,7 @@ class CrunchDispatch
     @cgroup_root = ENV['CRUNCH_CGROUP_ROOT']
 
     @arvados_internal = Rails.configuration.git_internal_dir
-    if not File.exists? @arvados_internal
+    if not File.exist? @arvados_internal
       $stderr.puts `mkdir -p #{@arvados_internal.shellescape} && git init --bare #{@arvados_internal.shellescape}`
       raise "No internal git repository available" unless ($? == 0)
     end
@@ -73,7 +73,7 @@ class CrunchDispatch
       # into multiple rows with one hostname each.
       `#{cmd} --noheader -o '%N:#{outfmt}'`.each_line do |line|
         tokens = line.chomp.split(":", max_fields)
-        if (re = tokens[0].match /^(.*?)\[([-,\d]+)\]$/)
+        if (re = tokens[0].match(/^(.*?)\[([-,\d]+)\]$/))
           tokens.shift
           re[2].split(",").each do |range|
             range = range.split("-").collect(&:to_i)
@@ -105,7 +105,7 @@ class CrunchDispatch
   end
 
   def update_node_status
-    return unless Server::Application.config.crunch_job_wrapper.to_s.match /^slurm/
+    return unless Server::Application.config.crunch_job_wrapper.to_s.match(/^slurm/)
     slurm_status.each_pair do |hostname, slurmdata|
       next if @node_state[hostname] == slurmdata
       begin
@@ -169,7 +169,7 @@ class CrunchDispatch
       end
       usable_nodes << node
       if usable_nodes.count >= min_node_count
-        return usable_nodes.map { |node| node.hostname }
+        return usable_nodes.map { |n| n.hostname }
       end
     end
     nil
@@ -512,8 +512,6 @@ class CrunchDispatch
 
   def read_pipes
     @running.each do |job_uuid, j|
-      job = j[:job]
-
       now = Time.now
       if now > j[:log_throttle_reset_time]
         # It has been more than throttle_period seconds since the last
index fbd4ef5f0c67933a7cc703d9f532c94fd601fc3d..97348d5fb4b7a9458089016dc280778559365185 100644 (file)
@@ -1,3 +1,11 @@
+$system_user = nil
+$system_group = nil
+$all_users_group = nil
+$anonymous_user = nil
+$anonymous_group = nil
+$anonymous_group_read_permission = nil
+$empty_collection = nil
+
 module CurrentApiClient
   def current_user
     Thread.current[:user]
@@ -83,9 +91,7 @@ module CurrentApiClient
             User.all.collect(&:uuid).each do |user_uuid|
               Link.create!(link_class: 'permission',
                            name: 'can_manage',
-                           tail_kind: 'arvados#group',
                            tail_uuid: system_group_uuid,
-                           head_kind: 'arvados#user',
                            head_uuid: user_uuid)
             end
           end
index 16bb030941c3033ebf32cb972a645eb821a063d3..cb65c7f30c7da92bb54f8ce66fd2eb72e9ddc5c7 100644 (file)
@@ -78,6 +78,10 @@ class EventBus
     @connection_count = 0
   end
 
+  def send_message(ws, obj)
+    ws.send(Oj.dump(obj, mode: :compat))
+  end
+
   # Push out any pending events to the connection +ws+
   # +notify_id+  the id of the most recent row in the log table, may be nil
   #
@@ -146,7 +150,7 @@ class EventBus
         logs.select('logs.id').find_each do |l|
           if not ws.sent_ids.include?(l.id)
             # only send if not a duplicate
-            ws.send(Log.find(l.id).as_api_response.to_json)
+            send_message(ws, Log.find(l.id).as_api_response)
           end
           if not ws.last_log_id.nil?
             # record ids only when sending "catchup" messages, not notifies
@@ -158,12 +162,12 @@ class EventBus
     rescue ArgumentError => e
       # There was some kind of user error.
       Rails.logger.warn "Error publishing event: #{$!}"
-      ws.send ({status: 500, message: $!}.to_json)
+      send_message(ws, {status: 500, message: $!})
       ws.close
     rescue => e
       Rails.logger.warn "Error publishing event: #{$!}"
       Rails.logger.warn "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
-      ws.send ({status: 500, message: $!}.to_json)
+      send_message(ws, {status: 500, message: $!})
       ws.close
       # These exceptions typically indicate serious server trouble:
       # out of memory issues, database connection problems, etc.  Go ahead and
@@ -180,7 +184,7 @@ class EventBus
         p = (Oj.strict_load event.data).symbolize_keys
         filter = Filter.new(p)
       rescue Oj::Error => e
-        ws.send ({status: 400, message: "malformed request"}.to_json)
+        send_message(ws, {status: 400, message: "malformed request"})
         return
       end
 
@@ -200,12 +204,12 @@ class EventBus
           # Add a filter.  This gets the :filters field which is the same
           # format as used for regular index queries.
           ws.filters << filter
-          ws.send ({status: 200, message: 'subscribe ok', filter: p}.to_json)
+          send_message(ws, {status: 200, message: 'subscribe ok', filter: p})
 
           # Send any pending events
           push_events ws, nil
         else
-          ws.send ({status: 403, message: "maximum of #{Rails.configuration.websocket_max_filters} filters allowed per connection"}.to_json)
+          send_message(ws, {status: 403, message: "maximum of #{Rails.configuration.websocket_max_filters} filters allowed per connection"})
         end
 
       elsif p[:method] == 'unsubscribe'
@@ -214,18 +218,18 @@ class EventBus
         len = ws.filters.length
         ws.filters.select! { |f| not ((f.filters == p[:filters]) or (f.filters.empty? and p[:filters].nil?)) }
         if ws.filters.length < len
-          ws.send ({status: 200, message: 'unsubscribe ok'}.to_json)
+          send_message(ws, {status: 200, message: 'unsubscribe ok'})
         else
-          ws.send ({status: 404, message: 'filter not found'}.to_json)
+          send_message(ws, {status: 404, message: 'filter not found'})
         end
 
       else
-        ws.send ({status: 400, message: "missing or unrecognized method"}.to_json)
+        send_message(ws, {status: 400, message: "missing or unrecognized method"})
       end
     rescue => e
       Rails.logger.warn "Error handling message: #{$!}"
       Rails.logger.warn "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
-      ws.send ({status: 500, message: 'error'}.to_json)
+      send_message(ws, {status: 500, message: 'error'})
       ws.close
     end
   end
@@ -241,8 +245,13 @@ class EventBus
     # Disconnect if no valid API token.
     # current_user is included from CurrentApiClient
     if not current_user
-      ws.send ({status: 401, message: "Valid API token required"}.to_json)
-      ws.close
+      send_message(ws, {status: 401, message: "Valid API token required"})
+      # Wait for the handshake to complete before closing the
+      # socket. Otherwise, nginx responds with HTTP 502 Bad gateway,
+      # and the client never sees our real error message.
+      ws.on :open do |event|
+        ws.close
+      end
       return
     end
 
@@ -262,7 +271,7 @@ class EventBus
     # forward them to the thread associated with the connection.
     sub = @channel.subscribe do |msg|
       if ws.queue.length > Rails.configuration.websocket_max_notify_backlog
-        ws.send ({status: 500, message: 'Notify backlog too long'}.to_json)
+        send_message(ws, {status: 500, message: 'Notify backlog too long'})
         ws.close
         @channel.unsubscribe sub
         ws.queue.clear
index 5b22274d07781325276b5df152037a5b2a13dc61..3bab33f9a9d90e5da56d206841d8c05052909b85 100644 (file)
@@ -92,11 +92,11 @@ module LoadParam
         # has used set_table_name to use an alternate table name from the Rails standard.
         # I could not find a perfect way to handle this well, but ActiveRecord::Base.send(:descendants)
         # would be a place to start if this ever becomes necessary.
-        if attr.match /^[a-z][_a-z0-9]+$/ and
+        if attr.match(/^[a-z][_a-z0-9]+$/) and
             model_class.columns.collect(&:name).index(attr) and
             ['asc','desc'].index direction.downcase
           @orders << "#{table_name}.#{attr} #{direction.downcase}"
-        elsif attr.match /^([a-z][_a-z0-9]+)\.([a-z][_a-z0-9]+)$/ and
+        elsif attr.match(/^([a-z][_a-z0-9]+)\.([a-z][_a-z0-9]+)$/) and
             ['asc','desc'].index(direction.downcase) and
             ActiveRecord::Base.connection.tables.include?($1) and
             $1.classify.constantize.columns.collect(&:name).index($2)
index 2011f812d5ccf8e3836394acafd50d14ef58f7ec..c6664b1ec916413fc3d2e431bb12551a303bf1ed 100755 (executable)
@@ -29,7 +29,7 @@ module SalvageCollection
   def salvage_collection_locator_data manifest
     locators = []
     size = 0
-    manifest.scan /(^|[^[:xdigit:]])([[:xdigit:]]{32})((\+\d+)(\+|\b))?/ do |_, hash, _, sizehint, _|
+    manifest.scan(/(^|[^[:xdigit:]])([[:xdigit:]]{32})((\+\d+)(\+|\b))?/) do |_, hash, _, sizehint, _|
       if sizehint
         locators << hash.downcase + sizehint
         size += sizehint.to_i
index 3a8ed2724f236b9966f0f9e64d625e11db36486f..b78553491715bf6aa85ea5615bbbdb39392e0a95 100755 (executable)
@@ -22,7 +22,7 @@ DEBUG = 1
 # load and merge in the environment-specific application config info
 # if present, overriding base config parameters as specified
 path = File.absolute_path('../../config/arvados-clients.yml', __FILE__)
-if File.exists?(path) then
+if File.exist?(path) then
   cp_config = YAML.load_file(path)[ENV['RAILS_ENV']]
 else
   puts "Please create a\n #{path}\n file"
@@ -214,7 +214,7 @@ end
 
 begin
   # Get our local gitolite-admin repo up to snuff
-  if not File.exists?(gitolite_admin) then
+  if not File.exist?(gitolite_admin) then
     ensure_directory(gitolite_tmpdir, 0700)
     Dir.chdir(gitolite_tmpdir)
     `git clone #{gitolite_url}`
index 8db1a0edadf4f8707d630d5dc02021501e0724a3..169509f63b96337f9b129f1b1ffd50305112f3aa 100755 (executable)
@@ -35,7 +35,7 @@ DEBUG = 1
 # load and merge in the environment-specific application config info
 # if present, overriding base config parameters as specified
 path = File.dirname(__FILE__) + '/config/arvados-clients.yml'
-if File.exists?(path) then
+if File.exist?(path) then
   cp_config = YAML.load_file(path)[ENV['RAILS_ENV']]
 else
   puts "Please create a\n " + File.dirname(__FILE__) + "/config/arvados-clients.yml\n file"
@@ -186,7 +186,7 @@ end
 
 begin
   # Get our local gitolite-admin repo up to snuff
-  if not File.exists?(gitolite_admin) then
+  if not File.exist?(gitolite_admin) then
     ensure_directory(gitolite_tmpdir, 0700)
     Dir.chdir(gitolite_tmpdir)
     `git clone #{gitolite_url}`
index 6ec9e9f05d5ad7cdeff29cda76c20abbe7a4eae1..a0e5ad95dc32a0210d28577c2b899d05c53fd3f1 100644 (file)
@@ -4,7 +4,7 @@ end
 
 FactoryGirl.define do
   factory :user do
-    ignore do
+    transient do
       join_groups []
     end
     after :create do |user, evaluator|
index acacf4023829756085caf1feb269845887043d4d..76f59c29f87a3ebbad2672a42a5f1502883c2b13 100644 (file)
@@ -90,6 +90,8 @@ completed:
   output_path: test
   command: ["echo", "hello"]
   container_uuid: zzzzz-dz642-compltcontainer
+  log_uuid: zzzzz-4zz18-y9vne9npefyxh8g
+  output_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
   runtime_constraints:
     vcpus: 1
     ram: 123
index cbd82de9241101a72cc1c263903b9a403a8234fa..49503c971236af12a936085d92084a79e2dfc0ba 100644 (file)
@@ -243,3 +243,25 @@ template_in_asubproject_with_same_name_as_one_in_active_user_home:
         dataclass: Collection
         title: "Foo/bar pair"
         description: "Provide a collection containing at least two files."
+
+workflow_with_input_defaults:
+  uuid: zzzzz-p5p6p-aox0k0ofxrystg2
+  owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
+  created_at: 2014-04-14 12:35:04 -0400
+  updated_at: 2014-04-14 12:35:04 -0400
+  modified_at: 2014-04-14 12:35:04 -0400
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  name: Pipeline with default input specifications
+  components:
+    part-one:
+      script: foo
+      script_version: master
+      script_parameters:
+        ex_string:
+          required: true
+          dataclass: string
+        ex_string_def:
+          required: true
+          dataclass: string
+          default: hello-testing-123
\ No newline at end of file
index 4badf9e175f21c1fb521befa6d6c6bbe8024af5b..f79320e907801cae499cdcbf809a4602194656f4 100644 (file)
@@ -44,3 +44,22 @@ workflow_with_input_specifications:
       inputBinding:
         position: 1
     outputs: []
+
+workflow_with_input_defaults:
+  uuid: zzzzz-7fd4e-validwithinput2
+  owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
+  name: Workflow with default input specifications
+  description: this workflow has inputs specified
+  created_at: <%= 1.minute.ago.to_s(:db) %>
+  definition: |
+    cwlVersion: v1.0
+    class: CommandLineTool
+    baseCommand:
+    - echo
+    inputs:
+    - type: string
+      id: ex_string
+    - type: string
+      id: ex_string_def
+      default: hello-testing-123
+    outputs: []
index 37e690e0b21ccb221454d6a46ac04b4e732c2ce7..6c09d8e9f593a8e7c847c83ceb9bb46dcb913397 100644 (file)
@@ -46,7 +46,7 @@ class Arvados::V1::ApiClientAuthorizationsControllerTest < ActionController::Tes
     get :index, search_params
     assert_response :success
     got_tokens = JSON.parse(@response.body)['items']
-      .map { |auth| auth['api_token'] }
+      .map { |a| a['api_token'] }
     assert_equal(expected_tokens.sort, got_tokens.sort,
                  "wrong results for #{search_params.inspect}")
   end
index c85cc1979f99482ff36ba6dc38ba5790ec7bf591..b96e22ed6583befa82153c623da9bac73014745e 100644 (file)
@@ -794,11 +794,11 @@ EOS
     [2**8, :success],
     [2**18, 422],
   ].each do |description_size, expected_response|
-    test "create collection with description size #{description_size}
+    # Descriptions are not part of search indexes. Skip until
+    # full-text search is implemented, at which point replace with a
+    # search in description.
+    skip "create collection with description size #{description_size}
           and expect response #{expected_response}" do
-      skip "(Descriptions are not part of search indexes. Skip until full-text search
-            is implemented, at which point replace with a search in description.)"
-
       authorize_with :active
 
       description = 'here is a collection with a very large description'
index 9344b0bc75c3be0c9bc878207df52275a1190e88..2b1b675323fa8a05cdd9682a176e3ec561a099ad 100644 (file)
@@ -32,7 +32,7 @@ class Arvados::V1::FiltersTest < ActionController::TestCase
       filters: [['uuid', '@@', 'abcdef']],
     }
     assert_response 422
-    assert_match /not supported/, json_response['errors'].join(' ')
+    assert_match(/not supported/, json_response['errors'].join(' '))
   end
 
   test 'difficult characters in full text search' do
@@ -52,7 +52,7 @@ class Arvados::V1::FiltersTest < ActionController::TestCase
       filters: [['any', '@@', ['abc', 'def']]],
     }
     assert_response 422
-    assert_match /not supported/, json_response['errors'].join(' ')
+    assert_match(/not supported/, json_response['errors'].join(' '))
   end
 
   test 'api responses provide timestamps with nanoseconds' do
@@ -65,7 +65,7 @@ class Arvados::V1::FiltersTest < ActionController::TestCase
       %w(created_at modified_at).each do |attr|
         # Pass fixtures with null timestamps.
         next if item[attr].nil?
-        assert_match /^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d.\d{9}Z$/, item[attr]
+        assert_match(/^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d.\d{9}Z$/, item[attr])
       end
     end
   end
index 10534a70610a8188d35863992f2810ac29195937..e9abf9d495bbaf937532e6ad44ebc1449254c66e 100644 (file)
@@ -55,12 +55,12 @@ class Arvados::V1::GroupsControllerTest < ActionController::TestCase
     assert_equal 0, json_response['items_available']
   end
 
-  def check_project_contents_response
+  def check_project_contents_response disabled_kinds=[]
     assert_response :success
     assert_operator 2, :<=, json_response['items_available']
     assert_operator 2, :<=, json_response['items'].count
     kinds = json_response['items'].collect { |i| i['kind'] }.uniq
-    expect_kinds = %w'arvados#group arvados#specimen arvados#pipelineTemplate arvados#job'
+    expect_kinds = %w'arvados#group arvados#specimen arvados#pipelineTemplate arvados#job' - disabled_kinds
     assert_equal expect_kinds, (expect_kinds & kinds)
 
     json_response['items'].each do |i|
@@ -69,6 +69,10 @@ class Arvados::V1::GroupsControllerTest < ActionController::TestCase
                "group#contents returned a non-project group")
       end
     end
+
+    disabled_kinds.each do |d|
+      assert_equal true, !kinds.include?(d)
+    end
   end
 
   test 'get group-owned objects' do
@@ -448,4 +452,15 @@ class Arvados::V1::GroupsControllerTest < ActionController::TestCase
       end
     end
   end
+
+  test 'get contents with jobs and pipeline instances disabled' do
+    Rails.configuration.disable_api_methods = ['jobs.index', 'pipeline_instances.index']
+
+    authorize_with :active
+    get :contents, {
+      id: groups(:aproject).uuid,
+      format: :json,
+    }
+    check_project_contents_response %w'arvados#pipelineInstance arvados#job'
+  end
 end
index 3c11b3e00940fefb644b2ea4b3e64f81ffdfed2b..8808a82c45c92d398df02e497438f137a7bf1e1a 100644 (file)
@@ -97,7 +97,7 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
                  'server should correct bogus cancelled_at ' +
                  job['cancelled_at'])
     assert_equal(true,
-                 File.exists?(Rails.configuration.crunch_refresh_trigger),
+                 File.exist?(Rails.configuration.crunch_refresh_trigger),
                  'trigger file should be created when job is cancelled')
   end
 
index 1345701b43e8a7666a7c634d8f73c7762ad59467..6a19bdf4ad0baf8946d54be21c965f9b2d783ebd 100644 (file)
@@ -305,8 +305,8 @@ class Arvados::V1::LinksControllerTest < ActionController::TestCase
     assert_response 404
   end
 
-  test "retrieve all permissions using generic links index api" do
-    skip "(not implemented)"
+  # not implemented
+  skip "retrieve all permissions using generic links index api" do
     # Links.readable_by() does not return the full set of permission
     # links that are visible to a user (i.e., all permission links
     # whose head_uuid references an object for which the user has
index 71b528e72afd9467539f2136d3163481e582b956..56dd57ce7c64dd4697279ce3332ccd86546c99ef 100644 (file)
@@ -43,9 +43,8 @@ class Arvados::V1::RepositoriesControllerTest < ActionController::TestCase
   end
 
   test "get_all_permissions takes into account is_active flag" do
-    r = nil
     act_as_user users(:active) do
-      r = Repository.create! name: 'active/testrepo'
+      Repository.create! name: 'active/testrepo'
     end
     act_as_system_user do
       u = users(:active)
@@ -170,19 +169,19 @@ class Arvados::V1::RepositoriesControllerTest < ActionController::TestCase
         u = User.find_by_uuid(user_uuid)
         if perms['can_read']
           assert u.can? read: repo['uuid']
-          assert_match /R/, perms['gitolite_permissions']
+          assert_match(/R/, perms['gitolite_permissions'])
         else
-          refute_match /R/, perms['gitolite_permissions']
+          refute_match(/R/, perms['gitolite_permissions'])
         end
         if perms['can_write']
           assert u.can? write: repo['uuid']
-          assert_match /RW\+/, perms['gitolite_permissions']
+          assert_match(/RW\+/, perms['gitolite_permissions'])
         else
-          refute_match /W/, perms['gitolite_permissions']
+          refute_match(/W/, perms['gitolite_permissions'])
         end
         if perms['can_manage']
           assert u.can? manage: repo['uuid']
-          assert_match /RW\+/, perms['gitolite_permissions']
+          assert_match(/RW\+/, perms['gitolite_permissions'])
         end
       end
     end
index 2e370ec9cd63db9b61f6e93ab15111d028f469e0..710182174621b0bcf1eaddc8432b4ca824182949 100644 (file)
@@ -32,7 +32,7 @@ class Arvados::V1::SchemaControllerTest < ActionController::TestCase
     get :index
     assert_response :success
     discovery_doc = JSON.parse(@response.body)
-    assert_match /^[0-9a-f]+(-modified)?$/, discovery_doc['source_version']
+    assert_match(/^[0-9a-f]+(-modified)?$/, discovery_doc['source_version'])
   end
 
   test "discovery document overrides source_version with config" do
index 157e487859c927a978baad10c810592e36be9e77..579b8cc6d05256a88086cd0a50592e3d8a1afaa7 100644 (file)
@@ -603,7 +603,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
 
     active_user = User.find_by_uuid(users(:active).uuid)
     readable_groups = active_user.groups_i_can(:read)
-    all_users_group = Group.all.collect(&:uuid).select { |g| g.match /-f+$/ }
+    all_users_group = Group.all.collect(&:uuid).select { |g| g.match(/-f+$/) }
     refute_includes(readable_groups, all_users_group,
                     "active user can read All Users group after being deactivated")
     assert_equal(false, active_user.is_invited,
@@ -842,14 +842,12 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
   end
 
   def verify_num_links (original_links, expected_additional_links)
-    links_now = Link.all
     assert_equal expected_additional_links, Link.all.size-original_links.size,
         "Expected #{expected_additional_links.inspect} more links"
   end
 
   def find_obj_in_resp (response_items, object_type, head_kind=nil)
     return_obj = nil
-    response_items
     response_items.each { |x|
       if !x
         next
index 329bc1589afc6a2298472d739524fc094e7f0723..9b805af8e38b7095cdddafd67a5f18c63a7e5237 100644 (file)
@@ -33,7 +33,6 @@ class Arvados::V1::VirtualMachinesControllerTest < ActionController::TestCase
   test "groups is an empty list by default" do
     get_logins_for(:testvm2)
     active_login = find_login(:active)
-    perm = links(:active_can_login_to_testvm2)
     assert_equal([], active_login["groups"])
   end
 
index 2a618204c649bace8e6d271059ccee0413807e7d..4cf70cfbc61360aa9661bee1666d921a90137e8e 100644 (file)
@@ -74,7 +74,7 @@ module UsersTestHelper
     end
 
     group = Group.where(name: 'All users').select do |g|
-      g[:uuid].match /-f+$/
+      g[:uuid].match(/-f+$/)
     end.first
     group_read_perms = Link.where(tail_uuid: uuid,
                                   head_uuid: group[:uuid],
index 0bedc0726a08549711f3455f509778b1f9901de3..5f55f5eaecf8a56fa71c59a1f78cb9d6d065ec0c 100644 (file)
@@ -21,7 +21,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
       :filters => ['uuid', '=', 'ad02e37b6a7f45bbe2ead3c29a109b8a+54'].to_json
     }, auth(:active)
     assert_response 422
-    assert_match /nvalid element.*not an array/, json_response['errors'].join(' ')
+    assert_match(/nvalid element.*not an array/, json_response['errors'].join(' '))
   end
 
   test "get index with invalid filters (unsearchable column) responds 422" do
@@ -30,7 +30,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
       :filters => [['this_column_does_not_exist', '=', 'bogus']].to_json
     }, auth(:active)
     assert_response 422
-    assert_match /nvalid attribute/, json_response['errors'].join(' ')
+    assert_match(/nvalid attribute/, json_response['errors'].join(' '))
   end
 
   test "get index with invalid filters (invalid operator) responds 422" do
@@ -39,7 +39,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
       :filters => [['uuid', ':-(', 'displeased']].to_json
     }, auth(:active)
     assert_response 422
-    assert_match /nvalid operator/, json_response['errors'].join(' ')
+    assert_match(/nvalid operator/, json_response['errors'].join(' '))
   end
 
   test "get index with invalid filters (invalid operand type) responds 422" do
@@ -48,7 +48,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
       :filters => [['uuid', '=', {foo: 'bar'}]].to_json
     }, auth(:active)
     assert_response 422
-    assert_match /nvalid operand type/, json_response['errors'].join(' ')
+    assert_match(/nvalid operand type/, json_response['errors'].join(' '))
   end
 
   test "get index with where= (empty string)" do
@@ -73,7 +73,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
           :select => ['bogus'].to_json
         }, auth(:active)
     assert_response 422
-    assert_match /Invalid attribute.*bogus/, json_response['errors'].join(' ')
+    assert_match(/Invalid attribute.*bogus/, json_response['errors'].join(' '))
   end
 
   test "get index with select= (invalid attribute type) responds 422" do
@@ -82,7 +82,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
           :select => [['bogus']].to_json
         }, auth(:active)
     assert_response 422
-    assert_match /Invalid attribute.*bogus/, json_response['errors'].join(' ')
+    assert_match(/Invalid attribute.*bogus/, json_response['errors'].join(' '))
   end
 
   test "controller 404 response is json" do
@@ -243,8 +243,6 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
     assert_response :success
     assert_equal true, json_response['manifest_text'].include?('file4_in_subdir4.txt')
 
-    created = json_response
-
     # search using the filename
     search_using_full_text_search 'subdir2', 0
     search_using_full_text_search 'subdir2:*', 1
index a952c202cb7dbadf73fae734ca0141d000ac5cde..f6f39fe526edff84cc1679887bf400ecd2c3bc00 100644 (file)
@@ -5,8 +5,7 @@ require 'helpers/time_block'
 class CollectionsApiPerformanceTest < ActionDispatch::IntegrationTest
   include ManifestExamples
 
-  test "crud cycle for a collection with a big manifest" do
-    slow_test
+  slow_test "crud cycle for a collection with a big manifest" do
     bigmanifest = time_block 'make example' do
       make_manifest(streams: 100,
                     files_per_stream: 100,
@@ -39,8 +38,7 @@ class CollectionsApiPerformanceTest < ActionDispatch::IntegrationTest
     end
   end
 
-  test "memory usage" do
-    slow_test
+  slow_test "memory usage" do
     hugemanifest = make_manifest(streams: 1,
                                  files_per_stream: 2000,
                                  blocks_per_file: 200,
index ebe7ce7a6705b0d99d2e4b439b9339c7061a6531..28c1b81dabcc8621707044e1f7d225e6e1a624b7 100644 (file)
@@ -70,7 +70,7 @@ class CrossOriginTest < ActionDispatch::IntegrationTest
 
   def assert_no_cors_headers
     response.headers.keys.each do |h|
-      assert_no_match /^Access-Control-/i, h
+      assert_no_match(/^Access-Control-/i, h)
     end
   end
 end
index ecb2f2a05831a44a7798fd98d048a821878fd11a..029e37cbbfed18075a73785ce1b565d5907202cd 100644 (file)
@@ -3,8 +3,7 @@ require 'test_helper'
 class DatabaseResetTest < ActionDispatch::IntegrationTest
   self.use_transactional_fixtures = false
 
-  test "reset fails when Rails.env != 'test'" do
-    slow_test
+  slow_test "reset fails when Rails.env != 'test'" do
     rails_env_was = Rails.env
     begin
       Rails.env = 'production'
@@ -22,8 +21,7 @@ class DatabaseResetTest < ActionDispatch::IntegrationTest
     assert_response 403
   end
 
-  test "database reset doesn't break basic CRUD operations" do
-    slow_test
+  slow_test "database reset doesn't break basic CRUD operations" do
     active_auth = auth(:active)
     admin_auth = auth(:admin)
 
@@ -49,8 +47,7 @@ class DatabaseResetTest < ActionDispatch::IntegrationTest
     assert_response 404
   end
 
-  test "roll back database change" do
-    slow_test
+  slow_test "roll back database change" do
     active_auth = auth(:active)
     admin_auth = auth(:admin)
 
index 7a9f9176d335c02c1a7dc425cf4212dfb85e0ea5..a46a4d1bc29c0e25992f6424bb4f50b919020229 100644 (file)
@@ -110,7 +110,7 @@ class UserSessionsApiTest < ActionDispatch::IntegrationTest
         (repos.collect(&:name) +
          vm_links.collect { |link| link.properties['username'] }
          ).each do |name|
-          r = name.match /^(.{#{prefix.length}})(\d+)$/
+          r = name.match(/^(.{#{prefix.length}})(\d+)$/)
           assert_not_nil r, "#{name.inspect} does not match {prefix}\\d+"
           assert_equal(prefix, r[1],
                        "#{name.inspect} was not {#{prefix.inspect} plus digits}")
index 99ca7ac960b3dac2fc4e0f9b82d89949afd6e76c..a9993b2fc318b4abe6ca0669f64a614250608733 100644 (file)
@@ -1,5 +1,4 @@
 require 'test_helper'
-require 'websocket_runner'
 require 'oj'
 require 'database_cleaner'
 
@@ -16,35 +15,92 @@ class WebsocketTest < ActionDispatch::IntegrationTest
     DatabaseCleaner.clean
   end
 
-  def ws_helper (token = nil, timeout = true)
+  def self.startup
+    s = TCPServer.new('0.0.0.0', 0)
+    @@port = s.addr[1]
+    s.close
+    @@pidfile = "tmp/pids/passenger.#{@@port}.pid"
+    DatabaseCleaner.start
+    Dir.chdir(Rails.root) do |apidir|
+      # Only passenger seems to be able to run the websockets server
+      # successfully.
+      _system('passenger', 'start', '-d',
+              "-p#{@@port}",
+              "--log-file", "/dev/stderr",
+              "--pid-file", @@pidfile)
+      timeout = Time.now.tv_sec + 10
+      begin
+        sleep 0.2
+        begin
+          server_pid = IO.read(@@pidfile).to_i
+          good_pid = (server_pid > 0) and (Process.kill(0, pid) rescue false)
+        rescue Errno::ENOENT
+          good_pid = false
+        end
+      end while (not good_pid) and (Time.now.tv_sec < timeout)
+      if not good_pid
+        raise RuntimeError, "could not find API server Rails pid"
+      end
+      STDERR.puts "Started websocket server on port #{@@port} with pid #{server_pid}"
+    end
+  end
+
+  def self.shutdown
+    Dir.chdir(Rails.root) do
+      _system('passenger', 'stop', "-p#{@@port}",
+              "--pid-file", @@pidfile)
+    end
+    # DatabaseCleaner leaves the database empty. Prefer to leave it full.
+    dc = DatabaseController.new
+    dc.define_singleton_method :render do |*args| end
+    dc.reset
+  end
+
+  def self._system(*cmd)
+    Bundler.with_clean_env do
+      env = {
+        'ARVADOS_WEBSOCKETS' => 'ws-only',
+        'RAILS_ENV' => 'test',
+      }
+      if not system(env, *cmd)
+        raise RuntimeError, "Command exited #{$?}: #{cmd.inspect}"
+      end
+    end
+  end
+
+  def ws_helper(token: nil, timeout: 8)
     opened = false
     close_status = nil
     too_long = false
 
-    EM.run {
+    EM.run do
       if token
-        ws = Faye::WebSocket::Client.new("ws://localhost:#{WEBSOCKET_PORT}/websocket?api_token=#{api_client_authorizations(token).api_token}")
+        ws = Faye::WebSocket::Client.new("ws://localhost:#{@@port}/websocket?api_token=#{api_client_authorizations(token).api_token}")
       else
-        ws = Faye::WebSocket::Client.new("ws://localhost:#{WEBSOCKET_PORT}/websocket")
+        ws = Faye::WebSocket::Client.new("ws://localhost:#{@@port}/websocket")
       end
 
       ws.on :open do |event|
         opened = true
         if timeout
-          EM::Timer.new 8 do
+          EM::Timer.new(timeout) do
             too_long = true if close_status.nil?
             EM.stop_event_loop
           end
         end
       end
 
+      ws.on :error do |event|
+        STDERR.puts "websocket client error: #{event.inspect}"
+      end
+
       ws.on :close do |event|
         close_status = [:close, event.code, event.reason]
         EM.stop_event_loop
       end
 
       yield ws
-    }
+    end
 
     assert opened, "Should have opened web socket"
     assert (not too_long), "Test took too long"
@@ -65,11 +121,10 @@ class WebsocketTest < ActionDispatch::IntegrationTest
     assert_equal 401, status
   end
 
-
   test "connect, subscribe and get response" do
     status = nil
 
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
       ws.on :open do |event|
         ws.send ({method: 'subscribe'}.to_json)
       end
@@ -91,7 +146,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
 
     authorize_with :active
 
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
       ws.on :open do |event|
         ws.send ({method: 'subscribe'}.to_json)
       end
@@ -128,7 +183,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
 
     authorize_with :active
 
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
       ws.on :open do |event|
         ws.send ({method: 'subscribe'}.to_json)
       end
@@ -168,7 +223,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
 
     authorize_with :active
 
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
       ws.on :open do |event|
         ws.send ({method: 'subscribe', filters: [['object_uuid', 'is_a', 'arvados#human']]}.to_json)
       end
@@ -206,7 +261,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
 
     authorize_with :active
 
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
       ws.on :open do |event|
         ws.send ({method: 'subscribe', filters: [['object_uuid', 'is_a', 'arvados#human']]}.to_json)
         ws.send ({method: 'subscribe', filters: [['object_uuid', 'is_a', 'arvados#specimen']]}.to_json)
@@ -251,7 +306,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
 
     authorize_with :active
 
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
       ws.on :open do |event|
         ws.send ({method: 'subscribe', filters: [['object_uuid', 'is_a', 'arvados#trait'], ['event_type', '=', 'update']]}.to_json)
       end
@@ -282,8 +337,6 @@ class WebsocketTest < ActionDispatch::IntegrationTest
 
   test "connect, subscribe, ask events starting at seq num" do
     state = 1
-    human = nil
-    human_ev_uuid = nil
 
     authorize_with :active
 
@@ -291,7 +344,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
     l1 = nil
     l2 = nil
 
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
       ws.on :open do |event|
         ws.send ({method: 'subscribe', last_log_id: lastid}.to_json)
       end
@@ -322,16 +375,14 @@ class WebsocketTest < ActionDispatch::IntegrationTest
     assert_equal expect_next_logs[1].object_uuid, l2
   end
 
-  test "connect, subscribe, get event, unsubscribe" do
-    slow_test
+  slow_test "connect, subscribe, get event, unsubscribe" do
     state = 1
     spec = nil
     spec_ev_uuid = nil
-    filter_id = nil
 
     authorize_with :active
 
-    ws_helper :active, false do |ws|
+    ws_helper(token: :active, timeout: false) do |ws|
       ws.on :open do |event|
         ws.send ({method: 'subscribe'}.to_json)
         EM::Timer.new 3 do
@@ -372,15 +423,14 @@ class WebsocketTest < ActionDispatch::IntegrationTest
     assert_equal spec.uuid, spec_ev_uuid
   end
 
-  test "connect, subscribe, get event, unsubscribe with filter" do
-    slow_test
+  slow_test "connect, subscribe, get event, unsubscribe with filter" do
     state = 1
     spec = nil
     spec_ev_uuid = nil
 
     authorize_with :active
 
-    ws_helper :active, false do |ws|
+    ws_helper(token: :active, timeout: false) do |ws|
       ws.on :open do |event|
         ws.send ({method: 'subscribe', filters: [['object_uuid', 'is_a', 'arvados#human']]}.to_json)
         EM::Timer.new 6 do
@@ -422,8 +472,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
   end
 
 
-  test "connect, subscribe, get event, try to unsubscribe with bogus filter" do
-    slow_test
+  slow_test "connect, subscribe, get event, try to unsubscribe with bogus filter" do
     state = 1
     spec = nil
     spec_ev_uuid = nil
@@ -432,7 +481,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
 
     authorize_with :active
 
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
       ws.on :open do |event|
         ws.send ({method: 'subscribe'}.to_json)
       end
@@ -473,13 +522,10 @@ class WebsocketTest < ActionDispatch::IntegrationTest
     assert_equal human.uuid, human_ev_uuid
   end
 
-
-
-  test "connected, not subscribed, no event" do
-    slow_test
+  slow_test "connected, not subscribed, no event" do
     authorize_with :active
 
-    ws_helper :active, false do |ws|
+    ws_helper(token: :active, timeout: false) do |ws|
       ws.on :open do |event|
         EM::Timer.new 1 do
           Specimen.create
@@ -496,13 +542,12 @@ class WebsocketTest < ActionDispatch::IntegrationTest
     end
   end
 
-  test "connected, not authorized to see event" do
-    slow_test
+  slow_test "connected, not authorized to see event" do
     state = 1
 
     authorize_with :admin
 
-    ws_helper :active, false do |ws|
+    ws_helper(token: :active, timeout: false) do |ws|
       ws.on :open do |event|
         ws.send ({method: 'subscribe'}.to_json)
 
@@ -530,7 +575,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
   test "connect, try bogus method" do
     status = nil
 
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
       ws.on :open do |event|
         ws.send ({method: 'frobnabble'}.to_json)
       end
@@ -548,7 +593,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
   test "connect, missing method" do
     status = nil
 
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
       ws.on :open do |event|
         ws.send ({fizzbuzz: 'frobnabble'}.to_json)
       end
@@ -566,7 +611,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
   test "connect, send malformed request" do
     status = nil
 
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
       ws.on :open do |event|
         ws.send '<XML4EVER></XML4EVER>'
       end
@@ -587,7 +632,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
 
     authorize_with :active
 
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
       ws.on :open do |event|
         (1..17).each do |i|
           ws.send ({method: 'subscribe', filters: [['object_uuid', '=', i]]}.to_json)
@@ -612,15 +657,14 @@ class WebsocketTest < ActionDispatch::IntegrationTest
 
   end
 
-  test "connect, subscribe, lots of events" do
-    slow_test
+  slow_test "connect, subscribe, lots of events" do
     state = 1
     event_count = 0
     log_start = Log.order(:id).last.id
 
     authorize_with :active
 
-    ws_helper :active, false do |ws|
+    ws_helper(token: :active, timeout: false) do |ws|
       EM::Timer.new 45 do
         # Needs a longer timeout than the default
         ws.close
@@ -637,7 +681,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
           assert_equal 200, d["status"]
           ActiveRecord::Base.transaction do
             (1..202).each do
-              spec = Specimen.create
+              Specimen.create
             end
           end
           state = 2
@@ -658,12 +702,10 @@ class WebsocketTest < ActionDispatch::IntegrationTest
 
   test "connect, subscribe with invalid filter" do
     state = 1
-    human = nil
-    human_ev_uuid = nil
 
     authorize_with :active
 
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
       ws.on :open do |event|
         # test that #6451 is fixed (invalid filter crashes websockets)
         ws.send ({method: 'subscribe', filters: [['object_blarg', 'is_a', 'arvados#human']]}.to_json)
@@ -675,7 +717,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
         when 1
           assert_equal 200, d["status"]
           Specimen.create
-          human = Human.create
+          Human.create
           state = 2
         when 2
           assert_equal 500, d["status"]
index 417ddf6bee8eeee96d8e960099ccc227cee4950a..86bc2397c5309e98310f659d4077d7d0f8a33184 100644 (file)
@@ -22,7 +22,7 @@ end
 
 require File.expand_path('../../config/environment', __FILE__)
 require 'rails/test_help'
-require 'mocha/mini_test'
+require 'mocha'
 
 module ArvadosTestSupport
   def json_response
@@ -84,7 +84,7 @@ class ActiveSupport::TestCase
   def restore_configuration
     # Restore configuration settings changed during tests
     $application_config.each do |k,v|
-      if k.match /^[^.]*$/
+      if k.match(/^[^.]*$/)
         Rails.configuration.send (k + '='), v
       end
     end
@@ -112,9 +112,18 @@ class ActiveSupport::TestCase
                              "HTTP_AUTHORIZATION" => "OAuth2 #{t}")
   end
 
-  def slow_test
-    skip "RAILS_TEST_SHORT is set" unless (ENV['RAILS_TEST_SHORT'] || '').empty?
+  def self.skip_slow_tests?
+    !(ENV['RAILS_TEST_SHORT'] || '').empty?
   end
+
+  def self.skip(*args, &block)
+  end
+
+  def self.slow_test(name, &block)
+    define_method(name, block) unless skip_slow_tests?
+  end
+
+  alias_method :skip, :omit
 end
 
 class ActionController::TestCase
@@ -135,6 +144,21 @@ class ActionController::TestCase
       super action, *args
     end
   end
+
+  def self.suite
+    s = super
+    def s.run(*args)
+      @test_case.startup()
+      begin
+        super
+      ensure
+        @test_case.shutdown()
+      end
+    end
+    s
+  end
+  def self.startup; end
+  def self.shutdown; end
 end
 
 class ActionDispatch::IntegrationTest
index 3e9b16757dbf8af4750ea21402b78a752202c54e..2e585051ad56ce0c2f6ecaf8fe6f53d5c3e9795d 100644 (file)
@@ -20,16 +20,19 @@ class AppVersionTest < ActiveSupport::TestCase
     end
   end
 
-  test 'override with configuration' do
+  test 'override with configuration "foobar"' do
     Rails.configuration.source_version = 'foobar'
     assert_equal 'foobar', AppVersion.hash
+  end
+
+  test 'override with configuration false' do
     Rails.configuration.source_version = false
     assert_not_equal 'foobar', AppVersion.hash
   end
 
   test 'override with file' do
     path = Rails.root.join 'git-commit.version'
-    assert(!File.exists?(path),
+    assert(!File.exist?(path),
            "Packaged version file found in source tree: #{path}")
     begin
       File.open(path, 'w') do |f|
index 5a661785bd7bef903747b5890bb135b8bacaebf1..25801bb9b6f6d38b65bd4947ba9b62a045c5c913 100644 (file)
@@ -25,7 +25,7 @@ class AuthorizedKeyTest < ActiveSupport::TestCase
       ak2 = AuthorizedKey.new(name: "bar", public_key: TEST_KEY, authorized_user_uuid: u2.uuid)
       refute ak2.valid?
       refute ak2.save
-      assert_match /already exists/, ak2.errors.full_messages.to_s
+      assert_match(/already exists/, ak2.errors.full_messages.to_s)
     end
   end
 
index 1c6e4f2db2c0dfafcde3d7fba519fe8e4431cf6b..57beddbe6d6247bf9f96d21103e6b2640c42732a 100644 (file)
@@ -17,8 +17,7 @@ class CollectionModelPerformanceTest < ActiveSupport::TestCase
   end
 
   # "crrud" == "create read render update delete", not a typo
-  test "crrud cycle for a collection with a big manifest)" do
-    slow_test
+  slow_test "crrud cycle for a collection with a big manifest)" do
     bigmanifest = time_block 'make example' do
       make_manifest(streams: 100,
                     files_per_stream: 100,
@@ -44,7 +43,7 @@ class CollectionModelPerformanceTest < ActiveSupport::TestCase
         c.signed_manifest_text
       end
       time_block 'sign + render' do
-        resp = c.as_api_response(nil)
+        c.as_api_response(nil)
       end
       loc = Blob.sign_locator(Digest::MD5.hexdigest('foo') + '+3',
                               api_token: api_token(:active))
index 91568927ae37654117da4dec7c811882818d0add..1c85a716e3bd34bf269448a455d5d6bef0938408 100644 (file)
@@ -28,7 +28,7 @@ class CollectionTest < ActiveSupport::TestCase
       c = create_collection "f\xc8o", Encoding::UTF_8
       assert !c.valid?
       assert_equal [:manifest_text], c.errors.messages.keys
-      assert_match /UTF-8/, c.errors.messages[:manifest_text].first
+      assert_match(/UTF-8/, c.errors.messages[:manifest_text].first)
     end
   end
 
@@ -37,7 +37,7 @@ class CollectionTest < ActiveSupport::TestCase
       c = create_collection "f\xc8o", Encoding::ASCII_8BIT
       assert !c.valid?
       assert_equal [:manifest_text], c.errors.messages.keys
-      assert_match /UTF-8/, c.errors.messages[:manifest_text].first
+      assert_match(/UTF-8/, c.errors.messages[:manifest_text].first)
     end
   end
 
@@ -107,11 +107,11 @@ class CollectionTest < ActiveSupport::TestCase
       assert c.valid?
       created_file_names = c.file_names
       assert created_file_names
-      assert_match /foo.txt/, c.file_names
+      assert_match(/foo.txt/, c.file_names)
 
       c.update_attribute 'manifest_text', ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo2.txt\n"
       assert_not_equal created_file_names, c.file_names
-      assert_match /foo2.txt/, c.file_names
+      assert_match(/foo2.txt/, c.file_names)
     end
   end
 
@@ -134,11 +134,11 @@ class CollectionTest < ActiveSupport::TestCase
 
         assert c.valid?
         assert c.file_names
-        assert_match /veryverylongfilename0000000000001.txt/, c.file_names
-        assert_match /veryverylongfilename0000000000002.txt/, c.file_names
+        assert_match(/veryverylongfilename0000000000001.txt/, c.file_names)
+        assert_match(/veryverylongfilename0000000000002.txt/, c.file_names)
         if not allow_truncate
-          assert_match /veryverylastfilename/, c.file_names
-          assert_match /laststreamname/, c.file_names
+          assert_match(/veryverylastfilename/, c.file_names)
+          assert_match(/laststreamname/, c.file_names)
         end
       end
     end
index b57c23b4538dee4339a0a27630a1ad36e7e575a6..a8594169fb3c7567aa95a00922be6350ac41ac52 100644 (file)
@@ -18,7 +18,7 @@ class CommitTest < ActiveSupport::TestCase
   test 'find_commit_range does not bypass permissions' do
     authorize_with :inactive
     assert_raises ArgumentError do
-      c = Commit.find_commit_range 'foo', nil, 'master', []
+      Commit.find_commit_range 'foo', nil, 'master', []
     end
   end
 
@@ -68,10 +68,10 @@ class CommitTest < ActiveSupport::TestCase
     authorize_with :active
     gitint = "git --git-dir #{Rails.configuration.git_internal_dir}"
     IO.read("|#{gitint} tag -d testtag 2>/dev/null") # "no such tag", fine
-    assert_match /^fatal: /, IO.read("|#{gitint} show testtag 2>&1")
+    assert_match(/^fatal: /, IO.read("|#{gitint} show testtag 2>&1"))
     refute $?.success?
     Commit.tag_in_internal_repository 'active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', 'testtag'
-    assert_match /^commit 31ce37f/, IO.read("|#{gitint} show testtag")
+    assert_match(/^commit 31ce37f/, IO.read("|#{gitint} show testtag"))
     assert $?.success?
   end
 
@@ -183,34 +183,34 @@ class CommitTest < ActiveSupport::TestCase
     Dir.mktmpdir do |touchdir|
       # invalid input to maximum
       a = Commit.find_commit_range('active/foo', nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
+      assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to maximum
       a = Commit.find_commit_range('active/foo', nil, "$(uname>#{touchdir}/uh_oh)", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
+      assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to minimum
       a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
+      assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to minimum
       a = Commit.find_commit_range('active/foo', "$(uname>#{touchdir}/uh_oh)", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
+      assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to 'excludes'
       # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
       a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["4fe459abe02d9b365932b8f5dc419439ab4e2577 ; touch #{touchdir}/uh_oh"])
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
+      assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
       assert_equal [], a
 
       # invalid input to 'excludes'
       # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
       a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["$(uname>#{touchdir}/uh_oh)"])
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
+      assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
       assert_equal [], a
     end
   end
index 4fd9f8e75931eff3d3f66e8fcddd4e65253cec7f..a1755332853e227acee600d990b1866a9d22b443 100644 (file)
@@ -102,8 +102,9 @@ class ContainerTest < ActiveSupport::TestCase
   test "find_reusable method should select higher priority queued container" do
     set_user_from_auth :active
     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment:{"var" => "queued"}})
-    c_low_priority, _ = minimal_new(common_attrs.merge({priority:1}))
-    c_high_priority, _ = minimal_new(common_attrs.merge({priority:2}))
+    c_low_priority, _ = minimal_new(common_attrs.merge({use_existing:false, priority:1}))
+    c_high_priority, _ = minimal_new(common_attrs.merge({use_existing:false, priority:2}))
+    assert_not_equal c_low_priority.uuid, c_high_priority.uuid
     assert_equal Container::Queued, c_low_priority.state
     assert_equal Container::Queued, c_high_priority.state
     reused = Container.find_reusable(common_attrs)
@@ -121,8 +122,9 @@ class ContainerTest < ActiveSupport::TestCase
       output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45'
     }
 
-    c_older, _ = minimal_new(common_attrs)
-    c_recent, _ = minimal_new(common_attrs)
+    c_older, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_recent, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    assert_not_equal c_older.uuid, c_recent.uuid
 
     set_user_from_auth :dispatch1
     c_older.update_attributes!({state: Container::Locked})
@@ -151,6 +153,7 @@ class ContainerTest < ActiveSupport::TestCase
 
     c_output1 = Container.create common_attrs
     c_output2 = Container.create common_attrs
+    assert_not_equal c_output1.uuid, c_output2.uuid
 
     cr = ContainerRequest.new common_attrs
     cr.state = ContainerRequest::Committed
@@ -177,9 +180,11 @@ class ContainerTest < ActiveSupport::TestCase
   test "find_reusable method should select running container by start date" do
     set_user_from_auth :active
     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "running"}})
-    c_slower, _ = minimal_new(common_attrs)
-    c_faster_started_first, _ = minimal_new(common_attrs)
-    c_faster_started_second, _ = minimal_new(common_attrs)
+    c_slower, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_faster_started_first, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_faster_started_second, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    # Confirm the 3 container UUIDs are different.
+    assert_equal 3, [c_slower.uuid, c_faster_started_first.uuid, c_faster_started_second.uuid].uniq.length
     set_user_from_auth :dispatch1
     c_slower.update_attributes!({state: Container::Locked})
     c_slower.update_attributes!({state: Container::Running,
@@ -199,9 +204,11 @@ class ContainerTest < ActiveSupport::TestCase
   test "find_reusable method should select running container by progress" do
     set_user_from_auth :active
     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "running2"}})
-    c_slower, _ = minimal_new(common_attrs)
-    c_faster_started_first, _ = minimal_new(common_attrs)
-    c_faster_started_second, _ = minimal_new(common_attrs)
+    c_slower, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_faster_started_first, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_faster_started_second, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    # Confirm the 3 container UUIDs are different.
+    assert_equal 3, [c_slower.uuid, c_faster_started_first.uuid, c_faster_started_second.uuid].uniq.length
     set_user_from_auth :dispatch1
     c_slower.update_attributes!({state: Container::Locked})
     c_slower.update_attributes!({state: Container::Running,
@@ -221,9 +228,11 @@ class ContainerTest < ActiveSupport::TestCase
   test "find_reusable method should select locked container most likely to start sooner" do
     set_user_from_auth :active
     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "locked"}})
-    c_low_priority, _ = minimal_new(common_attrs)
-    c_high_priority_older, _ = minimal_new(common_attrs)
-    c_high_priority_newer, _ = minimal_new(common_attrs)
+    c_low_priority, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_high_priority_older, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_high_priority_newer, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    # Confirm the 3 container UUIDs are different.
+    assert_equal 3, [c_low_priority.uuid, c_high_priority_older.uuid, c_high_priority_newer.uuid].uniq.length
     set_user_from_auth :dispatch1
     c_low_priority.update_attributes!({state: Container::Locked,
                                        priority: 1})
@@ -239,8 +248,9 @@ class ContainerTest < ActiveSupport::TestCase
   test "find_reusable method should select running over failed container" do
     set_user_from_auth :active
     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "failed_vs_running"}})
-    c_failed, _ = minimal_new(common_attrs)
-    c_running, _ = minimal_new(common_attrs)
+    c_failed, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_running, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    assert_not_equal c_failed.uuid, c_running.uuid
     set_user_from_auth :dispatch1
     c_failed.update_attributes!({state: Container::Locked})
     c_failed.update_attributes!({state: Container::Running})
@@ -259,8 +269,9 @@ class ContainerTest < ActiveSupport::TestCase
   test "find_reusable method should select complete over running container" do
     set_user_from_auth :active
     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "completed_vs_running"}})
-    c_completed, _ = minimal_new(common_attrs)
-    c_running, _ = minimal_new(common_attrs)
+    c_completed, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_running, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    assert_not_equal c_completed.uuid, c_running.uuid
     set_user_from_auth :dispatch1
     c_completed.update_attributes!({state: Container::Locked})
     c_completed.update_attributes!({state: Container::Running})
@@ -279,8 +290,9 @@ class ContainerTest < ActiveSupport::TestCase
   test "find_reusable method should select running over locked container" do
     set_user_from_auth :active
     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "running_vs_locked"}})
-    c_locked, _ = minimal_new(common_attrs)
-    c_running, _ = minimal_new(common_attrs)
+    c_locked, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_running, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    assert_not_equal c_running.uuid, c_locked.uuid
     set_user_from_auth :dispatch1
     c_locked.update_attributes!({state: Container::Locked})
     c_running.update_attributes!({state: Container::Locked})
@@ -294,8 +306,9 @@ class ContainerTest < ActiveSupport::TestCase
   test "find_reusable method should select locked over queued container" do
     set_user_from_auth :active
     common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "running_vs_locked"}})
-    c_locked, _ = minimal_new(common_attrs)
-    c_queued, _ = minimal_new(common_attrs)
+    c_locked, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_queued, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    assert_not_equal c_queued.uuid, c_locked.uuid
     set_user_from_auth :dispatch1
     c_locked.update_attributes!({state: Container::Locked})
     reused = Container.find_reusable(common_attrs)
index c390b3213e94120eea370fdaaeec25500c7f5f3f..1f5847aea6e22b5bea9fe2e51d7f9354861145dd 100644 (file)
@@ -60,13 +60,13 @@ class FailJobsTest < ActiveSupport::TestCase
 
   test 'command line help' do
     cmd = Rails.root.join('script/fail-jobs.rb').to_s
-    assert_match /Options:.*--before=/m, File.popen([cmd, '--help']).read
+    assert_match(/Options:.*--before=/m, File.popen([cmd, '--help']).read)
   end
 
   protected
 
   def assert_end_states
-    @job.values.map &:reload
+    @job.values.map(&:reload)
     assert_equal 'Failed', @job[:before_reboot].state
     assert_equal false, @job[:before_reboot].running
     assert_equal false, @job[:before_reboot].success
index 3da2c836ed61579fe1d5058e4c367db2c9dd3eab..1f80ea50f2742167446917aea2fb0b0e6d8f2052 100644 (file)
@@ -185,7 +185,7 @@ class JobTest < ActiveSupport::TestCase
       # Ensure valid_attrs doesn't produce errors -- otherwise we will
       # not know whether errors reported below are actually caused by
       # invalid_attrs.
-      dummy = Job.create! job_attrs
+      Job.create! job_attrs
 
       job = Job.create job_attrs(invalid_attrs)
       assert_raises(ActiveRecord::RecordInvalid, ArgumentError,
@@ -223,7 +223,7 @@ class JobTest < ActiveSupport::TestCase
 
       parameters.each do |parameter|
         expectations = parameter[2]
-        if parameter[1] == 'use_current_user_uuid'
+        if 'use_current_user_uuid' == parameter[1]
           parameter[1] = Thread.current[:user].uuid
         end
 
@@ -411,7 +411,7 @@ class JobTest < ActiveSupport::TestCase
     }
     assert_raises(ActiveRecord::RecordInvalid,
                   "created job with a collection uuid in script_parameters") do
-      job = Job.create!(job_attrs(bad_params))
+      Job.create!(job_attrs(bad_params))
     end
   end
 
index efbb189c9f8f0e50f1db262a8cd3bac7342a0b03..92976e0053580eafa45707febfe753fd08f43c9b 100644 (file)
@@ -263,7 +263,7 @@ class LogTest < ActiveSupport::TestCase
     # appear too, but only if they are _not_ listed in known_logs
     # (i.e., we do not make any assertions about logs not mentioned in
     # either "known" or "expected".)
-    result_ids = result.collect &:id
+    result_ids = result.collect(&:id)
     expected_logs.each do |want|
       assert_includes result_ids, logs(want).id
     end
index 6eb1df56d129f0279c2e86323b865d13fd09817c..df8c22baf4ad04a6a20b97c0f9c551a335a9fb96 100644 (file)
@@ -33,7 +33,7 @@ class NodeTest < ActiveSupport::TestCase
     conffile = Rails.root.join 'tmp', 'compute65535.conf'
     File.unlink conffile rescue nil
     assert Node.dns_server_update 'compute65535', '127.0.0.1'
-    assert_match /\"1\.0\.0\.127\.in-addr\.arpa\. IN PTR compute65535\.zzzzz\.arvadosapi\.com\"/, IO.read(conffile)
+    assert_match(/\"1\.0\.0\.127\.in-addr\.arpa\. IN PTR compute65535\.zzzzz\.arvadosapi\.com\"/, IO.read(conffile))
     File.unlink conffile
   end
 
index c7f9776ac6a98c36f8ab8a3e002773c03603ea6a..6fcc3165289acbccc2884b52887146fae71317b5 100644 (file)
@@ -27,7 +27,7 @@ class OwnerTest < ActiveSupport::TestCase
     test "create object with non-existent #{o_class} owner" do
       assert_raises(ActiveRecord::RecordInvalid,
                     "create should fail with random owner_uuid") do
-        i = Specimen.create!(owner_uuid: o_class.generate_uuid)
+        Specimen.create!(owner_uuid: o_class.generate_uuid)
       end
 
       i = Specimen.create(owner_uuid: o_class.generate_uuid)
@@ -89,7 +89,6 @@ class OwnerTest < ActiveSupport::TestCase
       o = eval ofixt
       assert_equal(true, Specimen.where(owner_uuid: o.uuid).any?,
                    "need something to be owned by #{o.uuid} for this test")
-      old_uuid = o.uuid
       new_uuid = o.uuid.sub(/..........$/, rand(2**256).to_s(36)[0..9])
       assert(!o.update_attributes(uuid: new_uuid),
              "should not change uuid of #{ofixt} that owns objects")
index 79fc1f29c7bf46a2f1efb3ae8f9dd298f0222015..df110549989c124f1f92f2aa46fd2a3baca5c459 100644 (file)
@@ -125,10 +125,10 @@ class PermissionTest < ActiveSupport::TestCase
     sp_grp = Group.create!
     sp = Specimen.create!(owner_uuid: sp_grp.uuid)
 
-    manage_perm = Link.create!(link_class: 'permission',
-                               name: 'can_manage',
-                               tail_uuid: owner_grp.uuid,
-                               head_uuid: sp_grp.uuid)
+    Link.create!(link_class: 'permission',
+                 name: 'can_manage',
+                 tail_uuid: owner_grp.uuid,
+                 head_uuid: sp_grp.uuid)
 
     # active user owns owner_grp, which has can_manage permission on sp_grp
     # user should be able to add permissions on sp.
@@ -137,14 +137,12 @@ class PermissionTest < ActiveSupport::TestCase
                             head_uuid: sp.uuid,
                             link_class: 'permission',
                             name: 'can_write')
-    test_uuid = test_perm.uuid
     assert test_perm.save, "could not save new permission on target object"
     assert test_perm.destroy, "could not delete new permission on target object"
   end
 
-  # TODO(twp): fix bug #3091, which should fix this test.
-  test "can_manage permission on a non-group object" do
-    skip
+  # bug #3091
+  skip "can_manage permission on a non-group object" do
     set_user_from_auth :admin
 
     ob = Specimen.create!
index 93354f8b1edd31d2b332a71cd6d7de28bcc490e6..fc40d06b2cb1b4c6000fbab44ed92d8232ca014f 100644 (file)
@@ -91,7 +91,6 @@ class PipelineInstanceTest < ActiveSupport::TestCase
     component2 = {'script_parameters' => {"something_else" => "xxxad4b39ca5a924e481008009d94e32+210", "input_missing" => {"required" => true}}}
     pi.components['first'] = component1
     pi.components['second'] = component2
-    components = pi.components
 
     Thread.current[:user] = users(:admin)
     pi.update_attribute 'components', pi.components
index a269078b736b92fcebf8a3bb1fbca4c52516c166..1381c8f538ff3bbf9de5596c2dce784123318f1c 100644 (file)
@@ -60,7 +60,7 @@ class SalvageCollectionTest < ActiveSupport::TestCase
     updated_name = updated_src_collection.name
     assert_equal true, updated_name.include?(src_collection.name)
 
-    match = updated_name.match /^test collection.*salvaged data at (.*)\)$/
+    match = updated_name.match(/^test collection.*salvaged data at (.*)\)$/)
     assert_not_nil match
     assert_not_nil match[1]
     assert_empty updated_src_collection.manifest_text
@@ -68,7 +68,7 @@ class SalvageCollectionTest < ActiveSupport::TestCase
     # match[1] is the uuid of the new collection created from src_collection's salvaged data
     # use this to get the new collection and verify
     new_collection = Collection.find_by_uuid match[1]
-    match = new_collection.name.match /^salvaged from (.*),.*/
+    match = new_collection.name.match(/^salvaged from (.*),.*/)
     assert_not_nil match
     assert_equal src_collection.uuid, match[1]
 
@@ -80,7 +80,7 @@ class SalvageCollectionTest < ActiveSupport::TestCase
   end
 
   test "salvage collection with no uuid required argument" do
-    e = assert_raises RuntimeError do
+    assert_raises RuntimeError do
       salvage_collection nil
     end
   end
@@ -107,7 +107,7 @@ class SalvageCollectionTest < ActiveSupport::TestCase
     e = assert_raises RuntimeError do
       salvage_collection collections('user_agreement').uuid
     end
-    assert_match /Error during arv-put: pid \d+ exit \d+ \(cmd was \"arv-put .*\"\)/, e.message
+    assert_match(/Error during arv-put: pid \d+ exit \d+ \(cmd was \"arv-put .*\"\)/, e.message)
   end
 
   # This test uses BAD_MANIFEST, which has the following flaws:
@@ -146,7 +146,7 @@ class SalvageCollectionTest < ActiveSupport::TestCase
     updated_name = updated_src_collection.name
     assert_equal true, updated_name.include?(src_collection.name)
 
-    match = updated_name.match /^test collection.*salvaged data at (.*)\)$/
+    match = updated_name.match(/^test collection.*salvaged data at (.*)\)$/)
     assert_not_nil match
     assert_not_nil match[1]
     assert_empty updated_src_collection.manifest_text
@@ -154,7 +154,7 @@ class SalvageCollectionTest < ActiveSupport::TestCase
     # match[1] is the uuid of the new collection created from src_collection's salvaged data
     # use this to get the new collection and verify
     new_collection = Collection.find_by_uuid match[1]
-    match = new_collection.name.match /^salvaged from (.*),.*/
+    match = new_collection.name.match(/^salvaged from (.*),.*/)
     assert_not_nil match
     assert_equal src_collection.uuid, match[1]
     # verify the new collection's manifest includes the bad locators
index 4df6cc0b369a67cd99907cb128979a4cf1c21956..0d66a4b384fe6916e82fec7b13fcc7b0ac2f8ee9 100644 (file)
@@ -136,7 +136,6 @@ class UserTest < ActiveSupport::TestCase
   test "admin can't clear username when user owns repositories" do
     set_user_from_auth :admin
     user = users(:active)
-    start_username = user.username
     user.username = nil
     assert_not_allowed { user.save }
     refute_empty(user.errors[:username])
diff --git a/services/api/test/websocket_runner.rb b/services/api/test/websocket_runner.rb
deleted file mode 100644 (file)
index be32a0f..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-require 'bundler'
-require 'socket'
-
-$ARV_API_SERVER_DIR = File.expand_path('../..', __FILE__)
-
-s = TCPServer.new('0.0.0.0', 0)
-WEBSOCKET_PORT = s.addr[1]
-s.close
-SERVER_PID_PATH = "tmp/pids/passenger.#{WEBSOCKET_PORT}.pid"
-
-class WebsocketTestRunner < MiniTest::Unit
-  def _system(*cmd)
-    Bundler.with_clean_env do
-      if not system({'ARVADOS_WEBSOCKETS' => 'ws-only', 'RAILS_ENV' => 'test'}, *cmd)
-        raise RuntimeError, "Command failed with exit status #{$?}: #{cmd.inspect}"
-      end
-    end
-  end
-
-  def _run(args=[])
-    server_pid = Dir.chdir($ARV_API_SERVER_DIR) do |apidir|
-      # Only passenger seems to be able to run the websockets server successfully.
-      _system('passenger', 'start', '-d', "-p#{WEBSOCKET_PORT}")
-      timeout = Time.now.tv_sec + 10
-      begin
-        sleep 0.2
-        begin
-          server_pid = IO.read(SERVER_PID_PATH).to_i
-          good_pid = (server_pid > 0) and (Process.kill(0, pid) rescue false)
-        rescue Errno::ENOENT
-          good_pid = false
-        end
-      end while (not good_pid) and (Time.now.tv_sec < timeout)
-      if not good_pid
-        raise RuntimeError, "could not find API server Rails pid"
-      end
-      server_pid
-    end
-    begin
-      super(args)
-    ensure
-      Dir.chdir($ARV_API_SERVER_DIR) do
-        _system('passenger', 'stop', "-p#{WEBSOCKET_PORT}")
-      end
-      # DatabaseCleaner leaves the database empty. Prefer to leave it full.
-      dc = DatabaseController.new
-      dc.define_singleton_method :render do |*args| end
-      dc.reset
-    end
-  end
-end
-
-MiniTest::Unit.runner = WebsocketTestRunner.new
index 3c4f281912842a0ceedb6df409aa61e80fa38fa2..e768b509cd6f2c69bb529d9e9a90e2d923e422ce 100644 (file)
@@ -195,6 +195,7 @@ func submit(dispatcher *dispatch.Dispatcher,
                b, _ := ioutil.ReadAll(stdoutReader)
                stdoutReader.Close()
                stdoutChan <- b
+               close(stdoutChan)
        }()
 
        stderrChan := make(chan []byte)
@@ -202,6 +203,7 @@ func submit(dispatcher *dispatch.Dispatcher,
                b, _ := ioutil.ReadAll(stderrReader)
                stderrReader.Close()
                stderrChan <- b
+               close(stderrChan)
        }()
 
        // Send a tiny script on stdin to execute the crunch-run command
@@ -209,13 +211,10 @@ func submit(dispatcher *dispatch.Dispatcher,
        io.WriteString(stdinWriter, execScript(append(crunchRunCommand, container.UUID)))
        stdinWriter.Close()
 
-       err = cmd.Wait()
-
        stdoutMsg := <-stdoutChan
        stderrmsg := <-stderrChan
 
-       close(stdoutChan)
-       close(stderrChan)
+       err = cmd.Wait()
 
        if err != nil {
                submitErr = fmt.Errorf("Container submission failed: %v: %v (stderr: %q)", cmd.Args, err, stderrmsg)
@@ -302,12 +301,13 @@ func run(dispatcher *dispatch.Dispatcher,
 
                                // Mutex between squeue sync and running sbatch or scancel.
                                squeueUpdater.SlurmLock.Lock()
-                               err := scancelCmd(container).Run()
+                               cmd := scancelCmd(container)
+                               msg, err := cmd.CombinedOutput()
                                squeueUpdater.SlurmLock.Unlock()
 
                                if err != nil {
-                                       log.Printf("Error stopping container %s with scancel: %v",
-                                               container.UUID, err)
+                                       log.Printf("Error stopping container %s with %v %v: %v %v",
+                                               container.UUID, cmd.Path, cmd.Args, err, string(msg))
                                        if squeueUpdater.CheckSqueue(container.UUID) {
                                                log.Printf("Container %s is still in squeue after scancel.",
                                                        container.UUID)
index fbea48e548a59f78718cb0afa419b5a84a1cd89b..40461031e214486f1dbed9feeda6aae0d97fb76c 100644 (file)
@@ -81,7 +81,8 @@ func (s *TestSuite) TestIntegrationCancel(c *C) {
                return exec.Command("echo")
        }
 
-       container := s.integrationTest(c, func() *exec.Cmd { return exec.Command("echo", "zzzzz-dz642-queuedcontainer") },
+       container := s.integrationTest(c,
+               func() *exec.Cmd { return exec.Command("echo", "zzzzz-dz642-queuedcontainer") },
                []string(nil),
                func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
                        dispatcher.UpdateState(container.UUID, dispatch.Running)
@@ -134,7 +135,7 @@ func (s *TestSuite) integrationTest(c *C,
        }(squeueCmd)
        squeueCmd = newSqueueCmd
 
-       // There should be no queued containers now
+       // There should be one queued container
        params := arvadosclient.Dict{
                "filters": [][]string{{"state", "=", "Queued"}},
        }
index 61decde61c4bd61d0a92e96bde20ff0c82780f57..45d06c8c1e27f12f2bc6e83ca262ab2ff7f08a53 100644 (file)
@@ -2,6 +2,8 @@ package main
 
 import (
        "bufio"
+       "io"
+       "io/ioutil"
        "log"
        "os/exec"
        "sync"
@@ -45,31 +47,49 @@ func (squeue *Squeue) RunSqueue() {
                log.Printf("Error creating stdout pipe for squeue: %v", err)
                return
        }
+
+       stderrReader, err := cmd.StderrPipe()
+       if err != nil {
+               log.Printf("Error creating stderr pipe for squeue: %v", err)
+               return
+       }
+
        err = cmd.Start()
        if err != nil {
                log.Printf("Error running squeue: %v", err)
                return
        }
+
+       stderrChan := make(chan []byte)
+       go func() {
+               b, _ := ioutil.ReadAll(stderrReader)
+               stderrChan <- b
+               close(stderrChan)
+       }()
+
        scanner := bufio.NewScanner(sq)
        for scanner.Scan() {
                newSqueueContents = append(newSqueueContents, scanner.Text())
        }
-       if err := scanner.Err(); err != nil {
-               cmd.Wait()
-               log.Printf("Error reading from squeue pipe: %v", err)
-               return
-       }
+       io.Copy(ioutil.Discard, sq)
+
+       stderrmsg := <-stderrChan
 
        err = cmd.Wait()
+
+       if scanner.Err() != nil {
+               log.Printf("Error reading from squeue pipe: %v", err)
+       }
        if err != nil {
-               log.Printf("Error running squeue: %v", err)
-               return
+               log.Printf("Error running %v %v: %v %q", cmd.Path, cmd.Args, err, string(stderrmsg))
        }
 
-       squeue.squeueCond.L.Lock()
-       squeue.squeueContents = newSqueueContents
-       squeue.squeueCond.Broadcast()
-       squeue.squeueCond.L.Unlock()
+       if scanner.Err() == nil && err == nil {
+               squeue.squeueCond.L.Lock()
+               squeue.squeueContents = newSqueueContents
+               squeue.squeueCond.Broadcast()
+               squeue.squeueCond.L.Unlock()
+       }
 }
 
 // CheckSqueue checks if a given container UUID is in the slurm queue.  This
index ade40c6b03a4d4a98812172aab31da5173453c4e..2e475c72e64842b15aa6c7dee88446bc0056b802 100644 (file)
@@ -800,6 +800,7 @@ func (runner *ContainerRunner) Run() (err error) {
                checkErr(err)
 
                if runner.finalState == "Queued" {
+                       runner.CrunchLog.Close()
                        runner.UpdateContainerFinal()
                        return
                }
@@ -832,6 +833,7 @@ func (runner *ContainerRunner) Run() (err error) {
        // check for and/or load image
        err = runner.LoadImage()
        if err != nil {
+               runner.finalState = "Cancelled"
                err = fmt.Errorf("While loading container image: %v", err)
                return
        }
@@ -839,6 +841,7 @@ func (runner *ContainerRunner) Run() (err error) {
        // set up FUSE mount and binds
        err = runner.SetupMounts()
        if err != nil {
+               runner.finalState = "Cancelled"
                err = fmt.Errorf("While setting up mounts: %v", err)
                return
        }
diff --git a/services/datamanager/collection/collection.go b/services/datamanager/collection/collection.go
deleted file mode 100644 (file)
index 05e7a5f..0000000
+++ /dev/null
@@ -1,408 +0,0 @@
-// Deals with parsing Collection responses from API Server.
-
-package collection
-
-import (
-       "flag"
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "git.curoverse.com/arvados.git/sdk/go/logger"
-       "git.curoverse.com/arvados.git/sdk/go/manifest"
-       "git.curoverse.com/arvados.git/sdk/go/util"
-       "log"
-       "os"
-       "runtime/pprof"
-       "time"
-)
-
-var (
-       HeapProfileFilename string
-)
-
-// Collection representation
-type Collection struct {
-       UUID              string
-       OwnerUUID         string
-       ReplicationLevel  int
-       BlockDigestToSize map[blockdigest.BlockDigest]int
-       TotalSize         int
-}
-
-// ReadCollections holds information about collections from API server
-type ReadCollections struct {
-       ReadAllCollections        bool
-       UUIDToCollection          map[string]Collection
-       OwnerToCollectionSize     map[string]int
-       BlockToDesiredReplication map[blockdigest.DigestWithSize]int
-       CollectionUUIDToIndex     map[string]int
-       CollectionIndexToUUID     []string
-       BlockToCollectionIndices  map[blockdigest.DigestWithSize][]int
-}
-
-// GetCollectionsParams params
-type GetCollectionsParams struct {
-       Client    *arvadosclient.ArvadosClient
-       Logger    *logger.Logger
-       BatchSize int
-}
-
-// SdkCollectionInfo holds collection info from api
-type SdkCollectionInfo struct {
-       UUID               string    `json:"uuid"`
-       OwnerUUID          string    `json:"owner_uuid"`
-       ReplicationDesired int       `json:"replication_desired"`
-       ModifiedAt         time.Time `json:"modified_at"`
-       ManifestText       string    `json:"manifest_text"`
-}
-
-// SdkCollectionList lists collections from api
-type SdkCollectionList struct {
-       ItemsAvailable int                 `json:"items_available"`
-       Items          []SdkCollectionInfo `json:"items"`
-}
-
-func init() {
-       flag.StringVar(&HeapProfileFilename,
-               "heap-profile",
-               "",
-               "File to write the heap profiles to. Leave blank to skip profiling.")
-}
-
-// WriteHeapProfile writes the heap profile to a file for later review.
-// Since a file is expected to only contain a single heap profile this
-// function overwrites the previously written profile, so it is safe
-// to call multiple times in a single run.
-// Otherwise we would see cumulative numbers as explained here:
-// https://groups.google.com/d/msg/golang-nuts/ZyHciRglQYc/2nh4Ndu2fZcJ
-func WriteHeapProfile() error {
-       if HeapProfileFilename != "" {
-               heapProfile, err := os.Create(HeapProfileFilename)
-               if err != nil {
-                       return err
-               }
-
-               defer heapProfile.Close()
-
-               err = pprof.WriteHeapProfile(heapProfile)
-               return err
-       }
-
-       return nil
-}
-
-// GetCollectionsAndSummarize gets collections from api and summarizes
-func GetCollectionsAndSummarize(params GetCollectionsParams) (results ReadCollections, err error) {
-       results, err = GetCollections(params)
-       if err != nil {
-               return
-       }
-
-       results.Summarize(params.Logger)
-
-       log.Printf("Uuid to Size used: %v", results.OwnerToCollectionSize)
-       log.Printf("Read and processed %d collections",
-               len(results.UUIDToCollection))
-
-       // TODO(misha): Add a "readonly" flag. If we're in readonly mode,
-       // lots of behaviors can become warnings (and obviously we can't
-       // write anything).
-       // if !readCollections.ReadAllCollections {
-       //      log.Fatalf("Did not read all collections")
-       // }
-
-       return
-}
-
-// GetCollections gets collections from api
-func GetCollections(params GetCollectionsParams) (results ReadCollections, err error) {
-       if &params.Client == nil {
-               err = fmt.Errorf("params.Client passed to GetCollections() should " +
-                       "contain a valid ArvadosClient, but instead it is nil.")
-               return
-       }
-
-       fieldsWanted := []string{"manifest_text",
-               "owner_uuid",
-               "uuid",
-               "replication_desired",
-               "modified_at"}
-
-       sdkParams := arvadosclient.Dict{
-               "select":  fieldsWanted,
-               "order":   []string{"modified_at ASC", "uuid ASC"},
-               "filters": [][]string{{"modified_at", ">=", "1900-01-01T00:00:00Z"}},
-               "offset":  0}
-
-       if params.BatchSize > 0 {
-               sdkParams["limit"] = params.BatchSize
-       }
-
-       var defaultReplicationLevel int
-       {
-               var value interface{}
-               value, err = params.Client.Discovery("defaultCollectionReplication")
-               if err != nil {
-                       return
-               }
-
-               defaultReplicationLevel = int(value.(float64))
-               if defaultReplicationLevel <= 0 {
-                       err = fmt.Errorf("Default collection replication returned by arvados SDK "+
-                               "should be a positive integer but instead it was %d.",
-                               defaultReplicationLevel)
-                       return
-               }
-       }
-
-       initialNumberOfCollectionsAvailable, err :=
-               util.NumberItemsAvailable(params.Client, "collections")
-       if err != nil {
-               return
-       }
-       // Include a 1% margin for collections added while we're reading so
-       // that we don't have to grow the map in most cases.
-       maxExpectedCollections := int(
-               float64(initialNumberOfCollectionsAvailable) * 1.01)
-       results.UUIDToCollection = make(map[string]Collection, maxExpectedCollections)
-
-       if params.Logger != nil {
-               params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       collectionInfo := logger.GetOrCreateMap(p, "collection_info")
-                       collectionInfo["num_collections_at_start"] = initialNumberOfCollectionsAvailable
-                       collectionInfo["batch_size"] = params.BatchSize
-                       collectionInfo["default_replication_level"] = defaultReplicationLevel
-               })
-       }
-
-       // These values are just for getting the loop to run the first time,
-       // afterwards they'll be set to real values.
-       remainingCollections := 1
-       var totalCollections int
-       var previousTotalCollections int
-       for remainingCollections > 0 {
-               // We're still finding new collections
-
-               // Write the heap profile for examining memory usage
-               err = WriteHeapProfile()
-               if err != nil {
-                       return
-               }
-
-               // Get next batch of collections.
-               var collections SdkCollectionList
-               err = params.Client.List("collections", sdkParams, &collections)
-               if err != nil {
-                       return
-               }
-               batchCollections := len(collections.Items)
-
-               // We must always have at least one collection in the batch
-               if batchCollections < 1 {
-                       err = fmt.Errorf("API query returned no collections for %+v", sdkParams)
-                       return
-               }
-
-               // Update count of remaining collections
-               remainingCollections = collections.ItemsAvailable - sdkParams["offset"].(int) - batchCollections
-
-               // Process collection and update our date filter.
-               latestModificationDate, maxManifestSize, totalManifestSize, err := ProcessCollections(params.Logger,
-                       collections.Items,
-                       defaultReplicationLevel,
-                       results.UUIDToCollection)
-               if err != nil {
-                       return results, err
-               }
-               if sdkParams["filters"].([][]string)[0][2] != latestModificationDate.Format(time.RFC3339) {
-                       sdkParams["filters"].([][]string)[0][2] = latestModificationDate.Format(time.RFC3339)
-                       sdkParams["offset"] = 0
-               } else {
-                       sdkParams["offset"] = sdkParams["offset"].(int) + batchCollections
-               }
-
-               // update counts
-               previousTotalCollections = totalCollections
-               totalCollections = len(results.UUIDToCollection)
-
-               log.Printf("%d collections read, %d (%d new) in last batch, "+
-                       "%d remaining, "+
-                       "%s latest modified date, %.0f %d %d avg,max,total manifest size",
-                       totalCollections,
-                       batchCollections,
-                       totalCollections-previousTotalCollections,
-                       remainingCollections,
-                       sdkParams["filters"].([][]string)[0][2],
-                       float32(totalManifestSize)/float32(totalCollections),
-                       maxManifestSize, totalManifestSize)
-
-               if params.Logger != nil {
-                       params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                               collectionInfo := logger.GetOrCreateMap(p, "collection_info")
-                               collectionInfo["collections_read"] = totalCollections
-                               collectionInfo["latest_modified_date_seen"] = sdkParams["filters"].([][]string)[0][2]
-                               collectionInfo["total_manifest_size"] = totalManifestSize
-                               collectionInfo["max_manifest_size"] = maxManifestSize
-                       })
-               }
-       }
-
-       // Make one final API request to verify that we have processed all collections available up to the latest modification date
-       var collections SdkCollectionList
-       sdkParams["filters"].([][]string)[0][1] = "<="
-       sdkParams["limit"] = 0
-       err = params.Client.List("collections", sdkParams, &collections)
-       if err != nil {
-               return
-       }
-       finalNumberOfCollectionsAvailable, err :=
-               util.NumberItemsAvailable(params.Client, "collections")
-       if err != nil {
-               return
-       }
-       if totalCollections < finalNumberOfCollectionsAvailable {
-               err = fmt.Errorf("API server indicates a total of %d collections "+
-                       "available up to %v, but we only retrieved %d. "+
-                       "Refusing to continue as this could indicate an "+
-                       "otherwise undetected failure.",
-                       finalNumberOfCollectionsAvailable,
-                       sdkParams["filters"].([][]string)[0][2],
-                       totalCollections)
-               return
-       }
-
-       // Write the heap profile for examining memory usage
-       err = WriteHeapProfile()
-
-       return
-}
-
-// StrCopy returns a newly allocated string.
-// It is useful to copy slices so that the garbage collector can reuse
-// the memory of the longer strings they came from.
-func StrCopy(s string) string {
-       return string([]byte(s))
-}
-
-// ProcessCollections read from api server
-func ProcessCollections(arvLogger *logger.Logger,
-       receivedCollections []SdkCollectionInfo,
-       defaultReplicationLevel int,
-       UUIDToCollection map[string]Collection,
-) (
-       latestModificationDate time.Time,
-       maxManifestSize, totalManifestSize uint64,
-       err error,
-) {
-       for _, sdkCollection := range receivedCollections {
-               collection := Collection{UUID: StrCopy(sdkCollection.UUID),
-                       OwnerUUID:         StrCopy(sdkCollection.OwnerUUID),
-                       ReplicationLevel:  sdkCollection.ReplicationDesired,
-                       BlockDigestToSize: make(map[blockdigest.BlockDigest]int)}
-
-               if sdkCollection.ModifiedAt.IsZero() {
-                       err = fmt.Errorf(
-                               "Arvados SDK collection returned with unexpected zero "+
-                                       "modification date. This probably means that either we failed to "+
-                                       "parse the modification date or the API server has changed how "+
-                                       "it returns modification dates: %+v",
-                               collection)
-                       return
-               }
-
-               if sdkCollection.ModifiedAt.After(latestModificationDate) {
-                       latestModificationDate = sdkCollection.ModifiedAt
-               }
-
-               if collection.ReplicationLevel == 0 {
-                       collection.ReplicationLevel = defaultReplicationLevel
-               }
-
-               manifest := manifest.Manifest{Text: sdkCollection.ManifestText}
-               manifestSize := uint64(len(sdkCollection.ManifestText))
-
-               if _, alreadySeen := UUIDToCollection[collection.UUID]; !alreadySeen {
-                       totalManifestSize += manifestSize
-               }
-               if manifestSize > maxManifestSize {
-                       maxManifestSize = manifestSize
-               }
-
-               blockChannel := manifest.BlockIterWithDuplicates()
-               for block := range blockChannel {
-                       if storedSize, stored := collection.BlockDigestToSize[block.Digest]; stored && storedSize != block.Size {
-                               log.Printf(
-                                       "Collection %s contains multiple sizes (%d and %d) for block %s",
-                                       collection.UUID,
-                                       storedSize,
-                                       block.Size,
-                                       block.Digest)
-                       }
-                       collection.BlockDigestToSize[block.Digest] = block.Size
-               }
-               if manifest.Err != nil {
-                       err = manifest.Err
-                       return
-               }
-
-               collection.TotalSize = 0
-               for _, size := range collection.BlockDigestToSize {
-                       collection.TotalSize += size
-               }
-               UUIDToCollection[collection.UUID] = collection
-
-               // Clear out all the manifest strings that we don't need anymore.
-               // These hopefully form the bulk of our memory usage.
-               manifest.Text = ""
-               sdkCollection.ManifestText = ""
-       }
-
-       return
-}
-
-// Summarize the collections read
-func (readCollections *ReadCollections) Summarize(arvLogger *logger.Logger) {
-       readCollections.OwnerToCollectionSize = make(map[string]int)
-       readCollections.BlockToDesiredReplication = make(map[blockdigest.DigestWithSize]int)
-       numCollections := len(readCollections.UUIDToCollection)
-       readCollections.CollectionUUIDToIndex = make(map[string]int, numCollections)
-       readCollections.CollectionIndexToUUID = make([]string, 0, numCollections)
-       readCollections.BlockToCollectionIndices = make(map[blockdigest.DigestWithSize][]int)
-
-       for _, coll := range readCollections.UUIDToCollection {
-               collectionIndex := len(readCollections.CollectionIndexToUUID)
-               readCollections.CollectionIndexToUUID =
-                       append(readCollections.CollectionIndexToUUID, coll.UUID)
-               readCollections.CollectionUUIDToIndex[coll.UUID] = collectionIndex
-
-               readCollections.OwnerToCollectionSize[coll.OwnerUUID] =
-                       readCollections.OwnerToCollectionSize[coll.OwnerUUID] + coll.TotalSize
-
-               for block, size := range coll.BlockDigestToSize {
-                       locator := blockdigest.DigestWithSize{Digest: block, Size: uint32(size)}
-                       readCollections.BlockToCollectionIndices[locator] =
-                               append(readCollections.BlockToCollectionIndices[locator],
-                                       collectionIndex)
-                       storedReplication := readCollections.BlockToDesiredReplication[locator]
-                       if coll.ReplicationLevel > storedReplication {
-                               readCollections.BlockToDesiredReplication[locator] =
-                                       coll.ReplicationLevel
-                       }
-               }
-       }
-
-       if arvLogger != nil {
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       collectionInfo := logger.GetOrCreateMap(p, "collection_info")
-                       // Since maps are shallow copied, we run a risk of concurrent
-                       // updates here. By copying results.OwnerToCollectionSize into
-                       // the log, we're assuming that it won't be updated.
-                       collectionInfo["owner_to_collection_size"] =
-                               readCollections.OwnerToCollectionSize
-                       collectionInfo["distinct_blocks_named"] =
-                               len(readCollections.BlockToDesiredReplication)
-               })
-       }
-
-       return
-}
diff --git a/services/datamanager/collection/collection_test.go b/services/datamanager/collection/collection_test.go
deleted file mode 100644 (file)
index 1bf6a89..0000000
+++ /dev/null
@@ -1,202 +0,0 @@
-package collection
-
-import (
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       . "gopkg.in/check.v1"
-       "net/http"
-       "net/http/httptest"
-       "testing"
-)
-
-// Gocheck boilerplate
-func Test(t *testing.T) {
-       TestingT(t)
-}
-
-type MySuite struct{}
-
-var _ = Suite(&MySuite{})
-
-// This captures the result we expect from
-// ReadCollections.Summarize().  Because CollectionUUIDToIndex is
-// indeterminate, we replace BlockToCollectionIndices with
-// BlockToCollectionUuids.
-type ExpectedSummary struct {
-       OwnerToCollectionSize     map[string]int
-       BlockToDesiredReplication map[blockdigest.DigestWithSize]int
-       BlockToCollectionUuids    map[blockdigest.DigestWithSize][]string
-}
-
-func CompareSummarizedReadCollections(c *C,
-       summarized ReadCollections,
-       expected ExpectedSummary) {
-
-       c.Assert(summarized.OwnerToCollectionSize, DeepEquals,
-               expected.OwnerToCollectionSize)
-
-       c.Assert(summarized.BlockToDesiredReplication, DeepEquals,
-               expected.BlockToDesiredReplication)
-
-       summarizedBlockToCollectionUuids :=
-               make(map[blockdigest.DigestWithSize]map[string]struct{})
-       for digest, indices := range summarized.BlockToCollectionIndices {
-               uuidSet := make(map[string]struct{})
-               summarizedBlockToCollectionUuids[digest] = uuidSet
-               for _, index := range indices {
-                       uuidSet[summarized.CollectionIndexToUUID[index]] = struct{}{}
-               }
-       }
-
-       expectedBlockToCollectionUuids :=
-               make(map[blockdigest.DigestWithSize]map[string]struct{})
-       for digest, uuidSlice := range expected.BlockToCollectionUuids {
-               uuidSet := make(map[string]struct{})
-               expectedBlockToCollectionUuids[digest] = uuidSet
-               for _, uuid := range uuidSlice {
-                       uuidSet[uuid] = struct{}{}
-               }
-       }
-
-       c.Assert(summarizedBlockToCollectionUuids, DeepEquals,
-               expectedBlockToCollectionUuids)
-}
-
-func (s *MySuite) TestSummarizeSimple(checker *C) {
-       rc := MakeTestReadCollections([]TestCollectionSpec{{
-               ReplicationLevel: 5,
-               Blocks:           []int{1, 2},
-       }})
-
-       rc.Summarize(nil)
-
-       c := rc.UUIDToCollection["col0"]
-
-       blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
-       blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
-
-       expected := ExpectedSummary{
-               OwnerToCollectionSize:     map[string]int{c.OwnerUUID: c.TotalSize},
-               BlockToDesiredReplication: map[blockdigest.DigestWithSize]int{blockDigest1: 5, blockDigest2: 5},
-               BlockToCollectionUuids:    map[blockdigest.DigestWithSize][]string{blockDigest1: {c.UUID}, blockDigest2: {c.UUID}},
-       }
-
-       CompareSummarizedReadCollections(checker, rc, expected)
-}
-
-func (s *MySuite) TestSummarizeOverlapping(checker *C) {
-       rc := MakeTestReadCollections([]TestCollectionSpec{
-               {
-                       ReplicationLevel: 5,
-                       Blocks:           []int{1, 2},
-               },
-               {
-                       ReplicationLevel: 8,
-                       Blocks:           []int{2, 3},
-               },
-       })
-
-       rc.Summarize(nil)
-
-       c0 := rc.UUIDToCollection["col0"]
-       c1 := rc.UUIDToCollection["col1"]
-
-       blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
-       blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
-       blockDigest3 := blockdigest.MakeTestDigestWithSize(3)
-
-       expected := ExpectedSummary{
-               OwnerToCollectionSize: map[string]int{
-                       c0.OwnerUUID: c0.TotalSize,
-                       c1.OwnerUUID: c1.TotalSize,
-               },
-               BlockToDesiredReplication: map[blockdigest.DigestWithSize]int{
-                       blockDigest1: 5,
-                       blockDigest2: 8,
-                       blockDigest3: 8,
-               },
-               BlockToCollectionUuids: map[blockdigest.DigestWithSize][]string{
-                       blockDigest1: {c0.UUID},
-                       blockDigest2: {c0.UUID, c1.UUID},
-                       blockDigest3: {c1.UUID},
-               },
-       }
-
-       CompareSummarizedReadCollections(checker, rc, expected)
-}
-
-type APITestData struct {
-       // path and response map
-       responses map[string]arvadostest.StubResponse
-
-       // expected error, if any
-       expectedError string
-}
-
-func (s *MySuite) TestGetCollectionsAndSummarize_DiscoveryError(c *C) {
-       testGetCollectionsAndSummarize(c,
-               APITestData{
-                       responses:     make(map[string]arvadostest.StubResponse),
-                       expectedError: "arvados API server error: 500.*",
-               })
-}
-
-func (s *MySuite) TestGetCollectionsAndSummarize_ApiErrorGetCollections(c *C) {
-       respMap := make(map[string]arvadostest.StubResponse)
-       respMap["/discovery/v1/apis/arvados/v1/rest"] = arvadostest.StubResponse{200, `{"defaultCollectionReplication":2}`}
-       respMap["/arvados/v1/collections"] = arvadostest.StubResponse{-1, ``}
-
-       testGetCollectionsAndSummarize(c,
-               APITestData{
-                       responses:     respMap,
-                       expectedError: "arvados API server error: 302.*",
-               })
-}
-
-func (s *MySuite) TestGetCollectionsAndSummarize_GetCollectionsBadStreamName(c *C) {
-       respMap := make(map[string]arvadostest.StubResponse)
-       respMap["/discovery/v1/apis/arvados/v1/rest"] = arvadostest.StubResponse{200, `{"defaultCollectionReplication":2}`}
-       respMap["/arvados/v1/collections"] = arvadostest.StubResponse{200, `{"items_available":1,"items":[{"modified_at":"2015-11-24T15:04:05Z","manifest_text":"badstreamname"}]}`}
-
-       testGetCollectionsAndSummarize(c,
-               APITestData{
-                       responses:     respMap,
-                       expectedError: "Invalid stream name: badstreamname",
-               })
-}
-
-func (s *MySuite) TestGetCollectionsAndSummarize_GetCollectionsBadFileToken(c *C) {
-       respMap := make(map[string]arvadostest.StubResponse)
-       respMap["/discovery/v1/apis/arvados/v1/rest"] = arvadostest.StubResponse{200, `{"defaultCollectionReplication":2}`}
-       respMap["/arvados/v1/collections"] = arvadostest.StubResponse{200, `{"items_available":1,"items":[{"modified_at":"2015-11-24T15:04:05Z","manifest_text":"./goodstream acbd18db4cc2f85cedef654fccc4a4d8+3 0:1:file1.txt file2.txt"}]}`}
-
-       testGetCollectionsAndSummarize(c,
-               APITestData{
-                       responses:     respMap,
-                       expectedError: "Invalid file token: file2.txt",
-               })
-}
-
-func testGetCollectionsAndSummarize(c *C, testData APITestData) {
-       apiStub := arvadostest.ServerStub{testData.responses}
-
-       api := httptest.NewServer(&apiStub)
-       defer api.Close()
-
-       arv := &arvadosclient.ArvadosClient{
-               Scheme:    "http",
-               ApiServer: api.URL[7:],
-               ApiToken:  "abc123",
-               Client:    &http.Client{Transport: &http.Transport{}},
-       }
-
-       // GetCollectionsAndSummarize
-       _, err := GetCollectionsAndSummarize(GetCollectionsParams{arv, nil, 10})
-
-       if testData.expectedError == "" {
-               c.Assert(err, IsNil)
-       } else {
-               c.Assert(err, ErrorMatches, testData.expectedError)
-       }
-}
diff --git a/services/datamanager/collection/testing.go b/services/datamanager/collection/testing.go
deleted file mode 100644 (file)
index 2238433..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-// Code used for testing only.
-
-package collection
-
-import (
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-)
-
-// TestCollectionSpec with test blocks and desired replication level
-type TestCollectionSpec struct {
-       // The desired replication level
-       ReplicationLevel int
-       // Blocks this contains, represented by ints. Ints repeated will
-       // still only represent one block
-       Blocks []int
-}
-
-// MakeTestReadCollections creates a ReadCollections object for testing
-// based on the give specs. Only the ReadAllCollections and UUIDToCollection
-// fields are populated. To populate other fields call rc.Summarize().
-func MakeTestReadCollections(specs []TestCollectionSpec) (rc ReadCollections) {
-       rc = ReadCollections{
-               ReadAllCollections: true,
-               UUIDToCollection:   map[string]Collection{},
-       }
-
-       for i, spec := range specs {
-               c := Collection{
-                       UUID:              fmt.Sprintf("col%d", i),
-                       OwnerUUID:         fmt.Sprintf("owner%d", i),
-                       ReplicationLevel:  spec.ReplicationLevel,
-                       BlockDigestToSize: map[blockdigest.BlockDigest]int{},
-               }
-               rc.UUIDToCollection[c.UUID] = c
-               for _, j := range spec.Blocks {
-                       c.BlockDigestToSize[blockdigest.MakeTestBlockDigest(j)] = j
-               }
-               // We compute the size in a separate loop because the value
-               // computed in the above loop would be invalid if c.Blocks
-               // contained duplicates.
-               for _, size := range c.BlockDigestToSize {
-                       c.TotalSize += size
-               }
-       }
-       return
-}
-
-// CollectionIndicesForTesting returns a slice giving the collection
-// index of each collection that was passed in to MakeTestReadCollections.
-// rc.Summarize() must be called before this method, since Summarize()
-// assigns an index to each collection.
-func (rc ReadCollections) CollectionIndicesForTesting() (indices []int) {
-       // TODO(misha): Assert that rc.Summarize() has been called.
-       numCollections := len(rc.CollectionIndexToUUID)
-       indices = make([]int, numCollections)
-       for i := 0; i < numCollections; i++ {
-               indices[i] = rc.CollectionUUIDToIndex[fmt.Sprintf("col%d", i)]
-       }
-       return
-}
diff --git a/services/datamanager/datamanager.go b/services/datamanager/datamanager.go
deleted file mode 100644 (file)
index 5250d17..0000000
+++ /dev/null
@@ -1,220 +0,0 @@
-/* Keep Datamanager. Responsible for checking on and reporting on Keep Storage */
-
-package main
-
-import (
-       "errors"
-       "flag"
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       "git.curoverse.com/arvados.git/sdk/go/logger"
-       "git.curoverse.com/arvados.git/sdk/go/util"
-       "git.curoverse.com/arvados.git/services/datamanager/collection"
-       "git.curoverse.com/arvados.git/services/datamanager/keep"
-       "git.curoverse.com/arvados.git/services/datamanager/loggerutil"
-       "git.curoverse.com/arvados.git/services/datamanager/summary"
-       "log"
-       "time"
-)
-
-var (
-       logEventTypePrefix  string
-       logFrequencySeconds int
-       minutesBetweenRuns  int
-       collectionBatchSize int
-       dryRun              bool
-)
-
-func init() {
-       flag.StringVar(&logEventTypePrefix,
-               "log-event-type-prefix",
-               "experimental-data-manager",
-               "Prefix to use in the event_type of our arvados log entries. Set to empty to turn off logging")
-       flag.IntVar(&logFrequencySeconds,
-               "log-frequency-seconds",
-               20,
-               "How frequently we'll write log entries in seconds.")
-       flag.IntVar(&minutesBetweenRuns,
-               "minutes-between-runs",
-               0,
-               "How many minutes we wait between data manager runs. 0 means run once and exit.")
-       flag.IntVar(&collectionBatchSize,
-               "collection-batch-size",
-               1000,
-               "How many collections to request in each batch.")
-       flag.BoolVar(&dryRun,
-               "dry-run",
-               false,
-               "Perform a dry run. Log how many blocks would be deleted/moved, but do not issue any changes to keepstore.")
-}
-
-func main() {
-       flag.Parse()
-
-       if minutesBetweenRuns == 0 {
-               arv, err := arvadosclient.MakeArvadosClient()
-               if err != nil {
-                       loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Error making arvados client: %v", err))
-               }
-               err = singlerun(arv)
-               if err != nil {
-                       loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("singlerun: %v", err))
-               }
-       } else {
-               waitTime := time.Minute * time.Duration(minutesBetweenRuns)
-               for {
-                       log.Println("Beginning Run")
-                       arv, err := arvadosclient.MakeArvadosClient()
-                       if err != nil {
-                               loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Error making arvados client: %v", err))
-                       }
-                       err = singlerun(arv)
-                       if err != nil {
-                               log.Printf("singlerun: %v", err)
-                       }
-                       log.Printf("Sleeping for %d minutes", minutesBetweenRuns)
-                       time.Sleep(waitTime)
-               }
-       }
-}
-
-var arvLogger *logger.Logger
-
-func singlerun(arv *arvadosclient.ArvadosClient) error {
-       var err error
-       if isAdmin, err := util.UserIsAdmin(arv); err != nil {
-               return errors.New("Error verifying admin token: " + err.Error())
-       } else if !isAdmin {
-               return errors.New("Current user is not an admin. Datamanager requires a privileged token.")
-       }
-
-       if logEventTypePrefix != "" {
-               arvLogger, err = logger.NewLogger(logger.LoggerParams{
-                       Client:          arv,
-                       EventTypePrefix: logEventTypePrefix,
-                       WriteInterval:   time.Second * time.Duration(logFrequencySeconds)})
-       }
-
-       loggerutil.LogRunInfo(arvLogger)
-       if arvLogger != nil {
-               arvLogger.AddWriteHook(loggerutil.LogMemoryAlloc)
-       }
-
-       var (
-               dataFetcher     summary.DataFetcher
-               readCollections collection.ReadCollections
-               keepServerInfo  keep.ReadServers
-       )
-
-       if summary.ShouldReadData() {
-               dataFetcher = summary.ReadData
-       } else {
-               dataFetcher = BuildDataFetcher(arv)
-       }
-
-       err = dataFetcher(arvLogger, &readCollections, &keepServerInfo)
-       if err != nil {
-               return err
-       }
-
-       err = summary.MaybeWriteData(arvLogger, readCollections, keepServerInfo)
-       if err != nil {
-               return err
-       }
-
-       buckets := summary.BucketReplication(readCollections, keepServerInfo)
-       bucketCounts := buckets.Counts()
-
-       replicationSummary := buckets.SummarizeBuckets(readCollections)
-       replicationCounts := replicationSummary.ComputeCounts()
-
-       log.Printf("Blocks In Collections: %d, "+
-               "\nBlocks In Keep: %d.",
-               len(readCollections.BlockToDesiredReplication),
-               len(keepServerInfo.BlockToServers))
-       log.Println(replicationCounts.PrettyPrint())
-
-       log.Printf("Blocks Histogram:")
-       for _, rlbss := range bucketCounts {
-               log.Printf("%+v: %10d",
-                       rlbss.Levels,
-                       rlbss.Count)
-       }
-
-       kc, err := keepclient.MakeKeepClient(arv)
-       if err != nil {
-               return fmt.Errorf("Error setting up keep client %v", err.Error())
-       }
-
-       // Log that we're finished. We force the recording, since go will
-       // not wait for the write timer before exiting.
-       if arvLogger != nil {
-               defer arvLogger.FinalUpdate(func(p map[string]interface{}, e map[string]interface{}) {
-                       summaryInfo := logger.GetOrCreateMap(p, "summary_info")
-                       summaryInfo["block_replication_counts"] = bucketCounts
-                       summaryInfo["replication_summary"] = replicationCounts
-                       p["summary_info"] = summaryInfo
-
-                       p["run_info"].(map[string]interface{})["finished_at"] = time.Now()
-               })
-       }
-
-       pullServers := summary.ComputePullServers(kc,
-               &keepServerInfo,
-               readCollections.BlockToDesiredReplication,
-               replicationSummary.UnderReplicatedBlocks)
-
-       pullLists := summary.BuildPullLists(pullServers)
-
-       trashLists, trashErr := summary.BuildTrashLists(kc,
-               &keepServerInfo,
-               replicationSummary.KeepBlocksNotInCollections)
-
-       err = summary.WritePullLists(arvLogger, pullLists, dryRun)
-       if err != nil {
-               return err
-       }
-
-       if trashErr != nil {
-               return err
-       }
-       keep.SendTrashLists(arvLogger, kc, trashLists, dryRun)
-
-       return nil
-}
-
-// BuildDataFetcher returns a data fetcher that fetches data from remote servers.
-func BuildDataFetcher(arv *arvadosclient.ArvadosClient) summary.DataFetcher {
-       return func(
-               arvLogger *logger.Logger,
-               readCollections *collection.ReadCollections,
-               keepServerInfo *keep.ReadServers,
-       ) error {
-               collDone := make(chan struct{})
-               var collErr error
-               go func() {
-                       *readCollections, collErr = collection.GetCollectionsAndSummarize(
-                               collection.GetCollectionsParams{
-                                       Client:    arv,
-                                       Logger:    arvLogger,
-                                       BatchSize: collectionBatchSize})
-                       collDone <- struct{}{}
-               }()
-
-               var keepErr error
-               *keepServerInfo, keepErr = keep.GetKeepServersAndSummarize(
-                       keep.GetKeepServersParams{
-                               Client: arv,
-                               Logger: arvLogger,
-                               Limit:  1000})
-
-               <-collDone
-
-               // Return a nil error only if both parts succeeded.
-               if collErr != nil {
-                       return collErr
-               }
-               return keepErr
-       }
-}
diff --git a/services/datamanager/datamanager_test.go b/services/datamanager/datamanager_test.go
deleted file mode 100644 (file)
index 7a8fff5..0000000
+++ /dev/null
@@ -1,732 +0,0 @@
-package main
-
-import (
-       "encoding/json"
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       "git.curoverse.com/arvados.git/services/datamanager/collection"
-       "git.curoverse.com/arvados.git/services/datamanager/summary"
-       "io/ioutil"
-       "net/http"
-       "os"
-       "os/exec"
-       "path"
-       "regexp"
-       "strings"
-       "testing"
-       "time"
-)
-
-var arv *arvadosclient.ArvadosClient
-var keepClient *keepclient.KeepClient
-var keepServers []string
-
-func SetupDataManagerTest(t *testing.T) {
-       os.Setenv("ARVADOS_API_HOST_INSECURE", "true")
-
-       // start api and keep servers
-       arvadostest.ResetEnv()
-       arvadostest.StartAPI()
-       arvadostest.StartKeep(2, false)
-
-       var err error
-       arv, err = arvadosclient.MakeArvadosClient()
-       if err != nil {
-               t.Fatalf("Error making arvados client: %s", err)
-       }
-       arv.ApiToken = arvadostest.DataManagerToken
-
-       // keep client
-       keepClient = &keepclient.KeepClient{
-               Arvados:       arv,
-               Want_replicas: 2,
-               Client:        &http.Client{},
-       }
-
-       // discover keep services
-       if err = keepClient.DiscoverKeepServers(); err != nil {
-               t.Fatalf("Error discovering keep services: %s", err)
-       }
-       keepServers = []string{}
-       for _, host := range keepClient.LocalRoots() {
-               keepServers = append(keepServers, host)
-       }
-}
-
-func TearDownDataManagerTest(t *testing.T) {
-       arvadostest.StopKeep(2)
-       arvadostest.StopAPI()
-       summary.WriteDataTo = ""
-       collection.HeapProfileFilename = ""
-}
-
-func putBlock(t *testing.T, data string) string {
-       locator, _, err := keepClient.PutB([]byte(data))
-       if err != nil {
-               t.Fatalf("Error putting test data for %s %s %v", data, locator, err)
-       }
-       if locator == "" {
-               t.Fatalf("No locator found after putting test data")
-       }
-
-       splits := strings.Split(locator, "+")
-       return splits[0] + "+" + splits[1]
-}
-
-func getBlock(t *testing.T, locator string, data string) {
-       reader, blocklen, _, err := keepClient.Get(locator)
-       if err != nil {
-               t.Fatalf("Error getting test data in setup for %s %s %v", data, locator, err)
-       }
-       if reader == nil {
-               t.Fatalf("No reader found after putting test data")
-       }
-       if blocklen != int64(len(data)) {
-               t.Fatalf("blocklen %d did not match data len %d", blocklen, len(data))
-       }
-
-       all, err := ioutil.ReadAll(reader)
-       if string(all) != data {
-               t.Fatalf("Data read %s did not match expected data %s", string(all), data)
-       }
-}
-
-// Create a collection using arv-put
-func createCollection(t *testing.T, data string) string {
-       tempfile, err := ioutil.TempFile(os.TempDir(), "temp-test-file")
-       defer os.Remove(tempfile.Name())
-
-       _, err = tempfile.Write([]byte(data))
-       if err != nil {
-               t.Fatalf("Error writing to tempfile %v", err)
-       }
-
-       // arv-put
-       output, err := exec.Command("arv-put", "--use-filename", "test.txt", tempfile.Name()).Output()
-       if err != nil {
-               t.Fatalf("Error running arv-put %s", err)
-       }
-
-       uuid := string(output[0:27]) // trim terminating char
-       return uuid
-}
-
-// Get collection locator
-var locatorMatcher = regexp.MustCompile(`^([0-9a-f]{32})\+(\d*)(.*)$`)
-
-func getFirstLocatorFromCollection(t *testing.T, uuid string) string {
-       manifest := getCollection(t, uuid)["manifest_text"].(string)
-
-       locator := strings.Split(manifest, " ")[1]
-       match := locatorMatcher.FindStringSubmatch(locator)
-       if match == nil {
-               t.Fatalf("No locator found in collection manifest %s", manifest)
-       }
-
-       return match[1] + "+" + match[2]
-}
-
-func switchToken(t string) func() {
-       orig := arv.ApiToken
-       restore := func() {
-               arv.ApiToken = orig
-       }
-       arv.ApiToken = t
-       return restore
-}
-
-func getCollection(t *testing.T, uuid string) Dict {
-       defer switchToken(arvadostest.AdminToken)()
-
-       getback := make(Dict)
-       err := arv.Get("collections", uuid, nil, &getback)
-       if err != nil {
-               t.Fatalf("Error getting collection %s", err)
-       }
-       if getback["uuid"] != uuid {
-               t.Fatalf("Get collection uuid did not match original: $s, result: $s", uuid, getback["uuid"])
-       }
-
-       return getback
-}
-
-func updateCollection(t *testing.T, uuid string, paramName string, paramValue string) {
-       defer switchToken(arvadostest.AdminToken)()
-
-       err := arv.Update("collections", uuid, arvadosclient.Dict{
-               "collection": arvadosclient.Dict{
-                       paramName: paramValue,
-               },
-       }, &arvadosclient.Dict{})
-
-       if err != nil {
-               t.Fatalf("Error updating collection %s", err)
-       }
-}
-
-type Dict map[string]interface{}
-
-func deleteCollection(t *testing.T, uuid string) {
-       defer switchToken(arvadostest.AdminToken)()
-
-       getback := make(Dict)
-       err := arv.Delete("collections", uuid, nil, &getback)
-       if err != nil {
-               t.Fatalf("Error deleting collection %s", err)
-       }
-       if getback["uuid"] != uuid {
-               t.Fatalf("Delete collection uuid did not match original: $s, result: $s", uuid, getback["uuid"])
-       }
-}
-
-func dataManagerSingleRun(t *testing.T) {
-       err := singlerun(arv)
-       if err != nil {
-               t.Fatalf("Error during singlerun %s", err)
-       }
-}
-
-func getBlockIndexesForServer(t *testing.T, i int) []string {
-       var indexes []string
-
-       path := keepServers[i] + "/index"
-       client := http.Client{}
-       req, err := http.NewRequest("GET", path, nil)
-       req.Header.Add("Authorization", "OAuth2 "+arvadostest.DataManagerToken)
-       req.Header.Add("Content-Type", "application/octet-stream")
-       resp, err := client.Do(req)
-       defer resp.Body.Close()
-
-       if err != nil {
-               t.Fatalf("Error during %s %s", path, err)
-       }
-
-       body, err := ioutil.ReadAll(resp.Body)
-       if err != nil {
-               t.Fatalf("Error reading response from %s %s", path, err)
-       }
-
-       lines := strings.Split(string(body), "\n")
-       for _, line := range lines {
-               indexes = append(indexes, strings.Split(line, " ")...)
-       }
-
-       return indexes
-}
-
-func getBlockIndexes(t *testing.T) [][]string {
-       var indexes [][]string
-
-       for i := 0; i < len(keepServers); i++ {
-               indexes = append(indexes, getBlockIndexesForServer(t, i))
-       }
-       return indexes
-}
-
-func verifyBlocks(t *testing.T, notExpected []string, expected []string, minReplication int) {
-       blocks := getBlockIndexes(t)
-
-       for _, block := range notExpected {
-               for _, idx := range blocks {
-                       if valueInArray(block, idx) {
-                               t.Fatalf("Found unexpected block %s", block)
-                       }
-               }
-       }
-
-       for _, block := range expected {
-               nFound := 0
-               for _, idx := range blocks {
-                       if valueInArray(block, idx) {
-                               nFound++
-                       }
-               }
-               if nFound < minReplication {
-                       t.Fatalf("Found %d replicas of block %s, expected >= %d", nFound, block, minReplication)
-               }
-       }
-}
-
-func valueInArray(value string, list []string) bool {
-       for _, v := range list {
-               if value == v {
-                       return true
-               }
-       }
-       return false
-}
-
-// Test env uses two keep volumes. The volume names can be found by reading the files
-// ARVADOS_HOME/tmp/keep0.volume and ARVADOS_HOME/tmp/keep1.volume
-//
-// The keep volumes are of the dir structure: volumeN/subdir/locator
-func backdateBlocks(t *testing.T, oldUnusedBlockLocators []string) {
-       // First get rid of any size hints in the locators
-       var trimmedBlockLocators []string
-       for _, block := range oldUnusedBlockLocators {
-               trimmedBlockLocators = append(trimmedBlockLocators, strings.Split(block, "+")[0])
-       }
-
-       // Get the working dir so that we can read keep{n}.volume files
-       wd, err := os.Getwd()
-       if err != nil {
-               t.Fatalf("Error getting working dir %s", err)
-       }
-
-       // Now cycle through the two keep volumes
-       oldTime := time.Now().AddDate(0, -2, 0)
-       for i := 0; i < 2; i++ {
-               filename := fmt.Sprintf("%s/../../tmp/keep%d.volume", wd, i)
-               volumeDir, err := ioutil.ReadFile(filename)
-               if err != nil {
-                       t.Fatalf("Error reading keep volume file %s %s", filename, err)
-               }
-
-               // Read the keep volume dir structure
-               volumeContents, err := ioutil.ReadDir(string(volumeDir))
-               if err != nil {
-                       t.Fatalf("Error reading keep dir %s %s", string(volumeDir), err)
-               }
-
-               // Read each subdir for each of the keep volume dir
-               for _, subdir := range volumeContents {
-                       subdirName := fmt.Sprintf("%s/%s", volumeDir, subdir.Name())
-                       subdirContents, err := ioutil.ReadDir(string(subdirName))
-                       if err != nil {
-                               t.Fatalf("Error reading keep dir %s %s", string(subdirName), err)
-                       }
-
-                       // Now we got to the files. The files are names are the block locators
-                       for _, fileInfo := range subdirContents {
-                               blockName := fileInfo.Name()
-                               myname := fmt.Sprintf("%s/%s", subdirName, blockName)
-                               if valueInArray(blockName, trimmedBlockLocators) {
-                                       err = os.Chtimes(myname, oldTime, oldTime)
-                               }
-                       }
-               }
-       }
-}
-
-func getStatus(t *testing.T, path string) interface{} {
-       client := http.Client{}
-       req, err := http.NewRequest("GET", path, nil)
-       req.Header.Add("Authorization", "OAuth2 "+arvadostest.DataManagerToken)
-       req.Header.Add("Content-Type", "application/octet-stream")
-       resp, err := client.Do(req)
-       if err != nil {
-               t.Fatalf("Error during %s %s", path, err)
-       }
-       defer resp.Body.Close()
-
-       var s interface{}
-       json.NewDecoder(resp.Body).Decode(&s)
-
-       return s
-}
-
-// Wait until PullQueue and TrashQueue are empty on all keepServers.
-func waitUntilQueuesFinishWork(t *testing.T) {
-       for _, ks := range keepServers {
-               for done := false; !done; {
-                       time.Sleep(100 * time.Millisecond)
-                       s := getStatus(t, ks+"/status.json")
-                       for _, qName := range []string{"PullQueue", "TrashQueue"} {
-                               qStatus := s.(map[string]interface{})[qName].(map[string]interface{})
-                               if qStatus["Queued"].(float64)+qStatus["InProgress"].(float64) == 0 {
-                                       done = true
-                               }
-                       }
-               }
-       }
-}
-
-// Create some blocks and backdate some of them.
-// Also create some collections and delete some of them.
-// Verify block indexes.
-func TestPutAndGetBlocks(t *testing.T) {
-       defer TearDownDataManagerTest(t)
-       SetupDataManagerTest(t)
-
-       // Put some blocks which will be backdated later on
-       // The first one will also be used in a collection and hence should not be deleted when datamanager runs.
-       // The rest will be old and unreferenced and hence should be deleted when datamanager runs.
-       var oldUnusedBlockLocators []string
-       oldUnusedBlockData := "this block will have older mtime"
-       for i := 0; i < 5; i++ {
-               oldUnusedBlockLocators = append(oldUnusedBlockLocators, putBlock(t, fmt.Sprintf("%s%d", oldUnusedBlockData, i)))
-       }
-       for i := 0; i < 5; i++ {
-               getBlock(t, oldUnusedBlockLocators[i], fmt.Sprintf("%s%d", oldUnusedBlockData, i))
-       }
-
-       // The rest will be old and unreferenced and hence should be deleted when datamanager runs.
-       oldUsedBlockData := "this collection block will have older mtime"
-       oldUsedBlockLocator := putBlock(t, oldUsedBlockData)
-       getBlock(t, oldUsedBlockLocator, oldUsedBlockData)
-
-       // Put some more blocks which will not be backdated; hence they are still new, but not in any collection.
-       // Hence, even though unreferenced, these should not be deleted when datamanager runs.
-       var newBlockLocators []string
-       newBlockData := "this block is newer"
-       for i := 0; i < 5; i++ {
-               newBlockLocators = append(newBlockLocators, putBlock(t, fmt.Sprintf("%s%d", newBlockData, i)))
-       }
-       for i := 0; i < 5; i++ {
-               getBlock(t, newBlockLocators[i], fmt.Sprintf("%s%d", newBlockData, i))
-       }
-
-       // Create a collection that would be deleted later on
-       toBeDeletedCollectionUUID := createCollection(t, "some data for collection creation")
-       toBeDeletedCollectionLocator := getFirstLocatorFromCollection(t, toBeDeletedCollectionUUID)
-
-       // Create another collection that has the same data as the one of the old blocks
-       oldUsedBlockCollectionUUID := createCollection(t, oldUsedBlockData)
-       oldUsedBlockCollectionLocator := getFirstLocatorFromCollection(t, oldUsedBlockCollectionUUID)
-       if oldUsedBlockCollectionLocator != oldUsedBlockLocator {
-               t.Fatalf("Locator of the collection with the same data as old block is different %s", oldUsedBlockCollectionLocator)
-       }
-
-       // Create another collection whose replication level will be changed
-       replicationCollectionUUID := createCollection(t, "replication level on this collection will be reduced")
-       replicationCollectionLocator := getFirstLocatorFromCollection(t, replicationCollectionUUID)
-
-       // Create two collections with same data; one will be deleted later on
-       dataForTwoCollections := "one of these collections will be deleted"
-       oneOfTwoWithSameDataUUID := createCollection(t, dataForTwoCollections)
-       oneOfTwoWithSameDataLocator := getFirstLocatorFromCollection(t, oneOfTwoWithSameDataUUID)
-       secondOfTwoWithSameDataUUID := createCollection(t, dataForTwoCollections)
-       secondOfTwoWithSameDataLocator := getFirstLocatorFromCollection(t, secondOfTwoWithSameDataUUID)
-       if oneOfTwoWithSameDataLocator != secondOfTwoWithSameDataLocator {
-               t.Fatalf("Locators for both these collections expected to be same: %s %s", oneOfTwoWithSameDataLocator, secondOfTwoWithSameDataLocator)
-       }
-
-       // create collection with empty manifest text
-       emptyBlockLocator := putBlock(t, "")
-       emptyCollection := createCollection(t, "")
-
-       // Verify blocks before doing any backdating / deleting.
-       var expected []string
-       expected = append(expected, oldUnusedBlockLocators...)
-       expected = append(expected, newBlockLocators...)
-       expected = append(expected, toBeDeletedCollectionLocator)
-       expected = append(expected, replicationCollectionLocator)
-       expected = append(expected, oneOfTwoWithSameDataLocator)
-       expected = append(expected, secondOfTwoWithSameDataLocator)
-       expected = append(expected, emptyBlockLocator)
-
-       verifyBlocks(t, nil, expected, 2)
-
-       // Run datamanager in singlerun mode
-       dataManagerSingleRun(t)
-       waitUntilQueuesFinishWork(t)
-
-       verifyBlocks(t, nil, expected, 2)
-
-       // Backdate the to-be old blocks and delete the collections
-       backdateBlocks(t, oldUnusedBlockLocators)
-       deleteCollection(t, toBeDeletedCollectionUUID)
-       deleteCollection(t, secondOfTwoWithSameDataUUID)
-       backdateBlocks(t, []string{emptyBlockLocator})
-       deleteCollection(t, emptyCollection)
-
-       // Run data manager again
-       dataManagerSingleRun(t)
-       waitUntilQueuesFinishWork(t)
-
-       // Get block indexes and verify that all backdated blocks except the first one used in collection are not included.
-       expected = expected[:0]
-       expected = append(expected, oldUsedBlockLocator)
-       expected = append(expected, newBlockLocators...)
-       expected = append(expected, toBeDeletedCollectionLocator)
-       expected = append(expected, oneOfTwoWithSameDataLocator)
-       expected = append(expected, secondOfTwoWithSameDataLocator)
-       expected = append(expected, emptyBlockLocator) // even when unreferenced, this remains
-
-       verifyBlocks(t, oldUnusedBlockLocators, expected, 2)
-
-       // Reduce desired replication on replicationCollectionUUID
-       // collection, and verify that Data Manager does not reduce
-       // actual replication any further than that. (It might not
-       // reduce actual replication at all; that's OK for this test.)
-
-       // Reduce desired replication level.
-       updateCollection(t, replicationCollectionUUID, "replication_desired", "1")
-       collection := getCollection(t, replicationCollectionUUID)
-       if collection["replication_desired"].(interface{}) != float64(1) {
-               t.Fatalf("After update replication_desired is not 1; instead it is %v", collection["replication_desired"])
-       }
-
-       // Verify data is currently overreplicated.
-       verifyBlocks(t, nil, []string{replicationCollectionLocator}, 2)
-
-       // Run data manager again
-       dataManagerSingleRun(t)
-       waitUntilQueuesFinishWork(t)
-
-       // Verify data is not underreplicated.
-       verifyBlocks(t, nil, []string{replicationCollectionLocator}, 1)
-
-       // Verify *other* collections' data is not underreplicated.
-       verifyBlocks(t, oldUnusedBlockLocators, expected, 2)
-}
-
-func TestDatamanagerSingleRunRepeatedly(t *testing.T) {
-       defer TearDownDataManagerTest(t)
-       SetupDataManagerTest(t)
-
-       for i := 0; i < 10; i++ {
-               err := singlerun(arv)
-               if err != nil {
-                       t.Fatalf("Got an error during datamanager singlerun: %v", err)
-               }
-       }
-}
-
-func TestGetStatusRepeatedly(t *testing.T) {
-       defer TearDownDataManagerTest(t)
-       SetupDataManagerTest(t)
-
-       for i := 0; i < 10; i++ {
-               for j := 0; j < 2; j++ {
-                       s := getStatus(t, keepServers[j]+"/status.json")
-
-                       var pullQueueStatus interface{}
-                       pullQueueStatus = s.(map[string]interface{})["PullQueue"]
-                       var trashQueueStatus interface{}
-                       trashQueueStatus = s.(map[string]interface{})["TrashQueue"]
-
-                       if pullQueueStatus.(map[string]interface{})["Queued"] == nil ||
-                               pullQueueStatus.(map[string]interface{})["InProgress"] == nil ||
-                               trashQueueStatus.(map[string]interface{})["Queued"] == nil ||
-                               trashQueueStatus.(map[string]interface{})["InProgress"] == nil {
-                               t.Fatalf("PullQueue and TrashQueue status not found")
-                       }
-
-                       time.Sleep(100 * time.Millisecond)
-               }
-       }
-}
-
-func TestRunDatamanagerWithBogusServer(t *testing.T) {
-       defer TearDownDataManagerTest(t)
-       SetupDataManagerTest(t)
-
-       arv.ApiServer = "bogus-server"
-
-       err := singlerun(arv)
-       if err == nil {
-               t.Fatalf("Expected error during singlerun with bogus server")
-       }
-}
-
-func TestRunDatamanagerAsNonAdminUser(t *testing.T) {
-       defer TearDownDataManagerTest(t)
-       SetupDataManagerTest(t)
-
-       arv.ApiToken = arvadostest.ActiveToken
-
-       err := singlerun(arv)
-       if err == nil {
-               t.Fatalf("Expected error during singlerun as non-admin user")
-       }
-}
-
-func TestPutAndGetBlocks_NoErrorDuringSingleRun(t *testing.T) {
-       testOldBlocksNotDeletedOnDataManagerError(t, "", "", false, false)
-}
-
-func TestPutAndGetBlocks_ErrorDuringGetCollectionsBadWriteTo(t *testing.T) {
-       badpath, err := arvadostest.CreateBadPath()
-       if err != nil {
-               t.Fatalf(err.Error())
-       }
-       defer func() {
-               err = arvadostest.DestroyBadPath(badpath)
-               if err != nil {
-                       t.Fatalf(err.Error())
-               }
-       }()
-       testOldBlocksNotDeletedOnDataManagerError(t, path.Join(badpath, "writetofile"), "", true, true)
-}
-
-func TestPutAndGetBlocks_ErrorDuringGetCollectionsBadHeapProfileFilename(t *testing.T) {
-       badpath, err := arvadostest.CreateBadPath()
-       if err != nil {
-               t.Fatalf(err.Error())
-       }
-       defer func() {
-               err = arvadostest.DestroyBadPath(badpath)
-               if err != nil {
-                       t.Fatalf(err.Error())
-               }
-       }()
-       testOldBlocksNotDeletedOnDataManagerError(t, "", path.Join(badpath, "heapprofilefile"), true, true)
-}
-
-// Create some blocks and backdate some of them.
-// Run datamanager while producing an error condition.
-// Verify that the blocks are hence not deleted.
-func testOldBlocksNotDeletedOnDataManagerError(t *testing.T, writeDataTo string, heapProfileFile string, expectError bool, expectOldBlocks bool) {
-       defer TearDownDataManagerTest(t)
-       SetupDataManagerTest(t)
-
-       // Put some blocks and backdate them.
-       var oldUnusedBlockLocators []string
-       oldUnusedBlockData := "this block will have older mtime"
-       for i := 0; i < 5; i++ {
-               oldUnusedBlockLocators = append(oldUnusedBlockLocators, putBlock(t, fmt.Sprintf("%s%d", oldUnusedBlockData, i)))
-       }
-       backdateBlocks(t, oldUnusedBlockLocators)
-
-       // Run data manager
-       summary.WriteDataTo = writeDataTo
-       collection.HeapProfileFilename = heapProfileFile
-
-       err := singlerun(arv)
-       if !expectError {
-               if err != nil {
-                       t.Fatalf("Got an error during datamanager singlerun: %v", err)
-               }
-       } else {
-               if err == nil {
-                       t.Fatalf("Expected error during datamanager singlerun")
-               }
-       }
-       waitUntilQueuesFinishWork(t)
-
-       // Get block indexes and verify that all backdated blocks are not/deleted as expected
-       if expectOldBlocks {
-               verifyBlocks(t, nil, oldUnusedBlockLocators, 2)
-       } else {
-               verifyBlocks(t, oldUnusedBlockLocators, nil, 2)
-       }
-}
-
-// Create a collection with multiple streams and blocks
-func createMultiStreamBlockCollection(t *testing.T, data string, numStreams, numBlocks int) (string, []string) {
-       defer switchToken(arvadostest.AdminToken)()
-
-       manifest := ""
-       locators := make(map[string]bool)
-       for s := 0; s < numStreams; s++ {
-               manifest += fmt.Sprintf("./stream%d ", s)
-               for b := 0; b < numBlocks; b++ {
-                       locator, _, err := keepClient.PutB([]byte(fmt.Sprintf("%s in stream %d and block %d", data, s, b)))
-                       if err != nil {
-                               t.Fatalf("Error creating block %d in stream %d: %v", b, s, err)
-                       }
-                       locators[strings.Split(locator, "+A")[0]] = true
-                       manifest += locator + " "
-               }
-               manifest += "0:1:dummyfile.txt\n"
-       }
-
-       collection := make(Dict)
-       err := arv.Create("collections",
-               arvadosclient.Dict{"collection": arvadosclient.Dict{"manifest_text": manifest}},
-               &collection)
-
-       if err != nil {
-               t.Fatalf("Error creating collection %v", err)
-       }
-
-       var locs []string
-       for k := range locators {
-               locs = append(locs, k)
-       }
-
-       return collection["uuid"].(string), locs
-}
-
-// Create collection with multiple streams and blocks; backdate the blocks and but do not delete the collection.
-// Also, create stray block and backdate it.
-// After datamanager run: expect blocks from the collection, but not the stray block.
-func TestManifestWithMultipleStreamsAndBlocks(t *testing.T) {
-       testManifestWithMultipleStreamsAndBlocks(t, 100, 10, "", false)
-}
-
-// Same test as TestManifestWithMultipleStreamsAndBlocks with an additional
-// keepstore of a service type other than "disk". Only the "disk" type services
-// will be indexed by datamanager and hence should work the same way.
-func TestManifestWithMultipleStreamsAndBlocks_WithOneUnsupportedKeepServer(t *testing.T) {
-       testManifestWithMultipleStreamsAndBlocks(t, 2, 2, "testblobstore", false)
-}
-
-// Test datamanager with dry-run. Expect no block to be deleted.
-func TestManifestWithMultipleStreamsAndBlocks_DryRun(t *testing.T) {
-       testManifestWithMultipleStreamsAndBlocks(t, 2, 2, "", true)
-}
-
-func testManifestWithMultipleStreamsAndBlocks(t *testing.T, numStreams, numBlocks int, createExtraKeepServerWithType string, isDryRun bool) {
-       defer TearDownDataManagerTest(t)
-       SetupDataManagerTest(t)
-
-       // create collection whose blocks will be backdated
-       collectionWithOldBlocks, oldBlocks := createMultiStreamBlockCollection(t, "old block", numStreams, numBlocks)
-       if collectionWithOldBlocks == "" {
-               t.Fatalf("Failed to create collection with %d blocks", numStreams*numBlocks)
-       }
-       if len(oldBlocks) != numStreams*numBlocks {
-               t.Fatalf("Not all blocks are created: expected %v, found %v", 1000, len(oldBlocks))
-       }
-
-       // create a stray block that will be backdated
-       strayOldBlock := putBlock(t, "this stray block is old")
-
-       expected := []string{strayOldBlock}
-       expected = append(expected, oldBlocks...)
-       verifyBlocks(t, nil, expected, 2)
-
-       // Backdate old blocks; but the collection still references these blocks
-       backdateBlocks(t, oldBlocks)
-
-       // also backdate the stray old block
-       backdateBlocks(t, []string{strayOldBlock})
-
-       // If requested, create an extra keepserver with the given type
-       // This should be ignored during indexing and hence not change the datamanager outcome
-       var extraKeepServerUUID string
-       if createExtraKeepServerWithType != "" {
-               extraKeepServerUUID = addExtraKeepServer(t, createExtraKeepServerWithType)
-               defer deleteExtraKeepServer(extraKeepServerUUID)
-       }
-
-       // run datamanager
-       dryRun = isDryRun
-       dataManagerSingleRun(t)
-
-       if dryRun {
-               // verify that all blocks, including strayOldBlock, are still to be found
-               verifyBlocks(t, nil, expected, 2)
-       } else {
-               // verify that strayOldBlock is not to be found, but the collections blocks are still there
-               verifyBlocks(t, []string{strayOldBlock}, oldBlocks, 2)
-       }
-}
-
-// Add one more keepstore with the given service type
-func addExtraKeepServer(t *testing.T, serviceType string) string {
-       defer switchToken(arvadostest.AdminToken)()
-
-       extraKeepService := make(arvadosclient.Dict)
-       err := arv.Create("keep_services",
-               arvadosclient.Dict{"keep_service": arvadosclient.Dict{
-                       "service_host":     "localhost",
-                       "service_port":     "21321",
-                       "service_ssl_flag": false,
-                       "service_type":     serviceType}},
-               &extraKeepService)
-       if err != nil {
-               t.Fatal(err)
-       }
-
-       return extraKeepService["uuid"].(string)
-}
-
-func deleteExtraKeepServer(uuid string) {
-       defer switchToken(arvadostest.AdminToken)()
-       arv.Delete("keep_services", uuid, nil, nil)
-}
diff --git a/services/datamanager/experimental/datamanager.py b/services/datamanager/experimental/datamanager.py
deleted file mode 100755 (executable)
index 8207bdc..0000000
+++ /dev/null
@@ -1,887 +0,0 @@
-#! /usr/bin/env python
-
-import arvados
-
-import argparse
-import cgi
-import csv
-import json
-import logging
-import math
-import pprint
-import re
-import threading
-import urllib2
-
-from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
-from collections import defaultdict, Counter
-from functools import partial
-from operator import itemgetter
-from SocketServer import ThreadingMixIn
-
-arv = arvados.api('v1')
-
-# Adapted from http://stackoverflow.com/questions/4180980/formatting-data-quantity-capacity-as-string
-byteunits = ('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB')
-def fileSizeFormat(value):
-  exponent = 0 if value == 0 else int(math.log(value, 1024))
-  return "%7.2f %-3s" % (float(value) / pow(1024, exponent),
-                         byteunits[exponent])
-
-def percentageFloor(x):
-  """ Returns a float which is the input rounded down to the neared 0.01.
-
-e.g. precentageFloor(0.941354) = 0.94
-"""
-  return math.floor(x*100) / 100.0
-
-
-def byteSizeFromValidUuid(valid_uuid):
-  return int(valid_uuid.split('+')[1])
-
-class maxdict(dict):
-  """A dictionary that holds the largest value entered for each key."""
-  def addValue(self, key, value):
-    dict.__setitem__(self, key, max(dict.get(self, key), value))
-  def addValues(self, kv_pairs):
-    for key,value in kv_pairs:
-      self.addValue(key, value)
-  def addDict(self, d):
-    self.addValues(d.items())
-
-class CollectionInfo:
-  DEFAULT_PERSISTER_REPLICATION_LEVEL=2
-  all_by_uuid = {}
-
-  def __init__(self, uuid):
-    if CollectionInfo.all_by_uuid.has_key(uuid):
-      raise ValueError('Collection for uuid "%s" already exists.' % uuid)
-    self.uuid = uuid
-    self.block_uuids = set()  # uuids of keep blocks in this collection
-    self.reader_uuids = set()  # uuids of users who can read this collection
-    self.persister_uuids = set()  # uuids of users who want this collection saved
-    # map from user uuid to replication level they desire
-    self.persister_replication = maxdict()
-
-    # The whole api response in case we need anything else later.
-    self.api_response = []
-    CollectionInfo.all_by_uuid[uuid] = self
-
-  def byteSize(self):
-    return sum(map(byteSizeFromValidUuid, self.block_uuids))
-
-  def __str__(self):
-    return ('CollectionInfo uuid: %s\n'
-            '               %d block(s) containing %s\n'
-            '               reader_uuids: %s\n'
-            '               persister_replication: %s' %
-            (self.uuid,
-             len(self.block_uuids),
-             fileSizeFormat(self.byteSize()),
-             pprint.pformat(self.reader_uuids, indent = 15),
-             pprint.pformat(self.persister_replication, indent = 15)))
-
-  @staticmethod
-  def get(uuid):
-    if not CollectionInfo.all_by_uuid.has_key(uuid):
-      CollectionInfo(uuid)
-    return CollectionInfo.all_by_uuid[uuid]
-
-
-def extractUuid(candidate):
-  """ Returns a canonical (hash+size) uuid from a valid uuid, or None if candidate is not a valid uuid."""
-  match = re.match('([0-9a-fA-F]{32}\+[0-9]+)(\+[^+]+)*$', candidate)
-  return match and match.group(1)
-
-def checkUserIsAdmin():
-  current_user = arv.users().current().execute()
-
-  if not current_user['is_admin']:
-    log.warning('Current user %s (%s - %s) does not have '
-                'admin access and will not see much of the data.',
-                current_user['full_name'],
-                current_user['email'],
-                current_user['uuid'])
-    if args.require_admin_user:
-      log.critical('Exiting, rerun with --no-require-admin-user '
-                   'if you wish to continue.')
-      exit(1)
-
-def buildCollectionsList():
-  if args.uuid:
-    return [args.uuid,]
-  else:
-    collections_list_response = arv.collections().list(limit=args.max_api_results).execute()
-
-    print ('Returned %d of %d collections.' %
-           (len(collections_list_response['items']),
-            collections_list_response['items_available']))
-
-    return [item['uuid'] for item in collections_list_response['items']]
-
-
-def readCollections(collection_uuids):
-  for collection_uuid in collection_uuids:
-    collection_block_uuids = set()
-    collection_response = arv.collections().get(uuid=collection_uuid).execute()
-    collection_info = CollectionInfo.get(collection_uuid)
-    collection_info.api_response = collection_response
-    manifest_lines = collection_response['manifest_text'].split('\n')
-
-    if args.verbose:
-      print 'Manifest text for %s:' % collection_uuid
-      pprint.pprint(manifest_lines)
-
-    for manifest_line in manifest_lines:
-      if manifest_line:
-        manifest_tokens = manifest_line.split(' ')
-        if args.verbose:
-          print 'manifest tokens: ' + pprint.pformat(manifest_tokens)
-        stream_name = manifest_tokens[0]
-
-        line_block_uuids = set(filter(None,
-                                      [extractUuid(candidate)
-                                       for candidate in manifest_tokens[1:]]))
-        collection_info.block_uuids.update(line_block_uuids)
-
-        # file_tokens = [token
-        #                for token in manifest_tokens[1:]
-        #                if extractUuid(token) is None]
-
-        # # Sort file tokens by start position in case they aren't already
-        # file_tokens.sort(key=lambda file_token: int(file_token.split(':')[0]))
-
-        # if args.verbose:
-        #   print 'line_block_uuids: ' + pprint.pformat(line_block_uuids)
-        #   print 'file_tokens: ' + pprint.pformat(file_tokens)
-
-
-def readLinks():
-  link_classes = set()
-
-  for collection_uuid,collection_info in CollectionInfo.all_by_uuid.items():
-    # TODO(misha): We may not be seing all the links, but since items
-    # available does not return an accurate number, I don't knos how
-    # to confirm that we saw all of them.
-    collection_links_response = arv.links().list(where={'head_uuid':collection_uuid}).execute()
-    link_classes.update([link['link_class'] for link in collection_links_response['items']])
-    for link in collection_links_response['items']:
-      if link['link_class'] == 'permission':
-        collection_info.reader_uuids.add(link['tail_uuid'])
-      elif link['link_class'] == 'resources':
-        replication_level = link['properties'].get(
-          'replication',
-          CollectionInfo.DEFAULT_PERSISTER_REPLICATION_LEVEL)
-        collection_info.persister_replication.addValue(
-          link['tail_uuid'],
-          replication_level)
-        collection_info.persister_uuids.add(link['tail_uuid'])
-
-  print 'Found the following link classes:'
-  pprint.pprint(link_classes)
-
-def reportMostPopularCollections():
-  most_popular_collections = sorted(
-    CollectionInfo.all_by_uuid.values(),
-    key=lambda info: len(info.reader_uuids) + 10 * len(info.persister_replication),
-    reverse=True)[:10]
-
-  print 'Most popular Collections:'
-  for collection_info in most_popular_collections:
-    print collection_info
-
-
-def buildMaps():
-  for collection_uuid,collection_info in CollectionInfo.all_by_uuid.items():
-    # Add the block holding the manifest itself for all calculations
-    block_uuids = collection_info.block_uuids.union([collection_uuid,])
-    for block_uuid in block_uuids:
-      block_to_collections[block_uuid].add(collection_uuid)
-      block_to_readers[block_uuid].update(collection_info.reader_uuids)
-      block_to_persisters[block_uuid].update(collection_info.persister_uuids)
-      block_to_persister_replication[block_uuid].addDict(
-        collection_info.persister_replication)
-    for reader_uuid in collection_info.reader_uuids:
-      reader_to_collections[reader_uuid].add(collection_uuid)
-      reader_to_blocks[reader_uuid].update(block_uuids)
-    for persister_uuid in collection_info.persister_uuids:
-      persister_to_collections[persister_uuid].add(collection_uuid)
-      persister_to_blocks[persister_uuid].update(block_uuids)
-
-
-def itemsByValueLength(original):
-  return sorted(original.items(),
-                key=lambda item:len(item[1]),
-                reverse=True)
-
-
-def reportBusiestUsers():
-  busiest_readers = itemsByValueLength(reader_to_collections)
-  print 'The busiest readers are:'
-  for reader,collections in busiest_readers:
-    print '%s reading %d collections.' % (reader, len(collections))
-  busiest_persisters = itemsByValueLength(persister_to_collections)
-  print 'The busiest persisters are:'
-  for persister,collections in busiest_persisters:
-    print '%s reading %d collections.' % (persister, len(collections))
-
-
-def blockDiskUsage(block_uuid):
-  """Returns the disk usage of a block given its uuid.
-
-  Will return 0 before reading the contents of the keep servers.
-  """
-  return byteSizeFromValidUuid(block_uuid) * block_to_replication[block_uuid]
-
-def blockPersistedUsage(user_uuid, block_uuid):
-  return (byteSizeFromValidUuid(block_uuid) *
-          block_to_persister_replication[block_uuid].get(user_uuid, 0))
-
-memo_computeWeightedReplicationCosts = {}
-def computeWeightedReplicationCosts(replication_levels):
-  """Computes the relative cost of varied replication levels.
-
-  replication_levels: a tuple of integers representing the desired
-  replication level. If n users want a replication level of x then x
-  should appear n times in replication_levels.
-
-  Returns a dictionary from replication level to cost.
-
-  The basic thinking is that the cost of replicating at level x should
-  be shared by everyone who wants replication of level x or higher.
-
-  For example, if we have two users who want 1 copy, one user who
-  wants 3 copies and two users who want 6 copies:
-  the input would be [1, 1, 3, 6, 6] (or any permutation)
-
-  The cost of the first copy is shared by all 5 users, so they each
-  pay 1 copy / 5 users = 0.2.
-  The cost of the second and third copies shared by 3 users, so they
-  each pay 2 copies / 3 users = 0.67 (plus the above costs)
-  The cost of the fourth, fifth and sixth copies is shared by two
-  users, so they each pay 3 copies / 2 users = 1.5 (plus the above costs)
-
-  Here are some other examples:
-  computeWeightedReplicationCosts([1,]) -> {1:1.0}
-  computeWeightedReplicationCosts([2,]) -> {2:2.0}
-  computeWeightedReplicationCosts([1,1]) -> {1:0.5}
-  computeWeightedReplicationCosts([2,2]) -> {1:1.0}
-  computeWeightedReplicationCosts([1,2]) -> {1:0.5,2:1.5}
-  computeWeightedReplicationCosts([1,3]) -> {1:0.5,2:2.5}
-  computeWeightedReplicationCosts([1,3,6,6,10]) -> {1:0.2,3:0.7,6:1.7,10:5.7}
-  """
-  replication_level_counts = sorted(Counter(replication_levels).items())
-
-  memo_key = str(replication_level_counts)
-
-  if not memo_key in memo_computeWeightedReplicationCosts:
-    last_level = 0
-    current_cost = 0
-    total_interested = float(sum(map(itemgetter(1), replication_level_counts)))
-    cost_for_level = {}
-    for replication_level, count in replication_level_counts:
-      copies_added = replication_level - last_level
-      # compute marginal cost from last level and add it to the last cost
-      current_cost += copies_added / total_interested
-      cost_for_level[replication_level] = current_cost
-      # update invariants
-      last_level = replication_level
-      total_interested -= count
-    memo_computeWeightedReplicationCosts[memo_key] = cost_for_level
-
-  return memo_computeWeightedReplicationCosts[memo_key]
-
-def blockPersistedWeightedUsage(user_uuid, block_uuid):
-  persister_replication_for_block = block_to_persister_replication[block_uuid]
-  user_replication = persister_replication_for_block[user_uuid]
-  return (
-    byteSizeFromValidUuid(block_uuid) *
-    computeWeightedReplicationCosts(
-      persister_replication_for_block.values())[user_replication])
-
-
-def computeUserStorageUsage():
-  for user, blocks in reader_to_blocks.items():
-    user_to_usage[user][UNWEIGHTED_READ_SIZE_COL] = sum(map(
-        byteSizeFromValidUuid,
-        blocks))
-    user_to_usage[user][WEIGHTED_READ_SIZE_COL] = sum(map(
-        lambda block_uuid:(float(byteSizeFromValidUuid(block_uuid))/
-                                 len(block_to_readers[block_uuid])),
-        blocks))
-  for user, blocks in persister_to_blocks.items():
-    user_to_usage[user][UNWEIGHTED_PERSIST_SIZE_COL] = sum(map(
-        partial(blockPersistedUsage, user),
-        blocks))
-    user_to_usage[user][WEIGHTED_PERSIST_SIZE_COL] = sum(map(
-        partial(blockPersistedWeightedUsage, user),
-        blocks))
-
-def printUserStorageUsage():
-  print ('user: unweighted readable block size, weighted readable block size, '
-         'unweighted persisted block size, weighted persisted block size:')
-  for user, usage in user_to_usage.items():
-    print ('%s: %s %s %s %s' %
-           (user,
-            fileSizeFormat(usage[UNWEIGHTED_READ_SIZE_COL]),
-            fileSizeFormat(usage[WEIGHTED_READ_SIZE_COL]),
-            fileSizeFormat(usage[UNWEIGHTED_PERSIST_SIZE_COL]),
-            fileSizeFormat(usage[WEIGHTED_PERSIST_SIZE_COL])))
-
-def logUserStorageUsage():
-  for user, usage in user_to_usage.items():
-    body = {}
-    # user could actually represent a user or a group. We don't set
-    # the object_type field since we don't know which we have.
-    body['object_uuid'] = user
-    body['event_type'] = args.user_storage_log_event_type
-    properties = {}
-    properties['read_collections_total_bytes'] = usage[UNWEIGHTED_READ_SIZE_COL]
-    properties['read_collections_weighted_bytes'] = (
-      usage[WEIGHTED_READ_SIZE_COL])
-    properties['persisted_collections_total_bytes'] = (
-      usage[UNWEIGHTED_PERSIST_SIZE_COL])
-    properties['persisted_collections_weighted_bytes'] = (
-      usage[WEIGHTED_PERSIST_SIZE_COL])
-    body['properties'] = properties
-    # TODO(misha): Confirm that this will throw an exception if it
-    # fails to create the log entry.
-    arv.logs().create(body=body).execute()
-
-def getKeepServers():
-  response = arv.keep_disks().list().execute()
-  return [[keep_server['service_host'], keep_server['service_port']]
-          for keep_server in response['items']]
-
-
-def getKeepBlocks(keep_servers):
-  blocks = []
-  for host,port in keep_servers:
-    response = urllib2.urlopen('http://%s:%d/index' % (host, port))
-    server_blocks = [line.split(' ')
-                     for line in response.read().split('\n')
-                     if line]
-    server_blocks = [(block_id, int(mtime))
-                     for block_id, mtime in server_blocks]
-    blocks.append(server_blocks)
-  return blocks
-
-def getKeepStats(keep_servers):
-  MOUNT_COLUMN = 5
-  TOTAL_COLUMN = 1
-  FREE_COLUMN = 3
-  DISK_BLOCK_SIZE = 1024
-  stats = []
-  for host,port in keep_servers:
-    response = urllib2.urlopen('http://%s:%d/status.json' % (host, port))
-
-    parsed_json = json.load(response)
-    df_entries = [line.split()
-                  for line in parsed_json['df'].split('\n')
-                  if line]
-    keep_volumes = [columns
-                    for columns in df_entries
-                    if 'keep' in columns[MOUNT_COLUMN]]
-    total_space = DISK_BLOCK_SIZE*sum(map(int,map(itemgetter(TOTAL_COLUMN),
-                                                  keep_volumes)))
-    free_space =  DISK_BLOCK_SIZE*sum(map(int,map(itemgetter(FREE_COLUMN),
-                                                  keep_volumes)))
-    stats.append([total_space, free_space])
-  return stats
-
-
-def computeReplication(keep_blocks):
-  for server_blocks in keep_blocks:
-    for block_uuid, _ in server_blocks:
-      block_to_replication[block_uuid] += 1
-  log.debug('Seeing the following replication levels among blocks: %s',
-            str(set(block_to_replication.values())))
-
-
-def computeGarbageCollectionCandidates():
-  for server_blocks in keep_blocks:
-    block_to_latest_mtime.addValues(server_blocks)
-  empty_set = set()
-  garbage_collection_priority = sorted(
-    [(block,mtime)
-     for block,mtime in block_to_latest_mtime.items()
-     if len(block_to_persisters.get(block,empty_set)) == 0],
-    key = itemgetter(1))
-  global garbage_collection_report
-  garbage_collection_report = []
-  cumulative_disk_size = 0
-  for block,mtime in garbage_collection_priority:
-    disk_size = blockDiskUsage(block)
-    cumulative_disk_size += disk_size
-    garbage_collection_report.append(
-      (block,
-       mtime,
-       disk_size,
-       cumulative_disk_size,
-       float(free_keep_space + cumulative_disk_size)/total_keep_space))
-
-  print 'The oldest Garbage Collection Candidates: '
-  pprint.pprint(garbage_collection_report[:20])
-
-
-def outputGarbageCollectionReport(filename):
-  with open(filename, 'wb') as csvfile:
-    gcwriter = csv.writer(csvfile)
-    gcwriter.writerow(['block uuid', 'latest mtime', 'disk size',
-                       'cumulative size', 'disk free'])
-    for line in garbage_collection_report:
-      gcwriter.writerow(line)
-
-def computeGarbageCollectionHistogram():
-  # TODO(misha): Modify this to allow users to specify the number of
-  # histogram buckets through a flag.
-  histogram = []
-  last_percentage = -1
-  for _,mtime,_,_,disk_free in garbage_collection_report:
-    curr_percentage = percentageFloor(disk_free)
-    if curr_percentage > last_percentage:
-      histogram.append( (mtime, curr_percentage) )
-    last_percentage = curr_percentage
-
-  log.info('Garbage collection histogram is: %s', histogram)
-
-  return histogram
-
-
-def logGarbageCollectionHistogram():
-  body = {}
-  # TODO(misha): Decide whether we should specify an object_uuid in
-  # the body and if so, which uuid to use.
-  body['event_type'] = args.block_age_free_space_histogram_log_event_type
-  properties = {}
-  properties['histogram'] = garbage_collection_histogram
-  body['properties'] = properties
-  # TODO(misha): Confirm that this will throw an exception if it
-  # fails to create the log entry.
-  arv.logs().create(body=body).execute()
-
-
-def detectReplicationProblems():
-  blocks_not_in_any_collections.update(
-    set(block_to_replication.keys()).difference(block_to_collections.keys()))
-  underreplicated_persisted_blocks.update(
-    [uuid
-     for uuid, persister_replication in block_to_persister_replication.items()
-     if len(persister_replication) > 0 and
-     block_to_replication[uuid] < max(persister_replication.values())])
-  overreplicated_persisted_blocks.update(
-    [uuid
-     for uuid, persister_replication in block_to_persister_replication.items()
-     if len(persister_replication) > 0 and
-     block_to_replication[uuid] > max(persister_replication.values())])
-
-  log.info('Found %d blocks not in any collections, e.g. %s...',
-           len(blocks_not_in_any_collections),
-           ','.join(list(blocks_not_in_any_collections)[:5]))
-  log.info('Found %d underreplicated blocks, e.g. %s...',
-           len(underreplicated_persisted_blocks),
-           ','.join(list(underreplicated_persisted_blocks)[:5]))
-  log.info('Found %d overreplicated blocks, e.g. %s...',
-           len(overreplicated_persisted_blocks),
-           ','.join(list(overreplicated_persisted_blocks)[:5]))
-
-  # TODO:
-  #  Read blocks sorted by mtime
-  #  Cache window vs % free space
-  #  Collections which candidates will appear in
-  #  Youngest underreplicated read blocks that appear in collections.
-  #  Report Collections that have blocks which are missing from (or
-  #   underreplicated in) keep.
-
-
-# This is the main flow here
-
-parser = argparse.ArgumentParser(description='Report on keep disks.')
-"""The command line argument parser we use.
-
-We only use it in the __main__ block, but leave it outside the block
-in case another package wants to use it or customize it by specifying
-it as a parent to their commandline parser.
-"""
-parser.add_argument('-m',
-                    '--max-api-results',
-                    type=int,
-                    default=5000,
-                    help=('The max results to get at once.'))
-parser.add_argument('-p',
-                    '--port',
-                    type=int,
-                    default=9090,
-                    help=('The port number to serve on. 0 means no server.'))
-parser.add_argument('-v',
-                    '--verbose',
-                    help='increase output verbosity',
-                    action='store_true')
-parser.add_argument('-u',
-                    '--uuid',
-                    help='uuid of specific collection to process')
-parser.add_argument('--require-admin-user',
-                    action='store_true',
-                    default=True,
-                    help='Fail if the user is not an admin [default]')
-parser.add_argument('--no-require-admin-user',
-                    dest='require_admin_user',
-                    action='store_false',
-                    help=('Allow users without admin permissions with '
-                          'only a warning.'))
-parser.add_argument('--log-to-workbench',
-                    action='store_true',
-                    default=False,
-                    help='Log findings to workbench')
-parser.add_argument('--no-log-to-workbench',
-                    dest='log_to_workbench',
-                    action='store_false',
-                    help='Don\'t log findings to workbench [default]')
-parser.add_argument('--user-storage-log-event-type',
-                    default='user-storage-report',
-                    help=('The event type to set when logging user '
-                          'storage usage to workbench.'))
-parser.add_argument('--block-age-free-space-histogram-log-event-type',
-                    default='block-age-free-space-histogram',
-                    help=('The event type to set when logging user '
-                          'storage usage to workbench.'))
-parser.add_argument('--garbage-collection-file',
-                    default='',
-                    help=('The file to write a garbage collection report, or '
-                          'leave empty for no report.'))
-
-args = None
-
-# TODO(misha): Think about moving some of this to the __main__ block.
-log = logging.getLogger('arvados.services.datamanager')
-stderr_handler = logging.StreamHandler()
-log.setLevel(logging.INFO)
-stderr_handler.setFormatter(
-  logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s'))
-log.addHandler(stderr_handler)
-
-# Global Data - don't try this at home
-collection_uuids = []
-
-# These maps all map from uuids to a set of uuids
-block_to_collections = defaultdict(set)  # keep blocks
-reader_to_collections = defaultdict(set)  # collection(s) for which the user has read access
-persister_to_collections = defaultdict(set)  # collection(s) which the user has persisted
-block_to_readers = defaultdict(set)
-block_to_persisters = defaultdict(set)
-block_to_persister_replication = defaultdict(maxdict)
-reader_to_blocks = defaultdict(set)
-persister_to_blocks = defaultdict(set)
-
-UNWEIGHTED_READ_SIZE_COL = 0
-WEIGHTED_READ_SIZE_COL = 1
-UNWEIGHTED_PERSIST_SIZE_COL = 2
-WEIGHTED_PERSIST_SIZE_COL = 3
-NUM_COLS = 4
-user_to_usage = defaultdict(lambda : [0,]*NUM_COLS)
-
-keep_servers = []
-keep_blocks = []
-keep_stats = []
-total_keep_space = 0
-free_keep_space =  0
-
-block_to_replication = defaultdict(lambda: 0)
-block_to_latest_mtime = maxdict()
-
-garbage_collection_report = []
-"""A list of non-persisted blocks, sorted by increasing mtime
-
-Each entry is of the form (block uuid, latest mtime, disk size,
-cumulative size)
-
-* block uuid: The id of the block we want to delete
-* latest mtime: The latest mtime of the block across all keep servers.
-* disk size: The total disk space used by this block (block size
-multiplied by current replication level)
-* cumulative disk size: The sum of this block's disk size and all the
-blocks listed above it
-* disk free: The proportion of our disk space that would be free if we
-deleted this block and all the above. So this is (free disk space +
-cumulative disk size) / total disk capacity
-"""
-
-garbage_collection_histogram = []
-""" Shows the tradeoff of keep block age vs keep disk free space.
-
-Each entry is of the form (mtime, Disk Proportion).
-
-An entry of the form (1388747781, 0.52) means that if we deleted the
-oldest non-presisted blocks until we had 52% of the disk free, then
-all blocks with an mtime greater than 1388747781 would be preserved.
-"""
-
-# Stuff to report on
-blocks_not_in_any_collections = set()
-underreplicated_persisted_blocks = set()
-overreplicated_persisted_blocks = set()
-
-all_data_loaded = False
-
-def loadAllData():
-  checkUserIsAdmin()
-
-  log.info('Building Collection List')
-  global collection_uuids
-  collection_uuids = filter(None, [extractUuid(candidate)
-                                   for candidate in buildCollectionsList()])
-
-  log.info('Reading Collections')
-  readCollections(collection_uuids)
-
-  if args.verbose:
-    pprint.pprint(CollectionInfo.all_by_uuid)
-
-  log.info('Reading Links')
-  readLinks()
-
-  reportMostPopularCollections()
-
-  log.info('Building Maps')
-  buildMaps()
-
-  reportBusiestUsers()
-
-  log.info('Getting Keep Servers')
-  global keep_servers
-  keep_servers = getKeepServers()
-
-  print keep_servers
-
-  log.info('Getting Blocks from each Keep Server.')
-  global keep_blocks
-  keep_blocks = getKeepBlocks(keep_servers)
-
-  log.info('Getting Stats from each Keep Server.')
-  global keep_stats, total_keep_space, free_keep_space
-  keep_stats = getKeepStats(keep_servers)
-
-  total_keep_space = sum(map(itemgetter(0), keep_stats))
-  free_keep_space = sum(map(itemgetter(1), keep_stats))
-
-  # TODO(misha): Delete this hack when the keep servers are fixed!
-  # This hack deals with the fact that keep servers report each other's disks.
-  total_keep_space /= len(keep_stats)
-  free_keep_space /= len(keep_stats)
-
-  log.info('Total disk space: %s, Free disk space: %s (%d%%).' %
-           (fileSizeFormat(total_keep_space),
-            fileSizeFormat(free_keep_space),
-            100*free_keep_space/total_keep_space))
-
-  computeReplication(keep_blocks)
-
-  log.info('average replication level is %f',
-           (float(sum(block_to_replication.values())) /
-            len(block_to_replication)))
-
-  computeGarbageCollectionCandidates()
-
-  if args.garbage_collection_file:
-    log.info('Writing garbage Collection report to %s',
-             args.garbage_collection_file)
-    outputGarbageCollectionReport(args.garbage_collection_file)
-
-  global garbage_collection_histogram
-  garbage_collection_histogram = computeGarbageCollectionHistogram()
-
-  if args.log_to_workbench:
-    logGarbageCollectionHistogram()
-
-  detectReplicationProblems()
-
-  computeUserStorageUsage()
-  printUserStorageUsage()
-  if args.log_to_workbench:
-    logUserStorageUsage()
-
-  global all_data_loaded
-  all_data_loaded = True
-
-
-class DataManagerHandler(BaseHTTPRequestHandler):
-  USER_PATH = 'user'
-  COLLECTION_PATH = 'collection'
-  BLOCK_PATH = 'block'
-
-  def userLink(self, uuid):
-    return ('<A HREF="/%(path)s/%(uuid)s">%(uuid)s</A>' %
-            {'uuid': uuid,
-             'path': DataManagerHandler.USER_PATH})
-
-  def collectionLink(self, uuid):
-    return ('<A HREF="/%(path)s/%(uuid)s">%(uuid)s</A>' %
-            {'uuid': uuid,
-             'path': DataManagerHandler.COLLECTION_PATH})
-
-  def blockLink(self, uuid):
-    return ('<A HREF="/%(path)s/%(uuid)s">%(uuid)s</A>' %
-            {'uuid': uuid,
-             'path': DataManagerHandler.BLOCK_PATH})
-
-  def writeTop(self, title):
-    self.wfile.write('<HTML><HEAD><TITLE>%s</TITLE></HEAD>\n<BODY>' % title)
-
-  def writeBottom(self):
-    self.wfile.write('</BODY></HTML>\n')
-
-  def writeHomePage(self):
-    self.send_response(200)
-    self.end_headers()
-    self.writeTop('Home')
-    self.wfile.write('<TABLE>')
-    self.wfile.write('<TR><TH>user'
-                     '<TH>unweighted readable block size'
-                     '<TH>weighted readable block size'
-                     '<TH>unweighted persisted block size'
-                     '<TH>weighted persisted block size</TR>\n')
-    for user, usage in user_to_usage.items():
-      self.wfile.write('<TR><TD>%s<TD>%s<TD>%s<TD>%s<TD>%s</TR>\n' %
-                       (self.userLink(user),
-                        fileSizeFormat(usage[UNWEIGHTED_READ_SIZE_COL]),
-                        fileSizeFormat(usage[WEIGHTED_READ_SIZE_COL]),
-                        fileSizeFormat(usage[UNWEIGHTED_PERSIST_SIZE_COL]),
-                        fileSizeFormat(usage[WEIGHTED_PERSIST_SIZE_COL])))
-    self.wfile.write('</TABLE>\n')
-    self.writeBottom()
-
-  def userExists(self, uuid):
-    # Currently this will return false for a user who exists but
-    # doesn't appear on any manifests.
-    # TODO(misha): Figure out if we need to fix this.
-    return user_to_usage.has_key(uuid)
-
-  def writeUserPage(self, uuid):
-    if not self.userExists(uuid):
-      self.send_error(404,
-                      'User (%s) Not Found.' % cgi.escape(uuid, quote=False))
-    else:
-      # Here we assume that since a user exists, they don't need to be
-      # html escaped.
-      self.send_response(200)
-      self.end_headers()
-      self.writeTop('User %s' % uuid)
-      self.wfile.write('<TABLE>')
-      self.wfile.write('<TR><TH>user'
-                       '<TH>unweighted readable block size'
-                       '<TH>weighted readable block size'
-                       '<TH>unweighted persisted block size'
-                       '<TH>weighted persisted block size</TR>\n')
-      usage = user_to_usage[uuid]
-      self.wfile.write('<TR><TD>%s<TD>%s<TD>%s<TD>%s<TD>%s</TR>\n' %
-                       (self.userLink(uuid),
-                        fileSizeFormat(usage[UNWEIGHTED_READ_SIZE_COL]),
-                        fileSizeFormat(usage[WEIGHTED_READ_SIZE_COL]),
-                        fileSizeFormat(usage[UNWEIGHTED_PERSIST_SIZE_COL]),
-                        fileSizeFormat(usage[WEIGHTED_PERSIST_SIZE_COL])))
-      self.wfile.write('</TABLE>\n')
-      self.wfile.write('<P>Persisting Collections: %s\n' %
-                       ', '.join(map(self.collectionLink,
-                                     persister_to_collections[uuid])))
-      self.wfile.write('<P>Reading Collections: %s\n' %
-                       ', '.join(map(self.collectionLink,
-                                     reader_to_collections[uuid])))
-      self.writeBottom()
-
-  def collectionExists(self, uuid):
-    return CollectionInfo.all_by_uuid.has_key(uuid)
-
-  def writeCollectionPage(self, uuid):
-    if not self.collectionExists(uuid):
-      self.send_error(404,
-                      'Collection (%s) Not Found.' % cgi.escape(uuid, quote=False))
-    else:
-      collection = CollectionInfo.get(uuid)
-      # Here we assume that since a collection exists, its id doesn't
-      # need to be html escaped.
-      self.send_response(200)
-      self.end_headers()
-      self.writeTop('Collection %s' % uuid)
-      self.wfile.write('<H1>Collection %s</H1>\n' % uuid)
-      self.wfile.write('<P>Total size %s (not factoring in replication).\n' %
-                       fileSizeFormat(collection.byteSize()))
-      self.wfile.write('<P>Readers: %s\n' %
-                       ', '.join(map(self.userLink, collection.reader_uuids)))
-
-      if len(collection.persister_replication) == 0:
-        self.wfile.write('<P>No persisters\n')
-      else:
-        replication_to_users = defaultdict(set)
-        for user,replication in collection.persister_replication.items():
-          replication_to_users[replication].add(user)
-        replication_levels = sorted(replication_to_users.keys())
-
-        self.wfile.write('<P>%d persisters in %d replication level(s) maxing '
-                         'out at %dx replication:\n' %
-                         (len(collection.persister_replication),
-                          len(replication_levels),
-                          replication_levels[-1]))
-
-        # TODO(misha): This code is used twice, let's move it to a method.
-        self.wfile.write('<TABLE><TR><TH>%s</TR>\n' %
-                         '<TH>'.join(['Replication Level ' + str(x)
-                                      for x in replication_levels]))
-        self.wfile.write('<TR>\n')
-        for replication_level in replication_levels:
-          users = replication_to_users[replication_level]
-          self.wfile.write('<TD valign="top">%s\n' % '<BR>\n'.join(
-              map(self.userLink, users)))
-        self.wfile.write('</TR></TABLE>\n')
-
-      replication_to_blocks = defaultdict(set)
-      for block in collection.block_uuids:
-        replication_to_blocks[block_to_replication[block]].add(block)
-      replication_levels = sorted(replication_to_blocks.keys())
-      self.wfile.write('<P>%d blocks in %d replication level(s):\n' %
-                       (len(collection.block_uuids), len(replication_levels)))
-      self.wfile.write('<TABLE><TR><TH>%s</TR>\n' %
-                       '<TH>'.join(['Replication Level ' + str(x)
-                                    for x in replication_levels]))
-      self.wfile.write('<TR>\n')
-      for replication_level in replication_levels:
-        blocks = replication_to_blocks[replication_level]
-        self.wfile.write('<TD valign="top">%s\n' % '<BR>\n'.join(blocks))
-      self.wfile.write('</TR></TABLE>\n')
-
-
-  def do_GET(self):
-    if not all_data_loaded:
-      self.send_error(503,
-                      'Sorry, but I am still loading all the data I need.')
-    else:
-      # Removing leading '/' and process request path
-      split_path = self.path[1:].split('/')
-      request_type = split_path[0]
-      log.debug('path (%s) split as %s with request_type %s' % (self.path,
-                                                                split_path,
-                                                                request_type))
-      if request_type == '':
-        self.writeHomePage()
-      elif request_type == DataManagerHandler.USER_PATH:
-        self.writeUserPage(split_path[1])
-      elif request_type == DataManagerHandler.COLLECTION_PATH:
-        self.writeCollectionPage(split_path[1])
-      else:
-        self.send_error(404, 'Unrecognized request path.')
-    return
-
-class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
-  """Handle requests in a separate thread."""
-
-
-if __name__ == '__main__':
-  args = parser.parse_args()
-
-  if args.port == 0:
-    loadAllData()
-  else:
-    loader = threading.Thread(target = loadAllData, name = 'loader')
-    loader.start()
-
-    server = ThreadedHTTPServer(('localhost', args.port), DataManagerHandler)
-    server.serve_forever()
diff --git a/services/datamanager/experimental/datamanager_test.py b/services/datamanager/experimental/datamanager_test.py
deleted file mode 100755 (executable)
index 0842c16..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-#! /usr/bin/env python
-
-import datamanager
-import unittest
-
-class TestComputeWeightedReplicationCosts(unittest.TestCase):
-  def test_obvious(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([1,]),
-                     {1:1.0})
-
-  def test_simple(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([2,]),
-                     {2:2.0})
-
-  def test_even_split(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([1,1]),
-                     {1:0.5})
-
-  def test_even_split_bigger(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([2,2]),
-                     {2:1.0})
-
-  def test_uneven_split(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([1,2]),
-                     {1:0.5, 2:1.5})
-
-  def test_uneven_split_bigger(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([1,3]),
-                     {1:0.5, 3:2.5})
-
-  def test_uneven_split_jumble(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([1,3,6,6,10]),
-                     {1:0.2, 3:0.7, 6:1.7, 10:5.7})
-
-  def test_documentation_example(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([1,1,3,6,6]),
-                     {1:0.2, 3: 0.2 + 2.0 / 3, 6: 0.2 + 2.0 / 3 + 1.5})
-
-
-if __name__ == '__main__':
-  unittest.main()
diff --git a/services/datamanager/keep/keep.go b/services/datamanager/keep/keep.go
deleted file mode 100644 (file)
index 39d2d5b..0000000
+++ /dev/null
@@ -1,551 +0,0 @@
-/* Deals with getting Keep Server blocks from API Server and Keep Servers. */
-
-package keep
-
-import (
-       "bufio"
-       "encoding/json"
-       "errors"
-       "flag"
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       "git.curoverse.com/arvados.git/sdk/go/logger"
-       "io"
-       "io/ioutil"
-       "log"
-       "net/http"
-       "strconv"
-       "strings"
-       "time"
-)
-
-// ServerAddress struct
-type ServerAddress struct {
-       SSL         bool   `json:"service_ssl_flag"`
-       Host        string `json:"service_host"`
-       Port        int    `json:"service_port"`
-       UUID        string `json:"uuid"`
-       ServiceType string `json:"service_type"`
-}
-
-// BlockInfo is info about a particular block returned by the server
-type BlockInfo struct {
-       Digest blockdigest.DigestWithSize
-       Mtime  int64 // TODO(misha): Replace this with a timestamp.
-}
-
-// BlockServerInfo is info about a specified block given by a server
-type BlockServerInfo struct {
-       ServerIndex int
-       Mtime       int64 // TODO(misha): Replace this with a timestamp.
-}
-
-// ServerContents struct
-type ServerContents struct {
-       BlockDigestToInfo map[blockdigest.DigestWithSize]BlockInfo
-}
-
-// ServerResponse struct
-type ServerResponse struct {
-       Address  ServerAddress
-       Contents ServerContents
-       Err      error
-}
-
-// ReadServers struct
-type ReadServers struct {
-       ReadAllServers           bool
-       KeepServerIndexToAddress []ServerAddress
-       KeepServerAddressToIndex map[ServerAddress]int
-       ServerToContents         map[ServerAddress]ServerContents
-       BlockToServers           map[blockdigest.DigestWithSize][]BlockServerInfo
-       BlockReplicationCounts   map[int]int
-}
-
-// GetKeepServersParams struct
-type GetKeepServersParams struct {
-       Client *arvadosclient.ArvadosClient
-       Logger *logger.Logger
-       Limit  int
-}
-
-// ServiceList consists of the addresses of all the available kee servers
-type ServiceList struct {
-       ItemsAvailable int             `json:"items_available"`
-       KeepServers    []ServerAddress `json:"items"`
-}
-
-var serviceType string
-
-func init() {
-       flag.StringVar(&serviceType,
-               "service-type",
-               "disk",
-               "Operate only on keep_services with the specified service_type, ignoring all others.")
-}
-
-// String
-// TODO(misha): Change this to include the UUID as well.
-func (s ServerAddress) String() string {
-       return s.URL()
-}
-
-// URL of the keep server
-func (s ServerAddress) URL() string {
-       if s.SSL {
-               return fmt.Sprintf("https://%s:%d", s.Host, s.Port)
-       }
-       return fmt.Sprintf("http://%s:%d", s.Host, s.Port)
-}
-
-// GetKeepServersAndSummarize gets keep servers from api
-func GetKeepServersAndSummarize(params GetKeepServersParams) (results ReadServers, err error) {
-       results, err = GetKeepServers(params)
-       if err != nil {
-               return
-       }
-       log.Printf("Returned %d keep disks", len(results.ServerToContents))
-
-       results.Summarize(params.Logger)
-       log.Printf("Replication level distribution: %v",
-               results.BlockReplicationCounts)
-
-       return
-}
-
-// GetKeepServers from api server
-func GetKeepServers(params GetKeepServersParams) (results ReadServers, err error) {
-       sdkParams := arvadosclient.Dict{
-               "filters": [][]string{{"service_type", "!=", "proxy"}},
-       }
-       if params.Limit > 0 {
-               sdkParams["limit"] = params.Limit
-       }
-
-       var sdkResponse ServiceList
-       err = params.Client.List("keep_services", sdkParams, &sdkResponse)
-
-       if err != nil {
-               return
-       }
-
-       var keepServers []ServerAddress
-       for _, server := range sdkResponse.KeepServers {
-               if server.ServiceType == serviceType {
-                       keepServers = append(keepServers, server)
-               } else {
-                       log.Printf("Skipping keep_service %q because its service_type %q does not match -service-type=%q", server, server.ServiceType, serviceType)
-               }
-       }
-
-       if len(keepServers) == 0 {
-               return results, fmt.Errorf("Found no keepservices with the service type %v", serviceType)
-       }
-
-       if params.Logger != nil {
-               params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       keepInfo["num_keep_servers_available"] = sdkResponse.ItemsAvailable
-                       keepInfo["num_keep_servers_received"] = len(sdkResponse.KeepServers)
-                       keepInfo["keep_servers"] = sdkResponse.KeepServers
-                       keepInfo["indexable_keep_servers"] = keepServers
-               })
-       }
-
-       log.Printf("Received keep services list: %+v", sdkResponse)
-
-       if len(sdkResponse.KeepServers) < sdkResponse.ItemsAvailable {
-               return results, fmt.Errorf("Did not receive all available keep servers: %+v", sdkResponse)
-       }
-
-       results.KeepServerIndexToAddress = keepServers
-       results.KeepServerAddressToIndex = make(map[ServerAddress]int)
-       for i, address := range results.KeepServerIndexToAddress {
-               results.KeepServerAddressToIndex[address] = i
-       }
-
-       log.Printf("Got Server Addresses: %v", results)
-
-       // Send off all the index requests concurrently
-       responseChan := make(chan ServerResponse)
-       for _, keepServer := range results.KeepServerIndexToAddress {
-               // The above keepsServer variable is reused for each iteration, so
-               // it would be shared across all goroutines. This would result in
-               // us querying one server n times instead of n different servers
-               // as we intended. To avoid this we add it as an explicit
-               // parameter which gets copied. This bug and solution is described
-               // in https://golang.org/doc/effective_go.html#channels
-               go func(keepServer ServerAddress) {
-                       responseChan <- GetServerContents(params.Logger,
-                               keepServer,
-                               params.Client)
-               }(keepServer)
-       }
-
-       results.ServerToContents = make(map[ServerAddress]ServerContents)
-       results.BlockToServers = make(map[blockdigest.DigestWithSize][]BlockServerInfo)
-
-       // Read all the responses
-       for i := range results.KeepServerIndexToAddress {
-               _ = i // Here to prevent go from complaining.
-               response := <-responseChan
-
-               // Check if there were any errors during GetServerContents
-               if response.Err != nil {
-                       return results, response.Err
-               }
-
-               log.Printf("Received channel response from %v containing %d files",
-                       response.Address,
-                       len(response.Contents.BlockDigestToInfo))
-               results.ServerToContents[response.Address] = response.Contents
-               serverIndex := results.KeepServerAddressToIndex[response.Address]
-               for _, blockInfo := range response.Contents.BlockDigestToInfo {
-                       results.BlockToServers[blockInfo.Digest] = append(
-                               results.BlockToServers[blockInfo.Digest],
-                               BlockServerInfo{ServerIndex: serverIndex,
-                                       Mtime: blockInfo.Mtime})
-               }
-       }
-       return
-}
-
-// GetServerContents of the keep server
-func GetServerContents(arvLogger *logger.Logger,
-       keepServer ServerAddress,
-       arv *arvadosclient.ArvadosClient) (response ServerResponse) {
-
-       err := GetServerStatus(arvLogger, keepServer, arv)
-       if err != nil {
-               response.Err = err
-               return
-       }
-
-       req, err := CreateIndexRequest(arvLogger, keepServer, arv)
-       if err != nil {
-               response.Err = err
-               return
-       }
-
-       resp, err := arv.Client.Do(req)
-       if err != nil {
-               response.Err = err
-               return
-       }
-
-       response, err = ReadServerResponse(arvLogger, keepServer, resp)
-       if err != nil {
-               response.Err = err
-               return
-       }
-
-       return
-}
-
-// GetServerStatus get keep server status by invoking /status.json
-func GetServerStatus(arvLogger *logger.Logger,
-       keepServer ServerAddress,
-       arv *arvadosclient.ArvadosClient) error {
-       url := fmt.Sprintf("http://%s:%d/status.json",
-               keepServer.Host,
-               keepServer.Port)
-
-       if arvLogger != nil {
-               now := time.Now()
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       serverInfo := make(map[string]interface{})
-                       serverInfo["status_request_sent_at"] = now
-                       serverInfo["host"] = keepServer.Host
-                       serverInfo["port"] = keepServer.Port
-
-                       keepInfo[keepServer.UUID] = serverInfo
-               })
-       }
-
-       resp, err := arv.Client.Get(url)
-       if err != nil {
-               return fmt.Errorf("Error getting keep status from %s: %v", url, err)
-       } else if resp.StatusCode != 200 {
-               return fmt.Errorf("Received error code %d in response to request "+
-                       "for %s status: %s",
-                       resp.StatusCode, url, resp.Status)
-       }
-
-       var keepStatus map[string]interface{}
-       decoder := json.NewDecoder(resp.Body)
-       decoder.UseNumber()
-       err = decoder.Decode(&keepStatus)
-       if err != nil {
-               return fmt.Errorf("Error decoding keep status from %s: %v", url, err)
-       }
-
-       if arvLogger != nil {
-               now := time.Now()
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
-                       serverInfo["status_response_processed_at"] = now
-                       serverInfo["status"] = keepStatus
-               })
-       }
-
-       return nil
-}
-
-// CreateIndexRequest to the keep server
-func CreateIndexRequest(arvLogger *logger.Logger,
-       keepServer ServerAddress,
-       arv *arvadosclient.ArvadosClient) (req *http.Request, err error) {
-       url := fmt.Sprintf("http://%s:%d/index", keepServer.Host, keepServer.Port)
-       log.Println("About to fetch keep server contents from " + url)
-
-       if arvLogger != nil {
-               now := time.Now()
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
-                       serverInfo["index_request_sent_at"] = now
-               })
-       }
-
-       req, err = http.NewRequest("GET", url, nil)
-       if err != nil {
-               return req, fmt.Errorf("Error building http request for %s: %v", url, err)
-       }
-
-       req.Header.Add("Authorization", "OAuth2 "+arv.ApiToken)
-       return req, err
-}
-
-// ReadServerResponse reads reasponse from keep server
-func ReadServerResponse(arvLogger *logger.Logger,
-       keepServer ServerAddress,
-       resp *http.Response) (response ServerResponse, err error) {
-
-       if resp.StatusCode != 200 {
-               return response, fmt.Errorf("Received error code %d in response to index request for %s: %s",
-                       resp.StatusCode, keepServer.String(), resp.Status)
-       }
-
-       if arvLogger != nil {
-               now := time.Now()
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
-                       serverInfo["index_response_received_at"] = now
-               })
-       }
-
-       response.Address = keepServer
-       response.Contents.BlockDigestToInfo =
-               make(map[blockdigest.DigestWithSize]BlockInfo)
-       reader := bufio.NewReader(resp.Body)
-       numLines, numDuplicates, numSizeDisagreements := 0, 0, 0
-       for {
-               numLines++
-               line, err := reader.ReadString('\n')
-               if err == io.EOF {
-                       return response, fmt.Errorf("Index from %s truncated at line %d",
-                               keepServer.String(), numLines)
-               } else if err != nil {
-                       return response, fmt.Errorf("Error reading index response from %s at line %d: %v",
-                               keepServer.String(), numLines, err)
-               }
-               if line == "\n" {
-                       if _, err := reader.Peek(1); err == nil {
-                               extra, _ := reader.ReadString('\n')
-                               return response, fmt.Errorf("Index from %s had trailing data at line %d after EOF marker: %s",
-                                       keepServer.String(), numLines+1, extra)
-                       } else if err != io.EOF {
-                               return response, fmt.Errorf("Index from %s had read error after EOF marker at line %d: %v",
-                                       keepServer.String(), numLines, err)
-                       }
-                       numLines--
-                       break
-               }
-               blockInfo, err := parseBlockInfoFromIndexLine(line)
-               if err != nil {
-                       return response, fmt.Errorf("Error parsing BlockInfo from index line "+
-                               "received from %s: %v",
-                               keepServer.String(),
-                               err)
-               }
-
-               if storedBlock, ok := response.Contents.BlockDigestToInfo[blockInfo.Digest]; ok {
-                       // This server returned multiple lines containing the same block digest.
-                       numDuplicates++
-                       // Keep the block that's newer.
-                       if storedBlock.Mtime < blockInfo.Mtime {
-                               response.Contents.BlockDigestToInfo[blockInfo.Digest] = blockInfo
-                       }
-               } else {
-                       response.Contents.BlockDigestToInfo[blockInfo.Digest] = blockInfo
-               }
-       }
-
-       log.Printf("%s index contained %d lines with %d duplicates with "+
-               "%d size disagreements",
-               keepServer.String(),
-               numLines,
-               numDuplicates,
-               numSizeDisagreements)
-
-       if arvLogger != nil {
-               now := time.Now()
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
-
-                       serverInfo["processing_finished_at"] = now
-                       serverInfo["lines_received"] = numLines
-                       serverInfo["duplicates_seen"] = numDuplicates
-                       serverInfo["size_disagreements_seen"] = numSizeDisagreements
-               })
-       }
-       resp.Body.Close()
-       return
-}
-
-func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err error) {
-       tokens := strings.Fields(indexLine)
-       if len(tokens) != 2 {
-               err = fmt.Errorf("Expected 2 tokens per line but received a "+
-                       "line containing %#q instead.",
-                       tokens)
-       }
-
-       var locator blockdigest.BlockLocator
-       if locator, err = blockdigest.ParseBlockLocator(tokens[0]); err != nil {
-               err = fmt.Errorf("%v Received error while parsing line \"%#q\"",
-                       err, indexLine)
-               return
-       }
-       if len(locator.Hints) > 0 {
-               err = fmt.Errorf("Block locator in index line should not contain hints "+
-                       "but it does: %#q",
-                       locator)
-               return
-       }
-
-       var ns int64
-       ns, err = strconv.ParseInt(tokens[1], 10, 64)
-       if err != nil {
-               return
-       }
-       if ns < 1e12 {
-               // An old version of keepstore is giving us timestamps
-               // in seconds instead of nanoseconds. (This threshold
-               // correctly handles all times between 1970-01-02 and
-               // 33658-09-27.)
-               ns = ns * 1e9
-       }
-       blockInfo.Mtime = ns
-       blockInfo.Digest = blockdigest.DigestWithSize{
-               Digest: locator.Digest,
-               Size:   uint32(locator.Size),
-       }
-       return
-}
-
-// Summarize results from keep server
-func (readServers *ReadServers) Summarize(arvLogger *logger.Logger) {
-       readServers.BlockReplicationCounts = make(map[int]int)
-       for _, infos := range readServers.BlockToServers {
-               replication := len(infos)
-               readServers.BlockReplicationCounts[replication]++
-       }
-
-       if arvLogger != nil {
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       keepInfo["distinct_blocks_stored"] = len(readServers.BlockToServers)
-               })
-       }
-}
-
-// TrashRequest struct
-type TrashRequest struct {
-       Locator    string `json:"locator"`
-       BlockMtime int64  `json:"block_mtime"`
-}
-
-// TrashList is an array of TrashRequest objects
-type TrashList []TrashRequest
-
-// SendTrashLists to trash queue
-func SendTrashLists(arvLogger *logger.Logger, kc *keepclient.KeepClient, spl map[string]TrashList, dryRun bool) (errs []error) {
-       count := 0
-       barrier := make(chan error)
-
-       client := kc.Client
-
-       for url, v := range spl {
-               if arvLogger != nil {
-                       // We need a local variable because Update doesn't call our mutator func until later,
-                       // when our list variable might have been reused by the next loop iteration.
-                       url := url
-                       trashLen := len(v)
-                       arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                               trashListInfo := logger.GetOrCreateMap(p, "trash_list_len")
-                               trashListInfo[url] = trashLen
-                       })
-               }
-
-               if dryRun {
-                       log.Printf("dry run, not sending trash list to service %s with %d blocks", url, len(v))
-                       continue
-               }
-
-               count++
-               log.Printf("Sending trash list to %v", url)
-
-               go (func(url string, v TrashList) {
-                       pipeReader, pipeWriter := io.Pipe()
-                       go (func() {
-                               enc := json.NewEncoder(pipeWriter)
-                               enc.Encode(v)
-                               pipeWriter.Close()
-                       })()
-
-                       req, err := http.NewRequest("PUT", fmt.Sprintf("%s/trash", url), pipeReader)
-                       if err != nil {
-                               log.Printf("Error creating trash list request for %v error: %v", url, err.Error())
-                               barrier <- err
-                               return
-                       }
-
-                       req.Header.Add("Authorization", "OAuth2 "+kc.Arvados.ApiToken)
-
-                       // Make the request
-                       var resp *http.Response
-                       if resp, err = client.Do(req); err != nil {
-                               log.Printf("Error sending trash list to %v error: %v", url, err.Error())
-                               barrier <- err
-                               return
-                       }
-
-                       log.Printf("Sent trash list to %v: response was HTTP %v", url, resp.Status)
-
-                       io.Copy(ioutil.Discard, resp.Body)
-                       resp.Body.Close()
-
-                       if resp.StatusCode != 200 {
-                               barrier <- errors.New(fmt.Sprintf("Got HTTP code %v", resp.StatusCode))
-                       } else {
-                               barrier <- nil
-                       }
-               })(url, v)
-       }
-
-       for i := 0; i < count; i++ {
-               b := <-barrier
-               if b != nil {
-                       errs = append(errs, b)
-               }
-       }
-
-       return errs
-}
diff --git a/services/datamanager/keep/keep_test.go b/services/datamanager/keep/keep_test.go
deleted file mode 100644 (file)
index ca8797e..0000000
+++ /dev/null
@@ -1,278 +0,0 @@
-package keep
-
-import (
-       "encoding/json"
-       "fmt"
-       "net"
-       "net/http"
-       "net/http/httptest"
-       "net/url"
-       "strconv"
-       "strings"
-       "testing"
-
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-
-       . "gopkg.in/check.v1"
-)
-
-// Gocheck boilerplate
-func Test(t *testing.T) {
-       TestingT(t)
-}
-
-type KeepSuite struct{}
-
-var _ = Suite(&KeepSuite{})
-
-type TestHandler struct {
-       request TrashList
-}
-
-func (ts *TestHandler) ServeHTTP(writer http.ResponseWriter, req *http.Request) {
-       r := json.NewDecoder(req.Body)
-       r.Decode(&ts.request)
-}
-
-func (s *KeepSuite) TestSendTrashLists(c *C) {
-       th := TestHandler{}
-       server := httptest.NewServer(&th)
-       defer server.Close()
-
-       tl := map[string]TrashList{
-               server.URL: {TrashRequest{"000000000000000000000000deadbeef", 99}}}
-
-       arv := &arvadosclient.ArvadosClient{ApiToken: "abc123"}
-       kc := keepclient.KeepClient{Arvados: arv, Client: &http.Client{}}
-       kc.SetServiceRoots(map[string]string{"xxxx": server.URL},
-               map[string]string{"xxxx": server.URL},
-               map[string]string{})
-
-       err := SendTrashLists(nil, &kc, tl, false)
-
-       c.Check(err, IsNil)
-
-       c.Check(th.request,
-               DeepEquals,
-               tl[server.URL])
-
-}
-
-type TestHandlerError struct {
-}
-
-func (tse *TestHandlerError) ServeHTTP(writer http.ResponseWriter, req *http.Request) {
-       http.Error(writer, "I'm a teapot", 418)
-}
-
-func sendTrashListError(c *C, server *httptest.Server) {
-       tl := map[string]TrashList{
-               server.URL: {TrashRequest{"000000000000000000000000deadbeef", 99}}}
-
-       arv := &arvadosclient.ArvadosClient{ApiToken: "abc123"}
-       kc := keepclient.KeepClient{Arvados: arv, Client: &http.Client{}}
-       kc.SetServiceRoots(map[string]string{"xxxx": server.URL},
-               map[string]string{"xxxx": server.URL},
-               map[string]string{})
-
-       err := SendTrashLists(nil, &kc, tl, false)
-
-       c.Check(err, NotNil)
-       c.Check(err[0], NotNil)
-}
-
-func (s *KeepSuite) TestSendTrashListErrorResponse(c *C) {
-       server := httptest.NewServer(&TestHandlerError{})
-       sendTrashListError(c, server)
-       defer server.Close()
-}
-
-func (s *KeepSuite) TestSendTrashListUnreachable(c *C) {
-       sendTrashListError(c, httptest.NewUnstartedServer(&TestHandler{}))
-}
-
-type APITestData struct {
-       numServers int
-       serverType string
-       statusCode int
-}
-
-func (s *KeepSuite) TestGetKeepServers_UnsupportedServiceType(c *C) {
-       testGetKeepServersFromAPI(c, APITestData{1, "notadisk", 200}, "Found no keepservices with the service type disk")
-}
-
-func (s *KeepSuite) TestGetKeepServers_ReceivedTooFewServers(c *C) {
-       testGetKeepServersFromAPI(c, APITestData{2, "disk", 200}, "Did not receive all available keep servers")
-}
-
-func (s *KeepSuite) TestGetKeepServers_ServerError(c *C) {
-       testGetKeepServersFromAPI(c, APITestData{-1, "disk", -1}, "arvados API server error")
-}
-
-func testGetKeepServersFromAPI(c *C, testData APITestData, expectedError string) {
-       keepServers := ServiceList{
-               ItemsAvailable: testData.numServers,
-               KeepServers: []ServerAddress{{
-                       SSL:         false,
-                       Host:        "example.com",
-                       Port:        12345,
-                       UUID:        "abcdefg",
-                       ServiceType: testData.serverType,
-               }},
-       }
-
-       ksJSON, _ := json.Marshal(keepServers)
-       apiStubResponses := make(map[string]arvadostest.StubResponse)
-       apiStubResponses["/arvados/v1/keep_services"] = arvadostest.StubResponse{testData.statusCode, string(ksJSON)}
-       apiStub := arvadostest.ServerStub{apiStubResponses}
-
-       api := httptest.NewServer(&apiStub)
-       defer api.Close()
-
-       arv := &arvadosclient.ArvadosClient{
-               Scheme:    "http",
-               ApiServer: api.URL[7:],
-               ApiToken:  "abc123",
-               Client:    &http.Client{Transport: &http.Transport{}},
-       }
-
-       kc := keepclient.KeepClient{Arvados: arv, Client: &http.Client{}}
-       kc.SetServiceRoots(map[string]string{"xxxx": "http://example.com:23456"},
-               map[string]string{"xxxx": "http://example.com:23456"},
-               map[string]string{})
-
-       params := GetKeepServersParams{
-               Client: arv,
-               Logger: nil,
-               Limit:  10,
-       }
-
-       _, err := GetKeepServersAndSummarize(params)
-       c.Assert(err, NotNil)
-       c.Assert(err, ErrorMatches, fmt.Sprintf(".*%s.*", expectedError))
-}
-
-type KeepServerTestData struct {
-       // handle /status.json
-       statusStatusCode int
-
-       // handle /index
-       indexStatusCode   int
-       indexResponseBody string
-
-       // expected error, if any
-       expectedError string
-}
-
-func (s *KeepSuite) TestGetKeepServers_ErrorGettingKeepServerStatus(c *C) {
-       testGetKeepServersAndSummarize(c, KeepServerTestData{500, 200, "ok",
-               ".*http://.* 500 Internal Server Error"})
-}
-
-func (s *KeepSuite) TestGetKeepServers_GettingIndex(c *C) {
-       testGetKeepServersAndSummarize(c, KeepServerTestData{200, -1, "notok",
-               ".*redirect-loop.*"})
-}
-
-func (s *KeepSuite) TestGetKeepServers_ErrorReadServerResponse(c *C) {
-       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 500, "notok",
-               ".*http://.* 500 Internal Server Error"})
-}
-
-func (s *KeepSuite) TestGetKeepServers_ReadServerResponseTuncatedAtLineOne(c *C) {
-       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 200,
-               "notterminatedwithnewline", "Index from http://.* truncated at line 1"})
-}
-
-func (s *KeepSuite) TestGetKeepServers_InvalidBlockLocatorPattern(c *C) {
-       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 200, "testing\n",
-               "Error parsing BlockInfo from index line.*"})
-}
-
-func (s *KeepSuite) TestGetKeepServers_ReadServerResponseEmpty(c *C) {
-       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 200, "\n", ""})
-}
-
-func (s *KeepSuite) TestGetKeepServers_ReadServerResponseWithTwoBlocks(c *C) {
-       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 200,
-               "51752ba076e461ec9ec1d27400a08548+20 1447526361\na048cc05c02ba1ee43ad071274b9e547+52 1447526362\n\n", ""})
-}
-
-func testGetKeepServersAndSummarize(c *C, testData KeepServerTestData) {
-       ksStubResponses := make(map[string]arvadostest.StubResponse)
-       ksStubResponses["/status.json"] = arvadostest.StubResponse{testData.statusStatusCode, string(`{}`)}
-       ksStubResponses["/index"] = arvadostest.StubResponse{testData.indexStatusCode, testData.indexResponseBody}
-       ksStub := arvadostest.ServerStub{ksStubResponses}
-       ks := httptest.NewServer(&ksStub)
-       defer ks.Close()
-
-       ksURL, err := url.Parse(ks.URL)
-       c.Check(err, IsNil)
-       ksHost, port, err := net.SplitHostPort(ksURL.Host)
-       ksPort, err := strconv.Atoi(port)
-       c.Check(err, IsNil)
-
-       servers_list := ServiceList{
-               ItemsAvailable: 1,
-               KeepServers: []ServerAddress{{
-                       SSL:         false,
-                       Host:        ksHost,
-                       Port:        ksPort,
-                       UUID:        "abcdefg",
-                       ServiceType: "disk",
-               }},
-       }
-       ksJSON, _ := json.Marshal(servers_list)
-       apiStubResponses := make(map[string]arvadostest.StubResponse)
-       apiStubResponses["/arvados/v1/keep_services"] = arvadostest.StubResponse{200, string(ksJSON)}
-       apiStub := arvadostest.ServerStub{apiStubResponses}
-
-       api := httptest.NewServer(&apiStub)
-       defer api.Close()
-
-       arv := &arvadosclient.ArvadosClient{
-               Scheme:    "http",
-               ApiServer: api.URL[7:],
-               ApiToken:  "abc123",
-               Client:    &http.Client{Transport: &http.Transport{}},
-       }
-
-       kc := keepclient.KeepClient{Arvados: arv, Client: &http.Client{}}
-       kc.SetServiceRoots(map[string]string{"xxxx": ks.URL},
-               map[string]string{"xxxx": ks.URL},
-               map[string]string{})
-
-       params := GetKeepServersParams{
-               Client: arv,
-               Logger: nil,
-               Limit:  10,
-       }
-
-       // GetKeepServersAndSummarize
-       results, err := GetKeepServersAndSummarize(params)
-
-       if testData.expectedError == "" {
-               c.Assert(err, IsNil)
-               c.Assert(results, NotNil)
-
-               blockToServers := results.BlockToServers
-
-               blockLocators := strings.Split(testData.indexResponseBody, "\n")
-               for _, loc := range blockLocators {
-                       locator := strings.Split(loc, " ")[0]
-                       if locator != "" {
-                               blockLocator, err := blockdigest.ParseBlockLocator(locator)
-                               c.Assert(err, IsNil)
-
-                               blockDigestWithSize := blockdigest.DigestWithSize{blockLocator.Digest, uint32(blockLocator.Size)}
-                               blockServerInfo := blockToServers[blockDigestWithSize]
-                               c.Assert(blockServerInfo[0].Mtime, NotNil)
-                       }
-               }
-       } else {
-               c.Assert(err, ErrorMatches, testData.expectedError)
-       }
-}
diff --git a/services/datamanager/loggerutil/loggerutil.go b/services/datamanager/loggerutil/loggerutil.go
deleted file mode 100644 (file)
index 8111425..0000000
+++ /dev/null
@@ -1,52 +0,0 @@
-/* Datamanager-specific logging methods. */
-
-package loggerutil
-
-import (
-       "git.curoverse.com/arvados.git/sdk/go/logger"
-       "log"
-       "os"
-       "runtime"
-       "time"
-)
-
-// Useful to call at the beginning of execution to log info about the
-// current run.
-func LogRunInfo(arvLogger *logger.Logger) {
-       if arvLogger != nil {
-               now := time.Now()
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       runInfo := logger.GetOrCreateMap(p, "run_info")
-                       runInfo["started_at"] = now
-                       runInfo["args"] = os.Args
-                       hostname, err := os.Hostname()
-                       if err != nil {
-                               runInfo["hostname_error"] = err.Error()
-                       } else {
-                               runInfo["hostname"] = hostname
-                       }
-                       runInfo["pid"] = os.Getpid()
-               })
-       }
-}
-
-// A LogMutator that records the current memory usage. This is most useful as a logger write hook.
-func LogMemoryAlloc(p map[string]interface{}, e map[string]interface{}) {
-       runInfo := logger.GetOrCreateMap(p, "run_info")
-       var memStats runtime.MemStats
-       runtime.ReadMemStats(&memStats)
-       runInfo["memory_bytes_in_use"] = memStats.Alloc
-       runInfo["memory_bytes_reserved"] = memStats.Sys
-}
-
-func FatalWithMessage(arvLogger *logger.Logger, message string) {
-       if arvLogger != nil {
-               arvLogger.FinalUpdate(func(p map[string]interface{}, e map[string]interface{}) {
-                       p["FATAL"] = message
-                       runInfo := logger.GetOrCreateMap(p, "run_info")
-                       runInfo["finished_at"] = time.Now()
-               })
-       }
-
-       log.Fatalf(message)
-}
diff --git a/services/datamanager/summary/canonical_string.go b/services/datamanager/summary/canonical_string.go
deleted file mode 100644 (file)
index 152314c..0000000
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Ensures that we only have one copy of each unique string. This is
-/* not designed for concurrent access. */
-
-package summary
-
-// This code should probably be moved somewhere more universal.
-
-// CanonicalString struct
-type CanonicalString struct {
-       m map[string]string
-}
-
-// Get a CanonicalString
-func (cs *CanonicalString) Get(s string) (r string) {
-       if cs.m == nil {
-               cs.m = make(map[string]string)
-       }
-       value, found := cs.m[s]
-       if found {
-               return value
-       }
-
-       // s may be a substring of a much larger string.
-       // If we store s, it will prevent that larger string from getting
-       // garbage collected.
-       // If this is something you worry about you should change this code
-       // to make an explict copy of s using a byte array.
-       cs.m[s] = s
-       return s
-}
diff --git a/services/datamanager/summary/file.go b/services/datamanager/summary/file.go
deleted file mode 100644 (file)
index 6e463d7..0000000
+++ /dev/null
@@ -1,115 +0,0 @@
-// Handles writing data to and reading data from disk to speed up development.
-
-package summary
-
-import (
-       "encoding/gob"
-       "flag"
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/logger"
-       "git.curoverse.com/arvados.git/services/datamanager/collection"
-       "git.curoverse.com/arvados.git/services/datamanager/keep"
-       "log"
-       "os"
-)
-
-// Used to locally cache data read from servers to reduce execution
-// time when developing. Not for use in production.
-type serializedData struct {
-       ReadCollections collection.ReadCollections
-       KeepServerInfo  keep.ReadServers
-}
-
-var (
-       WriteDataTo  string
-       readDataFrom string
-)
-
-// DataFetcher to fetch data from keep servers
-type DataFetcher func(arvLogger *logger.Logger,
-       readCollections *collection.ReadCollections,
-       keepServerInfo *keep.ReadServers) error
-
-func init() {
-       flag.StringVar(&WriteDataTo,
-               "write-data-to",
-               "",
-               "Write summary of data received to this file. Used for development only.")
-       flag.StringVar(&readDataFrom,
-               "read-data-from",
-               "",
-               "Avoid network i/o and read summary data from this file instead. Used for development only.")
-}
-
-// MaybeWriteData writes data we've read to a file.
-//
-// This is useful for development, so that we don't need to read all
-// our data from the network every time we tweak something.
-//
-// This should not be used outside of development, since you'll be
-// working with stale data.
-func MaybeWriteData(arvLogger *logger.Logger,
-       readCollections collection.ReadCollections,
-       keepServerInfo keep.ReadServers) error {
-       if WriteDataTo == "" {
-               return nil
-       }
-       summaryFile, err := os.Create(WriteDataTo)
-       if err != nil {
-               return err
-       }
-       defer summaryFile.Close()
-
-       enc := gob.NewEncoder(summaryFile)
-       data := serializedData{
-               ReadCollections: readCollections,
-               KeepServerInfo:  keepServerInfo}
-       err = enc.Encode(data)
-       if err != nil {
-               return err
-       }
-       log.Printf("Wrote summary data to: %s", WriteDataTo)
-       return nil
-}
-
-// ShouldReadData should not be used outside of development
-func ShouldReadData() bool {
-       return readDataFrom != ""
-}
-
-// ReadData reads data that we've written to a file.
-//
-// This is useful for development, so that we don't need to read all
-// our data from the network every time we tweak something.
-//
-// This should not be used outside of development, since you'll be
-// working with stale data.
-func ReadData(arvLogger *logger.Logger,
-       readCollections *collection.ReadCollections,
-       keepServerInfo *keep.ReadServers) error {
-       if readDataFrom == "" {
-               return fmt.Errorf("ReadData() called with empty filename.")
-       }
-       summaryFile, err := os.Open(readDataFrom)
-       if err != nil {
-               return err
-       }
-       defer summaryFile.Close()
-
-       dec := gob.NewDecoder(summaryFile)
-       data := serializedData{}
-       err = dec.Decode(&data)
-       if err != nil {
-               return err
-       }
-
-       // re-summarize data, so that we can update our summarizing
-       // functions without needing to do all our network i/o
-       data.ReadCollections.Summarize(arvLogger)
-       data.KeepServerInfo.Summarize(arvLogger)
-
-       *readCollections = data.ReadCollections
-       *keepServerInfo = data.KeepServerInfo
-       log.Printf("Read summary data from: %s", readDataFrom)
-       return nil
-}
diff --git a/services/datamanager/summary/pull_list.go b/services/datamanager/summary/pull_list.go
deleted file mode 100644 (file)
index d7fb3eb..0000000
+++ /dev/null
@@ -1,215 +0,0 @@
-// Code for generating pull lists as described in https://arvados.org/projects/arvados/wiki/Keep_Design_Doc#Pull-List
-
-package summary
-
-import (
-       "encoding/json"
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       "git.curoverse.com/arvados.git/sdk/go/logger"
-       "git.curoverse.com/arvados.git/services/datamanager/keep"
-       "log"
-       "os"
-       "strings"
-)
-
-// Locator is a block digest
-type Locator blockdigest.DigestWithSize
-
-// MarshalJSON encoding
-func (l Locator) MarshalJSON() ([]byte, error) {
-       return []byte("\"" + blockdigest.DigestWithSize(l).String() + "\""), nil
-}
-
-// PullRequest represents one entry in the Pull List
-type PullRequest struct {
-       Locator Locator  `json:"locator"`
-       Servers []string `json:"servers"`
-}
-
-// PullList for a particular server
-type PullList []PullRequest
-
-// PullListByLocator implements sort.Interface for PullList based on
-// the Digest.
-type PullListByLocator PullList
-
-func (a PullListByLocator) Len() int      { return len(a) }
-func (a PullListByLocator) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
-func (a PullListByLocator) Less(i, j int) bool {
-       di, dj := a[i].Locator.Digest, a[j].Locator.Digest
-       if di.H < dj.H {
-               return true
-       } else if di.H == dj.H {
-               if di.L < dj.L {
-                       return true
-               } else if di.L == dj.L {
-                       return a[i].Locator.Size < a[j].Locator.Size
-               }
-       }
-       return false
-}
-
-// PullServers struct
-// For a given under-replicated block, this structure represents which
-// servers should pull the specified block and which servers they can
-// pull it from.
-type PullServers struct {
-       To   []string // Servers that should pull the specified block
-       From []string // Servers that already contain the specified block
-}
-
-// ComputePullServers creates a map from block locator to PullServers
-// with one entry for each under-replicated block.
-//
-// This method ignores zero-replica blocks since there are no servers
-// to pull them from, so callers should feel free to omit them, but
-// this function will ignore them if they are provided.
-func ComputePullServers(kc *keepclient.KeepClient,
-       keepServerInfo *keep.ReadServers,
-       blockToDesiredReplication map[blockdigest.DigestWithSize]int,
-       underReplicated BlockSet) (m map[Locator]PullServers) {
-       m = map[Locator]PullServers{}
-       // We use CanonicalString to avoid filling memory with duplicate
-       // copies of the same string.
-       var cs CanonicalString
-
-       // Servers that are writeable
-       writableServers := map[string]struct{}{}
-       for _, url := range kc.WritableLocalRoots() {
-               writableServers[cs.Get(url)] = struct{}{}
-       }
-
-       for block := range underReplicated {
-               serversStoringBlock := keepServerInfo.BlockToServers[block]
-               numCopies := len(serversStoringBlock)
-               numCopiesMissing := blockToDesiredReplication[block] - numCopies
-               if numCopiesMissing > 0 {
-                       // We expect this to always be true, since the block was listed
-                       // in underReplicated.
-
-                       if numCopies > 0 {
-                               // Not much we can do with blocks with no copies.
-
-                               // A server's host-port string appears as a key in this map
-                               // iff it contains the block.
-                               serverHasBlock := map[string]struct{}{}
-                               for _, info := range serversStoringBlock {
-                                       sa := keepServerInfo.KeepServerIndexToAddress[info.ServerIndex]
-                                       serverHasBlock[cs.Get(sa.URL())] = struct{}{}
-                               }
-
-                               roots := keepclient.NewRootSorter(kc.LocalRoots(),
-                                       block.String()).GetSortedRoots()
-
-                               l := Locator(block)
-                               m[l] = CreatePullServers(cs, serverHasBlock, writableServers,
-                                       roots, numCopiesMissing)
-                       }
-               }
-       }
-       return m
-}
-
-// CreatePullServers creates a pull list in which the To and From
-// fields preserve the ordering of sorted servers and the contents
-// are all canonical strings.
-func CreatePullServers(cs CanonicalString,
-       serverHasBlock map[string]struct{},
-       writableServers map[string]struct{},
-       sortedServers []string,
-       maxToFields int) (ps PullServers) {
-
-       ps = PullServers{
-               To:   make([]string, 0, maxToFields),
-               From: make([]string, 0, len(serverHasBlock)),
-       }
-
-       for _, host := range sortedServers {
-               // Strip the protocol portion of the url.
-               // Use the canonical copy of the string to avoid memory waste.
-               server := cs.Get(host)
-               _, hasBlock := serverHasBlock[server]
-               if hasBlock {
-                       // The from field should include the protocol.
-                       ps.From = append(ps.From, cs.Get(host))
-               } else if len(ps.To) < maxToFields {
-                       _, writable := writableServers[host]
-                       if writable {
-                               ps.To = append(ps.To, server)
-                       }
-               }
-       }
-
-       return
-}
-
-// RemoveProtocolPrefix strips the protocol prefix from a url.
-func RemoveProtocolPrefix(url string) string {
-       return url[(strings.LastIndex(url, "/") + 1):]
-}
-
-// BuildPullLists produces a PullList for each keep server.
-func BuildPullLists(lps map[Locator]PullServers) (spl map[string]PullList) {
-       spl = map[string]PullList{}
-       // We don't worry about canonicalizing our strings here, because we
-       // assume lps was created by ComputePullServers() which already
-       // canonicalized the strings for us.
-       for locator, pullServers := range lps {
-               for _, destination := range pullServers.To {
-                       pullList, pullListExists := spl[destination]
-                       if !pullListExists {
-                               pullList = PullList{}
-                       }
-                       spl[destination] = append(pullList,
-                               PullRequest{Locator: locator, Servers: pullServers.From})
-               }
-       }
-       return
-}
-
-// WritePullLists writes each pull list to a file.
-// The filename is based on the hostname.
-//
-// This is just a hack for prototyping, it is not expected to be used
-// in production.
-func WritePullLists(arvLogger *logger.Logger,
-       pullLists map[string]PullList,
-       dryRun bool) error {
-       r := strings.NewReplacer(":", ".")
-
-       for host, list := range pullLists {
-               if arvLogger != nil {
-                       // We need a local variable because Update doesn't call our mutator func until later,
-                       // when our list variable might have been reused by the next loop iteration.
-                       host := host
-                       listLen := len(list)
-                       arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                               pullListInfo := logger.GetOrCreateMap(p, "pull_list_len")
-                               pullListInfo[host] = listLen
-                       })
-               }
-
-               if dryRun {
-                       log.Print("dry run, not sending pull list to service %s with %d blocks", host, len(list))
-                       continue
-               }
-
-               filename := fmt.Sprintf("pull_list.%s", r.Replace(RemoveProtocolPrefix(host)))
-               pullListFile, err := os.Create(filename)
-               if err != nil {
-                       return err
-               }
-               defer pullListFile.Close()
-
-               enc := json.NewEncoder(pullListFile)
-               err = enc.Encode(list)
-               if err != nil {
-                       return err
-               }
-               log.Printf("Wrote pull list to %s.", filename)
-       }
-
-       return nil
-}
diff --git a/services/datamanager/summary/pull_list_test.go b/services/datamanager/summary/pull_list_test.go
deleted file mode 100644 (file)
index 60b495c..0000000
+++ /dev/null
@@ -1,272 +0,0 @@
-package summary
-
-import (
-       "encoding/json"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       . "gopkg.in/check.v1"
-       "sort"
-       "testing"
-)
-
-// Gocheck boilerplate
-func TestPullLists(t *testing.T) {
-       TestingT(t)
-}
-
-type PullSuite struct{}
-
-var _ = Suite(&PullSuite{})
-
-// Helper method to declare string sets more succinctly
-// Could be placed somewhere more general.
-func stringSet(slice ...string) (m map[string]struct{}) {
-       m = map[string]struct{}{}
-       for _, s := range slice {
-               m[s] = struct{}{}
-       }
-       return
-}
-
-func (s *PullSuite) TestPullListPrintsJSONCorrectly(c *C) {
-       pl := PullList{PullRequest{
-               Locator: Locator(blockdigest.MakeTestDigestSpecifySize(0xBadBeef, 56789)),
-               Servers: []string{"keep0.qr1hi.arvadosapi.com:25107",
-                       "keep1.qr1hi.arvadosapi.com:25108"}}}
-
-       b, err := json.Marshal(pl)
-       c.Assert(err, IsNil)
-       expectedOutput := `[{"locator":"0000000000000000000000000badbeef+56789",` +
-               `"servers":["keep0.qr1hi.arvadosapi.com:25107",` +
-               `"keep1.qr1hi.arvadosapi.com:25108"]}]`
-       c.Check(string(b), Equals, expectedOutput)
-}
-
-func (s *PullSuite) TestCreatePullServers(c *C) {
-       var cs CanonicalString
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet(),
-                       stringSet(),
-                       []string{},
-                       5),
-               DeepEquals,
-               PullServers{To: []string{}, From: []string{}})
-
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet("https://keep0:25107", "https://keep1:25108"),
-                       stringSet(),
-                       []string{},
-                       5),
-               DeepEquals,
-               PullServers{To: []string{}, From: []string{}})
-
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet("https://keep0:25107", "https://keep1:25108"),
-                       stringSet("https://keep0:25107"),
-                       []string{"https://keep0:25107"},
-                       5),
-               DeepEquals,
-               PullServers{To: []string{}, From: []string{"https://keep0:25107"}})
-
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet("https://keep0:25107", "https://keep1:25108"),
-                       stringSet("https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"),
-                       []string{"https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"},
-                       5),
-               DeepEquals,
-               PullServers{To: []string{"https://keep3:25110", "https://keep2:25109"},
-                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
-
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet("https://keep0:25107", "https://keep1:25108"),
-                       stringSet("https://keep3:25110", "https://keep1:25108", "https://keep0:25107"),
-                       []string{"https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"},
-                       5),
-               DeepEquals,
-               PullServers{To: []string{"https://keep3:25110"},
-                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
-
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet("https://keep0:25107", "https://keep1:25108"),
-                       stringSet("https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"),
-                       []string{"https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"},
-                       1),
-               DeepEquals,
-               PullServers{To: []string{"https://keep3:25110"},
-                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
-
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet("https://keep0:25107", "https://keep1:25108"),
-                       stringSet("https://keep3:25110", "https://keep2:25109",
-                               "https://keep1:25108", "https://keep0:25107"),
-                       []string{"https://keep3:25110", "https://keep2:25109",
-                               "https://keep1:25108", "https://keep0:25107"},
-                       1),
-               DeepEquals,
-               PullServers{To: []string{"https://keep3:25110"},
-                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
-
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet("https://keep0:25107", "https://keep1:25108"),
-                       stringSet("https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"),
-                       []string{"https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"},
-                       0),
-               DeepEquals,
-               PullServers{To: []string{},
-                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
-}
-
-// Checks whether two pull list maps are equal. Since pull lists are
-// ordered arbitrarily, we need to sort them by digest before
-// comparing them for deep equality.
-type pullListMapEqualsChecker struct {
-       *CheckerInfo
-}
-
-func (c *pullListMapEqualsChecker) Check(params []interface{}, names []string) (result bool, error string) {
-       obtained, ok := params[0].(map[string]PullList)
-       if !ok {
-               return false, "First parameter is not a PullList map"
-       }
-       expected, ok := params[1].(map[string]PullList)
-       if !ok {
-               return false, "Second parameter is not a PullList map"
-       }
-
-       for _, v := range obtained {
-               sort.Sort(PullListByLocator(v))
-       }
-       for _, v := range expected {
-               sort.Sort(PullListByLocator(v))
-       }
-
-       return DeepEquals.Check(params, names)
-}
-
-var PullListMapEquals Checker = &pullListMapEqualsChecker{&CheckerInfo{
-       Name:   "PullListMapEquals",
-       Params: []string{"obtained", "expected"},
-}}
-
-func (s *PullSuite) TestBuildPullLists(c *C) {
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{}),
-               PullListMapEquals,
-               map[string]PullList{})
-
-       locator1 := Locator{Digest: blockdigest.MakeTestBlockDigest(0xBadBeef)}
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {To: []string{}, From: []string{}}}),
-               PullListMapEquals,
-               map[string]PullList{})
-
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {To: []string{}, From: []string{"f1", "f2"}}}),
-               PullListMapEquals,
-               map[string]PullList{})
-
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {To: []string{"t1"}, From: []string{"f1", "f2"}}}),
-               PullListMapEquals,
-               map[string]PullList{
-                       "t1": {PullRequest{locator1, []string{"f1", "f2"}}}})
-
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {To: []string{"t1"}, From: []string{}}}),
-               PullListMapEquals,
-               map[string]PullList{"t1": {
-                       PullRequest{locator1, []string{}}}})
-
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {
-                               To:   []string{"t1", "t2"},
-                               From: []string{"f1", "f2"},
-                       }}),
-               PullListMapEquals,
-               map[string]PullList{
-                       "t1": {PullRequest{locator1, []string{"f1", "f2"}}},
-                       "t2": {PullRequest{locator1, []string{"f1", "f2"}}},
-               })
-
-       locator2 := Locator{Digest: blockdigest.MakeTestBlockDigest(0xCabbed)}
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {To: []string{"t1"}, From: []string{"f1", "f2"}},
-                       locator2: {To: []string{"t2"}, From: []string{"f3", "f4"}}}),
-               PullListMapEquals,
-               map[string]PullList{
-                       "t1": {PullRequest{locator1, []string{"f1", "f2"}}},
-                       "t2": {PullRequest{locator2, []string{"f3", "f4"}}},
-               })
-
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {
-                               To:   []string{"t1"},
-                               From: []string{"f1", "f2"}},
-                       locator2: {
-                               To:   []string{"t2", "t1"},
-                               From: []string{"f3", "f4"}},
-               }),
-               PullListMapEquals,
-               map[string]PullList{
-                       "t1": {
-                               PullRequest{locator1, []string{"f1", "f2"}},
-                               PullRequest{locator2, []string{"f3", "f4"}},
-                       },
-                       "t2": {
-                               PullRequest{locator2, []string{"f3", "f4"}},
-                       },
-               })
-
-       locator3 := Locator{Digest: blockdigest.MakeTestBlockDigest(0xDeadBeef)}
-       locator4 := Locator{Digest: blockdigest.MakeTestBlockDigest(0xFedBeef)}
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {
-                               To:   []string{"t1"},
-                               From: []string{"f1", "f2"}},
-                       locator2: {
-                               To:   []string{"t2", "t1"},
-                               From: []string{"f3", "f4"}},
-                       locator3: {
-                               To:   []string{"t3", "t2", "t1"},
-                               From: []string{"f4", "f5"}},
-                       locator4: {
-                               To:   []string{"t4", "t3", "t2", "t1"},
-                               From: []string{"f1", "f5"}},
-               }),
-               PullListMapEquals,
-               map[string]PullList{
-                       "t1": {
-                               PullRequest{locator1, []string{"f1", "f2"}},
-                               PullRequest{locator2, []string{"f3", "f4"}},
-                               PullRequest{locator3, []string{"f4", "f5"}},
-                               PullRequest{locator4, []string{"f1", "f5"}},
-                       },
-                       "t2": {
-                               PullRequest{locator2, []string{"f3", "f4"}},
-                               PullRequest{locator3, []string{"f4", "f5"}},
-                               PullRequest{locator4, []string{"f1", "f5"}},
-                       },
-                       "t3": {
-                               PullRequest{locator3, []string{"f4", "f5"}},
-                               PullRequest{locator4, []string{"f1", "f5"}},
-                       },
-                       "t4": {
-                               PullRequest{locator4, []string{"f1", "f5"}},
-                       },
-               })
-}
diff --git a/services/datamanager/summary/summary.go b/services/datamanager/summary/summary.go
deleted file mode 100644 (file)
index 9fb0316..0000000
+++ /dev/null
@@ -1,277 +0,0 @@
-// Summarizes Collection Data and Keep Server Contents.
-
-package summary
-
-// TODO(misha): Check size of blocks as well as their digest.
-
-import (
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "git.curoverse.com/arvados.git/services/datamanager/collection"
-       "git.curoverse.com/arvados.git/services/datamanager/keep"
-       "sort"
-)
-
-// BlockSet is a map of blocks
-type BlockSet map[blockdigest.DigestWithSize]struct{}
-
-// Insert adds a single block to the set.
-func (bs BlockSet) Insert(digest blockdigest.DigestWithSize) {
-       bs[digest] = struct{}{}
-}
-
-// Union adds a set of blocks to the set.
-func (bs BlockSet) Union(obs BlockSet) {
-       for k, v := range obs {
-               bs[k] = v
-       }
-}
-
-// CollectionIndexSet is used to save space. To convert to and from
-// the uuid, use collection.ReadCollections' fields
-// CollectionIndexToUUID and CollectionUUIDToIndex.
-type CollectionIndexSet map[int]struct{}
-
-// Insert adds a single collection to the set. The collection is specified by
-// its index.
-func (cis CollectionIndexSet) Insert(collectionIndex int) {
-       cis[collectionIndex] = struct{}{}
-}
-
-// ToCollectionIndexSet gets block to collection indices
-func (bs BlockSet) ToCollectionIndexSet(
-       readCollections collection.ReadCollections,
-       collectionIndexSet *CollectionIndexSet) {
-       for block := range bs {
-               for _, collectionIndex := range readCollections.BlockToCollectionIndices[block] {
-                       collectionIndexSet.Insert(collectionIndex)
-               }
-       }
-}
-
-// ReplicationLevels struct
-// Keeps track of the requested and actual replication levels.
-// Currently this is only used for blocks but could easily be used for
-// collections as well.
-type ReplicationLevels struct {
-       // The requested replication level.
-       // For Blocks this is the maximum replication level among all the
-       // collections this block belongs to.
-       Requested int
-
-       // The actual number of keep servers this is on.
-       Actual int
-}
-
-// ReplicationLevelBlockSetMap maps from replication levels to their blocks.
-type ReplicationLevelBlockSetMap map[ReplicationLevels]BlockSet
-
-// ReplicationLevelBlockCount is an individual entry from ReplicationLevelBlockSetMap
-// which only reports the number of blocks, not which blocks.
-type ReplicationLevelBlockCount struct {
-       Levels ReplicationLevels
-       Count  int
-}
-
-// ReplicationLevelBlockSetSlice is an ordered list of ReplicationLevelBlockCount useful for reporting.
-type ReplicationLevelBlockSetSlice []ReplicationLevelBlockCount
-
-// ReplicationSummary sturct
-type ReplicationSummary struct {
-       CollectionBlocksNotInKeep  BlockSet
-       UnderReplicatedBlocks      BlockSet
-       OverReplicatedBlocks       BlockSet
-       CorrectlyReplicatedBlocks  BlockSet
-       KeepBlocksNotInCollections BlockSet
-
-       CollectionsNotFullyInKeep      CollectionIndexSet
-       UnderReplicatedCollections     CollectionIndexSet
-       OverReplicatedCollections      CollectionIndexSet
-       CorrectlyReplicatedCollections CollectionIndexSet
-}
-
-// ReplicationSummaryCounts struct counts the elements in each set in ReplicationSummary.
-type ReplicationSummaryCounts struct {
-       CollectionBlocksNotInKeep      int
-       UnderReplicatedBlocks          int
-       OverReplicatedBlocks           int
-       CorrectlyReplicatedBlocks      int
-       KeepBlocksNotInCollections     int
-       CollectionsNotFullyInKeep      int
-       UnderReplicatedCollections     int
-       OverReplicatedCollections      int
-       CorrectlyReplicatedCollections int
-}
-
-// GetOrCreate gets the BlockSet for a given set of ReplicationLevels,
-// creating it if it doesn't already exist.
-func (rlbs ReplicationLevelBlockSetMap) GetOrCreate(
-       repLevels ReplicationLevels) (bs BlockSet) {
-       bs, exists := rlbs[repLevels]
-       if !exists {
-               bs = make(BlockSet)
-               rlbs[repLevels] = bs
-       }
-       return
-}
-
-// Insert adds a block to the set for a given replication level.
-func (rlbs ReplicationLevelBlockSetMap) Insert(
-       repLevels ReplicationLevels,
-       block blockdigest.DigestWithSize) {
-       rlbs.GetOrCreate(repLevels).Insert(block)
-}
-
-// Union adds a set of blocks to the set for a given replication level.
-func (rlbs ReplicationLevelBlockSetMap) Union(
-       repLevels ReplicationLevels,
-       bs BlockSet) {
-       rlbs.GetOrCreate(repLevels).Union(bs)
-}
-
-// Counts outputs a sorted list of ReplicationLevelBlockCounts.
-func (rlbs ReplicationLevelBlockSetMap) Counts() (
-       sorted ReplicationLevelBlockSetSlice) {
-       sorted = make(ReplicationLevelBlockSetSlice, len(rlbs))
-       i := 0
-       for levels, set := range rlbs {
-               sorted[i] = ReplicationLevelBlockCount{Levels: levels, Count: len(set)}
-               i++
-       }
-       sort.Sort(sorted)
-       return
-}
-
-// Implemented to meet sort.Interface
-func (rlbss ReplicationLevelBlockSetSlice) Len() int {
-       return len(rlbss)
-}
-
-// Implemented to meet sort.Interface
-func (rlbss ReplicationLevelBlockSetSlice) Less(i, j int) bool {
-       return rlbss[i].Levels.Requested < rlbss[j].Levels.Requested ||
-               (rlbss[i].Levels.Requested == rlbss[j].Levels.Requested &&
-                       rlbss[i].Levels.Actual < rlbss[j].Levels.Actual)
-}
-
-// Implemented to meet sort.Interface
-func (rlbss ReplicationLevelBlockSetSlice) Swap(i, j int) {
-       rlbss[i], rlbss[j] = rlbss[j], rlbss[i]
-}
-
-// ComputeCounts returns ReplicationSummaryCounts
-func (rs ReplicationSummary) ComputeCounts() (rsc ReplicationSummaryCounts) {
-       // TODO(misha): Consider rewriting this method to iterate through
-       // the fields using reflection, instead of explictily listing the
-       // fields as we do now.
-       rsc.CollectionBlocksNotInKeep = len(rs.CollectionBlocksNotInKeep)
-       rsc.UnderReplicatedBlocks = len(rs.UnderReplicatedBlocks)
-       rsc.OverReplicatedBlocks = len(rs.OverReplicatedBlocks)
-       rsc.CorrectlyReplicatedBlocks = len(rs.CorrectlyReplicatedBlocks)
-       rsc.KeepBlocksNotInCollections = len(rs.KeepBlocksNotInCollections)
-       rsc.CollectionsNotFullyInKeep = len(rs.CollectionsNotFullyInKeep)
-       rsc.UnderReplicatedCollections = len(rs.UnderReplicatedCollections)
-       rsc.OverReplicatedCollections = len(rs.OverReplicatedCollections)
-       rsc.CorrectlyReplicatedCollections = len(rs.CorrectlyReplicatedCollections)
-       return rsc
-}
-
-// PrettyPrint ReplicationSummaryCounts
-func (rsc ReplicationSummaryCounts) PrettyPrint() string {
-       return fmt.Sprintf("Replication Block Counts:"+
-               "\n Missing From Keep: %d, "+
-               "\n Under Replicated: %d, "+
-               "\n Over Replicated: %d, "+
-               "\n Replicated Just Right: %d, "+
-               "\n Not In Any Collection: %d. "+
-               "\nReplication Collection Counts:"+
-               "\n Missing From Keep: %d, "+
-               "\n Under Replicated: %d, "+
-               "\n Over Replicated: %d, "+
-               "\n Replicated Just Right: %d.",
-               rsc.CollectionBlocksNotInKeep,
-               rsc.UnderReplicatedBlocks,
-               rsc.OverReplicatedBlocks,
-               rsc.CorrectlyReplicatedBlocks,
-               rsc.KeepBlocksNotInCollections,
-               rsc.CollectionsNotFullyInKeep,
-               rsc.UnderReplicatedCollections,
-               rsc.OverReplicatedCollections,
-               rsc.CorrectlyReplicatedCollections)
-}
-
-// BucketReplication returns ReplicationLevelBlockSetMap
-func BucketReplication(readCollections collection.ReadCollections,
-       keepServerInfo keep.ReadServers) (rlbs ReplicationLevelBlockSetMap) {
-       rlbs = make(ReplicationLevelBlockSetMap)
-
-       for block, requestedReplication := range readCollections.BlockToDesiredReplication {
-               rlbs.Insert(
-                       ReplicationLevels{
-                               Requested: requestedReplication,
-                               Actual:    len(keepServerInfo.BlockToServers[block])},
-                       block)
-       }
-
-       for block, servers := range keepServerInfo.BlockToServers {
-               if 0 == readCollections.BlockToDesiredReplication[block] {
-                       rlbs.Insert(
-                               ReplicationLevels{Requested: 0, Actual: len(servers)},
-                               block)
-               }
-       }
-       return
-}
-
-// SummarizeBuckets reads collections and summarizes
-func (rlbs ReplicationLevelBlockSetMap) SummarizeBuckets(
-       readCollections collection.ReadCollections) (
-       rs ReplicationSummary) {
-       rs.CollectionBlocksNotInKeep = make(BlockSet)
-       rs.UnderReplicatedBlocks = make(BlockSet)
-       rs.OverReplicatedBlocks = make(BlockSet)
-       rs.CorrectlyReplicatedBlocks = make(BlockSet)
-       rs.KeepBlocksNotInCollections = make(BlockSet)
-
-       rs.CollectionsNotFullyInKeep = make(CollectionIndexSet)
-       rs.UnderReplicatedCollections = make(CollectionIndexSet)
-       rs.OverReplicatedCollections = make(CollectionIndexSet)
-       rs.CorrectlyReplicatedCollections = make(CollectionIndexSet)
-
-       for levels, bs := range rlbs {
-               if levels.Actual == 0 {
-                       rs.CollectionBlocksNotInKeep.Union(bs)
-               } else if levels.Requested == 0 {
-                       rs.KeepBlocksNotInCollections.Union(bs)
-               } else if levels.Actual < levels.Requested {
-                       rs.UnderReplicatedBlocks.Union(bs)
-               } else if levels.Actual > levels.Requested {
-                       rs.OverReplicatedBlocks.Union(bs)
-               } else {
-                       rs.CorrectlyReplicatedBlocks.Union(bs)
-               }
-       }
-
-       rs.CollectionBlocksNotInKeep.ToCollectionIndexSet(readCollections,
-               &rs.CollectionsNotFullyInKeep)
-       // Since different collections can specify different replication
-       // levels, the fact that a block is under-replicated does not imply
-       // that all collections that it belongs to are under-replicated, but
-       // we'll ignore that for now.
-       // TODO(misha): Fix this and report the correct set of collections.
-       rs.UnderReplicatedBlocks.ToCollectionIndexSet(readCollections,
-               &rs.UnderReplicatedCollections)
-       rs.OverReplicatedBlocks.ToCollectionIndexSet(readCollections,
-               &rs.OverReplicatedCollections)
-
-       for i := range readCollections.CollectionIndexToUUID {
-               if _, notInKeep := rs.CollectionsNotFullyInKeep[i]; notInKeep {
-               } else if _, underReplicated := rs.UnderReplicatedCollections[i]; underReplicated {
-               } else if _, overReplicated := rs.OverReplicatedCollections[i]; overReplicated {
-               } else {
-                       rs.CorrectlyReplicatedCollections.Insert(i)
-               }
-       }
-
-       return
-}
diff --git a/services/datamanager/summary/summary_test.go b/services/datamanager/summary/summary_test.go
deleted file mode 100644 (file)
index 8268404..0000000
+++ /dev/null
@@ -1,220 +0,0 @@
-package summary
-
-import (
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "git.curoverse.com/arvados.git/services/datamanager/collection"
-       "git.curoverse.com/arvados.git/services/datamanager/keep"
-       "reflect"
-       "sort"
-       "testing"
-)
-
-func BlockSetFromSlice(digests []int) (bs BlockSet) {
-       bs = make(BlockSet)
-       for _, digest := range digests {
-               bs.Insert(blockdigest.MakeTestDigestWithSize(digest))
-       }
-       return
-}
-
-func CollectionIndexSetFromSlice(indices []int) (cis CollectionIndexSet) {
-       cis = make(CollectionIndexSet)
-       for _, index := range indices {
-               cis.Insert(index)
-       }
-       return
-}
-
-func (cis CollectionIndexSet) ToSlice() (ints []int) {
-       ints = make([]int, len(cis))
-       i := 0
-       for collectionIndex := range cis {
-               ints[i] = collectionIndex
-               i++
-       }
-       sort.Ints(ints)
-       return
-}
-
-// Helper method to meet interface expected by older tests.
-func SummarizeReplication(readCollections collection.ReadCollections,
-       keepServerInfo keep.ReadServers) (rs ReplicationSummary) {
-       return BucketReplication(readCollections, keepServerInfo).
-               SummarizeBuckets(readCollections)
-}
-
-// Takes a map from block digest to replication level and represents
-// it in a keep.ReadServers structure.
-func SpecifyReplication(digestToReplication map[int]int) (rs keep.ReadServers) {
-       rs.BlockToServers = make(map[blockdigest.DigestWithSize][]keep.BlockServerInfo)
-       for digest, replication := range digestToReplication {
-               rs.BlockToServers[blockdigest.MakeTestDigestWithSize(digest)] =
-                       make([]keep.BlockServerInfo, replication)
-       }
-       return
-}
-
-// Verifies that
-// blocks.ToCollectionIndexSet(rc.BlockToCollectionIndices) returns
-// expectedCollections.
-func VerifyToCollectionIndexSet(
-       t *testing.T,
-       blocks []int,
-       blockToCollectionIndices map[int][]int,
-       expectedCollections []int) {
-
-       expected := CollectionIndexSetFromSlice(expectedCollections)
-
-       rc := collection.ReadCollections{
-               BlockToCollectionIndices: map[blockdigest.DigestWithSize][]int{},
-       }
-       for digest, indices := range blockToCollectionIndices {
-               rc.BlockToCollectionIndices[blockdigest.MakeTestDigestWithSize(digest)] = indices
-       }
-
-       returned := make(CollectionIndexSet)
-       BlockSetFromSlice(blocks).ToCollectionIndexSet(rc, &returned)
-
-       if !reflect.DeepEqual(returned, expected) {
-               t.Errorf("Expected %v.ToCollectionIndexSet(%v) to return \n %v \n but instead received \n %v",
-                       blocks,
-                       blockToCollectionIndices,
-                       expectedCollections,
-                       returned.ToSlice())
-       }
-}
-
-func TestToCollectionIndexSet(t *testing.T) {
-       VerifyToCollectionIndexSet(t, []int{6}, map[int][]int{6: {0}}, []int{0})
-       VerifyToCollectionIndexSet(t, []int{4}, map[int][]int{4: {1}}, []int{1})
-       VerifyToCollectionIndexSet(t, []int{4}, map[int][]int{4: {1, 9}}, []int{1, 9})
-       VerifyToCollectionIndexSet(t, []int{5, 6},
-               map[int][]int{5: {2, 3}, 6: {3, 4}},
-               []int{2, 3, 4})
-       VerifyToCollectionIndexSet(t, []int{5, 6},
-               map[int][]int{5: {8}, 6: {4}},
-               []int{4, 8})
-       VerifyToCollectionIndexSet(t, []int{6}, map[int][]int{5: {0}}, []int{})
-}
-
-func TestSimpleSummary(t *testing.T) {
-       rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
-               {ReplicationLevel: 1, Blocks: []int{1, 2}},
-       })
-       rc.Summarize(nil)
-       cIndex := rc.CollectionIndicesForTesting()
-
-       keepInfo := SpecifyReplication(map[int]int{1: 1, 2: 1})
-
-       expectedSummary := ReplicationSummary{
-               CollectionBlocksNotInKeep:  BlockSet{},
-               UnderReplicatedBlocks:      BlockSet{},
-               OverReplicatedBlocks:       BlockSet{},
-               CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1, 2}),
-               KeepBlocksNotInCollections: BlockSet{},
-
-               CollectionsNotFullyInKeep:      CollectionIndexSet{},
-               UnderReplicatedCollections:     CollectionIndexSet{},
-               OverReplicatedCollections:      CollectionIndexSet{},
-               CorrectlyReplicatedCollections: CollectionIndexSetFromSlice([]int{cIndex[0]}),
-       }
-
-       returnedSummary := SummarizeReplication(rc, keepInfo)
-
-       if !reflect.DeepEqual(returnedSummary, expectedSummary) {
-               t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v", expectedSummary, returnedSummary)
-       }
-}
-
-func TestMissingBlock(t *testing.T) {
-       rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
-               {ReplicationLevel: 1, Blocks: []int{1, 2}},
-       })
-       rc.Summarize(nil)
-       cIndex := rc.CollectionIndicesForTesting()
-
-       keepInfo := SpecifyReplication(map[int]int{1: 1})
-
-       expectedSummary := ReplicationSummary{
-               CollectionBlocksNotInKeep:  BlockSetFromSlice([]int{2}),
-               UnderReplicatedBlocks:      BlockSet{},
-               OverReplicatedBlocks:       BlockSet{},
-               CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1}),
-               KeepBlocksNotInCollections: BlockSet{},
-
-               CollectionsNotFullyInKeep:      CollectionIndexSetFromSlice([]int{cIndex[0]}),
-               UnderReplicatedCollections:     CollectionIndexSet{},
-               OverReplicatedCollections:      CollectionIndexSet{},
-               CorrectlyReplicatedCollections: CollectionIndexSet{},
-       }
-
-       returnedSummary := SummarizeReplication(rc, keepInfo)
-
-       if !reflect.DeepEqual(returnedSummary, expectedSummary) {
-               t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v",
-                       expectedSummary,
-                       returnedSummary)
-       }
-}
-
-func TestUnderAndOverReplicatedBlocks(t *testing.T) {
-       rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
-               {ReplicationLevel: 2, Blocks: []int{1, 2}},
-       })
-       rc.Summarize(nil)
-       cIndex := rc.CollectionIndicesForTesting()
-
-       keepInfo := SpecifyReplication(map[int]int{1: 1, 2: 3})
-
-       expectedSummary := ReplicationSummary{
-               CollectionBlocksNotInKeep:  BlockSet{},
-               UnderReplicatedBlocks:      BlockSetFromSlice([]int{1}),
-               OverReplicatedBlocks:       BlockSetFromSlice([]int{2}),
-               CorrectlyReplicatedBlocks:  BlockSet{},
-               KeepBlocksNotInCollections: BlockSet{},
-
-               CollectionsNotFullyInKeep:      CollectionIndexSet{},
-               UnderReplicatedCollections:     CollectionIndexSetFromSlice([]int{cIndex[0]}),
-               OverReplicatedCollections:      CollectionIndexSetFromSlice([]int{cIndex[0]}),
-               CorrectlyReplicatedCollections: CollectionIndexSet{},
-       }
-
-       returnedSummary := SummarizeReplication(rc, keepInfo)
-
-       if !reflect.DeepEqual(returnedSummary, expectedSummary) {
-               t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v",
-                       expectedSummary,
-                       returnedSummary)
-       }
-}
-
-func TestMixedReplication(t *testing.T) {
-       rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
-               {ReplicationLevel: 1, Blocks: []int{1, 2}},
-               {ReplicationLevel: 1, Blocks: []int{3, 4}},
-               {ReplicationLevel: 2, Blocks: []int{5, 6}},
-       })
-       rc.Summarize(nil)
-       cIndex := rc.CollectionIndicesForTesting()
-
-       keepInfo := SpecifyReplication(map[int]int{1: 1, 2: 1, 3: 1, 5: 1, 6: 3, 7: 2})
-
-       expectedSummary := ReplicationSummary{
-               CollectionBlocksNotInKeep:  BlockSetFromSlice([]int{4}),
-               UnderReplicatedBlocks:      BlockSetFromSlice([]int{5}),
-               OverReplicatedBlocks:       BlockSetFromSlice([]int{6}),
-               CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1, 2, 3}),
-               KeepBlocksNotInCollections: BlockSetFromSlice([]int{7}),
-
-               CollectionsNotFullyInKeep:      CollectionIndexSetFromSlice([]int{cIndex[1]}),
-               UnderReplicatedCollections:     CollectionIndexSetFromSlice([]int{cIndex[2]}),
-               OverReplicatedCollections:      CollectionIndexSetFromSlice([]int{cIndex[2]}),
-               CorrectlyReplicatedCollections: CollectionIndexSetFromSlice([]int{cIndex[0]}),
-       }
-
-       returnedSummary := SummarizeReplication(rc, keepInfo)
-
-       if !reflect.DeepEqual(returnedSummary, expectedSummary) {
-               t.Fatalf("Expected returnedSummary to look like: \n%+v but instead it is: \n%+v. Index to UUID is %v. BlockToCollectionIndices is %v.", expectedSummary, returnedSummary, rc.CollectionIndexToUUID, rc.BlockToCollectionIndices)
-       }
-}
diff --git a/services/datamanager/summary/trash_list.go b/services/datamanager/summary/trash_list.go
deleted file mode 100644 (file)
index 3e4d387..0000000
+++ /dev/null
@@ -1,62 +0,0 @@
-// Code for generating trash lists
-
-package summary
-
-import (
-       "errors"
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       "git.curoverse.com/arvados.git/services/datamanager/keep"
-       "time"
-)
-
-// BuildTrashLists builds list of blocks to be sent to trash queue
-func BuildTrashLists(kc *keepclient.KeepClient,
-       keepServerInfo *keep.ReadServers,
-       keepBlocksNotInCollections BlockSet) (m map[string]keep.TrashList, err error) {
-
-       // Servers that are writeable
-       writableServers := map[string]struct{}{}
-       for _, url := range kc.WritableLocalRoots() {
-               writableServers[url] = struct{}{}
-       }
-
-       _ttl, err := kc.Arvados.Discovery("blobSignatureTtl")
-       if err != nil {
-               return nil, errors.New(fmt.Sprintf("Failed to get blobSignatureTtl, can't build trash lists: %v", err))
-       }
-
-       ttl := int64(_ttl.(float64))
-
-       // expire unreferenced blocks more than "ttl" seconds old.
-       expiry := time.Now().UTC().UnixNano() - ttl*1e9
-
-       return buildTrashListsInternal(writableServers, keepServerInfo, expiry, keepBlocksNotInCollections), nil
-}
-
-func buildTrashListsInternal(writableServers map[string]struct{},
-       keepServerInfo *keep.ReadServers,
-       expiry int64,
-       keepBlocksNotInCollections BlockSet) (m map[string]keep.TrashList) {
-
-       m = make(map[string]keep.TrashList)
-
-       for block := range keepBlocksNotInCollections {
-               for _, blockOnServer := range keepServerInfo.BlockToServers[block] {
-                       if blockOnServer.Mtime >= expiry {
-                               continue
-                       }
-
-                       // block is older than expire cutoff
-                       srv := keepServerInfo.KeepServerIndexToAddress[blockOnServer.ServerIndex].String()
-
-                       if _, writable := writableServers[srv]; !writable {
-                               continue
-                       }
-
-                       m[srv] = append(m[srv], keep.TrashRequest{Locator: block.Digest.String(), BlockMtime: blockOnServer.Mtime})
-               }
-       }
-       return
-
-}
diff --git a/services/datamanager/summary/trash_list_test.go b/services/datamanager/summary/trash_list_test.go
deleted file mode 100644 (file)
index 3626904..0000000
+++ /dev/null
@@ -1,76 +0,0 @@
-package summary
-
-import (
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "git.curoverse.com/arvados.git/services/datamanager/keep"
-       . "gopkg.in/check.v1"
-       "testing"
-)
-
-// Gocheck boilerplate
-func TestTrash(t *testing.T) {
-       TestingT(t)
-}
-
-type TrashSuite struct{}
-
-var _ = Suite(&TrashSuite{})
-
-func (s *TrashSuite) TestBuildTrashLists(c *C) {
-       var sv0 = keep.ServerAddress{Host: "keep0.example.com", Port: 80}
-       var sv1 = keep.ServerAddress{Host: "keep1.example.com", Port: 80}
-
-       var block0 = blockdigest.MakeTestDigestWithSize(0xdeadbeef)
-       var block1 = blockdigest.MakeTestDigestWithSize(0xfedbeef)
-
-       var keepServerInfo = keep.ReadServers{
-               KeepServerIndexToAddress: []keep.ServerAddress{sv0, sv1},
-               BlockToServers: map[blockdigest.DigestWithSize][]keep.BlockServerInfo{
-                       block0: {
-                               {0, 99},
-                               {1, 101}},
-                       block1: {
-                               {0, 99},
-                               {1, 101}}}}
-
-       // only block0 is in delete set
-       var bs = make(BlockSet)
-       bs[block0] = struct{}{}
-
-       // Test trash list where only sv0 is on writable list.
-       c.Check(buildTrashListsInternal(
-               map[string]struct{}{
-                       sv0.URL(): {}},
-               &keepServerInfo,
-               110,
-               bs),
-               DeepEquals,
-               map[string]keep.TrashList{
-                       "http://keep0.example.com:80": {keep.TrashRequest{"000000000000000000000000deadbeef", 99}}})
-
-       // Test trash list where both sv0 and sv1 are on writable list.
-       c.Check(buildTrashListsInternal(
-               map[string]struct{}{
-                       sv0.URL(): {},
-                       sv1.URL(): {}},
-               &keepServerInfo,
-               110,
-               bs),
-               DeepEquals,
-               map[string]keep.TrashList{
-                       "http://keep0.example.com:80": {keep.TrashRequest{"000000000000000000000000deadbeef", 99}},
-                       "http://keep1.example.com:80": {keep.TrashRequest{"000000000000000000000000deadbeef", 101}}})
-
-       // Test trash list where only block on sv0 is expired
-       c.Check(buildTrashListsInternal(
-               map[string]struct{}{
-                       sv0.URL(): {},
-                       sv1.URL(): {}},
-               &keepServerInfo,
-               100,
-               bs),
-               DeepEquals,
-               map[string]keep.TrashList{
-                       "http://keep0.example.com:80": {keep.TrashRequest{"000000000000000000000000deadbeef", 99}}})
-
-}
diff --git a/services/fuse/arvados_fuse/_version.py b/services/fuse/arvados_fuse/_version.py
new file mode 100644 (file)
index 0000000..837d4b9
--- /dev/null
@@ -0,0 +1,3 @@
+import pkg_resources
+
+__version__ = pkg_resources.require('arvados_fuse')[0].version
index 3f89732bea25dcd1ca546fbef126227e9e0a9256..f2948f9e45f295b43544615ad75c764c35856053 100644 (file)
@@ -13,6 +13,7 @@ import time
 import arvados.commands._util as arv_cmd
 from arvados_fuse import crunchstat
 from arvados_fuse import *
+from arvados_fuse._version import __version__
 
 class ArgumentParser(argparse.ArgumentParser):
     def __init__(self):
@@ -24,6 +25,9 @@ class ArgumentParser(argparse.ArgumentParser):
     mountpoint before --exec, or mark the end of your --exec arguments
     with "--".
             """)
+        self.add_argument('--version', action='version',
+                          version="%s %s" % (sys.argv[0], __version__),
+                          help='Print version and exit.')
         self.add_argument('mountpoint', type=str, help="""Mount point.""")
         self.add_argument('--allow-other', action='store_true',
                             help="""Let other users read the mount""")
index d7e1a8afb302b26ae582bc5a3a5aaecc9514ae7c..9e282caf49919972b3fefe60001c603ae8176305 100644 (file)
@@ -40,7 +40,8 @@ setup(name='arvados_fuse',
         'arvados-python-client >= 0.1.20151118035730',
         'llfuse==0.41.1',
         'python-daemon',
-        'ciso8601'
+        'ciso8601',
+        'setuptools'
         ],
       test_suite='tests',
       tests_require=['pbr<1.7.0', 'mock>=1.0', 'PyYAML'],
index e8488d7ff967179423f3732c8e6e56b05194ed58..57b4a37826d6c4b4a73a22c6c51716021367f22b 100644 (file)
@@ -3,6 +3,7 @@ import arvados_fuse
 import arvados_fuse.command
 import contextlib
 import functools
+import io
 import json
 import llfuse
 import logging
@@ -48,6 +49,14 @@ class MountArgsTest(unittest.TestCase):
             ent = ent[p]
         return ent
 
+    @contextlib.contextmanager
+    def stderrMatches(self, stderr):
+        orig, sys.stderr = sys.stderr, stderr
+        try:
+            yield
+        finally:
+            sys.stderr = orig
+
     def check_ent_type(self, cls, *path):
         ent = self.lookup(self.mnt, *path)
         self.assertEqual(ent.__class__, cls)
@@ -170,6 +179,13 @@ class MountArgsTest(unittest.TestCase):
                          run_test_server.fixture('users')['active']['uuid'])
         self.assertEqual(True, self.mnt.listen_for_events)
 
+    def test_version_argument(self):
+        orig, sys.stderr = sys.stderr, io.BytesIO()
+        with self.assertRaises(SystemExit):
+            args = arvados_fuse.command.ArgumentParser().parse_args(['--version'])
+        self.assertRegexpMatches(sys.stderr.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
+        sys.stderr = orig
+
     @noexit
     @mock.patch('arvados.events.subscribe')
     def test_disable_event_listening(self, mock_subscribe):
index 8fc06c3534b76054cecbfdb1116007579952bcb1..9389f19ed801cf1ee840642d2078b70de8aa9e50 100644 (file)
@@ -246,7 +246,7 @@ func (bal *Balancer) GetCurrentState(c *arvados.Client, pageSize, bufs int) erro
                        }
                        if len(errs) > 0 {
                                // Some other goroutine encountered an
-                               // error -- any futher effort here
+                               // error -- any further effort here
                                // will be wasted.
                                return
                        }
index 6ca31c38329ec7347631a03c802b4216bd05f167..43cf83a07ead3db94b2620be74375c738d4e5d08 100644 (file)
@@ -8,7 +8,6 @@ import (
        "fmt"
        "io"
        "io/ioutil"
-       "log"
        "net/http"
        "os"
        "regexp"
@@ -18,6 +17,7 @@ import (
        "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       log "github.com/Sirupsen/logrus"
        "github.com/curoverse/azure-sdk-for-go/storage"
 )
 
index d636a5ee86887806372a14e2f291e5c4f2c11b33..c5dbc8f5831402aa3e223391c3ad0ece918de0a3 100644 (file)
@@ -9,7 +9,6 @@ import (
        "flag"
        "fmt"
        "io/ioutil"
-       "log"
        "math/rand"
        "net"
        "net/http"
@@ -22,6 +21,7 @@ import (
        "testing"
        "time"
 
+       log "github.com/Sirupsen/logrus"
        "github.com/curoverse/azure-sdk-for-go/storage"
 )
 
index 9a3509424a3b10a1b8361c06be8475ddaa31f832..38f97aff1183d0da0fdaa05b3022765277c9a954 100644 (file)
@@ -1,10 +1,11 @@
 package main
 
 import (
-       "log"
        "sync"
        "sync/atomic"
        "time"
+
+       log "github.com/Sirupsen/logrus"
 )
 
 type bufferPool struct {
index dc06ef549877ba0316294e4a0e7767393ef4436d..83dd84ecc09d3ebf41c4407344b2387b3dc5fd4b 100644 (file)
@@ -5,17 +5,19 @@ import (
        "encoding/json"
        "fmt"
        "io/ioutil"
-       "log"
        "strings"
        "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       log "github.com/Sirupsen/logrus"
 )
 
 type Config struct {
        Debug  bool
        Listen string
 
+       LogFormat string
+
        PIDFile string
 
        MaxBuffers  int
@@ -38,10 +40,13 @@ type Config struct {
 
 var theConfig = DefaultConfig()
 
+const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
+
 // DefaultConfig returns the default configuration.
 func DefaultConfig() *Config {
        return &Config{
                Listen:             ":25107",
+               LogFormat:          "json",
                MaxBuffers:         128,
                RequireSignatures:  true,
                BlobSignatureTTL:   arvados.Duration(14 * 24 * time.Hour),
@@ -55,12 +60,27 @@ func DefaultConfig() *Config {
 // fields, and before using the config.
 func (cfg *Config) Start() error {
        if cfg.Debug {
+               log.SetLevel(log.DebugLevel)
                cfg.debugLogf = log.Printf
                cfg.debugLogf("debugging enabled")
        } else {
                cfg.debugLogf = func(string, ...interface{}) {}
        }
 
+       switch strings.ToLower(cfg.LogFormat) {
+       case "text":
+               log.SetFormatter(&log.TextFormatter{
+                       FullTimestamp:   true,
+                       TimestampFormat: rfc3339NanoFixed,
+               })
+       case "json":
+               log.SetFormatter(&log.JSONFormatter{
+                       TimestampFormat: rfc3339NanoFixed,
+               })
+       default:
+               return fmt.Errorf(`unsupported log format %q (try "text" or "json")`, cfg.LogFormat)
+       }
+
        if cfg.MaxBuffers < 0 {
                return fmt.Errorf("MaxBuffers must be greater than zero")
        }
index eaa09042484388dd8185b2d8c79679034c6951f7..a6d46e5e4a1166967f5d3be9c22581d3c1401ef9 100644 (file)
@@ -1,7 +1,7 @@
 package main
 
 import (
-       "log"
+       log "github.com/Sirupsen/logrus"
 )
 
 func init() {
index 9708b4e6be32f96645d500dfcd4319972f213d47..40b4839e06cc96fc05cd8eb5a2be4e73707ac03d 100644 (file)
@@ -958,7 +958,7 @@ func TestGetHandlerClientDisconnect(t *testing.T) {
        ok := make(chan struct{})
        go func() {
                req, _ := http.NewRequest("GET", fmt.Sprintf("/%s+%d", TestHash, len(TestBlock)), nil)
-               (&LoggingRESTRouter{MakeRESTRouter()}).ServeHTTP(resp, req)
+               (&LoggingRESTRouter{router: MakeRESTRouter()}).ServeHTTP(resp, req)
                ok <- struct{}{}
        }()
 
index b51009ea4d78348fe53bab01c13070d950e6aa42..adaaa361e96177080a9df4e2b2f1d77aac98424d 100644 (file)
@@ -15,7 +15,6 @@ import (
        "fmt"
        "github.com/gorilla/mux"
        "io"
-       "log"
        "net/http"
        "os"
        "regexp"
@@ -24,13 +23,21 @@ import (
        "strings"
        "sync"
        "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/httpserver"
+       log "github.com/Sirupsen/logrus"
 )
 
-// MakeRESTRouter returns a new mux.Router that forwards all Keep
-// requests to the appropriate handlers.
-//
-func MakeRESTRouter() *mux.Router {
+type router struct {
+       *mux.Router
+       limiter httpserver.RequestCounter
+}
+
+// MakeRESTRouter returns a new router that forwards all Keep requests
+// to the appropriate handlers.
+func MakeRESTRouter() *router {
        rest := mux.NewRouter()
+       rtr := &router{Router: rest}
 
        rest.HandleFunc(
                `/{hash:[0-9a-f]{32}}`, GetBlockHandler).Methods("GET", "HEAD")
@@ -47,10 +54,10 @@ func MakeRESTRouter() *mux.Router {
        rest.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, IndexHandler).Methods("GET", "HEAD")
 
        // Internals/debugging info (runtime.MemStats)
-       rest.HandleFunc(`/debug.json`, DebugHandler).Methods("GET", "HEAD")
+       rest.HandleFunc(`/debug.json`, rtr.DebugHandler).Methods("GET", "HEAD")
 
        // List volumes: path, device number, bytes used/avail.
-       rest.HandleFunc(`/status.json`, StatusHandler).Methods("GET", "HEAD")
+       rest.HandleFunc(`/status.json`, rtr.StatusHandler).Methods("GET", "HEAD")
 
        // Replace the current pull queue.
        rest.HandleFunc(`/pull`, PullHandler).Methods("PUT")
@@ -65,7 +72,7 @@ func MakeRESTRouter() *mux.Router {
        // 400 Bad Request.
        rest.NotFoundHandler = http.HandlerFunc(BadRequestHandler)
 
-       return rest
+       return rtr
 }
 
 // BadRequestHandler is a HandleFunc to address bad requests.
@@ -258,17 +265,19 @@ type volumeStatusEnt struct {
 
 // NodeStatus struct
 type NodeStatus struct {
-       Volumes    []*volumeStatusEnt
-       BufferPool PoolStatus
-       PullQueue  WorkQueueStatus
-       TrashQueue WorkQueueStatus
+       Volumes         []*volumeStatusEnt
+       BufferPool      PoolStatus
+       PullQueue       WorkQueueStatus
+       TrashQueue      WorkQueueStatus
+       RequestsCurrent int
+       RequestsMax     int
 }
 
 var st NodeStatus
 var stLock sync.Mutex
 
 // DebugHandler addresses /debug.json requests.
-func DebugHandler(resp http.ResponseWriter, req *http.Request) {
+func (rtr *router) DebugHandler(resp http.ResponseWriter, req *http.Request) {
        type debugStats struct {
                MemStats runtime.MemStats
        }
@@ -281,9 +290,9 @@ func DebugHandler(resp http.ResponseWriter, req *http.Request) {
 }
 
 // StatusHandler addresses /status.json requests.
-func StatusHandler(resp http.ResponseWriter, req *http.Request) {
+func (rtr *router) StatusHandler(resp http.ResponseWriter, req *http.Request) {
        stLock.Lock()
-       readNodeStatus(&st)
+       rtr.readNodeStatus(&st)
        jstat, err := json.Marshal(&st)
        stLock.Unlock()
        if err == nil {
@@ -296,7 +305,7 @@ func StatusHandler(resp http.ResponseWriter, req *http.Request) {
 }
 
 // populate the given NodeStatus struct with current values.
-func readNodeStatus(st *NodeStatus) {
+func (rtr *router) readNodeStatus(st *NodeStatus) {
        vols := KeepVM.AllReadable()
        if cap(st.Volumes) < len(vols) {
                st.Volumes = make([]*volumeStatusEnt, len(vols))
@@ -319,6 +328,10 @@ func readNodeStatus(st *NodeStatus) {
        st.BufferPool.Len = bufs.Len()
        st.PullQueue = getWorkQueueStatus(pullq)
        st.TrashQueue = getWorkQueueStatus(trashq)
+       if rtr.limiter != nil {
+               st.RequestsCurrent = rtr.limiter.Current()
+               st.RequestsMax = rtr.limiter.Max()
+       }
 }
 
 // return a WorkQueueStatus for the given queue. If q is nil (which
index 4eaaea8ea3695ac0f9bd69f71bef4fa2fd5b9278..54147959719183141a8e3137d5d1363ec9667e6b 100644 (file)
@@ -3,7 +3,6 @@ package main
 import (
        "flag"
        "fmt"
-       "log"
        "net"
        "net/http"
        "os"
@@ -15,6 +14,7 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/config"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       log "github.com/Sirupsen/logrus"
        "github.com/coreos/go-systemd/daemon"
        "github.com/ghodss/yaml"
 )
@@ -150,10 +150,10 @@ func main() {
        KeepVM = MakeRRVolumeManager(theConfig.Volumes)
 
        // Middleware stack: logger, MaxRequests limiter, method handlers
-       http.Handle("/", &LoggingRESTRouter{
-               httpserver.NewRequestLimiter(theConfig.MaxRequests,
-                       MakeRESTRouter()),
-       })
+       router := MakeRESTRouter()
+       limiter := httpserver.NewRequestLimiter(theConfig.MaxRequests, router)
+       router.limiter = limiter
+       http.Handle("/", &LoggingRESTRouter{router: limiter})
 
        // Set up a TCP listener.
        listener, err := net.Listen("tcp", theConfig.Listen)
index 0f556b538ac7ae15b1939f61bad13be3ed0404e5..bfd006ee8d2f3576332b8a3be4c6b040cce14214 100644 (file)
@@ -4,10 +4,14 @@ package main
 // LoggingResponseWriter
 
 import (
-       "log"
+       "context"
+       "fmt"
        "net/http"
        "strings"
        "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/httpserver"
+       log "github.com/Sirupsen/logrus"
 )
 
 // LoggingResponseWriter has anonymous fields ResponseWriter and ResponseBody
@@ -57,21 +61,61 @@ func (resp *LoggingResponseWriter) Write(data []byte) (int, error) {
 
 // LoggingRESTRouter is used to add logging capabilities to mux.Router
 type LoggingRESTRouter struct {
-       router http.Handler
+       router      http.Handler
+       idGenerator httpserver.IDGenerator
 }
 
 func (loggingRouter *LoggingRESTRouter) ServeHTTP(wrappedResp http.ResponseWriter, req *http.Request) {
-       t0 := time.Now()
+       tStart := time.Now()
+
+       // Attach a requestID-aware logger to the request context.
+       lgr := log.WithField("RequestID", loggingRouter.idGenerator.Next())
+       ctx := context.WithValue(req.Context(), "logger", lgr)
+       req = req.WithContext(ctx)
+
+       lgr = lgr.WithFields(log.Fields{
+               "remoteAddr":      req.RemoteAddr,
+               "reqForwardedFor": req.Header.Get("X-Forwarded-For"),
+               "reqMethod":       req.Method,
+               "reqPath":         req.URL.Path[1:],
+               "reqBytes":        req.ContentLength,
+       })
+       lgr.Debug("request")
+
        resp := LoggingResponseWriter{http.StatusOK, 0, wrappedResp, "", zeroTime}
        loggingRouter.router.ServeHTTP(&resp, req)
+       tDone := time.Now()
+
        statusText := http.StatusText(resp.Status)
        if resp.Status >= 400 {
                statusText = strings.Replace(resp.ResponseBody, "\n", "", -1)
        }
-       now := time.Now()
-       tTotal := now.Sub(t0)
-       tLatency := resp.sentHdr.Sub(t0)
-       tResponse := now.Sub(resp.sentHdr)
-       log.Printf("[%s] %s %s %d %.6fs %.6fs %.6fs %d %d \"%s\"", req.RemoteAddr, req.Method, req.URL.Path[1:], req.ContentLength, tTotal.Seconds(), tLatency.Seconds(), tResponse.Seconds(), resp.Status, resp.Length, statusText)
+       if resp.sentHdr == zeroTime {
+               // Nobody changed status or wrote any data, i.e., we
+               // returned a 200 response with no body.
+               resp.sentHdr = tDone
+       }
+
+       lgr.WithFields(log.Fields{
+               "timeTotal":      loggedDuration(tDone.Sub(tStart)),
+               "timeToStatus":   loggedDuration(resp.sentHdr.Sub(tStart)),
+               "timeWriteBody":  loggedDuration(tDone.Sub(resp.sentHdr)),
+               "respStatusCode": resp.Status,
+               "respStatus":     statusText,
+               "respBytes":      resp.Length,
+       }).Info("response")
+}
+
+type loggedDuration time.Duration
+
+// MarshalJSON formats a duration as a number of seconds, using
+// fixed-point notation with no more than 6 decimal places.
+func (d loggedDuration) MarshalJSON() ([]byte, error) {
+       return []byte(d.String()), nil
+}
 
+// String formats a duration as a number of seconds, using
+// fixed-point notation with no more than 6 decimal places.
+func (d loggedDuration) String() string {
+       return fmt.Sprintf("%.6f", time.Duration(d).Seconds())
 }
index 12860bb662d91a1e31191fed2bccace97bc2ac30..3c6278d478d3d897982b2b6f8c9a166c505e6433 100644 (file)
@@ -7,8 +7,9 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/keepclient"
        "io"
        "io/ioutil"
-       "log"
        "time"
+
+       log "github.com/Sirupsen/logrus"
 )
 
 // RunPullWorker is used by Keepstore to initiate pull worker channel goroutine.
index 27ac0d9c74992f57fb9091c96dca8dfa59fb6ffc..ca5b1a2eb945cb2ae940c9599c955fb59d9e489a 100644 (file)
@@ -9,7 +9,6 @@ import (
        "fmt"
        "io"
        "io/ioutil"
-       "log"
        "net/http"
        "os"
        "regexp"
@@ -21,6 +20,7 @@ import (
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "github.com/AdRoll/goamz/aws"
        "github.com/AdRoll/goamz/s3"
+       log "github.com/Sirupsen/logrus"
 )
 
 const (
index 10e915852ca75e31cfbbe629198de52821e5ab3a..c43b85b1c588700ca41378432363ea41545e5dd0 100644 (file)
@@ -7,13 +7,15 @@ import (
        "encoding/json"
        "fmt"
        "io/ioutil"
-       "log"
+       "net/http"
+       "net/http/httptest"
        "os"
        "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "github.com/AdRoll/goamz/s3"
        "github.com/AdRoll/goamz/s3/s3test"
+       log "github.com/Sirupsen/logrus"
        check "gopkg.in/check.v1"
 )
 
@@ -112,6 +114,94 @@ func (s *StubbedS3Suite) TestStats(c *check.C) {
        c.Check(stats(), check.Matches, `.*"InBytes":6,.*`)
 }
 
+type blockingHandler struct {
+       requested chan *http.Request
+       unblock   chan struct{}
+}
+
+func (h *blockingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+       if h.requested != nil {
+               h.requested <- r
+       }
+       if h.unblock != nil {
+               <-h.unblock
+       }
+       http.Error(w, "nothing here", http.StatusNotFound)
+}
+
+func (s *StubbedS3Suite) TestGetContextCancel(c *check.C) {
+       loc := "acbd18db4cc2f85cedef654fccc4a4d8"
+       buf := make([]byte, 3)
+
+       s.testContextCancel(c, func(ctx context.Context, v *TestableS3Volume) error {
+               _, err := v.Get(ctx, loc, buf)
+               return err
+       })
+}
+
+func (s *StubbedS3Suite) TestCompareContextCancel(c *check.C) {
+       loc := "acbd18db4cc2f85cedef654fccc4a4d8"
+       buf := []byte("bar")
+
+       s.testContextCancel(c, func(ctx context.Context, v *TestableS3Volume) error {
+               return v.Compare(ctx, loc, buf)
+       })
+}
+
+func (s *StubbedS3Suite) TestPutContextCancel(c *check.C) {
+       loc := "acbd18db4cc2f85cedef654fccc4a4d8"
+       buf := []byte("foo")
+
+       s.testContextCancel(c, func(ctx context.Context, v *TestableS3Volume) error {
+               return v.Put(ctx, loc, buf)
+       })
+}
+
+func (s *StubbedS3Suite) testContextCancel(c *check.C, testFunc func(context.Context, *TestableS3Volume) error) {
+       handler := &blockingHandler{}
+       srv := httptest.NewServer(handler)
+       defer srv.Close()
+
+       v := s.newTestableVolume(c, 5*time.Minute, false, 2)
+       vol := *v.S3Volume
+       vol.Endpoint = srv.URL
+       v = &TestableS3Volume{S3Volume: &vol}
+       v.Start()
+
+       ctx, cancel := context.WithCancel(context.Background())
+
+       handler.requested = make(chan *http.Request)
+       handler.unblock = make(chan struct{})
+       defer close(handler.unblock)
+
+       doneFunc := make(chan struct{})
+       go func() {
+               err := testFunc(ctx, v)
+               c.Check(err, check.Equals, context.Canceled)
+               close(doneFunc)
+       }()
+
+       timeout := time.After(10 * time.Second)
+
+       // Wait for the stub server to receive a request, meaning
+       // Get() is waiting for an s3 operation.
+       select {
+       case <-timeout:
+               c.Fatal("timed out waiting for test func to call our handler")
+       case <-doneFunc:
+               c.Fatal("test func finished without even calling our handler!")
+       case <-handler.requested:
+       }
+
+       cancel()
+
+       select {
+       case <-timeout:
+               c.Fatal("timed out")
+       case <-doneFunc:
+       }
+}
+
 func (s *StubbedS3Suite) TestBackendStates(c *check.C) {
        defer func(tl, bs arvados.Duration) {
                theConfig.TrashLifetime = tl
@@ -320,18 +410,9 @@ func (s *StubbedS3Suite) newTestableVolume(c *check.C, raceWindow time.Duration,
        srv, err := s3test.NewServer(&s3test.Config{Clock: clock})
        c.Assert(err, check.IsNil)
 
-       tmp, err := ioutil.TempFile("", "keepstore")
-       c.Assert(err, check.IsNil)
-       defer os.Remove(tmp.Name())
-       _, err = tmp.Write([]byte("xxx\n"))
-       c.Assert(err, check.IsNil)
-       c.Assert(tmp.Close(), check.IsNil)
-
        v := &TestableS3Volume{
                S3Volume: &S3Volume{
                        Bucket:             TestBucketName,
-                       AccessKeyFile:      tmp.Name(),
-                       SecretKeyFile:      tmp.Name(),
                        Endpoint:           srv.URL(),
                        Region:             "test-region-1",
                        LocationConstraint: true,
@@ -341,15 +422,31 @@ func (s *StubbedS3Suite) newTestableVolume(c *check.C, raceWindow time.Duration,
                        ReadOnly:           readonly,
                        IndexPageSize:      1000,
                },
+               c:           c,
                server:      srv,
                serverClock: clock,
        }
-       c.Assert(v.Start(), check.IsNil)
+       v.Start()
        err = v.bucket.PutBucket(s3.ACL("private"))
        c.Assert(err, check.IsNil)
        return v
 }
 
+func (v *TestableS3Volume) Start() error {
+       tmp, err := ioutil.TempFile("", "keepstore")
+       v.c.Assert(err, check.IsNil)
+       defer os.Remove(tmp.Name())
+       _, err = tmp.Write([]byte("xxx\n"))
+       v.c.Assert(err, check.IsNil)
+       v.c.Assert(tmp.Close(), check.IsNil)
+
+       v.S3Volume.AccessKeyFile = tmp.Name()
+       v.S3Volume.SecretKeyFile = tmp.Name()
+
+       v.c.Assert(v.S3Volume.Start(), check.IsNil)
+       return nil
+}
+
 // PutRaw skips the ContentMD5 test
 func (v *TestableS3Volume) PutRaw(loc string, block []byte) {
        err := v.bucket.Put(loc, block, "application/octet-stream", s3ACL, s3.Options{})
index 27d6216d01633feca360de94f0a8febaabfb475a..696c3e53a60abbd352efc035f5a0bb1afaec737f 100644 (file)
@@ -2,10 +2,10 @@ package main
 
 import (
        "errors"
-       "log"
        "time"
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
+       log "github.com/Sirupsen/logrus"
 )
 
 // RunTrashWorker is used by Keepstore to initiate trash worker channel goroutine.
index 29f89f567d7b64729d10ecde3a4699186f233711..887cfd3a9edf80f5ef9a620efa880b9bc93856d8 100644 (file)
@@ -48,6 +48,10 @@ Listen:
     "address" is a host IP address or name and "port" is a port number
     or name.
 
+LogFormat:
+
+    Format of request/response and error logs: "json" or "text".
+
 PIDFile:
 
    Path to write PID file during startup. This file is kept open and
index f9812b07cd8630ae75f2a8e32feddd7b2d099a43..fff02aac260f59a6fc46fc24cbebea57b27e5743 100644 (file)
@@ -7,7 +7,6 @@ import (
        "fmt"
        "io"
        "io/ioutil"
-       "log"
        "os"
        "path/filepath"
        "regexp"
@@ -16,6 +15,8 @@ import (
        "sync"
        "syscall"
        "time"
+
+       log "github.com/Sirupsen/logrus"
 )
 
 type unixVolumeAdder struct {
diff --git a/services/nodemanager/arvnodeman/_version.py b/services/nodemanager/arvnodeman/_version.py
new file mode 100644 (file)
index 0000000..9a29cc1
--- /dev/null
@@ -0,0 +1,3 @@
+import pkg_resources
+
+__version__ = pkg_resources.require('arvados-node-manager')[0].version
index b853f00a6728693cce4b855021e18bb35c869087..1c6d214fe8818e9dd49e94b413daa6609096a4c8 100644 (file)
@@ -31,6 +31,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
         create_kwargs = create_kwargs.copy()
         create_kwargs.setdefault('external_ip', None)
         create_kwargs.setdefault('ex_metadata', {})
+        self._project = auth_kwargs.get("project")
         super(ComputeNodeDriver, self).__init__(
             auth_kwargs, list_kwargs, create_kwargs,
             driver_class)
@@ -44,7 +45,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
 
     def _init_image(self, image_name):
         return 'image', self.search_for(
-            image_name, 'list_images', self._name_key)
+            image_name, 'list_images', self._name_key, ex_project=self._project)
 
     def _init_network(self, network_name):
         return 'ex_network', self.search_for(
index 1be7e46387ff6c5bfe38d4e4805694fb7986cfa7..87ce48769e30235c8407f65bb73508633f97f916 100644 (file)
@@ -17,6 +17,7 @@ from .daemon import NodeManagerDaemonActor
 from .jobqueue import JobQueueMonitorActor, ServerCalculator
 from .nodelist import ArvadosNodeListMonitorActor, CloudNodeListMonitorActor
 from .timedcallback import TimedCallBackActor
+from ._version import __version__
 
 node_daemon = None
 
@@ -28,6 +29,10 @@ def parse_cli(args):
     parser = argparse.ArgumentParser(
         prog='arvados-node-manager',
         description="Dynamically allocate Arvados cloud compute nodes")
+    parser.add_argument(
+        '--version', action='version',
+        version="%s %s" % (sys.argv[0], __version__),
+        help='Print version and exit.')
     parser.add_argument(
         '--foreground', action='store_true', default=False,
         help="Run in the foreground.  Don't daemonize.")
index 3d838e49b443750be9608eec67738bbdb9b679f2..c30108f44bb65a487945e665e7f2afae91528c00 100644 (file)
@@ -33,6 +33,7 @@ setup(name='arvados-node-manager',
         'arvados-python-client>=0.1.20150206225333',
         'pykka',
         'python-daemon',
+        'setuptools'
         ],
       dependency_links = [
           "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.1.dev4.zip"
diff --git a/services/nodemanager/tests/test_arguments.py b/services/nodemanager/tests/test_arguments.py
new file mode 100644 (file)
index 0000000..f98309a
--- /dev/null
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+import io
+import os
+import sys
+import tempfile
+import unittest
+
+import arvnodeman.launcher as nodeman
+from . import testutil
+
+class ArvNodemArgumentsTestCase(unittest.TestCase):
+    def run_nodeman(self, args):
+        return nodeman.main(args)
+
+    def test_unsupported_arg(self):
+        with self.assertRaises(SystemExit):
+            self.run_nodeman(['-x=unknown'])
+
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with testutil.redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.run_nodeman(['--version'])
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
index 1b6aab3cafed16cfc0960d1a39a32d669fe53ffb..15337c4120173b6a7f2ca6b21f0924bd571f2326 100644 (file)
@@ -2,13 +2,15 @@
 
 from __future__ import absolute_import, print_function
 
+import contextlib
 import datetime
+import mock
+import pykka
+import sys
 import threading
 import time
 
 import libcloud.common.types as cloud_types
-import mock
-import pykka
 
 from . import pykka_timeout
 
@@ -55,6 +57,17 @@ def cloud_node_fqdn(node):
 def ip_address_mock(last_octet):
     return '10.20.30.{}'.format(last_octet)
 
+@contextlib.contextmanager
+def redirected_streams(stdout=None, stderr=None):
+    orig_stdout, sys.stdout = sys.stdout, stdout or sys.stdout
+    orig_stderr, sys.stderr = sys.stderr, stderr or sys.stderr
+    try:
+        yield
+    finally:
+        sys.stdout = orig_stdout
+        sys.stderr = orig_stderr
+
+
 class MockShutdownTimer(object):
     def _set_state(self, is_open, next_opening):
         self.window_open = lambda: is_open
diff --git a/tools/arvbash/arvbash.sh b/tools/arvbash/arvbash.sh
new file mode 100755 (executable)
index 0000000..c2785c4
--- /dev/null
@@ -0,0 +1,124 @@
+#!/bin/bash
+# bash functions for managing Arvados tokens and other conveniences.
+
+read -rd "\000" helpmessage <<EOF
+$(basename $0): bash functions for managing Arvados tokens and other shortcuts.
+
+Syntax:
+        . $0            # activate for current shell
+        $0 --install    # install into .bashrc
+
+arvswitch <name>
+  Set ARVADOS_API_HOST and ARVADOS_API_TOKEN in the current environment based on
+  $HOME/.config/arvados/<name>.conf
+  With no arguments, list available Arvados configurations.
+
+arvsave <name>
+  Save values of ARVADOS_API_HOST and ARVADOS_API_TOKEN in the current environment to
+  $HOME/.config/arvados/<name>.conf
+
+arvrm <name>
+  Delete $HOME/.config/arvados/<name>.conf
+
+arvboxswitch <name>
+  Set ARVBOX_CONTAINER to <name>
+  With no arguments, list available arvboxes.
+
+arvopen:
+  Open an Arvados uuid in web browser (http://curover.se)
+
+arvissue
+  Open an Arvados ticket in web browser (http://dev.arvados.org)
+
+EOF
+
+if [[ "$1" = "--install" ]] ; then
+    this=$(readlink -f $0)
+    if ! grep ". $this" ~/.bashrc >/dev/null ; then
+        echo ". $this" >> ~/.bashrc
+        echo "Installed into ~/.bashrc"
+    else
+        echo "Already installed in ~/.bashrc"
+    fi
+elif ! [[ $0 =~ bash$ ]] ; then
+    echo "$helpmessage"
+fi
+
+HISTIGNORE=$HISTIGNORE:'export ARVADOS_API_TOKEN=*'
+
+arvswitch() {
+    if [[ -n "$1" ]] ; then
+        if [[ -f $HOME/.config/arvados/$1.conf ]] ; then
+            unset ARVADOS_API_HOST_INSECURE
+            for a in $(cat $HOME/.config/arvados/$1.conf) ; do export $a ; done
+            echo "Switched to $1"
+        else
+            echo "$1 unknown"
+        fi
+    else
+        echo "Switch Arvados environment conf"
+        echo "Usage: arvswitch name"
+        echo "Available confs:" $((cd $HOME/.config/arvados && ls --indicator-style=none *.conf) | rev | cut -c6- | rev)
+    fi
+}
+
+arvsave() {
+    if [[ -n "$1" ]] ; then
+       touch $HOME/.config/arvados/$1.conf
+       chmod 0600 $HOME/.config/arvados/$1.conf
+        env | grep ARVADOS_ > $HOME/.config/arvados/$1.conf
+    else
+        echo "Save current Arvados environment variables to conf file"
+        echo "Usage: arvsave name"
+    fi
+}
+
+arvrm() {
+    if [[ -n "$1" ]] ; then
+        if [[ -f $HOME/.config/arvados/$1.conf ]] ; then
+            rm $HOME/.config/arvados/$1.conf
+        else
+            echo "$1 unknown"
+        fi
+    else
+        echo "Delete Arvados environment conf"
+        echo "Usage: arvrm name"
+    fi
+}
+
+arvboxswitch() {
+    if [[ -n "$1" ]] ; then
+        if [[ -d $HOME/.arvbox/$1 ]] ; then
+            export ARVBOX_CONTAINER=$1
+            echo "Arvbox switched to $1"
+        else
+            echo "$1 unknown"
+        fi
+    else
+        if test -z "$ARVBOX_CONTAINER" ; then
+            ARVBOX_CONTAINER=arvbox
+        fi
+        echo "Switch Arvbox environment conf"
+        echo "Usage: arvboxswitch name"
+        echo "Your current container is: $ARVBOX_CONTAINER"
+        echo "Available confs:" $(cd $HOME/.arvbox && ls --indicator-style=none)
+    fi
+}
+
+arvopen() {
+    if [[ -n "$1" ]] ; then
+        xdg-open https://curover.se/$1
+    else
+        echo "Open Arvados uuid in browser"
+        echo "Usage: arvopen uuid"
+    fi
+}
+
+arvissue() {
+    if [[ -n "$1" ]] ; then
+        xdg-open https://dev.arvados.org/issues/$1
+    else
+        echo "Open Arvados issue in browser"
+        echo "Usage: arvissue uuid"
+    fi
+}
index 6568a887b3c829681de54cc294b68389f3cc0001..a894350970f115594300befe9eab2798946cdfe4 100644 (file)
@@ -68,6 +68,8 @@ RUN set -e && \
  tar -C /usr/local -xjf /tmp/$PJS.tar.bz2 && \
  ln -s ../$PJS/bin/phantomjs /usr/local/bin/
 
+RUN pip install -U setuptools
+
 ARG arvados_version
 RUN echo arvados_version is git commit $arvados_version
 
index b3dfedcf83c4d22d346010c8ddbee138d67ad8fd..230a189a9a244c79d94380960f82d4ad52ddb6c3 100644 (file)
@@ -1,4 +1,8 @@
 
+export PATH=${PATH}:/usr/local/go/bin:/var/lib/gems/bin
+export GEM_HOME=/var/lib/gems
+export GEM_PATH=/var/lib/gems
+
 if test -s /var/run/localip_override ; then
     localip=$(cat /var/run/localip_override)
 else
index a36205c9678d4e67063bbec141072df1a737cea9..5f72f1c613192e5b8aa806a0f490711b6055ff69 100755 (executable)
@@ -9,7 +9,8 @@ cd /usr/src/arvados/services/api
 export RAILS_ENV=development
 
 run_bundler --without=development
-bundle exec passenger start --runtime-check-only --runtime-dir=/var/lib/passenger
+bundle exec passenger-config build-native-support
+bundle exec passenger-config install-standalone-runtime
 
 if test "$1" = "--only-deps" ; then
     exit
@@ -23,6 +24,5 @@ if test "$1" = "--only-setup" ; then
 fi
 
 exec bundle exec passenger start --port=${services[api]} \
-                  --runtime-dir=/var/lib/passenger \
                   --ssl --ssl-certificate=/var/lib/arvados/self-signed.pem \
                   --ssl-certificate-key=/var/lib/arvados/self-signed.key