Merge branch 'master' into 9998-unsigned_manifest

author Tom Clegg <tom@curoverse.com>

Thu, 12 Jan 2017 15:35:46 +0000 (10:35 -0500)

committer Tom Clegg <tom@curoverse.com>

Thu, 12 Jan 2017 15:35:46 +0000 (10:35 -0500)
author Tom Clegg <tom@curoverse.com>
Thu, 12 Jan 2017 15:35:46 +0000 (10:35 -0500)
committer Tom Clegg <tom@curoverse.com>
Thu, 12 Jan 2017 15:35:46 +0000 (10:35 -0500)
diff --git a/.gitignore b/.gitignore

index 8f2b33ad2f8484c274838299cca722256bbcafa0..77b98999bca91ede8ff5aab46f04473ba1f2cd99 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -25,3 +25,5 @@ tmp
  sdk/cli/binstubs/
  sdk/cwl/arvados_cwl/_version.py
  services/api/config/arvados-clients.yml
+*#*
+.DS_Store
diff --git a/README.md b/README.md

index cf09171b024716b4a57a412746c7d1a9decbb87e..419ca15957cc4cd2aa1b00048dd61feb2c6375f3 100644 (file)
--- a/README.md
+++ b/README.md
@@ -59,6 +59,7 @@ contributers to Arvados.
  ## Development
  
  [![Build Status](https://ci.curoverse.com/buildStatus/icon?job=run-tests)](https://ci.curoverse.com/job/run-tests/)
+[![Go Report Card](https://goreportcard.com/badge/github.com/curoverse/arvados)](https://goreportcard.com/report/github.com/curoverse/arvados)
  
  The Arvados public bug tracker is located at https://dev.arvados.org/projects/arvados/issues
  
diff --git a/apps/workbench/Gemfile b/apps/workbench/Gemfile

index 20d64d17a16dc87b2a076c13b2ad6c356b0a041a..eac4fdf601c8d4033db80e99b72c71a02cfa4e0e 100644 (file)
--- a/apps/workbench/Gemfile
+++ b/apps/workbench/Gemfile
@@ -1,6 +1,6 @@
  source 'https://rubygems.org'
  
-gem 'rails', '~> 4.1.0'
+gem 'rails', '~> 4.1'
  gem 'arvados', '>= 0.1.20150511150219'
  
  gem 'activerecord-nulldb-adapter'
@@ -19,7 +19,7 @@ gem 'coffee-rails'
  # in production environments by default.
  group :assets do
    gem 'sass-rails'
-  gem 'uglifier', '>= 1.0.3'
+  gem 'uglifier', '~> 2.0'
  
    # See https://github.com/sstephenson/execjs#readme for more supported runtimes
    gem 'therubyracer', :platforms => :ruby
@@ -33,7 +33,7 @@ group :development do
  end
  
  group :test, :diagnostics, :performance do
-  gem 'minitest', '>= 5.0.0'
+  gem 'minitest', '~> 5.0'
    gem 'selenium-webdriver'
    gem 'capybara'
    gem 'poltergeist'
diff --git a/apps/workbench/Gemfile.lock b/apps/workbench/Gemfile.lock

index a8431a7dfd373d0357053df0e06df901498ca0ca..0abe868ccf75bac683e44529ce939a421e772d45 100644 (file)
--- a/apps/workbench/Gemfile.lock
+++ b/apps/workbench/Gemfile.lock
@@ -92,7 +92,7 @@ GEM
      deep_merge (1.0.1)
      docile (1.1.5)
      erubis (2.7.0)
-    execjs (2.2.2)
+    execjs (2.7.0)
      extlib (0.9.16)
      faraday (0.9.2)
        multipart-post (>= 1.2, < 3)
@@ -123,7 +123,7 @@ GEM
        signet (~> 0.7)
      headless (1.0.2)
      highline (1.6.21)
-    httpclient (2.6.0.1)
+    httpclient (2.8.2.4)
      i18n (0.7.0)
      jquery-rails (3.1.2)
        railties (>= 3.0, < 5.0)
@@ -158,7 +158,7 @@ GEM
        metaclass (~> 0.0.1)
      morrisjs-rails (0.5.1)
        railties (> 3.1, < 5)
-    multi_json (1.12.0)
+    multi_json (1.12.1)
      multipart-post (2.0.0)
      net-scp (1.2.1)
        net-ssh (>= 2.6.5)
@@ -257,7 +257,7 @@ GEM
      tilt (1.4.1)
      tzinfo (1.2.2)
        thread_safe (~> 0.1)
-    uglifier (2.7.0)
+    uglifier (2.7.2)
        execjs (>= 0.3.0)
        json (>= 1.8.0)
      websocket (1.2.2)
@@ -292,7 +292,7 @@ DEPENDENCIES
    less-rails
    lograge
    logstash-event
-  minitest (>= 5.0.0)
+  minitest (~> 5.0)
    mocha
    morrisjs-rails
    multi_json
@@ -301,7 +301,7 @@ DEPENDENCIES
    piwik_analytics
    poltergeist
    rack-mini-profiler
-  rails (~> 4.1.0)
+  rails (~> 4.1)
    rails-perftest
    raphael-rails
    ruby-debug-passenger
@@ -316,8 +316,8 @@ DEPENDENCIES
    sshkey
    themes_for_rails!
    therubyracer
-  uglifier (>= 1.0.3)
+  uglifier (~> 2.0)
    wiselinks
  
  BUNDLED WITH
-   1.12.1
+   1.13.2
diff --git a/apps/workbench/app/assets/javascripts/work_unit_component.js b/apps/workbench/app/assets/javascripts/work_unit_component.js

index baff0e8de1ee474d938e6686e8cd8d285d7728c0..e63aecde70a4c48ac0d61b6e066513967a0e6928 100644 (file)
--- a/apps/workbench/app/assets/javascripts/work_unit_component.js
+++ b/apps/workbench/app/assets/javascripts/work_unit_component.js
@@ -1,18 +1,16 @@
  $(document).
-    on('click', '.component-detail-panel', function(event) {
-      var href = $($(event.target).attr('href'));
-      if ($(href).attr("class").split(' ').indexOf("in") == -1) {
-        return;   // collapsed; nothing more to do
-      }
-
+  on('click', '.component-detail-panel', function(event) {
+    var href = $($(event.target).attr('href'));
+    if ($(href).hasClass("in")) {
        var content_div = href.find('.work-unit-component-detail-body');
        content_div.html('<div class="spinner spinner-32px col-sm-1"></div>');
        var content_url = href.attr('content-url');
        var action_data = href.attr('action-data');
        $.ajax(content_url, {dataType: 'html', type: 'POST', data: {action_data: action_data}}).
-          done(function(data, status, jqxhr) {
-              content_div.html(data);
-          }).fail(function(jqxhr, status, error) {
-              content_div.html(error);
-          });
-      });
+        done(function(data, status, jqxhr) {
+          content_div.html(data);
+        }).fail(function(jqxhr, status, error) {
+          content_div.html(error);
+        });
+    }
+  });
diff --git a/apps/workbench/app/controllers/application_controller.rb b/apps/workbench/app/controllers/application_controller.rb

index c9ce8ce0b748a9473d2cd5f80739d070f1f8aef5..ee3ac4d6810588b7d630705a5efe4cd6e08bd6ae 100644 (file)
--- a/apps/workbench/app/controllers/application_controller.rb
+++ b/apps/workbench/app/controllers/application_controller.rb
@@ -907,7 +907,7 @@ class ApplicationController < ActionController::Base
    # from the top three levels.
    # That is: get toplevel projects under home, get subprojects of
    # these projects, and so on until we hit the limit.
-  def my_wanted_projects user, page_size=100
+  def my_wanted_projects(user, page_size=100)
      return @my_wanted_projects if @my_wanted_projects
  
      from_top = []
@@ -922,7 +922,7 @@ class ApplicationController < ActionController::Base
        break if current_level.results.size == 0
        @too_many_projects = true if current_level.items_available > current_level.results.size
        from_top.concat current_level.results
-      uuids = current_level.results.collect { |x| x.uuid }
+      uuids = current_level.results.collect(&:uuid)
        depth += 1
        if depth >= 3
          @reached_level_limit = true
@@ -933,12 +933,12 @@ class ApplicationController < ActionController::Base
    end
  
    helper_method :my_wanted_projects_tree
-  def my_wanted_projects_tree user, page_size=100
-    build_my_wanted_projects_tree user, page_size
+  def my_wanted_projects_tree(user, page_size=100)
+    build_my_wanted_projects_tree(user, page_size)
      [@my_wanted_projects_tree, @too_many_projects, @reached_level_limit]
    end
  
-  def build_my_wanted_projects_tree user, page_size=100
+  def build_my_wanted_projects_tree(user, page_size=100)
      return @my_wanted_projects_tree if @my_wanted_projects_tree
  
      parent_of = {user.uuid => 'me'}
diff --git a/apps/workbench/app/controllers/container_requests_controller.rb b/apps/workbench/app/controllers/container_requests_controller.rb

index b67d100887c838917ee4a2fc6ba8ac2871893cd3..b286a9456e14e3a2538df122c31e6071fab33558 100644 (file)
--- a/apps/workbench/app/controllers/container_requests_controller.rb
+++ b/apps/workbench/app/controllers/container_requests_controller.rb
@@ -59,4 +59,32 @@ class ContainerRequestsController < ApplicationController
      end
    end
  
+  def copy
+    src = @object
+
+    @object = ContainerRequest.new
+
+    @object.command = src.command
+    @object.container_image = src.container_image
+    @object.cwd = src.cwd
+    @object.description = src.description
+    @object.environment = src.environment
+    @object.mounts = src.mounts
+    @object.name = src.name
+    @object.output_path = src.output_path
+    @object.priority = 1
+    @object.properties[:template_uuid] = src.properties[:template_uuid]
+    @object.runtime_constraints = src.runtime_constraints
+    @object.scheduling_parameters = src.scheduling_parameters
+    @object.state = 'Uncommitted'
+    @object.use_existing = false
+
+    # set owner_uuid to that of source, provided it is a project and writable by current user
+    current_project = Group.find(src.owner_uuid) rescue nil
+    if (current_project && current_project.writable_by.andand.include?(current_user.uuid))
+      @object.owner_uuid = src.owner_uuid
+    end
+
+    super
+  end
  end
diff --git a/apps/workbench/app/controllers/pipeline_instances_controller.rb b/apps/workbench/app/controllers/pipeline_instances_controller.rb

index c5fbda0cf349177801a0bcbbd75c7c95634b56ef..83fe0dda4645a0437a962aa95e9572c9c897afe2 100644 (file)
--- a/apps/workbench/app/controllers/pipeline_instances_controller.rb
+++ b/apps/workbench/app/controllers/pipeline_instances_controller.rb
@@ -53,7 +53,7 @@ class PipelineInstancesController < ApplicationController
      end
      @object.state = 'New'
  
-    # set owner_uuid to that of source, provided it is a project and wriable by current user
+    # set owner_uuid to that of source, provided it is a project and writable by current user
      current_project = Group.find(source.owner_uuid) rescue nil
      if (current_project && current_project.writable_by.andand.include?(current_user.uuid))
        @object.owner_uuid = source.owner_uuid
diff --git a/apps/workbench/app/controllers/projects_controller.rb b/apps/workbench/app/controllers/projects_controller.rb

index 48b2c421fb08385e648d95ba8cac809e948eb7b8..273f9d0c8063c178489ca93972cf64c0b39bbc18 100644 (file)
--- a/apps/workbench/app/controllers/projects_controller.rb
+++ b/apps/workbench/app/controllers/projects_controller.rb
@@ -149,10 +149,10 @@ class ProjectsController < ApplicationController
          link.destroy
        end
  
-      # If this object has the 'expires_at' attribute, then simply mark it
-      # expired.
-      if item.attributes.include?("expires_at")
-        item.update_attributes expires_at: Time.now
+      # If this object has the 'trash_at' attribute, then simply mark it
+      # as trash.
+      if item.attributes.include?("trash_at")
+        item.update_attributes trash_at: Time.now
          @removed_uuids << item.uuid
        elsif item.owner_uuid == @object.uuid
          # Object is owned by this project. Remove it from the project by
@@ -161,7 +161,7 @@ class ProjectsController < ApplicationController
            item.update_attributes owner_uuid: current_user.uuid
            @removed_uuids << item.uuid
          rescue ArvadosApiClient::ApiErrorResponseException => e
-          if e.message.include? '_owner_uuid_name_unique'
+          if e.message.include? '_owner_uuid_'
              rename_to = item.name + ' removed from ' +
                          (@object.name ? @object.name : @object.uuid) +
                          ' at ' + Time.now.to_s
diff --git a/apps/workbench/app/controllers/work_units_controller.rb b/apps/workbench/app/controllers/work_units_controller.rb

index 3b611aa25b74e28663d9b7ecc2b0647670f066c8..550bdb7e953f7fe47a899cbe674a39ed457a9529 100644 (file)
--- a/apps/workbench/app/controllers/work_units_controller.rb
+++ b/apps/workbench/app/controllers/work_units_controller.rb
@@ -57,7 +57,7 @@ class WorkUnitsController < ApplicationController
        workflow = Workflow.find? template_uuid
        if workflow.definition
          begin
-          wf_json = YAML::load(workflow.definition)
+          wf_json = ActiveSupport::HashWithIndifferentAccess.new YAML::load(workflow.definition)
          rescue => e
            logger.error "Error converting definition yaml to json: #{e.message}"
            raise ArgumentError, "Error converting definition yaml to json: #{e.message}"
@@ -77,11 +77,21 @@ class WorkUnitsController < ApplicationController
        attrs['cwd'] = "/var/spool/cwl"
        attrs['output_path'] = "/var/spool/cwl"
  
+      input_defaults = {}
+      if wf_json
+        inputs = get_cwl_inputs(wf_json)
+        inputs.each do |input|
+          if input[:default]
+            input_defaults[cwl_shortname(input[:id])] = input[:default]
+          end
+        end
+      end
+
        # mounts
        mounts = {
          "/var/lib/cwl/cwl.input.json" => {
            "kind" => "json",
-          "content" => {}
+          "content" => input_defaults
          },
          "stdout" => {
            "kind" => "file",
diff --git a/apps/workbench/app/models/container_work_unit.rb b/apps/workbench/app/models/container_work_unit.rb

index 88aab306cedc8b9ea5a8a94a27cc394f2022780b..ed82f18036c1025bffb16e5267701045039bb167 100644 (file)
--- a/apps/workbench/app/models/container_work_unit.rb
+++ b/apps/workbench/app/models/container_work_unit.rb
@@ -99,12 +99,21 @@ class ContainerWorkUnit < ProxyWorkUnit
    end
  
    def log_collection
-    get_combined(:log)
+    if @proxied.is_a?(ContainerRequest)
+      get(:log_uuid)
+    else
+      get(:log)
+    end
    end
  
    def outputs
      items = []
-    items << get_combined(:output) if get_combined(:output)
+    if @proxied.is_a?(ContainerRequest)
+      out = get(:output_uuid)
+    else
+      out = get(:output)
+    end
+    items << out if out
      items
    end
  
diff --git a/apps/workbench/app/models/proxy_work_unit.rb b/apps/workbench/app/models/proxy_work_unit.rb

index 48bc3a04bc95dd41915e317449e7287ed4e42bce..771fdac47617fdd8855b8f0c00c10d5625064404 100644 (file)
--- a/apps/workbench/app/models/proxy_work_unit.rb
+++ b/apps/workbench/app/models/proxy_work_unit.rb
@@ -189,7 +189,7 @@ class ProxyWorkUnit < WorkUnit
    def cputime
      if state_label != "Queued"
        if started_at
-        (runtime_constraints.andand[:min_nodes] || 1) * ((finished_at || Time.now()) - started_at)
+        (runtime_constraints.andand[:min_nodes] || 1).to_i * ((finished_at || Time.now()) - started_at)
        end
      end
    end
@@ -276,14 +276,14 @@ class ProxyWorkUnit < WorkUnit
        if children.any?
          cpu_time = children.map { |c|
            if c.started_at
-             (c.runtime_constraints.andand[:min_nodes] || 1) * ((c.finished_at || Time.now()) - c.started_at)
+             (c.runtime_constraints.andand[:min_nodes] || 1).to_i * ((c.finished_at || Time.now()) - c.started_at)
            else
              0
            end
          }.reduce(:+) || 0
        else
          if started_at
-          cpu_time = (runtime_constraints.andand[:min_nodes] || 1) * ((finished_at || Time.now()) - started_at)
+          cpu_time = (runtime_constraints.andand[:min_nodes] || 1).to_i * ((finished_at || Time.now()) - started_at)
          end
        end
  
diff --git a/apps/workbench/app/views/application/_extra_tab_line_buttons.html.erb b/apps/workbench/app/views/application/_extra_tab_line_buttons.html.erb

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/apps/workbench/app/views/application/_title_and_buttons.html.erb b/apps/workbench/app/views/application/_title_and_buttons.html.erb

index 398f248a39bc478b016b64537a310a6b578bb9b8..46a949aa12d083ac1baeb1138e5395030889dd77 100644 (file)
--- a/apps/workbench/app/views/application/_title_and_buttons.html.erb
+++ b/apps/workbench/app/views/application/_title_and_buttons.html.erb
@@ -13,6 +13,9 @@
  
  <% if @object.class.goes_in_projects? && @object.uuid != current_user.andand.uuid # Not the "Home" project %>
    <% content_for :tab_line_buttons do %>
+    <% if current_user.andand.is_active %>
+      <%= render partial: 'extra_tab_line_buttons' %>
+    <% end %>
      <% if current_user.andand.is_active && @object.class.copies_to_projects? %>
        <%= link_to(
            choose_projects_path(
diff --git a/apps/workbench/app/views/container_requests/_extra_tab_line_buttons.html.erb b/apps/workbench/app/views/container_requests/_extra_tab_line_buttons.html.erb

new file mode 100644 (file)

index 0000000..662309f
--- /dev/null
+++ b/apps/workbench/app/views/container_requests/_extra_tab_line_buttons.html.erb
@@ -0,0 +1,10 @@
+<% if @object.state == 'Final' %>
+  <%= link_to(copy_container_request_path('id' => @object.uuid),
+      class: 'btn btn-primary',
+      title: 'Re-run',
+      data: {toggle: :tooltip, placement: :top}, title: 'This will make a copy and take you there. You can then make any needed changes and run it',
+      method: :post,
+      ) do %>
+    <i class="fa fa-fw fa-play"></i> Re-run
+  <% end %>
+<% end %>
diff --git a/apps/workbench/app/views/container_requests/_show_inputs.html.erb b/apps/workbench/app/views/container_requests/_show_inputs.html.erb

index a6c4bffacd2fc1add6043a16226e28bbae15affa..b2fb245454aae2ead67ca1851ba5f57700678512 100644 (file)
--- a/apps/workbench/app/views/container_requests/_show_inputs.html.erb
+++ b/apps/workbench/app/views/container_requests/_show_inputs.html.erb
@@ -1,22 +1,30 @@
-<% n_inputs = cwl_inputs_required(@object, get_cwl_inputs(@object.mounts[:"/var/lib/cwl/workflow.json"][:content]), [:mounts, :"/var/lib/cwl/cwl.input.json", :content]) %>
+<%
+n_inputs = if @object.mounts[:"/var/lib/cwl/workflow.json"] && @object.mounts[:"/var/lib/cwl/cwl.input.json"]
+             cwl_inputs_required(@object, get_cwl_inputs(@object.mounts[:"/var/lib/cwl/workflow.json"][:content]), [:mounts, :"/var/lib/cwl/cwl.input.json", :content])
+           else
+             0
+           end
+%>
  
  <% content_for :pi_input_form do %>
  <form role="form" style="width:60%">
    <div class="form-group">
-    <% workflow = @object.mounts[:"/var/lib/cwl/workflow.json"][:content] %>
-    <% inputs = get_cwl_inputs(workflow) %>
-    <% inputs.each do |input| %>
-      <label for="#input-<%= cwl_shortname(input[:id]) %>">
-        <%= input[:label] || cwl_shortname(input[:id]) %>
-      </label>
-      <div>
-        <p class="form-control-static">
-          <%= render_cwl_input @object, input, [:mounts, :"/var/lib/cwl/cwl.input.json", :content] %>
+    <% workflow = @object.mounts[:"/var/lib/cwl/workflow.json"].andand[:content] %>
+    <% if workflow %>
+      <% inputs = get_cwl_inputs(workflow) %>
+      <% inputs.each do |input| %>
+        <label for="#input-<%= cwl_shortname(input[:id]) %>">
+          <%= input[:label] || cwl_shortname(input[:id]) %>
+        </label>
+        <div>
+          <p class="form-control-static">
+            <%= render_cwl_input @object, input, [:mounts, :"/var/lib/cwl/cwl.input.json", :content] %>
+          </p>
+        </div>
+        <p class="help-block">
+          <%= input[:doc] %>
          </p>
-      </div>
-      <p class="help-block">
-        <%= input[:doc] %>
-      </p>
+      <% end %>
      <% end %>
    </div>
  </form>
diff --git a/apps/workbench/app/views/pipeline_instances/_running_component.html.erb b/apps/workbench/app/views/pipeline_instances/_running_component.html.erb

index ded535ef3ad5109e81a33ea1fd9815cde8ac6905..06ed01ee6efd71282f6c1e647bdd542c717b1569 100644 (file)
--- a/apps/workbench/app/views/pipeline_instances/_running_component.html.erb
+++ b/apps/workbench/app/views/pipeline_instances/_running_component.html.erb
@@ -48,7 +48,7 @@
            <div class="col-md-3">
              <% if current_job[:started_at] %>
                <% walltime = ((if current_job[:finished_at] then current_job[:finished_at] else Time.now() end) - current_job[:started_at]) %>
-              <% cputime = (current_job[:runtime_constraints].andand[:min_nodes] || 1) *
+              <% cputime = (current_job[:runtime_constraints].andand[:min_nodes] || 1).to_i *
                             ((current_job[:finished_at] || Time.now()) - current_job[:started_at]) %>
                <%= render_runtime(walltime, false) %>
                <% if cputime > 0 %> / <%= render_runtime(cputime, false) %> (<%= (cputime/walltime).round(1) %>&Cross;)<% end %>
diff --git a/apps/workbench/app/views/pipeline_instances/_show_components_running.html.erb b/apps/workbench/app/views/pipeline_instances/_show_components_running.html.erb

index 4343f2e57b5adbb64dfb0fbabe177b9d7f937b7a..a4eb6ffb2abad2b959c9bfe48718d5f951227b59 100644 (file)
--- a/apps/workbench/app/views/pipeline_instances/_show_components_running.html.erb
+++ b/apps/workbench/app/views/pipeline_instances/_show_components_running.html.erb
@@ -66,7 +66,7 @@
      <%
          cputime = pipeline_jobs.map { |j|
          if j[:job][:started_at]
-          (j[:job][:runtime_constraints].andand[:min_nodes] || 1) * ((j[:job][:finished_at] || Time.now()) - j[:job][:started_at])
+          (j[:job][:runtime_constraints].andand[:min_nodes] || 1).to_i * ((j[:job][:finished_at] || Time.now()) - j[:job][:started_at])
          else
            0
          end
diff --git a/apps/workbench/config/application.default.yml b/apps/workbench/config/application.default.yml

index e4e27829866a0748558efdd639496b4a420fb510..c2dcba80942a68d5107918a81aa723de5d24db9e 100644 (file)
--- a/apps/workbench/config/application.default.yml
+++ b/apps/workbench/config/application.default.yml
@@ -26,6 +26,11 @@ diagnostics:
      pipeline_2:
        template_uuid: zzzzz-p5p6p-1xbobfobk94ppbv
        input_paths: [zzzzz-4zz18-nz98douzhaa3jh2, zzzzz-4zz18-gpw9o5wpcti3nib]
+  container_requests_to_test:
+    container_request_1:
+      workflow_uuid: zzzzz-7fd4e-60e96shgwspt4mw
+      input_paths: []
+      max_wait_seconds: 10
  
  # Below is a sample setting for performance testing.
  # Configure workbench URL as "arvados_workbench_url"
diff --git a/apps/workbench/config/routes.rb b/apps/workbench/config/routes.rb

index 7c2312c1cee57c7ffef86ad300539d8c575db5bf..21cb7c40bc7755c22dc026a9f23c917369614149 100644 (file)
--- a/apps/workbench/config/routes.rb
+++ b/apps/workbench/config/routes.rb
@@ -26,6 +26,7 @@ ArvadosWorkbench::Application.routes.draw do
    resources :containers
    resources :container_requests do
      post 'cancel', :on => :member
+    post 'copy', on: :member
    end
    get '/virtual_machines/:id/webshell/:login' => 'virtual_machines#webshell', :as => :webshell_virtual_machine
    resources :authorized_keys
diff --git a/apps/workbench/lib/tasks/config_dump.rake b/apps/workbench/lib/tasks/config_dump.rake

new file mode 100644 (file)

index 0000000..c7e0214
--- /dev/null
+++ b/apps/workbench/lib/tasks/config_dump.rake
@@ -0,0 +1,6 @@
+namespace :config do
+  desc 'Show site configuration'
+  task dump: :environment do
+    puts $application_config.to_yaml
+  end
+end
diff --git a/apps/workbench/test/controllers/container_requests_controller_test.rb b/apps/workbench/test/controllers/container_requests_controller_test.rb

index 8dbbbd07c11d5add20dc91f2402ed015e8d43d2a..70e042cd3dcad21a38de4b639343bef6ab7c5098 100644 (file)
--- a/apps/workbench/test/controllers/container_requests_controller_test.rb
+++ b/apps/workbench/test/controllers/container_requests_controller_test.rb
@@ -29,4 +29,31 @@ class ContainerRequestsControllerTest < ActionController::TestCase
      assert_includes @response.body, '<div id="event_log_div"'
      assert_select 'Download the log', false
    end
+
+  test "completed container request offers re-run option" do
+    use_token 'active'
+
+    uuid = api_fixture('container_requests')['completed']['uuid']
+
+    get :show, {id: uuid}, session_for(:active)
+    assert_response :success
+
+   assert_includes @response.body, "href=\"/container_requests/#{uuid}/copy\""
+  end
+
+  test "container request copy" do
+    completed_cr = api_fixture('container_requests')['completed']
+    post(:copy,
+         {
+           id: completed_cr['uuid']
+         },
+         session_for(:active))
+    assert_response 302
+    copied_cr = assigns(:object)
+    assert_not_nil copied_cr
+    assert_equal 'Uncommitted', copied_cr[:state]
+    assert_equal "Copy of #{completed_cr['name']}", copied_cr['name']
+    assert_equal completed_cr['cmd'], copied_cr['cmd']
+    assert_equal completed_cr['runtime_constraints']['ram'], copied_cr['runtime_constraints'][:ram]
+  end
  end
diff --git a/apps/workbench/test/controllers/projects_controller_test.rb b/apps/workbench/test/controllers/projects_controller_test.rb

index d31d6e3458a94f629bc21329ba3fa5db1b79061e..1b19f2f4936a6cfd8c3b82cd1f8ebaaf32db49fb 100644 (file)
--- a/apps/workbench/test/controllers/projects_controller_test.rb
+++ b/apps/workbench/test/controllers/projects_controller_test.rb
@@ -101,8 +101,9 @@ class ProjectsControllerTest < ActionController::TestCase
    end
  
    test "project admin can remove collections from the project" do
-    # Deleting an object that supports 'expires_at' should make it
-    # completely inaccessible to API queries, not simply moved out of the project.
+    # Deleting an object that supports 'trash_at' should make it
+    # completely inaccessible to API queries, not simply moved out of
+    # the project.
      coll_key = "collection_to_remove_from_subproject"
      coll_uuid = api_fixture("collections")[coll_key]["uuid"]
      delete(:remove_item,
@@ -116,12 +117,12 @@ class ProjectsControllerTest < ActionController::TestCase
  
      use_token :subproject_admin
      assert_raise ArvadosApiClient::NotFoundException do
-      Collection.find(coll_uuid)
+      Collection.find(coll_uuid, cache: false)
      end
    end
  
    test "project admin can remove items from project other than collections" do
-    # An object which does not have an expired_at field (e.g. Specimen)
+    # An object which does not have an trash_at field (e.g. Specimen)
      # should be implicitly moved to the user's Home project when removed.
      specimen_uuid = api_fixture('specimens', 'in_asubproject')['uuid']
      delete(:remove_item,
@@ -490,27 +491,28 @@ class ProjectsControllerTest < ActionController::TestCase
      ["user1_with_load", 2, ["project_with_10_collections"], "project_with_2_pipelines_and_60_crs"],
      ["admin", 5, ["anonymously_accessible_project", "subproject_in_anonymous_accessible_project"], "aproject"],
    ].each do |user, page_size, tree_segment, unexpected|
+    # Note: this test is sensitive to database collation. It passes
+    # with en_US.UTF-8.
      test "build my projects tree for #{user} user and verify #{unexpected} is omitted" do
        use_token user
-      ctrl = ProjectsController.new
-
-      current_user = User.find(api_fixture('users')[user]['uuid'])
  
-      my_tree = ctrl.send :my_wanted_projects_tree, current_user, page_size
+      tree, _, _ = @controller.send(:my_wanted_projects_tree,
+                                    User.current,
+                                    page_size)
  
        tree_segment_at_depth_1 = api_fixture('groups')[tree_segment[0]]
        tree_segment_at_depth_2 = api_fixture('groups')[tree_segment[1]] if tree_segment[1]
  
-      tree_nodes = {}
-      my_tree[0].each do |x|
-        tree_nodes[x[:object]['uuid']] = x[:depth]
+      node_depth = {}
+      tree.each do |x|
+        node_depth[x[:object]['uuid']] = x[:depth]
        end
  
-      assert_equal(1, tree_nodes[tree_segment_at_depth_1['uuid']])
-      assert_equal(2, tree_nodes[tree_segment_at_depth_2['uuid']]) if tree_segment[1]
+      assert_equal(1, node_depth[tree_segment_at_depth_1['uuid']])
+      assert_equal(2, node_depth[tree_segment_at_depth_2['uuid']]) if tree_segment[1]
  
        unexpected_project = api_fixture('groups')[unexpected]
-      assert_nil(tree_nodes[unexpected_project['uuid']])
+      assert_nil(node_depth[unexpected_project['uuid']], node_depth.inspect)
      end
    end
  
diff --git a/apps/workbench/test/diagnostics/container_request_test.rb b/apps/workbench/test/diagnostics/container_request_test.rb

new file mode 100644 (file)

index 0000000..257159a
--- /dev/null
+++ b/apps/workbench/test/diagnostics/container_request_test.rb
@@ -0,0 +1,49 @@
+require 'diagnostics_test_helper'
+
+# This test assumes that the configured workflow_uuid corresponds to a cwl workflow.
+# Ex: configure a workflow using the steps below and use the resulting workflow uuid:
+#   > cd arvados/doc/user/cwl/bwa-mem
+#   > arvados-cwl-runner --create-workflow bwa-mem.cwl bwa-mem-input.yml
+
+class ContainerRequestTest < DiagnosticsTest
+  crs_to_test = Rails.configuration.container_requests_to_test.andand.keys
+
+  setup do
+    need_selenium 'to make websockets work'
+  end
+
+  crs_to_test.andand.each do |cr_to_test|
+    test "run container_request: #{cr_to_test}" do
+      cr_config = Rails.configuration.container_requests_to_test[cr_to_test]
+
+      visit_page_with_token 'active'
+
+      find('.btn', text: 'Run a process').click
+
+      within('.modal-dialog') do
+        page.find_field('Search').set cr_config['workflow_uuid']
+        wait_for_ajax
+        find('.selectable', text: 'bwa-mem.cwl').click
+        find('.btn', text: 'Next: choose inputs').click
+      end
+
+      page.assert_selector('a.disabled,button.disabled', text: 'Run') if cr_config['input_paths'].any?
+
+      # Choose input for the workflow
+      cr_config['input_paths'].each do |look_for|
+        select_input look_for
+      end
+      wait_for_ajax
+
+      # All needed input are already filled in. Run this workflow now
+      page.assert_no_selector('a.disabled,button.disabled', text: 'Run')
+      find('a,button', text: 'Run').click
+
+      # container_request is running. Run button is no longer available.
+      page.assert_no_selector('a', text: 'Run')
+
+      # Wait for container_request run to complete
+      wait_until_page_has 'completed', cr_config['max_wait_seconds']
+    end
+  end
+end
diff --git a/apps/workbench/test/diagnostics/pipeline_test.rb b/apps/workbench/test/diagnostics/pipeline_test.rb

index d038222cf0cf58278818bd087e288a4e1c11b52c..11d0e42629af4593d59c41ffddba31535fdce6f2 100644 (file)
--- a/apps/workbench/test/diagnostics/pipeline_test.rb
+++ b/apps/workbench/test/diagnostics/pipeline_test.rb
@@ -42,54 +42,11 @@ class PipelineTest < DiagnosticsTest
        find('a,button', text: 'Components').click
        find('a,button', text: 'Run').click
  
-      # Pipeline is running. We have a "Stop" button instead now.
+      # Pipeline is running. We have a "Pause" button instead now.
        page.assert_selector 'a,button', text: 'Pause'
  
        # Wait for pipeline run to complete
        wait_until_page_has 'completed', pipeline_config['max_wait_seconds']
      end
    end
-
-  def select_input look_for
-    inputs_needed = page.all('.btn', text: 'Choose')
-    return if (!inputs_needed || !inputs_needed.any?)
-
-    look_for_uuid = nil
-    look_for_file = nil
-    if look_for.andand.index('/').andand.>0
-      partitions = look_for.partition('/')
-      look_for_uuid = partitions[0]
-      look_for_file = partitions[2]
-    else
-      look_for_uuid = look_for
-      look_for_file = nil
-    end
-
-    assert_triggers_dom_event 'shown.bs.modal' do
-      inputs_needed[0].click
-    end
-
-    within('.modal-dialog') do
-      if look_for_uuid
-        fill_in('Search', with: look_for_uuid, exact: true)
-        wait_for_ajax
-      end
-             
-      page.all('.selectable').first.click
-      wait_for_ajax
-      # ajax reload is wiping out input selection after search results; so, select again.
-      page.all('.selectable').first.click
-      wait_for_ajax
-
-      if look_for_file
-        wait_for_ajax
-        within('.collection_files_name', text: look_for_file) do
-          find('.fa-file').click
-        end
-      end
-      
-      find('button', text: 'OK').click
-      wait_for_ajax
-    end
-  end
  end
diff --git a/apps/workbench/test/diagnostics_test_helper.rb b/apps/workbench/test/diagnostics_test_helper.rb

index 3587721edae7bc6e96778efceaaedfadddbeacd3..46b961ae11923e7db4b94e4e78215c407390099c 100644 (file)
--- a/apps/workbench/test/diagnostics_test_helper.rb
+++ b/apps/workbench/test/diagnostics_test_helper.rb
@@ -21,6 +21,49 @@ class DiagnosticsTest < ActionDispatch::IntegrationTest
      visit page_with_token(tokens[token_name], (workbench_url + path))
    end
  
+  def select_input look_for
+    inputs_needed = page.all('.btn', text: 'Choose')
+    return if (!inputs_needed || !inputs_needed.any?)
+
+    look_for_uuid = nil
+    look_for_file = nil
+    if look_for.andand.index('/').andand.>0
+      partitions = look_for.partition('/')
+      look_for_uuid = partitions[0]
+      look_for_file = partitions[2]
+    else
+      look_for_uuid = look_for
+      look_for_file = nil
+    end
+
+    assert_triggers_dom_event 'shown.bs.modal' do
+      inputs_needed[0].click
+    end
+
+    within('.modal-dialog') do
+      if look_for_uuid
+        fill_in('Search', with: look_for_uuid, exact: true)
+        wait_for_ajax
+      end
+
+      page.all('.selectable').first.click
+      wait_for_ajax
+      # ajax reload is wiping out input selection after search results; so, select again.
+      page.all('.selectable').first.click
+      wait_for_ajax
+
+      if look_for_file
+        wait_for_ajax
+        within('.collection_files_name', text: look_for_file) do
+          find('.fa-file').click
+        end
+      end
+
+      find('button', text: 'OK').click
+      wait_for_ajax
+    end
+  end
+
    # Looks for the text_to_look_for for up to the max_time provided
    def wait_until_page_has text_to_look_for, max_time=30
      max_time = 30 if (!max_time || (max_time.to_s != max_time.to_i.to_s))
diff --git a/apps/workbench/test/integration/application_layout_test.rb b/apps/workbench/test/integration/application_layout_test.rb

index 1d68b38d98ec79a1f755a7f6f1a4b7ee2d3284ec..b49cbf91c7f52f701ab7d898b866ed3619c9c5c6 100644 (file)
--- a/apps/workbench/test/integration/application_layout_test.rb
+++ b/apps/workbench/test/integration/application_layout_test.rb
@@ -256,7 +256,7 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
        end
  
        within('.recent-processes') do
-        assert_text 'pipeline_with_job'
+        assert_text 'running'
  
          within('.row-zzzzz-xvhdp-cr4runningcntnr') do
            assert_text 'requester_for_running_cr'
@@ -269,7 +269,7 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
  
          assert_text 'completed container request'
          within('.row-zzzzz-xvhdp-cr4completedctr')do
-          assert page.has_link? '1f4b0bc7583c2a7f9102c395f4ffc5e3+45'
+          assert page.has_link? 'foo_file'
          end
        end
  
diff --git a/apps/workbench/test/integration/container_requests_test.rb b/apps/workbench/test/integration/container_requests_test.rb

index df6584ebb6490cedac2fe439a1a77110a9feeb84..bd3a813f72af4b8ea77c35568b6f737a1389237d 100644 (file)
--- a/apps/workbench/test/integration/container_requests_test.rb
+++ b/apps/workbench/test/integration/container_requests_test.rb
@@ -96,4 +96,17 @@ class ContainerRequestsTest < ActionDispatch::IntegrationTest
      wait_for_ajax
      assert_text 'This container is queued'
    end
+
+  test "Run button enabled when workflow is empty and no inputs are needed" do
+    visit page_with_token("active")
+
+    find('.btn', text: 'Run a process').click
+    within('.modal-dialog') do
+      find('.selectable', text: 'Valid workflow with no definition yaml').click
+      find('.btn', text: 'Next: choose inputs').click
+    end
+
+    assert_text 'This workflow does not need any further inputs'
+    page.assert_selector 'a', text: 'Run'
+  end
  end
diff --git a/apps/workbench/test/integration/work_units_test.rb b/apps/workbench/test/integration/work_units_test.rb

index 3f551a012ea62a692b5fdfcf0c99de0208b359bd..5b5848ee7766580003ee10be1c28ba944070bf36 100644 (file)
--- a/apps/workbench/test/integration/work_units_test.rb
+++ b/apps/workbench/test/integration/work_units_test.rb
@@ -109,8 +109,8 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
    end
  
    [
-    ['Two Part Pipeline Template', 'part-one', 'Provide a value for the following'],
-    ['Workflow with input specifications', 'this workflow has inputs specified', 'Provide a value for the following'],
+    ['Pipeline with default input specifications', 'part-one', 'Provide values for the following'],
+    ['Workflow with default input specifications', 'this workflow has inputs specified', 'Provide a value for the following'],
    ].each do |template_name, preview_txt, process_txt|
      test "run a process using template #{template_name} from dashboard" do
        visit page_with_token('admin')
@@ -131,6 +131,10 @@ class WorkUnitsTest < ActionDispatch::IntegrationTest
        # in the process page now
        assert_text process_txt
        assert_selector 'a', text: template_name
+
+      assert_equal "Set value for ex_string_def", find('div.form-group > div > p.form-control-static > a', text: "hello-testing-123")[:"data-title"]
+
+      page.assert_selector 'a.disabled,button.disabled', text: 'Run'
      end
    end
  
diff --git a/backports/deb-fuse/fpm-info.sh b/backports/deb-fuse/fpm-info.sh

new file mode 100644 (file)

index 0000000..6f2b022
--- /dev/null
+++ b/backports/deb-fuse/fpm-info.sh
@@ -0,0 +1,5 @@
+case "$TARGET" in
+    ubuntu1204)
+        fpm_depends+=('libfuse2 = 2.9.2-5')
+        ;;
+esac
diff --git a/backports/deb-libfuse-dev/fpm-info.sh b/backports/deb-libfuse-dev/fpm-info.sh

new file mode 100644 (file)

index 0000000..6f2b022
--- /dev/null
+++ b/backports/deb-libfuse-dev/fpm-info.sh
@@ -0,0 +1,5 @@
+case "$TARGET" in
+    ubuntu1204)
+        fpm_depends+=('libfuse2 = 2.9.2-5')
+        ;;
+esac
diff --git a/backports/python-llfuse/fpm-info.sh b/backports/python-llfuse/fpm-info.sh

index 9fc00987e6f9561f37528368264adf15718b3afd..401a8367a7bf2c2a6bc9aeed1fa0b0cd156c6efe 100644 (file)
--- a/backports/python-llfuse/fpm-info.sh
+++ b/backports/python-llfuse/fpm-info.sh
@@ -1,11 +1,19 @@
  case "$TARGET" in
+    centos6)
+        build_depends+=('fuse-libs' 'fuse-devel')
+        fpm_depends+=(glibc 'fuse-libs = 2.9.2-5' 'fuse = 2.9.2-5')
+        ;;
      centos*)
          build_depends+=('fuse-devel')
          fpm_depends+=(glibc fuse-libs)
          ;;
+    ubuntu1204)
+        build_depends+=(libfuse2 libfuse-dev)
+        fpm_depends+=(libc6 python-contextlib2 'libfuse2 = 2.9.2-5' 'fuse = 2.9.2-5')
+        ;;
      debian* | ubuntu*)
          build_depends+=('libfuse-dev')
-        fpm_depends+=(libc6 libfuse2)
+        fpm_depends+=(libc6 'libfuse2 > 2.9.0' 'fuse > 2.9.0')
          ;;
  esac
  
diff --git a/backports/rpm-fuse-devel/fpm-info.sh b/backports/rpm-fuse-devel/fpm-info.sh

new file mode 100644 (file)

index 0000000..89cdebe
--- /dev/null
+++ b/backports/rpm-fuse-devel/fpm-info.sh
@@ -0,0 +1,5 @@
+case "$TARGET" in
+    centos6)
+        fpm_depends+=('fuse-libs = 2.9.2-5')
+        ;;
+esac
diff --git a/backports/rpm-fuse/fpm-info.sh b/backports/rpm-fuse/fpm-info.sh

new file mode 100644 (file)

index 0000000..89cdebe
--- /dev/null
+++ b/backports/rpm-fuse/fpm-info.sh
@@ -0,0 +1,5 @@
+case "$TARGET" in
+    centos6)
+        fpm_depends+=('fuse-libs = 2.9.2-5')
+        ;;
+esac
diff --git a/build/README b/build/README

index 418254457dd41e61f1e39bf698b297fcf59dcc26..4c67839a1006693f3d70f8adaf2823e4fa4f11e5 100644 (file)
--- a/build/README
+++ b/build/README
@@ -23,7 +23,11 @@ run-build-packages-python-and-ruby.sh    Build Python and Ruby packages suitable
  
  run-build-docker-images.sh               Build arvbox Docker images.
  
-run-build-docker-jobs-image.sh           Build arvados/jobs Docker image.
+run-build-docker-jobs-image.sh           Build arvados/jobs Docker image
+                                         (uses published debian packages)
+
+build-dev-docker-jobs-image.sh           Build developer arvados/jobs Docker image
+                                         (uses local git tree)
  
  run-library.sh                           A library of functions shared by the
                                           various scripts in this
diff --git a/build/build-dev-docker-jobs-image.sh b/build/build-dev-docker-jobs-image.sh

new file mode 100755 (executable)

index 0000000..5a6e777
--- /dev/null
+++ b/build/build-dev-docker-jobs-image.sh
@@ -0,0 +1,63 @@
+#!/bin/bash
+
+read -rd "\000" helpmessage <<EOF
+Build an arvados/jobs Docker image from local git tree.
+
+Intended for use by developers working on arvados-python-client or
+arvados-cwl-runner and need to run a crunch job with a custom package
+version.  Also supports building custom cwltool if CWLTOOL is set.
+
+Syntax:
+        WORKSPACE=/path/to/arvados $(basename $0)
+
+WORKSPACE=path         Path to the Arvados source tree to build packages from
+CWLTOOL=path           (optional) Path to cwltool git repository.
+
+EOF
+
+set -e
+
+if [[ -z "$WORKSPACE" ]] ; then
+    echo "$helpmessage"
+    echo
+    echo "Must set WORKSPACE"
+    exit 1
+fi
+
+if [[ -z "$ARVADOS_API_HOST" || -z "$ARVADOS_API_TOKEN" ]] ; then
+    echo "$helpmessage"
+    echo
+    echo "Must set ARVADOS_API_HOST and ARVADOS_API_TOKEN"
+    exit 1
+fi
+
+cd "$WORKSPACE"
+
+(cd sdk/python && python setup.py sdist)
+sdk=$(cd sdk/python/dist && ls -t arvados-python-client-*.tar.gz | head -n1)
+
+(cd sdk/cwl && python setup.py sdist)
+runner=$(cd sdk/cwl/dist && ls -t arvados-cwl-runner-*.tar.gz | head -n1)
+
+rm -rf sdk/cwl/cwltool_dist
+mkdir -p sdk/cwl/cwltool_dist
+if [[ -n "$CWLTOOL" ]] ; then
+    (cd "$CWLTOOL" && python setup.py sdist)
+    cwltool=$(cd "$CWLTOOL/dist" && ls -t cwltool-*.tar.gz | head -n1)
+    cp "$CWLTOOL/dist/$cwltool" $WORKSPACE/sdk/cwl/cwltool_dist
+fi
+
+. build/run-library.sh
+
+python_sdk_ts=$(cd sdk/python && timestamp_from_git)
+cwl_runner_ts=$(cd sdk/cwl && timestamp_from_git)
+
+if [[ $python_sdk_ts -gt $cwl_runner_ts ]]; then
+    gittag=$(git log --first-parent --max-count=1 --format=format:%H sdk/python)
+else
+    gittag=$(git log --first-parent --max-count=1 --format=format:%H sdk/cwl)
+fi
+
+docker build --build-arg sdk=$sdk --build-arg runner=$runner --build-arg cwltool=$cwltool -f "$WORKSPACE/sdk/dev-jobs.dockerfile" -t arvados/jobs:$gittag "$WORKSPACE/sdk"
+echo arv-keepdocker arvados/jobs $gittag
+arv-keepdocker arvados/jobs $gittag
diff --git a/build/libcloud-pin b/build/libcloud-pin

index 3fa07e6576b787801ef9557792412c85b9d061c8..0cbfcd894274bd43a37f830aa7dc02dbdd15f28a 100644 (file)
--- a/build/libcloud-pin
+++ b/build/libcloud-pin
@@ -1 +1 @@
-LIBCLOUD_PIN=0.20.2.dev1
-\ No newline at end of file
+LIBCLOUD_PIN=0.20.2.dev2
+\ No newline at end of file
diff --git a/build/package-build-dockerfiles/Makefile b/build/package-build-dockerfiles/Makefile

index 18694ed1844af6cb2fa87dd5f12b863df6402e7c..9987e9e0eb8565e7211c70a29384befd5d9ab2d5 100644 (file)
--- a/build/package-build-dockerfiles/Makefile
+++ b/build/package-build-dockerfiles/Makefile
@@ -1,17 +1,9 @@
-all: centos6/generated centos7/generated debian7/generated debian8/generated ubuntu1204/generated ubuntu1404/generated
-
-centos6/generated: common-generated-all
-       test -d centos6/generated || mkdir centos6/generated
-       cp -rlt centos6/generated common-generated/*
+all: centos7/generated debian8/generated ubuntu1204/generated ubuntu1404/generated
  
  centos7/generated: common-generated-all
         test -d centos7/generated || mkdir centos7/generated
         cp -rlt centos7/generated common-generated/*
  
-debian7/generated: common-generated-all
-       test -d debian7/generated || mkdir debian7/generated
-       cp -rlt debian7/generated common-generated/*
-
  debian8/generated: common-generated-all
         test -d debian8/generated || mkdir debian8/generated
         cp -rlt debian8/generated common-generated/*
diff --git a/build/package-build-dockerfiles/centos6/Dockerfile b/build/package-build-dockerfiles/centos6/Dockerfile

deleted file mode 100644 (file)

index 8ea81f4..0000000
--- a/build/package-build-dockerfiles/centos6/Dockerfile
+++ /dev/null
@@ -1,37 +0,0 @@
-FROM centos:6
-MAINTAINER Brett Smith <brett@curoverse.com>
-
-# Install build dependencies provided in base distribution
-RUN yum -q -y install make automake gcc gcc-c++ libyaml-devel patch readline-devel zlib-devel libffi-devel openssl-devel bzip2 libtool bison sqlite-devel rpm-build git perl-ExtUtils-MakeMaker libattr-devel nss-devel libcurl-devel which tar unzip scl-utils centos-release-scl postgresql-devel
-
-# Install golang binary
-ADD generated/go1.7.1.linux-amd64.tar.gz /usr/local/
-RUN ln -s /usr/local/go/bin/go /usr/local/bin/
-
-# Install RVM
-RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
-    curl -L https://get.rvm.io | bash -s stable && \
-    /usr/local/rvm/bin/rvm install 2.1 && \
-    /usr/local/rvm/bin/rvm alias create default ruby-2.1 && \
-    /usr/local/rvm/bin/rvm-exec default gem install bundler && \
-    /usr/local/rvm/bin/rvm-exec default gem install cure-fpm --version 1.6.0b
-
-# Need to "touch" RPM database to workaround bug in interaction between
-# overlayfs and yum (https://bugzilla.redhat.com/show_bug.cgi?id=1213602)
-RUN touch /var/lib/rpm/* && yum -q -y install python27 python33
-RUN scl enable python33 "easy_install-3.3 pip" && scl enable python27 "easy_install-2.7 pip"
-
-# fpm requires ffi which now wants xz-libs-5 which isn't packaged for centos6
-# but the library from xz-libs-4.999 appears to be good enough.
-RUN ln -s /usr/lib64/liblzma.so.0 /usr/lib64/lzma.so.5
-
-RUN cd /tmp && \
-    (curl -OLf 'http://pkgs.repoforge.org/rpmforge-release/rpmforge-release-0.5.3-1.el6.rf.x86_64.rpm' || \
-     curl -OLf 'http://repoforge.eecs.wsu.edu/redhat/el6/en/x86_64/rpmforge/RPMS/rpmforge-release-0.5.3-1.el6.rf.x86_64.rpm') && \
-    rpm -ivh rpmforge-release-0.5.3-1.el6.rf.x86_64.rpm && \
-    sed -i 's/enabled = 0/enabled = 1/' /etc/yum.repos.d/rpmforge.repo
-
-RUN touch /var/lib/rpm/* && yum install --assumeyes git
-
-ENV WORKSPACE /arvados
-CMD ["scl", "enable", "python33", "python27", "/usr/local/rvm/bin/rvm-exec default bash /jenkins/run-build-packages.sh --target centos6"]
diff --git a/build/package-build-dockerfiles/debian7/Dockerfile b/build/package-build-dockerfiles/debian7/Dockerfile

deleted file mode 100644 (file)

index 7632c94..0000000
--- a/build/package-build-dockerfiles/debian7/Dockerfile
+++ /dev/null
@@ -1,20 +0,0 @@
-FROM debian:wheezy
-MAINTAINER Ward Vandewege <ward@curoverse.com>
-
-# Install dependencies and set up system.
-RUN /usr/bin/apt-get update && /usr/bin/apt-get install -q -y python2.7-dev python3 python-setuptools python3-setuptools libcurl4-gnutls-dev curl git procps libattr1-dev libfuse-dev libpq-dev python-pip unzip
-
-# Install RVM
-RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
-    curl -L https://get.rvm.io | bash -s stable && \
-    /usr/local/rvm/bin/rvm install 2.1 && \
-    /usr/local/rvm/bin/rvm alias create default ruby-2.1 && \
-    /usr/local/rvm/bin/rvm-exec default gem install bundler && \
-    /usr/local/rvm/bin/rvm-exec default gem install cure-fpm --version 1.6.0b
-
-# Install golang binary
-ADD generated/go1.7.1.linux-amd64.tar.gz /usr/local/
-RUN ln -s /usr/local/go/bin/go /usr/local/bin/
-
-ENV WORKSPACE /arvados
-CMD ["/usr/local/rvm/bin/rvm-exec", "default", "bash", "/jenkins/run-build-packages.sh", "--target", "debian7"]
diff --git a/build/package-test-dockerfiles/centos6/Dockerfile b/build/package-test-dockerfiles/centos6/Dockerfile

deleted file mode 100644 (file)

index d38507a..0000000
--- a/build/package-test-dockerfiles/centos6/Dockerfile
+++ /dev/null
@@ -1,22 +0,0 @@
-FROM centos:6
-MAINTAINER Peter Amstutz <peter.amstutz@curoverse.com>
-
-RUN yum -q install --assumeyes scl-utils centos-release-scl \
-    which tar
-
-# Install RVM
-RUN touch /var/lib/rpm/* && \
-    gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
-    curl -L https://get.rvm.io | bash -s stable && \
-    /usr/local/rvm/bin/rvm install 2.1 && \
-    /usr/local/rvm/bin/rvm alias create default ruby-2.1 && \
-    /usr/local/rvm/bin/rvm-exec default gem install bundle && \
-    /usr/local/rvm/bin/rvm-exec default gem install cure-fpm --version 1.6.0b
-
-RUN cd /tmp && \
-    (curl -OLf 'http://pkgs.repoforge.org/rpmforge-release/rpmforge-release-0.5.3-1.el6.rf.x86_64.rpm' || \
-     curl -OLf 'http://repoforge.eecs.wsu.edu/redhat/el6/en/x86_64/rpmforge/RPMS/rpmforge-release-0.5.3-1.el6.rf.x86_64.rpm') && \
-    rpm -ivh rpmforge-release-0.5.3-1.el6.rf.x86_64.rpm && \
-    sed -i 's/enabled = 0/enabled = 1/' /etc/yum.repos.d/rpmforge.repo
-
-COPY localrepo.repo /etc/yum.repos.d/localrepo.repo
diff --git a/build/package-test-dockerfiles/centos6/localrepo.repo b/build/package-test-dockerfiles/centos6/localrepo.repo

deleted file mode 100644 (file)

index ac6b898..0000000
--- a/build/package-test-dockerfiles/centos6/localrepo.repo
+++ /dev/null
@@ -1,5 +0,0 @@
-[localrepo]
-name=Arvados Test
-baseurl=file:///arvados/packages/centos6
-gpgcheck=0
-enabled=1
diff --git a/build/package-test-dockerfiles/centos7/Dockerfile b/build/package-test-dockerfiles/centos7/Dockerfile

index 6bc40bffa56fac9bdffe85ccce65416ae016aaac..06a39ca397979e022028e727ba563253bbaa777d 100644 (file)
--- a/build/package-test-dockerfiles/centos7/Dockerfile
+++ b/build/package-test-dockerfiles/centos7/Dockerfile
@@ -7,8 +7,8 @@ RUN yum -q -y install scl-utils centos-release-scl which tar
  RUN touch /var/lib/rpm/* && \
      gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
      curl -L https://get.rvm.io | bash -s stable && \
-    /usr/local/rvm/bin/rvm install 2.1 && \
-    /usr/local/rvm/bin/rvm alias create default ruby-2.1 && \
+    /usr/local/rvm/bin/rvm install 2.3 && \
+    /usr/local/rvm/bin/rvm alias create default ruby-2.3 && \
      /usr/local/rvm/bin/rvm-exec default gem install bundle && \
      /usr/local/rvm/bin/rvm-exec default gem install cure-fpm --version 1.6.0b
  
diff --git a/build/package-test-dockerfiles/debian7/Dockerfile b/build/package-test-dockerfiles/debian7/Dockerfile

deleted file mode 100644 (file)

index c9a2fdc..0000000
--- a/build/package-test-dockerfiles/debian7/Dockerfile
+++ /dev/null
@@ -1,14 +0,0 @@
-FROM debian:7
-MAINTAINER Peter Amstutz <peter.amstutz@curoverse.com>
-
-# Install RVM
-RUN apt-get update && apt-get -y install curl procps && \
-    gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
-    curl -L https://get.rvm.io | bash -s stable && \
-    /usr/local/rvm/bin/rvm install 2.1 && \
-    /usr/local/rvm/bin/rvm alias create default ruby-2.1
-
-# udev daemon can't start in a container, so don't try.
-RUN mkdir -p /etc/udev/disabled
-
-RUN echo "deb file:///arvados/packages/debian7/ /" >>/etc/apt/sources.list
diff --git a/build/package-test-dockerfiles/debian8/Dockerfile b/build/package-test-dockerfiles/debian8/Dockerfile

index cde18472333cdc59e9421a24fecc5f10d1611d38..c6bc8f4f98c0cd8ea9a102b358a8d2da57329e47 100644 (file)
--- a/build/package-test-dockerfiles/debian8/Dockerfile
+++ b/build/package-test-dockerfiles/debian8/Dockerfile
@@ -2,11 +2,12 @@ FROM debian:8
  MAINTAINER Peter Amstutz <peter.amstutz@curoverse.com>
  
  # Install RVM
-RUN apt-get update && apt-get -y install curl && \
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends curl ca-certificates && \
      gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
      curl -L https://get.rvm.io | bash -s stable && \
-    /usr/local/rvm/bin/rvm install 2.1 && \
-    /usr/local/rvm/bin/rvm alias create default ruby-2.1
+    /usr/local/rvm/bin/rvm install 2.3 && \
+    /usr/local/rvm/bin/rvm alias create default ruby-2.3
  
  # udev daemon can't start in a container, so don't try.
  RUN mkdir -p /etc/udev/disabled
diff --git a/build/package-test-dockerfiles/ubuntu1204/Dockerfile b/build/package-test-dockerfiles/ubuntu1204/Dockerfile

index 0cb77c8f8a92a2bbc18bd9005b7a4246d66b48a4..f6223715e1e43793fd71906856fc02aa1f661533 100644 (file)
--- a/build/package-test-dockerfiles/ubuntu1204/Dockerfile
+++ b/build/package-test-dockerfiles/ubuntu1204/Dockerfile
@@ -2,13 +2,14 @@ FROM ubuntu:precise
  MAINTAINER Peter Amstutz <peter.amstutz@curoverse.com>
  
  # Install RVM
-RUN apt-get update && apt-get -y install curl && \
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends curl ca-certificates && \
      gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
      curl -L https://get.rvm.io | bash -s stable && \
-    /usr/local/rvm/bin/rvm install 2.1 && \
-    /usr/local/rvm/bin/rvm alias create default ruby-2.1
+    /usr/local/rvm/bin/rvm install 2.3 && \
+    /usr/local/rvm/bin/rvm alias create default ruby-2.3
  
  # udev daemon can't start in a container, so don't try.
  RUN mkdir -p /etc/udev/disabled
  
-RUN echo "deb file:///arvados/packages/ubuntu1204/ /" >>/etc/apt/sources.list
-\ No newline at end of file
+RUN echo "deb file:///arvados/packages/ubuntu1204/ /" >>/etc/apt/sources.list
diff --git a/build/package-test-dockerfiles/ubuntu1404/Dockerfile b/build/package-test-dockerfiles/ubuntu1404/Dockerfile

index 6c4d0e9b51f3c3a7f6acd27e4eec0f7c7cf413ad..f7ee48644df5ecd8afb57c85f0cd3e47ee49386b 100644 (file)
--- a/build/package-test-dockerfiles/ubuntu1404/Dockerfile
+++ b/build/package-test-dockerfiles/ubuntu1404/Dockerfile
@@ -2,13 +2,14 @@ FROM ubuntu:trusty
  MAINTAINER Peter Amstutz <peter.amstutz@curoverse.com>
  
  # Install RVM
-RUN apt-get update && apt-get -y install curl && \
+RUN apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends curl ca-certificates && \
      gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
      curl -L https://get.rvm.io | bash -s stable && \
-    /usr/local/rvm/bin/rvm install 2.1 && \
-    /usr/local/rvm/bin/rvm alias create default ruby-2.1
+    /usr/local/rvm/bin/rvm install 2.3 && \
+    /usr/local/rvm/bin/rvm alias create default ruby-2.3
  
  # udev daemon can't start in a container, so don't try.
  RUN mkdir -p /etc/udev/disabled
  
-RUN echo "deb file:///arvados/packages/ubuntu1404/ /" >>/etc/apt/sources.list
-\ No newline at end of file
+RUN echo "deb file:///arvados/packages/ubuntu1404/ /" >>/etc/apt/sources.list
diff --git a/build/package-testing/test-packages-centos6.sh b/build/package-testing/test-packages-centos6.sh

deleted file mode 120000 (symlink)

index 64ef604..0000000
--- a/build/package-testing/test-packages-centos6.sh
+++ /dev/null
@@ -1 +0,0 @@
-rpm-common-test-packages.sh
-\ No newline at end of file
diff --git a/build/rails-package-scripts/postinst.sh b/build/rails-package-scripts/postinst.sh

index 17454ef7065ac3a17af6e813308f73d14435e7b2..e019170d71d24a6323e3b4eaee856c30cebb5768 100644 (file)
--- a/build/rails-package-scripts/postinst.sh
+++ b/build/rails-package-scripts/postinst.sh
@@ -122,14 +122,14 @@ setup_conffile() {
  
  prepare_database() {
    DB_MIGRATE_STATUS=`$COMMAND_PREFIX bundle exec rake db:migrate:status 2>&1 || true`
-  if echo $DB_MIGRATE_STATUS | grep -qF 'Schema migrations table does not exist yet.'; then
+  if echo "$DB_MIGRATE_STATUS" | grep -qF 'Schema migrations table does not exist yet.'; then
        # The database exists, but the migrations table doesn't.
        run_and_report "Setting up database" $COMMAND_PREFIX bundle exec \
                       rake "$RAILSPKG_DATABASE_LOAD_TASK" db:seed
-  elif echo $DB_MIGRATE_STATUS | grep -q '^database: '; then
+  elif echo "$DB_MIGRATE_STATUS" | grep -q '^database: '; then
        run_and_report "Running db:migrate" \
                       $COMMAND_PREFIX bundle exec rake db:migrate
-  elif echo $DB_MIGRATE_STATUS | grep -q 'database .* does not exist'; then
+  elif echo "$DB_MIGRATE_STATUS" | grep -q 'database .* does not exist'; then
        if ! run_and_report "Running db:setup" \
             $COMMAND_PREFIX bundle exec rake db:setup 2>/dev/null; then
            echo "Warning: unable to set up database." >&2
diff --git a/build/run-build-docker-images.sh b/build/run-build-docker-images.sh

index a7dc30cfaedce168fab2ac3ec2d70d3e70bf297b..73f1e2dc46c12db55efc3e2aa81a7eec8ed63d8f 100755 (executable)
--- a/build/run-build-docker-images.sh
+++ b/build/run-build-docker-images.sh
@@ -148,7 +148,7 @@ title "uploading images"
  
  timer_reset
  
-if [[ "$ECODE" != "0" ]]; then
+if [[ "$EXITCODE" != "0" ]]; then
      title "upload arvados images SKIPPED because build failed"
  else
      if [[ $upload == true ]]; then
diff --git a/build/run-build-packages-all-targets.sh b/build/run-build-packages-all-targets.sh

index f1a1e1c4b3fa76eb5173109f071ee5b19f39b0b7..a4dd9a6ab8642e1e12b4a24e10ce3dd0d8c9b993 100755 (executable)
--- a/build/run-build-packages-all-targets.sh
+++ b/build/run-build-packages-all-targets.sh
@@ -83,7 +83,7 @@ cd $(dirname $0)
  
  FINAL_EXITCODE=0
  
-for dockerfile_path in $(find -name Dockerfile); do
+for dockerfile_path in $(find -name Dockerfile | grep package-build-dockerfiles); do
      if ./run-build-packages-one-target.sh --target "$(basename $(dirname "$dockerfile_path"))" --command "$COMMAND" $DEBUG $TEST_PACKAGES $ONLY_TEST ; then
          true
      else
diff --git a/build/run-build-packages-one-target.sh b/build/run-build-packages-one-target.sh

index adcb87f34d79b6e344317e75fa8fca3897d166cc..348dd7b982631623241b125f43fe01aab788d1ba 100755 (executable)
--- a/build/run-build-packages-one-target.sh
+++ b/build/run-build-packages-one-target.sh
@@ -7,14 +7,16 @@ Syntax:
          WORKSPACE=/path/to/arvados $(basename $0) [options]
  
  --target <target>
-    Distribution to build packages for (default: debian7)
+    Distribution to build packages for (default: debian8)
  --command
      Build command to execute (default: use built-in Docker image command)
  --test-packages
      Run package install test script "test-packages-$target.sh"
  --debug
      Output debug information (default: false)
---only-test
+--only-build <package>
+    Build only a specific package
+--only-test <package>
      Test only a specific package
  
  WORKSPACE=path         Path to the Arvados source tree to build packages from
@@ -40,13 +42,13 @@ if ! [[ -d "$WORKSPACE" ]]; then
  fi
  
  PARSEDOPTS=$(getopt --name "$0" --longoptions \
-    help,debug,test-packages,target:,command:,only-test: \
+    help,debug,test-packages,target:,command:,only-test:,only-build: \
      -- "" "$@")
  if [ $? -ne 0 ]; then
      exit 1
  fi
  
-TARGET=debian7
+TARGET=debian8
  COMMAND=
  DEBUG=
  
@@ -62,8 +64,12 @@ while [ $# -gt 0 ]; do
              TARGET="$2"; shift
              ;;
          --only-test)
+            test_packages=1
              packages="$2"; shift
              ;;
+        --only-build)
+            ONLY_BUILD="$2"; shift
+            ;;
          --debug)
              DEBUG=" --debug"
              ;;
@@ -121,7 +127,6 @@ popd
  
  if test -z "$packages" ; then
      packages="arvados-api-server
-        arvados-data-manager
          arvados-docker-cleaner
          arvados-git-httpd
          arvados-node-manager
@@ -140,10 +145,6 @@ if test -z "$packages" ; then
          libarvados-perl"
  
      case "$TARGET" in
-        centos6)
-            packages="$packages python27-python-arvados-fuse
-                  python27-python-arvados-python-client python27-python-arvados-cwl-runner"
-            ;;
          *)
              packages="$packages python-arvados-fuse
                    python-arvados-python-client python-arvados-cwl-runner"
@@ -169,6 +170,9 @@ docker_volume_args=(
  
  if [[ -n "$test_packages" ]]; then
      for p in $packages ; do
+        if [[ -n "$ONLY_BUILD" ]] && [[ "$p" != "$ONLY_BUILD" ]]; then
+            continue
+        fi
          echo
          echo "START: $p test on $IMAGE" >&2
          if docker run --rm \
@@ -191,6 +195,7 @@ else
      if docker run --rm \
          "${docker_volume_args[@]}" \
          --env ARVADOS_DEBUG=1 \
+        --env "ONLY_BUILD=$ONLY_BUILD" \
          "$IMAGE" $COMMAND
      then
          echo
diff --git a/build/run-build-packages-sso.sh b/build/run-build-packages-sso.sh

index b5dcdfce53b9e41518fe18d981af80815c47cde5..264f27d12b0202a9267a548a73e684460f8f5aa3 100755 (executable)
--- a/build/run-build-packages-sso.sh
+++ b/build/run-build-packages-sso.sh
@@ -14,7 +14,7 @@ Options:
  --debug
      Output debug information (default: false)
  --target
-    Distribution to build packages for (default: debian7)
+    Distribution to build packages for (default: debian8)
  
  WORKSPACE=path         Path to the Arvados SSO source tree to build packages from
  
@@ -22,7 +22,7 @@ EOF
  
  EXITCODE=0
  DEBUG=${ARVADOS_DEBUG:-0}
-TARGET=debian7
+TARGET=debian8
  
  PARSEDOPTS=$(getopt --name "$0" --longoptions \
      help,build-bundle-packages,debug,target: \
@@ -68,9 +68,6 @@ if [[ "$DEBUG" != 0 ]]; then
  fi
  
  case "$TARGET" in
-    debian7)
-        FORMAT=deb
-        ;;
      debian8)
          FORMAT=deb
          ;;
@@ -80,7 +77,7 @@ case "$TARGET" in
      ubuntu1404)
          FORMAT=deb
          ;;
-    centos6|centos7)
+    centos7)
          FORMAT=rpm
          ;;
      *)
diff --git a/build/run-build-packages.sh b/build/run-build-packages.sh

index 9eaa2143daf623f234dc2b7111e31e9385f6ed0b..40e3b43822fab8daf991bd8c8a645569a06ee700 100755 (executable)
--- a/build/run-build-packages.sh
+++ b/build/run-build-packages.sh
@@ -15,8 +15,10 @@ Options:
      Build api server and workbench packages with vendor/bundle included
  --debug
      Output debug information (default: false)
---target
-    Distribution to build packages for (default: debian7)
+--target <target>
+    Distribution to build packages for (default: debian8)
+--only-build <package>
+    Build only a specific package (or $ONLY_BUILD from environment)
  --command
      Build command to execute (defaults to the run command defined in the
      Docker image)
@@ -27,11 +29,11 @@ EOF
  
  EXITCODE=0
  DEBUG=${ARVADOS_DEBUG:-0}
-TARGET=debian7
+TARGET=debian8
  COMMAND=
  
  PARSEDOPTS=$(getopt --name "$0" --longoptions \
-    help,build-bundle-packages,debug,target: \
+    help,build-bundle-packages,debug,target:,only-build: \
      -- "" "$@")
  if [ $? -ne 0 ]; then
      exit 1
@@ -48,6 +50,9 @@ while [ $# -gt 0 ]; do
          --target)
              TARGET="$2"; shift
              ;;
+        --only-build)
+            ONLY_BUILD="$2"; shift
+            ;;
          --debug)
              DEBUG=1
              ;;
@@ -96,38 +101,27 @@ PYTHON3_INSTALL_LIB=lib/python$PYTHON3_VERSION/dist-packages
  ## End Debian Python defaults.
  
  case "$TARGET" in
-    debian7)
-        FORMAT=deb
-        PYTHON_BACKPORTS=(python-gflags==2.0 google-api-python-client==1.4.2 \
-            oauth2client==1.5.2 pyasn1==0.1.7 pyasn1-modules==0.0.5 \
-            rsa uritemplate httplib2 ws4py pykka six  \
-            ciso8601 pycrypto backports.ssl_match_hostname llfuse==0.41.1 \
-            'pycurl<7.21.5' contextlib2 pyyaml 'rdflib>=4.2.0' \
-            shellescape mistune typing avro ruamel.ordereddict
-            cachecontrol requests)
-        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client)
-        ;;
      debian8)
          FORMAT=deb
          PYTHON_BACKPORTS=(python-gflags==2.0 google-api-python-client==1.4.2 \
              oauth2client==1.5.2 pyasn1==0.1.7 pyasn1-modules==0.0.5 \
-            rsa uritemplate httplib2 ws4py pykka six  \
+            rsa uritemplate httplib2 ws4py pykka six \
              ciso8601 pycrypto backports.ssl_match_hostname llfuse==0.41.1 \
              'pycurl<7.21.5' pyyaml 'rdflib>=4.2.0' \
              shellescape mistune typing avro ruamel.ordereddict
-            cachecontrol)
-        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client)
+            cachecontrol 'pathlib2==2.1.0')
+        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client==0.37.0)
          ;;
      ubuntu1204)
          FORMAT=deb
          PYTHON_BACKPORTS=(python-gflags==2.0 google-api-python-client==1.4.2 \
              oauth2client==1.5.2 pyasn1==0.1.7 pyasn1-modules==0.0.5 \
-            rsa uritemplate httplib2 ws4py pykka six  \
+            rsa uritemplate httplib2 ws4py pykka six \
              ciso8601 pycrypto backports.ssl_match_hostname llfuse==0.41.1 \
              contextlib2 'pycurl<7.21.5' pyyaml 'rdflib>=4.2.0' \
              shellescape mistune typing avro isodate ruamel.ordereddict
-            cachecontrol requests)
-        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client)
+            cachecontrol requests 'pathlib2==2.1.0')
+        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client==0.37.0)
          ;;
      ubuntu1404)
          FORMAT=deb
@@ -135,29 +129,8 @@ case "$TARGET" in
              google-api-python-client==1.4.2 six uritemplate oauth2client==1.5.2 httplib2 \
              rsa 'pycurl<7.21.5' backports.ssl_match_hostname pyyaml 'rdflib>=4.2.0' \
              shellescape mistune typing avro ruamel.ordereddict
-            cachecontrol)
-        PYTHON3_BACKPORTS=(docker-py==1.7.2 requests websocket-client)
-        ;;
-    centos6)
-        FORMAT=rpm
-        PYTHON2_PACKAGE=$(rpm -qf "$(which python$PYTHON2_VERSION)" --queryformat '%{NAME}\n')
-        PYTHON2_PKG_PREFIX=$PYTHON2_PACKAGE
-        PYTHON2_PREFIX=/opt/rh/python27/root/usr
-        PYTHON2_INSTALL_LIB=lib/python$PYTHON2_VERSION/site-packages
-        PYTHON3_PACKAGE=$(rpm -qf "$(which python$PYTHON3_VERSION)" --queryformat '%{NAME}\n')
-        PYTHON3_PKG_PREFIX=$PYTHON3_PACKAGE
-        PYTHON3_PREFIX=/opt/rh/python33/root/usr
-        PYTHON3_INSTALL_LIB=lib/python$PYTHON3_VERSION/site-packages
-        PYTHON_BACKPORTS=(python-gflags==2.0 google-api-python-client==1.4.2 \
-            oauth2client==1.5.2 pyasn1==0.1.7 pyasn1-modules==0.0.5 \
-            rsa uritemplate httplib2 ws4py pykka six  \
-            ciso8601 pycrypto backports.ssl_match_hostname 'pycurl<7.21.5' \
-            python-daemon llfuse==0.41.1 'pbr<1.0' pyyaml \
-            'rdflib>=4.2.0' shellescape mistune typing avro requests \
-            isodate pyparsing sparqlwrapper html5lib==0.9999999 keepalive \
-            ruamel.ordereddict cachecontrol)
-        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client)
-        export PYCURL_SSL_LIBRARY=nss
+            cachecontrol 'pathlib2==2.1.0')
+        PYTHON3_BACKPORTS=(docker-py==1.7.2 requests websocket-client==0.37.0)
          ;;
      centos7)
          FORMAT=rpm
@@ -172,11 +145,11 @@ case "$TARGET" in
              oauth2client==1.5.2 pyasn1==0.1.7 pyasn1-modules==0.0.5 \
              rsa uritemplate httplib2 ws4py pykka  \
              ciso8601 pycrypto 'pycurl<7.21.5' \
-            python-daemon==2.1.1 llfuse==0.41.1 'pbr<1.0' pyyaml \
+            python-daemon==2.1.1 llfuse==0.41.1 'pbr<1.0' pyyaml contextlib2 \
              'rdflib>=4.2.0' shellescape mistune typing avro \
              isodate pyparsing sparqlwrapper html5lib==0.9999999 keepalive \
-            ruamel.ordereddict cachecontrol)
-        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client)
+            ruamel.ordereddict cachecontrol 'pathlib2==2.1.0')
+        PYTHON3_BACKPORTS=(docker-py==1.7.2 six requests websocket-client==0.37.0)
          export PYCURL_SSL_LIBRARY=nss
          ;;
      *)
@@ -248,6 +221,7 @@ fi
  # Perl packages
  debug_echo -e "\nPerl packages\n"
  
+if [[ -z "$ONLY_BUILD" ]] || [[ "libarvados-perl" = "$ONLY_BUILD" ]] ; then
  cd "$WORKSPACE/sdk/perl"
  
  if [[ -e Makefile ]]; then
@@ -263,6 +237,7 @@ perl Makefile.PL INSTALL_BASE=install >"$STDOUT_IF_DEBUG" && \
      "Curoverse, Inc." dir "$(version_from_git)" install/man/=/usr/share/man \
      "$WORKSPACE/LICENSE-2.0.txt=/usr/share/doc/libarvados-perl/LICENSE-2.0.txt" && \
      mv --no-clobber libarvados-perl*.$FORMAT "$WORKSPACE/packages/$TARGET/"
+fi
  
  # Ruby gems
  debug_echo -e "\nRuby gems\n"
@@ -368,38 +343,6 @@ if [[ $TARGET =~ ubuntu1204 ]]; then
          "$WORKSPACE/packages/$TARGET/libfuse-dev_2.9.2-5_amd64.deb"
      apt-get -y --no-install-recommends -f install
      rm -rf $LIBFUSE_DIR
-elif [[ $TARGET =~ centos6 ]]; then
-    # port fuse 2.9.2 to centos 6
-    # install tools to build rpm from source
-    yum install -y rpm-build redhat-rpm-config
-    LIBFUSE_DIR=$(mktemp -d)
-    (
-        cd "$LIBFUSE_DIR"
-        # download fuse 2.9.2 centos 7 source rpm
-        file="fuse-2.9.2-6.el7.src.rpm" && curl -L -o "${file}" "http://vault.centos.org/7.2.1511/os/Source/SPackages/${file}"
-        (
-            # modify source rpm spec to remove conflict on filesystem version
-            mkdir -p /root/rpmbuild/SOURCES
-            cd /root/rpmbuild/SOURCES
-            rpm2cpio ${LIBFUSE_DIR}/fuse-2.9.2-6.el7.src.rpm | cpio -i
-            perl -pi -e 's/Conflicts:\s*filesystem.*//g' fuse.spec
-        )
-        # build rpms from source
-        rpmbuild -bb /root/rpmbuild/SOURCES/fuse.spec
-        rm -f fuse-2.9.2-6.el7.src.rpm
-        # move built RPMs to LIBFUSE_DIR
-        mv "/root/rpmbuild/RPMS/x86_64/fuse-2.9.2-6.el6.x86_64.rpm" ${LIBFUSE_DIR}/
-        mv "/root/rpmbuild/RPMS/x86_64/fuse-libs-2.9.2-6.el6.x86_64.rpm" ${LIBFUSE_DIR}/
-        mv "/root/rpmbuild/RPMS/x86_64/fuse-devel-2.9.2-6.el6.x86_64.rpm" ${LIBFUSE_DIR}/
-        rm -rf /root/rpmbuild
-    )
-    fpm_build "$LIBFUSE_DIR/fuse-libs-2.9.2-6.el6.x86_64.rpm" fuse-libs "Centos Developers" rpm "2.9.2" --iteration 5
-    fpm_build "$LIBFUSE_DIR/fuse-2.9.2-6.el6.x86_64.rpm" fuse "Centos Developers" rpm "2.9.2" --iteration 5 --no-auto-depends
-    fpm_build "$LIBFUSE_DIR/fuse-devel-2.9.2-6.el6.x86_64.rpm" fuse-devel "Centos Developers" rpm "2.9.2" --iteration 5 --no-auto-depends
-    yum install -y \
-        "$WORKSPACE/packages/$TARGET/fuse-libs-2.9.2-5.x86_64.rpm" \
-        "$WORKSPACE/packages/$TARGET/fuse-2.9.2-5.x86_64.rpm" \
-        "$WORKSPACE/packages/$TARGET/fuse-devel-2.9.2-5.x86_64.rpm"
  fi
  
  # Go binaries
@@ -417,8 +360,6 @@ package_go_binary services/crunch-run crunch-run \
      "Supervise a single Crunch container"
  package_go_binary services/crunchstat crunchstat \
      "Gather cpu/memory/network statistics of running Crunch jobs"
-package_go_binary services/datamanager arvados-data-manager \
-    "Ensure block replication levels, report disk usage, and determine which blocks should be deleted when space is needed"
  package_go_binary services/keep-balance keep-balance \
      "Rebalance and garbage-collect data blocks stored in Arvados Keep"
  package_go_binary services/keepproxy keepproxy \
@@ -427,6 +368,8 @@ package_go_binary services/keepstore keepstore \
      "Keep storage daemon, accessible to clients on the LAN"
  package_go_binary services/keep-web keep-web \
      "Static web hosting service for user data stored in Arvados Keep"
+package_go_binary services/ws arvados-ws \
+    "Arvados Websocket server"
  package_go_binary tools/keep-block-check keep-block-check \
      "Verify that all data from one set of Keep servers to another was copied"
  package_go_binary tools/keep-rsync keep-rsync \
@@ -443,7 +386,7 @@ package_go_binary tools/keep-exercise keep-exercise \
  # 2014-05-15
  cd $WORKSPACE/packages/$TARGET
  rm -rf "$WORKSPACE/sdk/python/build"
-fpm_build $WORKSPACE/sdk/python "${PYTHON2_PKG_PREFIX}-arvados-python-client" 'Curoverse, Inc.' 'python' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/sdk/python/arvados_python_client.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Arvados Python SDK" --deb-recommends=git
+fpm_build $WORKSPACE/sdk/python "${PYTHON2_PKG_PREFIX}-arvados-python-client" 'Curoverse, Inc.' 'python' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/sdk/python/arvados_python_client.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Arvados Python SDK" --depends "${PYTHON2_PKG_PREFIX}-setuptools" --deb-recommends=git
  
  # cwl-runner
  cd $WORKSPACE/packages/$TARGET
@@ -467,21 +410,23 @@ fpm_build lockfile "" "" python 0.12.2 --epoch 1
  # So we build this thing separately.
  #
  # Ward, 2016-03-17
-fpm_build schema_salad "" "" python 1.20.20161122192122 --depends "${PYTHON2_PKG_PREFIX}-lockfile >= 1:0.12.2-2"
+saladversion=$(cat "$WORKSPACE/sdk/cwl/setup.py" | grep schema-salad== | sed "s/.*==\(.*\)'.*/\1/")
+fpm_build schema_salad "" "" python $saladversion --depends "${PYTHON2_PKG_PREFIX}-lockfile >= 1:0.12.2-2"
  
  # And schema_salad now depends on ruamel-yaml, which apparently has a braindead setup.py that requires special arguments to build (otherwise, it aborts with 'error: you have to install with "pip install ."'). Sigh.
  # Ward, 2016-05-26
-fpm_build ruamel.yaml "" "" python 0.12.4 --python-setup-py-arguments "--single-version-externally-managed"
+fpm_build ruamel.yaml "" "" python 0.13.7 --python-setup-py-arguments "--single-version-externally-managed"
  
  # Dependency of cwltool.  Fpm doesn't produce a package with the correct version
  # number unless we build it explicitly
  fpm_build cwltest "" "" python 1.0.20160907111242
  
  # And for cwltool we have the same problem as for schema_salad. Ward, 2016-03-17
-fpm_build cwltool "" "" python 1.0.20161122201220
+cwltoolversion=$(cat "$WORKSPACE/sdk/cwl/setup.py" | grep cwltool== | sed "s/.*==\(.*\)'.*/\1/")
+fpm_build cwltool "" "" python $cwltoolversion
  
  # FPM eats the trailing .0 in the python-rdflib-jsonld package when built with 'rdflib-jsonld>=0.3.0'. Force the version. Ward, 2016-03-25
-fpm_build rdflib-jsonld "" "" python 0.3.0
+fpm_build rdflib-jsonld "" "" python 0.4.0
  
  # The PAM module
  if [[ $TARGET =~ debian|ubuntu ]]; then
@@ -495,17 +440,17 @@ fi
  # not omit the python- prefix first.
  cd $WORKSPACE/packages/$TARGET
  rm -rf "$WORKSPACE/services/fuse/build"
-fpm_build $WORKSPACE/services/fuse "${PYTHON2_PKG_PREFIX}-arvados-fuse" 'Curoverse, Inc.' 'python' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/fuse/arvados_fuse.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Keep FUSE driver"
+fpm_build $WORKSPACE/services/fuse "${PYTHON2_PKG_PREFIX}-arvados-fuse" 'Curoverse, Inc.' 'python' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/fuse/arvados_fuse.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Keep FUSE driver" --depends "${PYTHON2_PKG_PREFIX}-setuptools"
  
  # The node manager
  cd $WORKSPACE/packages/$TARGET
  rm -rf "$WORKSPACE/services/nodemanager/build"
-fpm_build $WORKSPACE/services/nodemanager arvados-node-manager 'Curoverse, Inc.' 'python' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/nodemanager/arvados_node_manager.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Arvados node manager"
+fpm_build $WORKSPACE/services/nodemanager arvados-node-manager 'Curoverse, Inc.' 'python' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/nodemanager/arvados_node_manager.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Arvados node manager" --depends "${PYTHON2_PKG_PREFIX}-setuptools"
  
  # The Docker image cleaner
  cd $WORKSPACE/packages/$TARGET
  rm -rf "$WORKSPACE/services/dockercleaner/build"
-fpm_build $WORKSPACE/services/dockercleaner arvados-docker-cleaner 'Curoverse, Inc.' 'python3' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/dockercleaner/arvados_docker_cleaner.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Arvados Docker image cleaner"
+fpm_build $WORKSPACE/services/dockercleaner arvados-docker-cleaner 'Curoverse, Inc.' 'python3' "$(awk '($1 == "Version:"){print $2}' $WORKSPACE/services/dockercleaner/arvados_docker_cleaner.egg-info/PKG-INFO)" "--url=https://arvados.org" "--description=The Arvados Docker image cleaner" --depends "${PYTHON3_PKG_PREFIX}-websocket-client = 0.37.0" --iteration 3
  
  # The Arvados crunchstat-summary tool
  cd $WORKSPACE/packages/$TARGET
@@ -539,6 +484,11 @@ esac
  
  for deppkg in "${PYTHON_BACKPORTS[@]}"; do
      outname=$(echo "$deppkg" | sed -e 's/^python-//' -e 's/[<=>].*//' -e 's/_/-/g' -e "s/^/${PYTHON2_PKG_PREFIX}-/")
+
+    if [[ -n "$ONLY_BUILD" ]] && [[ "$outname" != "$ONLY_BUILD" ]] ; then
+        continue
+    fi
+
      case "$deppkg" in
          httplib2|google-api-python-client)
              # Work around 0640 permissions on some package files.
@@ -588,6 +538,7 @@ handle_rails_package arvados-api-server "$WORKSPACE/services/api" \
      --license="GNU Affero General Public License, version 3.0"
  
  # Build the workbench server package
+if [[ -z "$ONLY_BUILD" ]] || [[ "arvados-workbench" = "$ONLY_BUILD" ]] ; then
  (
      set -e
      cd "$WORKSPACE/apps/workbench"
@@ -612,6 +563,7 @@ handle_rails_package arvados-api-server "$WORKSPACE/services/api" \
      # Remove generated configuration files so they don't go in the package.
      rm config/application.yml config/environments/production.rb
  )
+fi
  
  if [[ "$?" != "0" ]]; then
    echo "ERROR: Asset precompilation failed"
diff --git a/build/run-build-test-packages-one-target.sh b/build/run-build-test-packages-one-target.sh

index ff6bad4a71fed426b9f7087e317d98d3ef93ee43..46b8133466509b799724361f410872ff315a2228 100755 (executable)
--- a/build/run-build-test-packages-one-target.sh
+++ b/build/run-build-test-packages-one-target.sh
@@ -7,7 +7,7 @@ Syntax:
          WORKSPACE=/path/to/arvados $(basename $0) [options]
  
  --target <target>
-    Distribution to build packages for (default: debian7)
+    Distribution to build packages for (default: debian8)
  --upload
      If the build and test steps are successful, upload the packages
      to a remote apt repository (default: false)
@@ -39,7 +39,7 @@ if [ $? -ne 0 ]; then
      exit 1
  fi
  
-TARGET=debian7
+TARGET=debian8
  UPLOAD=0
  
  eval set -- "$PARSEDOPTS"
diff --git a/build/run-library.sh b/build/run-library.sh

index f0b120f6bf1e4e011a69f9f811ee67ad55624938..23e09d43d519e85957cb7eb9a230b7dc6ea864cf 100755 (executable)
--- a/build/run-library.sh
+++ b/build/run-library.sh
@@ -69,6 +69,10 @@ handle_ruby_gem() {
      local gem_version="$(nohash_version_from_git)"
      local gem_src_dir="$(pwd)"
  
+    if [[ -n "$ONLY_BUILD" ]] && [[ "$gem_name" != "$ONLY_BUILD" ]] ; then
+        return 0
+    fi
+
      if ! [[ -e "${gem_name}-${gem_version}.gem" ]]; then
          find -maxdepth 1 -name "${gem_name}-*.gem" -delete
  
@@ -84,6 +88,10 @@ package_go_binary() {
      local description="$1"; shift
      local license_file="${1:-agpl-3.0.txt}"; shift
  
+    if [[ -n "$ONLY_BUILD" ]] && [[ "$prog" != "$ONLY_BUILD" ]] ; then
+        return 0
+    fi
+
      debug_echo "package_go_binary $src_path as $prog"
  
      local basename="${src_path##*/}"
@@ -143,6 +151,11 @@ _build_rails_package_scripts() {
  
  handle_rails_package() {
      local pkgname="$1"; shift
+
+    if [[ -n "$ONLY_BUILD" ]] && [[ "$pkgname" != "$ONLY_BUILD" ]] ; then
+        return 0
+    fi
+
      local srcdir="$1"; shift
      local license_path="$1"; shift
      local scripts_dir="$(mktemp --tmpdir -d "$pkgname-XXXXXXXX.scripts")" && \
@@ -165,7 +178,7 @@ handle_rails_package() {
      local -a pos_args=("$srcdir/=$railsdir" "$pkgname" "Curoverse, Inc." dir
                         "$(cat "$version_file")")
      local license_arg="$license_path=$railsdir/$(basename "$license_path")"
-    local -a switches=(--iteration=6
+    local -a switches=(--iteration=7
                         --after-install "$scripts_dir/postinst"
                         --before-remove "$scripts_dir/prerm"
                         --after-remove "$scripts_dir/postrm")
@@ -208,6 +221,10 @@ fpm_build () {
    VERSION=$1
    shift
  
+  if [[ -n "$ONLY_BUILD" ]] && [[ "$PACKAGE_NAME" != "$ONLY_BUILD" ]] && [[ "$PACKAGE" != "$ONLY_BUILD" ]] ; then
+      return 0
+  fi
+
    local default_iteration_value="$(default_iteration "$PACKAGE" "$VERSION")"
  
    case "$PACKAGE_TYPE" in
@@ -289,7 +306,7 @@ fpm_build () {
    declare -a fpm_dirs=(
        # source dir part of 'dir' package ("/source=/dest" => "/source"):
        "${PACKAGE%%=/*}"
-      # backports ("llfuse==0.41.1" => "backports/python-llfuse")
+      # backports ("llfuse>=1.0" => "backports/python-llfuse")
        "${WORKSPACE}/backports/${PACKAGE_TYPE}-${PACKAGE%%[<=>]*}")
    if [[ -n "$PACKAGE_NAME" ]]; then
        fpm_dirs+=("${WORKSPACE}/backports/${PACKAGE_NAME}")
diff --git a/build/run-tests.sh b/build/run-tests.sh

index 8959cfbe09c3ea7ac6ded2142b626259787d2121..4b6c81395f0925ca9a27c226acfaf3d30774d524 100755 (executable)
--- a/build/run-tests.sh
+++ b/build/run-tests.sh
@@ -22,6 +22,8 @@ Options:
  --leave-temp   Do not remove GOPATH, virtualenv, and other temp dirs at exit.
                 Instead, show the path to give as --temp to reuse them in
                 subsequent invocations.
+--repeat N     Repeat each install/test step until it succeeds N times.
+--retry        Prompt to retry if an install or test suite fails.
  --skip-install Do not run any install steps. Just run tests.
                 You should provide GOPATH, GEMHOME, and VENVDIR options
                 from a previous invocation if you use this option.
@@ -79,6 +81,7 @@ services/nodemanager
  services/crunch-run
  services/crunch-dispatch-local
  services/crunch-dispatch-slurm
+services/ws
  sdk/cli
  sdk/pam
  sdk/python
@@ -90,6 +93,7 @@ sdk/go/httpserver
  sdk/go/manifest
  sdk/go/blockdigest
  sdk/go/streamer
+sdk/go/stats
  sdk/go/crunchrunner
  sdk/cwl
  tools/crunchstat-summary
@@ -117,7 +121,7 @@ GEMHOME=
  PERLINSTALLBASE=
  
  short=
-skip_install=
+only_install=
  temp=
  temp_preserve=
  
@@ -156,6 +160,12 @@ sanity_checks() {
      echo -n 'virtualenv: '
      virtualenv --version \
          || fatal "No virtualenv. Try: apt-get install virtualenv (on ubuntu: python-virtualenv)"
+    echo -n 'ruby: '
+    ruby -v \
+        || fatal "No ruby. Install >=2.1.9 (using rbenv, rvm, or source)"
+    echo -n 'bundler: '
+    bundle version \
+        || fatal "No bundler. Try: gem install bundler"
      echo -n 'go: '
      go version \
          || fatal "No go binary. See http://golang.org/doc/install"
@@ -211,14 +221,7 @@ do
              exit 1
              ;;
          --skip)
-            skipwhat="$1"; shift
-            if [[ "$skipwhat" == "apps/workbench" ]]; then
-              skip["apps/workbench_units"]=1
-              skip["apps/workbench_functionals"]=1
-              skip["apps/workbench_integration"]=1
-            else
-              skip[$skipwhat]=1
-            fi
+            skip[$1]=1; shift
              ;;
          --only)
              only="$1"; skip[$1]=""; shift
@@ -227,10 +230,9 @@ do
              short=1
              ;;
          --skip-install)
-            skip_install=1
+            only_install=nothing
              ;;
          --only-install)
-            skip_install=1
              only_install="$1"; shift
              ;;
          --temp)
@@ -240,6 +242,9 @@ do
          --leave-temp)
              temp_preserve=1
              ;;
+        --repeat)
+            repeat=$((${1}+0)); shift
+            ;;
          --retry)
              retry=1
              ;;
@@ -264,15 +269,18 @@ start_api() {
          && eval $(python sdk/python/tests/run_test_server.py start --auth admin) \
          && export ARVADOS_TEST_API_HOST="$ARVADOS_API_HOST" \
          && export ARVADOS_TEST_API_INSTALLED="$$" \
+        && python sdk/python/tests/run_test_server.py start_ws \
+        && python sdk/python/tests/run_test_server.py start_nginx \
          && (env | egrep ^ARVADOS)
  }
  
  start_nginx_proxy_services() {
-    echo 'Starting keepproxy, keep-web, arv-git-httpd, and nginx ssl proxy...'
+    echo 'Starting keepproxy, keep-web, ws, arv-git-httpd, and nginx ssl proxy...'
      cd "$WORKSPACE" \
          && python sdk/python/tests/run_test_server.py start_keep_proxy \
          && python sdk/python/tests/run_test_server.py start_keep-web \
          && python sdk/python/tests/run_test_server.py start_arv-git-httpd \
+        && python sdk/python/tests/run_test_server.py start_ws \
          && python sdk/python/tests/run_test_server.py start_nginx \
          && export ARVADOS_TEST_PROXY_SERVICES=1
  }
@@ -283,12 +291,15 @@ stop_services() {
          cd "$WORKSPACE" \
              && python sdk/python/tests/run_test_server.py stop_nginx \
              && python sdk/python/tests/run_test_server.py stop_arv-git-httpd \
+            && python sdk/python/tests/run_test_server.py stop_ws \
              && python sdk/python/tests/run_test_server.py stop_keep-web \
              && python sdk/python/tests/run_test_server.py stop_keep_proxy
      fi
      if [[ -n "$ARVADOS_TEST_API_HOST" ]]; then
          unset ARVADOS_TEST_API_HOST
          cd "$WORKSPACE" \
+            && python sdk/python/tests/run_test_server.py stop_nginx \
+            && python sdk/python/tests/run_test_server.py stop_ws \
              && python sdk/python/tests/run_test_server.py stop
      fi
  }
@@ -443,13 +454,10 @@ pip freeze 2>/dev/null | egrep ^apache-libcloud==$LIBCLOUD_PIN \
      || pip install --pre --ignore-installed https://github.com/curoverse/libcloud/archive/apache-libcloud-$LIBCLOUD_PIN.zip >/dev/null \
      || fatal "pip install apache-libcloud failed"
  
-# This will help people who reuse --temp dirs when we upgrade to llfuse 0.42
-if egrep -q 'llfuse.*>= *0\.42' "$WORKSPACE/services/fuse/setup.py"; then
-    # Uninstall old llfuse, because services/fuse "pip install" won't
-    # upgrade it by default.
-    if pip freeze | egrep '^llfuse==0\.41\.'; then
-        yes | pip uninstall 'llfuse<0.42'
-    fi
+# Uninstall old llfuse (<1.0), because services/fuse "pip install"
+# won't upgrade it by default.
+if pip freeze | egrep '^llfuse==0'; then
+    yes | pip uninstall 'llfuse<1.0'
  fi
  
  # Deactivate Python 2 virtualenv
@@ -486,136 +494,151 @@ then
  fi
  
  retry() {
-    while ! ${@} && [[ "$retry" == 1 ]]
+    remain="${repeat}"
+    while :
      do
-        read -p 'Try again? [Y/n] ' x
-        if [[ "$x" != "y" ]] && [[ "$x" != "" ]]
-        then
+        if ${@}; then
+            if [[ "$remain" -gt 1 ]]; then
+                remain=$((${remain}-1))
+                title "Repeating ${remain} more times"
+            else
+                break
+            fi
+        elif [[ "$retry" == 1 ]]; then
+            read -p 'Try again? [Y/n] ' x
+            if [[ "$x" != "y" ]] && [[ "$x" != "" ]]
+            then
+                break
+            fi
+        else
              break
          fi
      done
  }
  
  do_test() {
-    retry do_test_once ${@}
+    case "${1}" in
+        apps/workbench_units | apps/workbench_functional | apps/workbench_integration)
+            suite=apps/workbench
+            ;;
+        *)
+            suite="${1}"
+            ;;
+    esac
+    if [[ -z "${skip[$suite]}" && -z "${skip[$1]}" && \
+                (-z "${only}" || "${only}" == "${suite}" || \
+                 "${only}" == "${1}") ]]; then
+        retry do_test_once ${@}
+    else
+        title "Skipping ${1} tests"
+    fi
  }
  
  do_test_once() {
      unset result
-    to_test=$1
-    if (( [[ "$only" == "apps/workbench" ]] ) &&
-       ( [[ "$to_test" == "apps/workbench_units" ]] || [[ "$to_test" == "apps/workbench_functionals" ]] ||
-         [[ "$to_test" == "apps/workbench_integration" ]])); then
-      to_test="apps/workbench"
-    fi
-    if [[ -z "${skip[$1]}" ]] && ( [[ -z "$only" ]] || [[ "$only" == "$to_test" ]] )
+
+    title "Running $1 tests"
+    timer_reset
+    if [[ "$2" == "go" ]]
      then
-        title "Running $1 tests"
-        timer_reset
-        if [[ "$2" == "go" ]]
-        then
-            covername="coverage-$(echo "$1" | sed -e 's/\//_/g')"
-            coverflags=("-covermode=count" "-coverprofile=$WORKSPACE/tmp/.$covername.tmp")
-            # We do "go get -t" here to catch compilation errors
-            # before trying "go test". Otherwise, coverage-reporting
-            # mode makes Go show the wrong line numbers when reporting
-            # compilation errors.
-            go get -t "git.curoverse.com/arvados.git/$1" || return 1
-            cd "$WORKSPACE/$1" || return 1
-            gofmt -e -d . | egrep . && result=1
-            if [[ -n "${testargs[$1]}" ]]
-            then
-                # "go test -check.vv giturl" doesn't work, but this
-                # does:
-                cd "$WORKSPACE/$1" && go test ${short:+-short} ${testargs[$1]}
-            else
-                # The above form gets verbose even when testargs is
-                # empty, so use this form in such cases:
-                go test ${short:+-short} ${coverflags[@]} "git.curoverse.com/arvados.git/$1"
-            fi
-            result=${result:-$?}
-            if [[ -f "$WORKSPACE/tmp/.$covername.tmp" ]]
-            then
-                go tool cover -html="$WORKSPACE/tmp/.$covername.tmp" -o "$WORKSPACE/tmp/$covername.html"
-                rm "$WORKSPACE/tmp/.$covername.tmp"
-            fi
-        elif [[ "$2" == "pip" ]]
+        covername="coverage-$(echo "$1" | sed -e 's/\//_/g')"
+        coverflags=("-covermode=count" "-coverprofile=$WORKSPACE/tmp/.$covername.tmp")
+        # We do "go get -t" here to catch compilation errors
+        # before trying "go test". Otherwise, coverage-reporting
+        # mode makes Go show the wrong line numbers when reporting
+        # compilation errors.
+        go get -t "git.curoverse.com/arvados.git/$1" || return 1
+        cd "$WORKSPACE/$1" || return 1
+        gofmt -e -d . | egrep . && result=1
+        if [[ -n "${testargs[$1]}" ]]
          then
-            tries=0
-            cd "$WORKSPACE/$1" && while :
-            do
-                tries=$((${tries}+1))
-                # $3 can name a path directory for us to use, including trailing
-                # slash; e.g., the bin/ subdirectory of a virtualenv.
-                "${3}python" setup.py ${short:+--short-tests-only} test ${testargs[$1]}
-                result=$?
-                if [[ ${tries} < 3 && ${result} == 137 ]]
-                then
-                    printf '\n*****\n%s tests killed -- retrying\n*****\n\n' "$1"
-                    continue
-                else
-                    break
-                fi
-            done
-        elif [[ "$2" != "" ]]
-        then
-            "test_$2"
+            # "go test -check.vv giturl" doesn't work, but this
+            # does:
+            cd "$WORKSPACE/$1" && go test ${short:+-short} ${testargs[$1]}
          else
-            "test_$1"
+            # The above form gets verbose even when testargs is
+            # empty, so use this form in such cases:
+            go test ${short:+-short} ${coverflags[@]} "git.curoverse.com/arvados.git/$1"
          fi
          result=${result:-$?}
-        checkexit $result "$1 tests"
-        title "End of $1 tests (`timer`)"
-        return $result
+        if [[ -f "$WORKSPACE/tmp/.$covername.tmp" ]]
+        then
+            go tool cover -html="$WORKSPACE/tmp/.$covername.tmp" -o "$WORKSPACE/tmp/$covername.html"
+            rm "$WORKSPACE/tmp/.$covername.tmp"
+        fi
+    elif [[ "$2" == "pip" ]]
+    then
+        tries=0
+        cd "$WORKSPACE/$1" && while :
+        do
+            tries=$((${tries}+1))
+            # $3 can name a path directory for us to use, including trailing
+            # slash; e.g., the bin/ subdirectory of a virtualenv.
+            "${3}python" setup.py ${short:+--short-tests-only} test ${testargs[$1]}
+            result=$?
+            if [[ ${tries} < 3 && ${result} == 137 ]]
+            then
+                printf '\n*****\n%s tests killed -- retrying\n*****\n\n' "$1"
+                continue
+            else
+                break
+            fi
+        done
+    elif [[ "$2" != "" ]]
+    then
+        "test_$2"
      else
-        title "Skipping $1 tests"
+        "test_$1"
      fi
+    result=${result:-$?}
+    checkexit $result "$1 tests"
+    title "End of $1 tests (`timer`)"
+    return $result
  }
  
  do_install() {
-    retry do_install_once ${@}
+    if [[ -z "${only_install}" || "${only_install}" == "${1}" ]]; then
+        retry do_install_once ${@}
+    else
+        title "Skipping $1 install"
+    fi
  }
  
  do_install_once() {
-    if [[ -z "$skip_install" || (-n "$only_install" && "$only_install" == "$1") ]]
+    title "Running $1 install"
+    timer_reset
+    if [[ "$2" == "go" ]]
      then
-        title "Running $1 install"
-        timer_reset
-        if [[ "$2" == "go" ]]
-        then
-            go get -t "git.curoverse.com/arvados.git/$1"
-        elif [[ "$2" == "pip" ]]
-        then
-            # $3 can name a path directory for us to use, including trailing
-            # slash; e.g., the bin/ subdirectory of a virtualenv.
-
-            # Need to change to a different directory after creating
-            # the source dist package to avoid a pip bug.
-            # see https://arvados.org/issues/5766 for details.
-
-            # Also need to install twice, because if it believes the package is
-            # already installed, pip it won't install it.  So the first "pip
-            # install" ensures that the dependencies are met, the second "pip
-            # install" ensures that we've actually installed the local package
-            # we just built.
-            cd "$WORKSPACE/$1" \
-                && "${3}python" setup.py sdist rotate --keep=1 --match .tar.gz \
-                && cd "$WORKSPACE" \
-                && "${3}pip" install --quiet "$WORKSPACE/$1/dist"/*.tar.gz \
-                && "${3}pip" install --quiet --no-deps --ignore-installed "$WORKSPACE/$1/dist"/*.tar.gz
-        elif [[ "$2" != "" ]]
-        then
-            "install_$2"
-        else
-            "install_$1"
-        fi
-        result=$?
-        checkexit $result "$1 install"
-        title "End of $1 install (`timer`)"
-        return $result
+        go get -t "git.curoverse.com/arvados.git/$1"
+    elif [[ "$2" == "pip" ]]
+    then
+        # $3 can name a path directory for us to use, including trailing
+        # slash; e.g., the bin/ subdirectory of a virtualenv.
+
+        # Need to change to a different directory after creating
+        # the source dist package to avoid a pip bug.
+        # see https://arvados.org/issues/5766 for details.
+
+        # Also need to install twice, because if it believes the package is
+        # already installed, pip it won't install it.  So the first "pip
+        # install" ensures that the dependencies are met, the second "pip
+        # install" ensures that we've actually installed the local package
+        # we just built.
+        cd "$WORKSPACE/$1" \
+            && "${3}python" setup.py sdist rotate --keep=1 --match .tar.gz \
+            && cd "$WORKSPACE" \
+            && "${3}pip" install --quiet "$WORKSPACE/$1/dist"/*.tar.gz \
+            && "${3}pip" install --quiet --no-deps --ignore-installed "$WORKSPACE/$1/dist"/*.tar.gz
+    elif [[ "$2" != "" ]]
+    then
+        "install_$2"
      else
-        title "Skipping $1 install"
+        "install_$1"
      fi
+    result=$?
+    checkexit $result "$1 install"
+    title "End of $1 install (`timer`)"
+    return $result
  }
  
  bundle_install_trylocal() {
@@ -750,6 +773,7 @@ gostuff=(
      sdk/go/manifest
      sdk/go/streamer
      sdk/go/crunchrunner
+    sdk/go/stats
      lib/crunchstat
      services/arv-git-httpd
      services/crunchstat
@@ -758,13 +782,10 @@ gostuff=(
      sdk/go/keepclient
      services/keep-balance
      services/keepproxy
-    services/datamanager/summary
-    services/datamanager/collection
-    services/datamanager/keep
-    services/datamanager
      services/crunch-dispatch-local
      services/crunch-dispatch-slurm
      services/crunch-run
+    services/ws
      tools/keep-block-check
      tools/keep-exercise
      tools/keep-rsync
diff --git a/doc/_config.yml b/doc/_config.yml

index 96aea34d36173a07c7952480d1fd59dcf3f90409..c2891d7cff1b4f20701839186e7a5deced1f3245 100644 (file)
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -28,9 +28,9 @@ navbar:
      - Welcome:
        - user/index.html.textile.liquid
        - user/getting_started/community.html.textile.liquid
-    - Run a pipeline using Workbench:
+    - Run a workflow using Workbench:
        - user/getting_started/workbench.html.textile.liquid
-      - user/tutorials/tutorial-pipeline-workbench.html.textile.liquid
+      - user/tutorials/tutorial-workflow-workbench.html.textile.liquid
      - Access an Arvados virtual machine:
        - user/getting_started/vm-login-with-webshell.html.textile.liquid
        - user/getting_started/ssh-access-unix.html.textile.liquid
@@ -43,23 +43,25 @@ navbar:
        - user/tutorials/tutorial-keep-mount.html.textile.liquid
        - user/topics/keep.html.textile.liquid
        - user/topics/arv-copy.html.textile.liquid
-    - Using Common Workflow Language:
+    - Running workflows at the command line:
        - user/cwl/cwl-runner.html.textile.liquid
-      - user/cwl/cwl-style.html.textile.liquid
-    - Working on the command line:
+      - user/cwl/cwl-run-options.html.textile.liquid
        - user/topics/running-pipeline-command-line.html.textile.liquid
        - user/topics/arv-run.html.textile.liquid
      - Working with git repositories:
        - user/tutorials/add-new-repository.html.textile.liquid
        - user/tutorials/git-arvados-guide.html.textile.liquid
-    - Develop an Arvados pipeline:
+    - Develop an Arvados workflow:
        - user/tutorials/intro-crunch.html.textile.liquid
+      - user/tutorials/writing-cwl-workflow.html.textile.liquid
+      - user/cwl/cwl-style.html.textile.liquid
+      - user/cwl/cwl-extensions.html.textile.liquid
+      - user/topics/arv-docker.html.textile.liquid
        - user/tutorials/running-external-program.html.textile.liquid
        - user/topics/crunch-tools-overview.html.textile.liquid
        - user/tutorials/tutorial-firstscript.html.textile.liquid
        - user/tutorials/tutorial-submit-job.html.textile.liquid
        - user/topics/tutorial-parallel.html.textile.liquid
-      - user/topics/arv-docker.html.textile.liquid
      - Develop a web service:
        - user/topics/arv-web.html.textile.liquid
      - Reference:
diff --git a/doc/_includes/_container_runtime_constraints.liquid b/doc/_includes/_container_runtime_constraints.liquid

index 849db42e47827c7a3cc2ddea8a28f36d3434979e..d505bfd9e0ec9981b84a19f046ea8260f34577e4 100644 (file)
--- a/doc/_includes/_container_runtime_constraints.liquid
+++ b/doc/_includes/_container_runtime_constraints.liquid
@@ -8,4 +8,3 @@ table(table table-bordered table-condensed).
  |vcpus|integer|Number of cores to be used to run this process.|Optional. However, a ContainerRequest that is in "Committed" state must provide this.|
  |keep_cache_ram|integer|Number of keep cache bytes to be used to run this process.|Optional.|
  |API|boolean|When set, ARVADOS_API_HOST and ARVADOS_API_TOKEN will be set, and container will have networking enabled to access the Arvados API server.|Optional.|
-|partition|array of strings|Specify the names of one or more compute partitions that may run this container.  If not provided, the system chooses where to run the container.|Optional.|
diff --git a/doc/_includes/_container_scheduling_parameters.liquid b/doc/_includes/_container_scheduling_parameters.liquid

new file mode 100644 (file)

index 0000000..ee2ca07
--- /dev/null
+++ b/doc/_includes/_container_scheduling_parameters.liquid
@@ -0,0 +1,7 @@
+Scheduling parameters
+
+Parameters to be passed to the container scheduler (e.g., SLURM) when running a container.
+
+table(table table-bordered table-condensed).
+|_. Key|_. Type|_. Description|_. Notes|
+|partitions|array of strings|The names of one or more compute partitions that may run this container. If not provided, the system will choose where to run the container.|Optional.|
diff --git a/doc/_includes/_what_is_cwl.liquid b/doc/_includes/_what_is_cwl.liquid

new file mode 100644 (file)

index 0000000..d7b890d
--- /dev/null
+++ b/doc/_includes/_what_is_cwl.liquid
@@ -0,0 +1 @@
+The "Common Workflow Language (CWL)":http://commonwl.org is a multi-vendor open standard for describing analysis tools and workflows that are portable across a variety of platforms.  CWL is the recommended way to develop and run workflows for Arvados.  Arvados supports the "CWL v1.0":http://commonwl.org/v1.0 specification.
diff --git a/doc/api/methods/container_requests.html.textile.liquid b/doc/api/methods/container_requests.html.textile.liquid

index 304226d5de59af541906b975b2b4cd2313e2290e..05a8cf56a8f48f24ae9eefa6de9ce69e108a76ff 100644 (file)
--- a/doc/api/methods/container_requests.html.textile.liquid
+++ b/doc/api/methods/container_requests.html.textile.liquid
@@ -35,20 +35,27 @@ table(table table-bordered table-condensed).
    "vcpus":2,
    "API":true
  }</code></pre>See "Runtime constraints":#runtime_constraints for more details.|
+|scheduling_parameters|hash|Parameters to be passed to the container scheduler when running this container.|e.g.,<pre><code>{
+"partitions":["fastcpu","vfastcpu"]
+}</code></pre>See "Scheduling parameters":#scheduling_parameters for more details.|
  |container_image|string|Portable data hash of a collection containing the docker image to run the container.|Required.|
  |environment|hash|Environment variables and values that should be set in the container environment (@docker run --env@). This augments and (when conflicts exist) overrides environment variables given in the image's Dockerfile.||
  |cwd|string|Initial working directory, given as an absolute path (in the container) or a path relative to the WORKDIR given in the image's Dockerfile.|Required.|
  |command|array of strings|Command to execute in the container.|Required. e.g., @["echo","hello"]@|
  |output_path|string|Path to a directory or file inside the container that should be preserved as container's output when it finishes. This path must be, or be inside, one of the mount targets. For best performance, point output_path to a writable collection mount.|Required.|
-|priority|integer|Higher value means spend more resources on this container_request, i.e., go ahead of other queued containers, bring up more nodes etc.|Priority 0 means a container should not be run on behalf of this request. Clients are expected to submit container requests with zero priority in order to prevew the container that will be used to satisfy it. Priority can be null if and only if state!="Committed".|
+|priority|integer|Higher value means spend more resources on this container_request, i.e., go ahead of other queued containers, bring up more nodes etc.|Priority 0 means a container should not be run on behalf of this request. Clients are expected to submit container requests with zero priority in order to preview the container that will be used to satisfy it. Priority can be null if and only if state!="Committed".|
  |expires_at|datetime|After this time, priority is considered to be zero.|Not yet implemented.|
  |use_existing|boolean|If possible, use an existing (non-failed) container to satisfy the request instead of creating a new one.|Default is true|
+|log_uuid|string|Log collection containing log messages provided by the scheduler and crunch processes.|Null if the container has not yet completed.|
+|output_uuid|string|Output collection created when the container finished successfully.|Null if the container has failed or not yet completed.|
  |filters|string|Additional constraints for satisfying the container_request, given in the same form as the filters parameter accepted by the container_requests.list API.|
  
  h2(#mount_types). {% include 'mount_types' %}
  
  h2(#runtime_constraints). {% include 'container_runtime_constraints' %}
  
+h2(#scheduling_parameters). {% include 'container_scheduling_parameters' %}
+
  h2(#container_reuse). Container reuse
  
  When a container request is "Committed", the system will try to find and reuse any preexisting Container with the same exact command, cwd, environment, output_path, container_image, mounts, and runtime_constraints as this container request. The serialized fields environment, mounts and runtime_constraints are sorted to facilitate comparison.
diff --git a/doc/api/methods/containers.html.textile.liquid b/doc/api/methods/containers.html.textile.liquid

index 221141cebc82537631b2c6b7fd53d18f3af36afa..7eed8b0d30f84aa07dddbc46edf0a35773b63977 100644 (file)
--- a/doc/api/methods/containers.html.textile.liquid
+++ b/doc/api/methods/containers.html.textile.liquid
@@ -36,6 +36,9 @@ Generally this will contain additional keys that are not present in any correspo
    "vcpus":2,
    "API":true
  }</code></pre>See "Runtime constraints":#runtime_constraints for more details.|
+|scheduling_parameters|hash|Parameters to be passed to the container scheduler when running this container.|e.g.,<pre><code>{
+"partitions":["fastcpu","vfastcpu"]
+}</code></pre>See "Scheduling parameters":#scheduling_parameters for more details.|
  |output|string|Portable data hash of the output collection.|Null if the container is not yet finished.|
  |container_image|string|Portable data hash of a collection containing the docker image used to run the container.||
  |progress|number|A number between 0.0 and 1.0 describing the fraction of work done.||
@@ -58,6 +61,8 @@ h2(#mount_types). {% include 'mount_types' %}
  
  h2(#runtime_constraints). {% include 'container_runtime_constraints' %}
  
+h2(#scheduling_parameters). {% include 'container_scheduling_parameters' %}
+
  h2. Methods
  
  See "Common resource methods":{{site.baseurl}}/api/methods.html for more information about @create@, @delete@, @get@, @list@, and @update@.
diff --git a/doc/images/upload-using-workbench.png b/doc/images/upload-using-workbench.png

index de8dc9e477f53e1369da23f5705d57cfdc901f0a..3d67577e68412a7e11b8f3307ff162caaee30520 100644 (file)

Binary files a/doc/images/upload-using-workbench.png and b/doc/images/upload-using-workbench.png differ
diff --git a/doc/images/workbench-dashboard.png b/doc/images/workbench-dashboard.png

index 76df32c9e2b27aefdb96e6119b4e1bb216d18174..3cdf1e4a66cfd552fbcf41ce46c1e13687d673a2 100644 (file)

Binary files a/doc/images/workbench-dashboard.png and b/doc/images/workbench-dashboard.png differ
diff --git a/doc/images/workbench-move-selected.png b/doc/images/workbench-move-selected.png

index 5ed1ef53e1b4a29a18cb5cb1394f1750e3963df7..bba1a1c60176748f51fb653bfdb3918a7e485e7b 100644 (file)

Binary files a/doc/images/workbench-move-selected.png and b/doc/images/workbench-move-selected.png differ
diff --git a/doc/install/install-ws.html.textile.liquid b/doc/install/install-ws.html.textile.liquid

new file mode 100644 (file)

index 0000000..a36a59a
--- /dev/null
+++ b/doc/install/install-ws.html.textile.liquid
@@ -0,0 +1,204 @@
+---
+layout: default
+navsection: installguide
+title: Install the websocket server
+...
+
+{% include 'notebox_begin_warning' %}
+
+This websocket server is an alternative to the puma server that comes with the API server. It is available as an *experimental pre-release* and is not recommended for production sites.
+
+{% include 'notebox_end' %}
+
+The arvados-ws server provides event notifications to websocket clients. It can be installed anywhere with access to Postgres database and the Arvados API server, typically behind a web proxy that provides SSL support. See the "godoc page":http://godoc.org/github.com/curoverse/arvados/services/keep-web for additional information.
+
+By convention, we use the following hostname for the websocket service.
+
+<notextile>
+<pre><code>ws.<span class="userinput">uuid_prefix.your.domain</span></code></pre>
+</notextile>
+
+The above hostname should resolve from anywhere on the internet.
+
+h2. Install arvados-ws
+
+Typically arvados-ws runs on the same host as the API server.
+
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install arvados-ws</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install arvados-ws</span>
+</code></pre>
+</notextile>
+
+Verify that @arvados-ws@ is functional:
+
+<notextile>
+<pre><code>~$ <span class="userinput">arvados-ws -h</span>
+Usage of arvados-ws:
+  -config path
+        path to config file (default "/etc/arvados/ws/ws.yml")
+  -dump-config
+        show current configuration and exit
+</code></pre>
+</notextile>
+
+h3. Create a configuration file
+
+Create @/etc/arvados/ws/ws.yml@ using the following template. Replace @xxxxxxxx@ with the "password you generated during database setup":install-postgresql.html#api.
+
+<notextile>
+<pre><code>Client:
+  APIHost: <span class="userinput">uuid_prefix.your.domain</span>:443
+Listen: ":<span class="userinput">9003</span>"
+Postgres:
+  dbname: arvados_production
+  host: localhost
+  password: <span class="userinput">xxxxxxxx</span>
+  user: arvados
+</code></pre>
+</notextile>
+
+h3. Start the service (option 1: systemd)
+
+If your system does not use systemd, skip this section and follow the "runit instructions":#runit instead.
+
+If your system uses systemd, the arvados-ws service should already be set up. Start it and check its status:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo systemctl restart arvados-ws</span>
+~$ <span class="userinput">sudo systemctl status arvados-ws</span>
+&#x25cf; arvados-ws.service - Arvados websocket server
+   Loaded: loaded (/lib/systemd/system/arvados-ws.service; enabled)
+   Active: active (running) since Tue 2016-12-06 11:20:48 EST; 10s ago
+     Docs: https://doc.arvados.org/
+ Main PID: 9421 (arvados-ws)
+   CGroup: /system.slice/arvados-ws.service
+           └─9421 /usr/bin/arvados-ws
+
+Dec 06 11:20:48 zzzzz arvados-ws[9421]: {"level":"info","msg":"started","time":"2016-12-06T11:20:48.207617188-05:00"}
+Dec 06 11:20:48 zzzzz arvados-ws[9421]: {"Listen":":9003","level":"info","msg":"listening","time":"2016-12-06T11:20:48.244956506-05:00"}
+Dec 06 11:20:48 zzzzz systemd[1]: Started Arvados websocket server.
+</code></pre>
+</notextile>
+
+If it is not running, use @journalctl@ to check logs for errors:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo journalctl -n10 -u arvados-ws</span>
+...
+Dec 06 11:12:48 zzzzz systemd[1]: Starting Arvados websocket server...
+Dec 06 11:12:48 zzzzz arvados-ws[8918]: {"level":"info","msg":"started","time":"2016-12-06T11:12:48.030496636-05:00"}
+Dec 06 11:12:48 zzzzz arvados-ws[8918]: {"error":"pq: password authentication failed for user \"arvados\"","level":"fatal","msg":"db.Ping failed","time":"2016-12-06T11:12:48.058206400-05:00"}
+</code></pre>
+</notextile>
+
+Skip ahead to "confirm the service is working":#confirm.
+
+h3(#runit). Start the service (option 2: runit)
+
+Install runit to supervise the arvados-ws daemon.  {% include 'install_runit' %}
+
+Create a supervised service.
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo mkdir /etc/service/arvados-ws</span>
+~$ <span class="userinput">cd /etc/service/arvados-ws</span>
+~$ <span class="userinput">sudo mkdir log log/main</span>
+~$ <span class="userinput">printf '#!/bin/sh\nexec arvados-ws 2>&1\n' | sudo tee run</span>
+~$ <span class="userinput">printf '#!/bin/sh\nexec svlogd main\n' | sudo tee log/run</span>
+~$ <span class="userinput">sudo chmod +x run log/run</span>
+~$ <span class="userinput">sudo sv exit .</span>
+~$ <span class="userinput">cd -</span>
+</code></pre>
+</notextile>
+
+Use @sv stat@ and check the log file to verify the service is running.
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo sv stat /etc/service/arvados-ws</span>
+run: /etc/service/arvados-ws: (pid 12520) 2s; run: log: (pid 12519) 2s
+~$ <span class="userinput">tail /etc/service/arvados-ws/log/main/current</span>
+{"level":"info","msg":"started","time":"2016-12-06T11:56:20.669171449-05:00"}
+{"Listen":":9003","level":"info","msg":"listening","time":"2016-12-06T11:56:20.708847627-05:00"}
+</code></pre>
+</notextile>
+
+h3(#confirm). Confirm the service is working
+
+Confirm the service is listening on its assigned port and responding to requests.
+
+<notextile>
+<pre><code>~$ <span class="userinput">curl http://0.0.0.0:<b>9003</b>/status.json</span>
+{"Clients":1}
+</code></pre>
+</notextile>
+
+h3. Set up a reverse proxy with SSL support
+
+The arvados-ws service will be accessible from anywhere on the internet, so we recommend using SSL for transport encryption.
+
+This is best achieved by putting a reverse proxy with SSL support in front of arvados-ws, running on port 443 and passing requests to arvados-ws on port 9003 (or whatever port you chose in your configuration file).
+
+For example, using Nginx:
+
+<notextile><pre>
+upstream arvados-ws {
+  server                127.0.0.1:<span class="userinput">9003</span>;
+}
+
+server {
+  listen                <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name           ws.<span class="userinput">uuid_prefix.your.domain</span>;
+
+  proxy_connect_timeout 90s;
+  proxy_read_timeout    300s;
+
+  ssl                   on;
+  ssl_certificate       <span class="userinput"/>YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key   <span class="userinput"/>YOUR/PATH/TO/cert.key</span>;
+
+  location / {
+    proxy_pass          http://arvados-ws;
+    proxy_set_header    Upgrade         $http_upgrade;
+    proxy_set_header    Connection      "upgrade";
+    proxy_set_header    Host            $host;
+    proxy_set_header    X-Forwarded-For $proxy_add_x_forwarded_for;
+  }
+}
+</pre></notextile>
+
+If Nginx is already configured to proxy @ws@ requests to puma, move that configuration out of the way or change its @server_name@ so it doesn't conflict.
+
+h3. Update API server configuration
+
+Ensure the websocket server address is correct in the API server configuration file @/etc/arvados/api/application.yml@.
+
+<notextile>
+<pre><code>websocket_address: wss://ws.<span class="userinput">uuid_prefix.your.domain</span>/websocket
+</code></pre>
+</notextile>
+
+Restart Nginx to reload the API server configuration.
+
+<notextile>
+<pre><code>$ sudo nginx -s reload</span>
+</code></pre>
+</notextile>
+
+h3. Verify DNS and proxy setup
+
+Use a host elsewhere on the Internet to confirm that your DNS, proxy, and SSL are configured correctly.
+
+<notextile>
+<pre><code>$ <span class="userinput">curl https://ws.<b>uuid_prefix.your.domain</b>/status.json</span>
+{"Clients":1}
+</code></pre>
+</notextile>
diff --git a/doc/user/cwl/cwl-extensions.html.textile.liquid b/doc/user/cwl/cwl-extensions.html.textile.liquid

new file mode 100644 (file)

index 0000000..8a7e64b
--- /dev/null
+++ b/doc/user/cwl/cwl-extensions.html.textile.liquid
@@ -0,0 +1,56 @@
+---
+layout: default
+navsection: userguide
+title: Arvados CWL Extensions
+...
+
+Arvados provides several extensions to CWL for workflow optimization, site-specific configuration, and to enable access the Arvados API.
+
+To use Arvados CWL extensions, add the following @$namespaces@ section at the top of your CWL file:
+
+<pre>
+$namespaces:
+  arv: "http://arvados.org/cwl#"
+</pre>
+
+Arvados extensions must go into the @hints@ section, for example:
+
+<pre>
+hints:
+  arv:RunInSingleContainer: {}
+  arv:RuntimeConstraints:
+    keep_cache: 123456
+    keep_output_dir: local_output_dir
+  arv:PartitionRequirement:
+    partition: dev_partition
+  arv:APIRequirement: {}
+</pre>
+
+h2. arv:RunInSingleContainer
+
+Indicates that a subworkflow should run in a single container and not be scheduled as separate steps.
+
+h2. arv:RuntimeConstraints
+
+Set Arvados-specific runtime hints.
+
+table(table table-bordered table-condensed).
+|_. Field |_. Type |_. Description |
+|keep_cache|int|Size of file data buffer for Keep mount in MiB. Default is 256 MiB. Increase this to reduce cache thrashing in situations such as accessing multiple large (64+ MiB) files at the same time, or performing random access on a large file.|
+|outputDirType|enum|Preferred backing store for output staging.  If not specified, the system may choose which one to use.  One of *local_output_dir* or *keep_output_dir*|
+
+*local_output_dir*: Use regular file system local to the compute node. There must be sufficient local scratch space to store entire output; specify this with @outdirMin@ of @ResourceRequirement@.  Files are batch uploaded to Keep when the process completes.  Most compatible, but upload step can be time consuming for very large files.
+
+*keep_output_dir*: Use writable Keep mount.  Files are streamed to Keep as they are written.  Does not consume local scratch space, but does consume RAM for output buffers (up to 192 MiB per file simultaneously open for writing.)  Best suited to processes which produce sequential output of large files (non-sequential writes may produced fragmented file manifests).  Supports regular files and directories, does not support special files such as symlinks, hard links, named pipes, named sockets, or device nodes.|
+
+h2. arv:PartitionRequirement
+
+Select preferred compute partitions on which to run jobs.
+
+table(table table-bordered table-condensed).
+|_. Field |_. Type |_. Description |
+|partition|string or array of strings||
+
+h2. arv:APIRequirement
+
+Indicates that process wants to access to the Arvados API.  Will be granted limited network access and have @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ set in the environment.
diff --git a/doc/user/cwl/cwl-run-options.html.textile.liquid b/doc/user/cwl/cwl-run-options.html.textile.liquid

new file mode 100644 (file)

index 0000000..c9b18e6
--- /dev/null
+++ b/doc/user/cwl/cwl-run-options.html.textile.liquid
@@ -0,0 +1,94 @@
+---
+layout: default
+navsection: userguide
+title: "Using arvados-cwl-runner"
+...
+
+The following command line options are available for @arvados-cwl-runner@:
+
+table(table table-bordered table-condensed).
+|_. Option |_. Description |
+|==--basedir== BASEDIR|     Base directory used to resolve relative references in the input, default to directory of input object file or current directory (if inputs piped/provided on command line).|
+|==--version==|             Print version and exit|
+|==--verbose==|             Default logging|
+|==--quiet==|               Only print warnings and errors.|
+|==--debug==|               Print even more logging|
+|==--tool-help==|           Print command line help for tool|
+|==--enable-reuse==|Enable job reuse (default)|
+|==--disable-reuse==|Disable job reuse (always run new jobs).|
+|==--project-uuid UUID==|   Project that will own the workflow jobs, if not provided, will go to home project.|
+|==--output-name OUTPUT_NAME==|Name to use for collection that stores the final output.|
+|==--output-tags OUTPUT_TAGS==|Tags for the final output collection separated by commas, e.g., =='--output-tags tag0,tag1,tag2'==.|
+|==--ignore-docker-for-reuse==|Ignore Docker image version when deciding whether to reuse past jobs.|
+|==--submit==|              Submit workflow to run on Arvados.|
+|==--local==|               Control workflow from local host (submits jobs to Arvados).|
+|==--create-template==|     (Deprecated) synonym for ==--create-workflow.==|
+|==--create-workflow==|     Create an Arvados workflow (if using the 'containers' API) or pipeline template (if using the 'jobs' API). See ==--api==.|
+|==--update-workflow== UUID|Update an existing Arvados workflow or pipeline template with the given UUID.|
+|==--wait==|                After submitting workflow runner job, wait for completion.|
+|==--no-wait==|             Submit workflow runner job and exit.|
+|==--api== WORK_API|        Select work submission API, one of 'jobs' or 'containers'. Default is 'jobs' if that API is available, otherwise 'containers'.|
+|==--compute-checksum==|    Compute checksum of contents while collecting outputs|
+|==--submit-runner-ram== SUBMIT_RUNNER_RAM|RAM (in MiB) required for the workflow runner job (default 1024)|
+|==--name== NAME|           Name to use for workflow execution instance.|
+
+h3. Specify workflow and output names
+
+Use the @--name@ and @--output-name@ options to specify the name of the workflow and name of the output collection.
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --name "Example bwa run" --output-name "Example bwa output" bwa-mem.cwl bwa-mem-input.yml</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
+{
+    "aligned_sam": {
+        "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
+        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
+        "class": "File",
+        "size": 30738986
+    }
+}
+</code></pre>
+</notextile>
+
+h3. Submit a workflow with no waiting
+
+To submit a workflow and exit immediately, use the @--no-wait@ option.  This will submit the workflow to Arvados, print out the UUID of the job that was submitted to standard output, and exit.
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --no-wait bwa-mem.cwl bwa-mem-input.yml</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Upload local files: "bwa-mem.cwl"
+2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Uploaded to qr1hi-4zz18-eqnfwrow8aysa9q
+2016-06-30 15:07:52 arvados.cwl-runner[12480] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
+qr1hi-8i9sb-fm2n3b1w0l6bskg
+</code></pre>
+</notextile>
+
+h3. Control a workflow locally
+
+To run a workflow with local control, use @--local@.  This means that the host where you run @arvados-cwl-runner@ will be responsible for submitting jobs, however, the jobs themselves will still run on the Arvados cluster.  With @--local@, if you interrupt @arvados-cwl-runner@ or log out, the workflow will be terminated.
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --local bwa-mem.cwl bwa-mem-input.yml</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-07-01 10:05:19 arvados.cwl-runner[16290] INFO: Pipeline instance qr1hi-d1hrv-92wcu6ldtio74r4
+2016-07-01 10:05:28 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Queued
+2016-07-01 10:05:29 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Running
+2016-07-01 10:05:45 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Complete
+2016-07-01 10:05:46 arvados.cwl-runner[16290] INFO: Overall process status is success
+{
+    "aligned_sam": {
+        "size": 30738986,
+        "path": "keep:15f56bad0aaa7364819bf14ca2a27c63+88/HWI-ST1027_129_D0THKACXX.1_1.sam",
+        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
+        "class": "File"
+    }
+}
+</code></pre>
+</notextile>
diff --git a/doc/user/cwl/cwl-runner.html.textile.liquid b/doc/user/cwl/cwl-runner.html.textile.liquid

index 573e2b9b9eff6bde696312d0632a07821c5e22ef..7c75c1c453ad8bd4e70107d9c5866f6e10137b10 100644 (file)
--- a/doc/user/cwl/cwl-runner.html.textile.liquid
+++ b/doc/user/cwl/cwl-runner.html.textile.liquid
@@ -1,54 +1,26 @@
  ---
  layout: default
  navsection: userguide
-title: Using Common Workflow Language
+title: "Running an Arvados workflow"
  ...
  
-The "Common Workflow Language (CWL)":http://commonwl.org is a multi-vendor open standard for describing analysis tools and workflows that are portable across a variety of platforms.  CWL is the recommended way to develop and run workflows for Arvados.  Arvados supports the "CWL v1.0":http://commonwl.org/v1.0 specification.
+{% include 'what_is_cwl' %}
  
  {% include 'tutorial_expectations' %}
  
-h2. Setting up
+{% include 'notebox_begin' %}
  
-The @arvados-cwl-runner@ client is installed by default on Arvados shell nodes.  However, if you do not have @arvados-cwl-runner@, you may install it using @pip@:
+By default, the @arvados-cwl-runner@ is installed on Arvados shell nodes.  If you want to submit jobs from somewhere else, such as your workstation, you may install "arvados-cwl-runner.":#setup
  
-<notextile>
-<pre><code>~$ <span class="userinput">virtualenv ~/venv</span>
-~$ <span class="userinput">. ~/venv/bin/activate</span>
-~$ <span class="userinput">pip install -U setuptools</span>
-~$ <span class="userinput">pip install arvados-cwl-runner</span>
-</code></pre>
-</notextile>
-
-h3. Docker
-
-Certain features of @arvados-cwl-runner@ require access to Docker.  You can determine if you have access to Docker by running @docker version@:
-
-<notextile>
-<pre><code>~$ <span class="userinput">docker version</span>
-Client:
- Version:      1.9.1
- API version:  1.21
- Go version:   go1.4.2
- Git commit:   a34a1d5
- Built:        Fri Nov 20 12:59:02 UTC 2015
- OS/Arch:      linux/amd64
+{% include 'notebox_end' %}
  
-Server:
- Version:      1.9.1
- API version:  1.21
- Go version:   go1.4.2
- Git commit:   a34a1d5
- Built:        Fri Nov 20 12:59:02 UTC 2015
- OS/Arch:      linux/amd64
-</code></pre>
-</notextile>
+This tutorial will demonstrate how to submit a workflow at the command line using @arvados-cwl-runner@.
  
-If this returns an error, contact the sysadmin of your cluster for assistance.  Alternatively, if you have Docker installed on your local workstation, you may follow the instructions above to install @arvados-cwl-runner@.
+h2. Running arvados-cwl-runner
  
-h3. Getting the example files
+h3. Get the example files
  
-The tutorial files are located in the documentation section of the Arvados source repository:
+The tutorial files are located in the "documentation section of the Arvados source repository:":https://github.com/curoverse/arvados/tree/master/doc/user/cwl/bwa-mem
  
  <notextile>
  <pre><code>~$ <span class="userinput">git clone https://github.com/curoverse/arvados</span>
@@ -61,6 +33,7 @@ The tutorial data is hosted on "https://cloud.curoverse.com":https://cloud.curov
  <notextile>
  <pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst settings 2463fa9efeb75e099685528b3b9071e0+438</span>
  ~$ <span class="userinput">arv-copy --src qr1hi --dst settings ae480c5099b81e17267b7445e35b4bc7+180</span>
+~$ <span class="userinput">arv-copy --src qr1hi --dst settings 655c6cd07550151b210961ed1d3852cf+57</span>
  </code></pre>
  </notextile>
  
@@ -70,9 +43,15 @@ If you do not wish to create an account on "https://cloud.curoverse.com":https:/
  
  "https://cloud.curoverse.com/collections/ae480c5099b81e17267b7445e35b4bc7+180":https://cloud.curoverse.com/collections/ae480c5099b81e17267b7445e35b4bc7+180
  
+"https://cloud.curoverse.com/collections/655c6cd07550151b210961ed1d3852cf+57":https://cloud.curoverse.com/collections/655c6cd07550151b210961ed1d3852cf+57
+
  h2. Submitting a workflow to an Arvados cluster
  
-Use @arvados-cwl-runner@ to submit CWL workflows to Arvados.  After submitting the job, it will wait for the workflow to complete and print out the final result to standard output.  Note that once submitted, the workflow runs entirely on Arvados, so even if you interrupt @arvados-cwl-runner@ or log out, the workflow will continue to run.
+h3. Submit a workflow and wait for results
+
+Use @arvados-cwl-runner@ to submit CWL workflows to Arvados.  After submitting the job, it will wait for the workflow to complete and print out the final result to standard output.
+
+*Note:* Once submitted, the workflow runs entirely on Arvados, so even if you log out, the workflow will continue to run.  However, if you interrupt @arvados-cwl-runner@ with control-C it will cancel the workflow.
  
  <notextile>
  <pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner bwa-mem.cwl bwa-mem-input.yml</span>
@@ -94,44 +73,7 @@ arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107,
  </code></pre>
  </notextile>
  
-To submit a workflow and exit immediately, use the @--no-wait@ option.  This will print out the uuid of the job that was submitted to standard output.
-
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --no-wait bwa-mem.cwl bwa-mem-input.yml</span>
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Upload local files: "bwa-mem.cwl"
-2016-06-30 15:07:52 arvados.arv-run[12480] INFO: Uploaded to qr1hi-4zz18-eqnfwrow8aysa9q
-2016-06-30 15:07:52 arvados.cwl-runner[12480] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-qr1hi-8i9sb-fm2n3b1w0l6bskg
-</code></pre>
-</notextile>
-
-To run a workflow with local control, use @--local@.  This means that the host where you run @arvados-cwl-runner@ will be responsible for submitting jobs. With @--local@, if you interrupt @arvados-cwl-runner@ or log out, the workflow will be terminated.
-
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --local bwa-mem.cwl bwa-mem-input.yml</span>
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-07-01 10:05:19 arvados.cwl-runner[16290] INFO: Pipeline instance qr1hi-d1hrv-92wcu6ldtio74r4
-2016-07-01 10:05:28 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Queued
-2016-07-01 10:05:29 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Running
-2016-07-01 10:05:45 arvados.cwl-runner[16290] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-2nzzfbuf9zjrj4g) is Complete
-2016-07-01 10:05:46 arvados.cwl-runner[16290] INFO: Overall process status is success
-{
-    "aligned_sam": {
-        "size": 30738986,
-        "path": "keep:15f56bad0aaa7364819bf14ca2a27c63+88/HWI-ST1027_129_D0THKACXX.1_1.sam",
-        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
-        "class": "File"
-    }
-}
-</code></pre>
-</notextile>
-
-h2. Work reuse
-
-Workflows submitted with @arvados-cwl-runner@ will take advantage of Arvados job reuse.  If you submit a workflow which is identical to one that has run before, it will short cut the execution and return the result of the previous run.  This also applies to individual workflow steps.  For example, a two step workflow where the first step has run before will reuse results for first step and only execute the new second step.  You can disable this behavior with @--disable-reuse@.
-
-h2. Referencing files
+h3. Referencing files
  
  When running a workflow on an Arvados cluster, the input files must be stored in Keep.  There are several ways this can happen.
  
@@ -141,45 +83,11 @@ If you reference a file in "arv-mount":{{site.baseurl}}/user/tutorials/tutorial-
  
  If you reference a local file which is not in @arv-mount@, then @arvados-cwl-runner@ will upload the file to Keep and use the Keep URI reference from the upload.
  
-h2. Registering a workflow to use in Workbench
-
-Use @--create-workflow@ to register a CWL workflow with Arvados.  This enables you to share workflows with other Arvados users, and run them by clicking the <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a pipeline...</span> button on the Workbench Dashboard.
-
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --create-workflow bwa-mem.cwl</span>
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Upload local files: "bwa-mem.cwl"
-2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Uploaded to qr1hi-4zz18-7e0hedrmkuyoei3
-2016-07-01 12:21:01 arvados.cwl-runner[15796] INFO: Created template qr1hi-p5p6p-rjleou1dwr167v5
-qr1hi-p5p6p-rjleou1dwr167v5
-</code></pre>
-</notextile>
-
-You can provide a partial input file to set default values for the workflow input parameters:
-
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --create-workflow bwa-mem.cwl bwa-mem-template.yml</span>
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Upload local files: "bwa-mem.cwl"
-2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Uploaded to qr1hi-4zz18-0f91qkovk4ml18o
-2016-07-01 14:09:50 arvados.cwl-runner[3730] INFO: Created template qr1hi-p5p6p-0deqe6nuuyqns2i
-qr1hi-p5p6p-0deqe6nuuyqns2i
-</code></pre>
-</notextile>
-
-h2. Making workflows directly executable
-
-You can make a workflow file directly executable (@cwl-runner@ should be an alias to @arvados-cwl-runner@) by adding the following line to the top of the file:
+You can also execute CWL files directly from Keep:
  
  <notextile>
-<pre><code>#!/usr/bin/env cwl-runner
-</code></pre>
-</notextile>
-
-<notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">./bwa-mem.cwl bwa-mem-input.yml</span>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner keep:655c6cd07550151b210961ed1d3852cf+57/bwa-mem.cwl bwa-mem-input.yml</span>
  arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
  2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
  2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
  2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
@@ -196,103 +104,50 @@ arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107,
  </code></pre>
  </notextile>
  
-You can even make an input file directly executable the same way with the following two lines at the top:
+h3. Work reuse
  
-<notextile>
-<pre><code>#!/usr/bin/env cwl-runner
-cwl:tool: <span class="userinput">bwa-mem.cwl</span>
-</code></pre>
-</notextile>
+Workflows submitted with @arvados-cwl-runner@ will take advantage of Arvados job reuse.  If you submit a workflow which is identical to one that has run before, it will short cut the execution and return the result of the previous run.  This also applies to individual workflow steps.  For example, a two step workflow where the first step has run before will reuse results for first step and only execute the new second step.  You can disable this behavior with @--disable-reuse@.
+
+h3. Command line options
+
+See "Using arvados-cwl-runner":{{site.baseurl}}/user/cwl/cwl-run-options.html
+
+h2(#setup). Setting up arvados-cwl-runner
+
+By default, the @arvados-cwl-runner@ is installed on Arvados shell nodes.  If you want to submit jobs from somewhere else, such as your workstation, you may install @arvados-cwl-runner@ using @pip@:
  
  <notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">./bwa-mem-input.yml</span>
-arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
-2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
-2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
-2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
-2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
-{
-    "aligned_sam": {
-        "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
-        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
-        "class": "File",
-        "size": 30738986
-    }
-}
+<pre><code>~$ <span class="userinput">virtualenv ~/venv</span>
+~$ <span class="userinput">. ~/venv/bin/activate</span>
+~$ <span class="userinput">pip install -U setuptools</span>
+~$ <span class="userinput">pip install arvados-cwl-runner</span>
  </code></pre>
  </notextile>
  
-h2. Developing workflows
+h3. Check Docker access
  
-For an introduction and and detailed documentation about writing CWL, see the "User Guide":http://commonwl.org/v1.0/UserGuide.html and the "Specification":http://commonwl.org/v1.0 .
+In order to pull and upload Docker images, @arvados-cwl-runner@ requires access to Docker.  You do not need Docker if the Docker images you intend to use are already available in Aravdos.
  
-To run on Arvados, a workflow should provide a @DockerRequirement@ in the @hints@ section.
-
-When developing a workflow, it is often helpful to run it on the local host to avoid the overhead of submitting to the cluster.  To execute a workflow only on the local host (without submitting jobs to an Arvados cluster) you can use the @cwltool@ command.  Note that you must also have the input data accessible on the local host.  You can use @arv-get@ to fetch the data from Keep.
+You can determine if you have access to Docker by running @docker version@:
  
  <notextile>
-<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arv-get 2463fa9efeb75e099685528b3b9071e0+438/ .</span>
-156 MiB / 156 MiB 100.0%
-~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arv-get ae480c5099b81e17267b7445e35b4bc7+180/ .</span>
-23 MiB / 23 MiB 100.0%
-~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">cwltool bwa-mem-input.yml bwa-mem-input-local.yml</span>
-cwltool 1.0.20160629140624
-[job bwa-mem.cwl] /home/example/arvados/doc/user/cwl/bwa-mem$ docker \
-    run \
-    -i \
-    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.ann:/var/lib/cwl/job979368791_bwa-mem/19.fasta.ann:ro \
-    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.fastq:/var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.fastq:ro \
-    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.sa:/var/lib/cwl/job979368791_bwa-mem/19.fasta.sa:ro \
-    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.amb:/var/lib/cwl/job979368791_bwa-mem/19.fasta.amb:ro \
-    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.pac:/var/lib/cwl/job979368791_bwa-mem/19.fasta.pac:ro \
-    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/HWI-ST1027_129_D0THKACXX.1_2.fastq:/var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_2.fastq:ro \
-    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.bwt:/var/lib/cwl/job979368791_bwa-mem/19.fasta.bwt:ro \
-    --volume=/home/example/arvados/doc/user/cwl/bwa-mem:/var/spool/cwl:rw \
-    --volume=/tmp/tmpgzyou9:/tmp:rw \
-    --workdir=/var/spool/cwl \
-    --read-only=true \
-    --log-driver=none \
-    --user=1001 \
-    --rm \
-    --env=TMPDIR=/tmp \
-    --env=HOME=/var/spool/cwl \
-    biodckr/bwa \
-    bwa \
-    mem \
-    -t \
-    1 \
-    -R \
-    '@RG       ID:arvados_tutorial     PL:illumina     SM:HWI-ST1027_129' \
-    /var/lib/cwl/job979368791_bwa-mem/19.fasta \
-    /var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.fastq \
-    /var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_2.fastq > /home/example/arvados/doc/user/cwl/bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.sam
-[M::bwa_idx_load_from_disk] read 0 ALT contigs
-[M::process] read 100000 sequences (10000000 bp)...
-[M::mem_pestat] # candidate unique pairs for (FF, FR, RF, RR): (0, 4745, 1, 0)
-[M::mem_pestat] skip orientation FF as there are not enough pairs
-[M::mem_pestat] analyzing insert size distribution for orientation FR...
-[M::mem_pestat] (25, 50, 75) percentile: (154, 181, 214)
-[M::mem_pestat] low and high boundaries for computing mean and std.dev: (34, 334)
-[M::mem_pestat] mean and std.dev: (185.63, 44.88)
-[M::mem_pestat] low and high boundaries for proper pairs: (1, 394)
-[M::mem_pestat] skip orientation RF as there are not enough pairs
-[M::mem_pestat] skip orientation RR as there are not enough pairs
-[M::mem_process_seqs] Processed 100000 reads in 9.848 CPU sec, 9.864 real sec
-[main] Version: 0.7.12-r1039
-[main] CMD: bwa mem -t 1 -R @RG        ID:arvados_tutorial     PL:illumina     SM:HWI-ST1027_129 /var/lib/cwl/job979368791_bwa-mem/19.fasta /var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.fastq /var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_2.fastq
-[main] Real time: 10.061 sec; CPU: 10.032 sec
-Final process status is success
-{
-    "aligned_sam": {
-        "size": 30738959,
-        "path": "/home/example/arvados/doc/user/cwl/bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.sam",
-        "checksum": "sha1$0c668cca45fef02397bb5302880526d300ee4dac",
-        "class": "File"
-    }
-}
+<pre><code>~$ <span class="userinput">docker version</span>
+Client:
+ Version:      1.9.1
+ API version:  1.21
+ Go version:   go1.4.2
+ Git commit:   a34a1d5
+ Built:        Fri Nov 20 12:59:02 UTC 2015
+ OS/Arch:      linux/amd64
+
+Server:
+ Version:      1.9.1
+ API version:  1.21
+ Go version:   go1.4.2
+ Git commit:   a34a1d5
+ Built:        Fri Nov 20 12:59:02 UTC 2015
+ OS/Arch:      linux/amd64
  </code></pre>
  </notextile>
  
-If you get the error @JavascriptException: Long-running script killed after 20 seconds.@ this may be due to the Dockerized Node.js engine taking too long to start.  You may address this by installing Node.js locally (run @apt-get install nodejs@ on Debian or Ubuntu) or by specifying a longer timeout with the @--eval-timeout@ option.  For example, run the workflow with @cwltool --eval-timeout=40@ for a 40-second timeout.
+If this returns an error, contact the sysadmin of your cluster for assistance.
diff --git a/doc/user/cwl/cwl-style.html.textile.liquid b/doc/user/cwl/cwl-style.html.textile.liquid

index 5c6d04940f6b1928120494e8134681d956efac50..0debd16a15cfae5a5bad1aa7279494837cdc19de 100644 (file)
--- a/doc/user/cwl/cwl-style.html.textile.liquid
+++ b/doc/user/cwl/cwl-style.html.textile.liquid
@@ -4,6 +4,8 @@ navsection: userguide
  title: Best Practices for writing CWL
  ...
  
+* To run on Arvados, a workflow should provide a @DockerRequirement@ in the @hints@ section.
+
  * Build a reusable library of components.  Share tool wrappers and subworkflows between projects.  Make use of and contribute to "community maintained workflows and tools":https://github.com/common-workflow-language/workflows and tool registries such as "Dockstore":http://dockstore.org .
  
  * When combining a parameter value with a string, such as adding a filename extension, write @$(inputs.file.basename).ext@ instead of @$(inputs.file.basename + 'ext')@.  The first form is evaluated as a simple text substitution, the second form (using the @+@ operator) is evaluated as an arbitrary Javascript expression and requires that you declare @InlineJavascriptRequirement@.
diff --git a/doc/user/examples/crunch-examples.html.textile.liquid b/doc/user/examples/crunch-examples.html.textile.liquid

index 21f30cc61d3b6d1d6572f97c7d67068c342b0e66..d1c7f12c8848663ba6d900fe600a924d022e6e53 100644 (file)
--- a/doc/user/examples/crunch-examples.html.textile.liquid
+++ b/doc/user/examples/crunch-examples.html.textile.liquid
@@ -4,6 +4,8 @@ navsection: userguide
  title: "Scripts provided by Arvados"
  ...
  
+{% include 'pipeline_deprecation_notice' %}
+
  Several crunch scripts are included with Arvados in the "/crunch_scripts directory":https://dev.arvados.org/projects/arvados/repository/revisions/master/show/crunch_scripts. They are intended to provide examples and starting points for writing your own scripts.
  
  h4. bwa-aln
diff --git a/doc/user/getting_started/workbench.html.textile.liquid b/doc/user/getting_started/workbench.html.textile.liquid

index 6e334ba0dd7583ddae43b48149a270d3d92ad458..b397bf4f2ecbed9125b49f2b286d2fa7a6816d40 100644 (file)
--- a/doc/user/getting_started/workbench.html.textile.liquid
+++ b/doc/user/getting_started/workbench.html.textile.liquid
@@ -12,6 +12,6 @@ If you are using the default Arvados instance for this guide, you can Access Arv
  
  You may be asked to log in using a Google account.  Arvados uses only your name and email address from Google services for identification, and will never access any personal information.  If you are accessing Arvados for the first time, the Workbench may indicate your account status is *New / inactive*.  If this is the case, contact the administrator of the Arvados instance to request activation of your account.
  
-Once your account is active, logging in to the Workbench will present you with the Dashboard. This gives a summary of your projects and recent activity in the Arvados instance.  "You are now ready to run your first pipeline.":{{ site.baseurl }}/user/tutorials/tutorial-pipeline-workbench.html
+Once your account is active, logging in to the Workbench will present you with the Dashboard. This gives a summary of your projects and recent activity in the Arvados instance.  "You are now ready to run your first pipeline.":{{ site.baseurl }}/user/tutorials/tutorial-workflow-workbench.html
  
-!{{ site.baseurl }}/images/workbench-dashboard.png!
+!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/workbench-dashboard.png!
diff --git a/doc/user/reference/job-pipeline-ref.html.textile.liquid b/doc/user/reference/job-pipeline-ref.html.textile.liquid

index 643e6bb3df4ffa5db2a8d388b7c8b1412502cafa..9e8b2cf34b22621f0c3185f7cdcd7a66a182fe54 100644 (file)
--- a/doc/user/reference/job-pipeline-ref.html.textile.liquid
+++ b/doc/user/reference/job-pipeline-ref.html.textile.liquid
@@ -4,4 +4,6 @@ navsection: userguide
  title: "Pipeline template reference"
  ...
  
+{% include 'pipeline_deprecation_notice' %}
+
  Pipeline template options are described on the "pipeline template schema page.":{{site.baseurl}}/api/methods/pipeline_templates.html
diff --git a/doc/user/topics/arv-copy.html.textile.liquid b/doc/user/topics/arv-copy.html.textile.liquid

index ed0a126a41cf6d07f018ff7fe00cad49d5b32fd8..223f2fe311b82ec76c24c258293d84d8e38c92e9 100644 (file)
--- a/doc/user/topics/arv-copy.html.textile.liquid
+++ b/doc/user/topics/arv-copy.html.textile.liquid
@@ -4,6 +4,9 @@ navsection: userguide
  title: "Using arv-copy"
  ...
  
+{% include 'crunch1only_begin' %}
+On those sites, the "copy a pipeline template" feature described below is not available. However, "copy a workflow" feature is not yet implemented.
+{% include 'crunch1only_end' %}
  
  This tutorial describes how to copy Arvados objects from one cluster to another by using @arv-copy@.
  
diff --git a/doc/user/topics/arv-docker.html.textile.liquid b/doc/user/topics/arv-docker.html.textile.liquid

index 1a31d126da698495eca853b72e1dcca9424c94a1..5ccd5ac476bdc8e3cda0cb4891869ee68e6cf2d8 100644 (file)
--- a/doc/user/topics/arv-docker.html.textile.liquid
+++ b/doc/user/topics/arv-docker.html.textile.liquid
@@ -184,13 +184,13 @@ arvados/jobs-with-r             latest      33ea6b877923  qr1hi-4zz18-3fk2px2ji2
  </code></pre>
  </notextile>
  
-You are now able to specify the runtime environment for your program using the @docker_image@ field of the @runtime_constaints@ section of your pipeline components:
+You are now able to specify the runtime environment for your program using @DockerRequirement@ in your workflow:
  
-<notextile>
-{% code 'example_docker' as javascript %}
-</notextile>
-
-* The @docker_image@ field can be one of: the Docker repository name (as shown above), the Docker image hash, or the Arvados collection portable data hash.
+<pre>
+hints:
+  DockerRequirement:
+    dockerPull: arvados/jobs-with-r
+</pre>
  
  h2. Share Docker images
  
diff --git a/doc/user/topics/run-command.html.textile.liquid b/doc/user/topics/run-command.html.textile.liquid

index 78839196967260dace26bc7791c72656ab85d0c1..ac40458e3f73d43a7978013faa5597d5c8f7a667 100644 (file)
--- a/doc/user/topics/run-command.html.textile.liquid
+++ b/doc/user/topics/run-command.html.textile.liquid
@@ -4,6 +4,8 @@ navsection: userguide
  title: "run-command reference"
  ...
  
+{% include 'pipeline_deprecation_notice' %}
+
  The @run-command@ crunch script enables you run command line programs.
  
  {% include 'tutorial_expectations_workstation' %}
diff --git a/doc/user/topics/running-pipeline-command-line.html.textile.liquid b/doc/user/topics/running-pipeline-command-line.html.textile.liquid

index 14c88d1311194b1b576f94209a6dcbf56469d494..c4d3d296af09dcd79df53b7f7dfa536d42a1cee7 100644 (file)
--- a/doc/user/topics/running-pipeline-command-line.html.textile.liquid
+++ b/doc/user/topics/running-pipeline-command-line.html.textile.liquid
@@ -8,7 +8,7 @@ title: "Running an Arvados pipeline"
  If the Jobs API is not available, use the "Common Workflow Language":{{site.baseurl}}/user/cwl/cwl-runner.html instead.
  {% include 'crunch1only_end' %}
  
-This tutorial demonstrates how to use the command line to run the same pipeline as described in "running a pipeline using Workbench.":{{site.baseurl}}/user/tutorials/tutorial-pipeline-workbench.html
+This tutorial demonstrates how to use the command line to run the same pipeline as described in "running a pipeline using Workbench.":{{site.baseurl}}/user/tutorials/tutorial-workflow-workbench.html
  
  {% include 'tutorial_expectations' %}
  {% include 'tutorial_cluster_name' %}
diff --git a/doc/user/tutorials/running-external-program.html.textile.liquid b/doc/user/tutorials/running-external-program.html.textile.liquid

index a7682594c9f767bd9cf3d2ef21d466c31004d253..bdd943d393a8ded8e30a542dd00f02fe418ca445 100644 (file)
--- a/doc/user/tutorials/running-external-program.html.textile.liquid
+++ b/doc/user/tutorials/running-external-program.html.textile.liquid
@@ -55,7 +55,7 @@ See the "run-command reference":{{site.baseurl}}/user/topics/run-command.html fo
  
  h2. Running your pipeline
  
-Your new pipeline template should appear at the top of the Workbench "pipeline&nbsp;templates":{{site.arvados_workbench_host}}/pipeline_templates page.  You can run your pipeline "using Workbench":tutorial-pipeline-workbench.html or the "command line.":{{site.baseurl}}/user/topics/running-pipeline-command-line.html
+Your new pipeline template should appear at the top of the Workbench "pipeline&nbsp;templates":{{site.arvados_workbench_host}}/pipeline_templates page.  You can run your pipeline "using Workbench":tutorial-workflow-workbench.html or the "command line.":{{site.baseurl}}/user/topics/running-pipeline-command-line.html
  
  Test data is available in the "Arvados Tutorial":{{site.arvados_workbench_host}}/projects/qr1hi-j7d0g-u7zg1qdaowykd8d project:
  
diff --git a/doc/user/tutorials/tutorial-pipeline-workbench.html.textile.liquid b/doc/user/tutorials/tutorial-pipeline-workbench.html.textile.liquid

deleted file mode 100644 (file)

index 6537fda..0000000
--- a/doc/user/tutorials/tutorial-pipeline-workbench.html.textile.liquid
+++ /dev/null
@@ -1,35 +0,0 @@
----
-layout: default
-navsection: userguide
-title: "Running a pipeline using Workbench"
-...
-
-{% include 'crunch1only_begin' %}
-On those sites, the details will be slightly different and the example pipeline might not be available.
-{% include 'crunch1only_end' %}
-
-A "pipeline" (sometimes called a "workflow" in other systems) is a sequence of steps that apply various programs or tools to transform input data to output data.  Pipelines are the principal means of performing computation with Arvados.  This tutorial demonstrates how to run a single-stage pipeline to take a small data set of paired-end reads from a sample "exome":https://en.wikipedia.org/wiki/Exome in "FASTQ":https://en.wikipedia.org/wiki/FASTQ_format format and align them to "Chromosome 19":https://en.wikipedia.org/wiki/Chromosome_19_%28human%29 using the "bwa mem":http://bio-bwa.sourceforge.net/ tool, producing a "Sequence Alignment/Map (SAM)":https://samtools.github.io/ file.  This tutorial will introduce the following Arvados features:
-
-<div>
-* How to create a new pipeline from an existing template.
-* How to browse and select input data for the pipeline and submit the pipeline to run on the Arvados cluster.
-* How to access your pipeline results.
-</div>
-
-notextile. <div class="spaced-out">
-
-h3. Steps
-
-# Start from the *Workbench Dashboard*.  You can access the Dashboard by clicking on *<i class="fa fa-lg fa-fw fa-dashboard"></i> Dashboard* in the upper left corner of any Workbench page.
-# Click on the <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a pipeline...</span> button.  This will open a dialog box titled *Choose a pipeline to run*.
-# In the search box, type in *Tutorial align using bwa mem*.
-# Select *<i class="fa fa-fw fa-gear"></i> Tutorial align using bwa mem* and click the <span class="btn btn-sm btn-primary" >Next: choose inputs <i class="fa fa-fw fa-arrow-circle-right"></i></span> button.  This will create a new pipeline in your *Home* project and will open it. You can now supply the inputs for the pipeline.
-# The first input parameter to the pipeline is *"reference_collection" parameter for run-command script in bwa-mem component*.  Click the <span class="btn btn-sm btn-primary">Choose</span> button beneath that header.  This will open a dialog box titled *Choose a dataset for "reference_collection" parameter for run-command script in bwa-mem component*.
-# Open the *Home <span class="caret"></span>* menu and select *All Projects*. Search for and select *<i class="fa fa-fw fa-archive"></i> Tutorial chromosome 19 reference* and click the <span class="btn btn-sm btn-primary" >OK</span> button.
-# Repeat the previous two steps to set the *"sample" parameter for run-command script in bwa-mem component* parameter to *<i class="fa fa-fw fa-archive"></i> Tutorial sample exome*.
-# Click on the <span class="btn btn-sm btn-primary" >Run <i class="fa fa-fw fa-play"></i></span> button.  The page updates to show you that the pipeline has been submitted to run on the Arvados cluster.
-# After the pipeline starts running, you can track the progress by watching log messages from jobs.  This page refreshes automatically.  You will see a <span class="label label-success">complete</span> label when the pipeline completes successfully.
-# Click on the *Output* link to see the results of the job.  This will load a new page listing the output files from this pipeline.  You'll see the output SAM file from the alignment tool under the *Files* tab.
-# Click on the <span class="btn btn-sm btn-info"><i class="fa fa-download"></i></span> download button to the right of the SAM file to download your results.
-
-notextile. </div>
diff --git a/doc/user/tutorials/tutorial-submit-job.html.textile.liquid b/doc/user/tutorials/tutorial-submit-job.html.textile.liquid

index d1556cfe028ab881b390d67ab50f05f55499426c..fe80dcb48ebeae24ff20e47cfc2500382d6741b5 100644 (file)
--- a/doc/user/tutorials/tutorial-submit-job.html.textile.liquid
+++ b/doc/user/tutorials/tutorial-submit-job.html.textile.liquid
@@ -85,6 +85,6 @@ In the editor, enter the following template:
  
  h2. Running your pipeline
  
-Your new pipeline template should appear at the top of the Workbench "pipeline&nbsp;templates":{{site.arvados_workbench_host}}/pipeline_templates page.  You can run your pipeline "using Workbench":tutorial-pipeline-workbench.html or the "command line.":{{site.baseurl}}/user/topics/running-pipeline-command-line.html
+Your new pipeline template should appear at the top of the Workbench "pipeline&nbsp;templates":{{site.arvados_workbench_host}}/pipeline_templates page.  You can run your pipeline "using Workbench":tutorial-workflow-workbench.html or the "command line.":{{site.baseurl}}/user/topics/running-pipeline-command-line.html
  
  For more information and examples for writing pipelines, see the "pipeline template reference":{{site.baseurl}}/api/methods/pipeline_templates.html
diff --git a/doc/user/tutorials/tutorial-workflow-workbench.html.textile.liquid b/doc/user/tutorials/tutorial-workflow-workbench.html.textile.liquid

new file mode 100644 (file)

index 0000000..445ce75
--- /dev/null
+++ b/doc/user/tutorials/tutorial-workflow-workbench.html.textile.liquid
@@ -0,0 +1,27 @@
+---
+layout: default
+navsection: userguide
+title: "Running a workflow using Workbench"
+...
+
+A "workflow" (sometimes called a "pipeline" in other systems) is a sequence of steps that apply various programs or tools to transform input data to output data.  Workflows are the principal means of performing computation with Arvados.  This tutorial demonstrates how to run a single-stage workflow to take a small data set of paired-end reads from a sample "exome":https://en.wikipedia.org/wiki/Exome in "FASTQ":https://en.wikipedia.org/wiki/FASTQ_format format and align them to "Chromosome 19":https://en.wikipedia.org/wiki/Chromosome_19_%28human%29 using the "bwa mem":http://bio-bwa.sourceforge.net/ tool, producing a "Sequence Alignment/Map (SAM)":https://samtools.github.io/ file.  This tutorial will introduce the following Arvados features:
+
+<div>
+* How to create a new process from an existing workflow.
+* How to browse and select input data for the workflow and submit the process to run on the Arvados cluster.
+* How to access your process results.
+</div>
+
+h3. Steps
+
+# Start from the *Workbench Dashboard*.  You can access the Dashboard by clicking on *<i class="fa fa-lg fa-fw fa-dashboard"></i> Dashboard* in the upper left corner of any Workbench page.
+# Click on the <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a process...</span> button.  This will open a dialog box titled *Choose a pipeline or workflow to run*.
+# In the search box, type in *Tutorial bwa mem cwl*.
+# Select *<i class="fa fa-fw fa-gear"></i> Tutorial bwa mem cwl* and click the <span class="btn btn-sm btn-primary" >Next: choose inputs <i class="fa fa-fw fa-arrow-circle-right"></i></span> button.  This will create a new process in your *Home* project and will open it. You can now supply the inputs for the process. Please note that all required inputs are populated with default values and you can change them if you prefer.
+# For example, let's see how to change *"reference" parameter* for this workflow. Click the <span class="btn btn-sm btn-primary">Choose</span> button beneath the *"reference" parameter* header.  This will open a dialog box titled *Choose a dataset for "reference" parameter for cwl-runner in bwa-mem.cwl component*.
+# Open the *Home <span class="caret"></span>* menu and select *All Projects*. Search for and select *<i class="fa fa-fw fa-archive"></i> Tutorial chromosome 19 reference*. You will then see a list of files. Select *<i class="fa fa-fw fa-file"></i> 19-fasta.bwt* and click the <span class="btn btn-sm btn-primary" >OK</span> button.
+# Repeat the previous two steps to set the *"read_p1" parameter for cwl-runner script in bwa-mem.cwl component* and *"read_p2" parameter for cwl-runner script in bwa-mem.cwl component* parameters.
+# Click on the <span class="btn btn-sm btn-primary" >Run <i class="fa fa-fw fa-play"></i></span> button.  The page updates to show you that the process has been submitted to run on the Arvados cluster.
+# After the process starts running, you can track the progress by watching log messages from the component(s).  This page refreshes automatically.  You will see a <span class="label label-success">complete</span> label when the process completes successfully.
+# Click on the *Output* link to see the results of the process.  This will load a new page listing the output files from this process.  You'll see the output SAM file from the alignment tool under the *Files* tab.
+# Click on the <span class="btn btn-sm btn-info"><i class="fa fa-download"></i></span> download button to the right of the SAM file to download your results.
diff --git a/doc/user/tutorials/writing-cwl-workflow.html.textile.liquid b/doc/user/tutorials/writing-cwl-workflow.html.textile.liquid

new file mode 100644 (file)

index 0000000..ab80c97
--- /dev/null
+++ b/doc/user/tutorials/writing-cwl-workflow.html.textile.liquid
@@ -0,0 +1,196 @@
+---
+layout: default
+navsection: userguide
+title: "Writing a CWL workflow"
+...
+
+{% include 'what_is_cwl' %}
+
+{% include 'tutorial_expectations' %}
+
+h2. Developing workflows
+
+For an introduction and and detailed documentation about writing CWL, see the "CWL User Guide":http://commonwl.org/v1.0/UserGuide.html and the "CWL Specification":http://commonwl.org/v1.0 .
+
+See "Best Practices for writing CWL":{{site.baseurl}}/user/cwl/cwl-style.html and "Arvados CWL Extensions":{{site.baseurl}}/user/cwl/cwl-extensions.html for additional information about using CWL on Arvados.
+
+h2. Registering a workflow to use in Workbench
+
+Use @--create-workflow@ to register a CWL workflow with Arvados.  This enables you to share workflows with other Arvados users, and run them by clicking the <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a process...</span> button on the Workbench Dashboard and on the command line by UUID.
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --create-workflow bwa-mem.cwl</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Upload local files: "bwa-mem.cwl"
+2016-07-01 12:21:01 arvados.arv-run[15796] INFO: Uploaded to qr1hi-4zz18-7e0hedrmkuyoei3
+2016-07-01 12:21:01 arvados.cwl-runner[15796] INFO: Created template qr1hi-p5p6p-rjleou1dwr167v5
+qr1hi-p5p6p-rjleou1dwr167v5
+</code></pre>
+</notextile>
+
+You can provide a partial input file to set default values for the workflow input parameters.  You can also use the @--name@ option to set the name of the workflow:
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner --name "My workflow with defaults" --create-workflow bwa-mem.cwl bwa-mem-template.yml</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Upload local files: "bwa-mem.cwl"
+2016-07-01 14:09:50 arvados.arv-run[3730] INFO: Uploaded to qr1hi-4zz18-0f91qkovk4ml18o
+2016-07-01 14:09:50 arvados.cwl-runner[3730] INFO: Created template qr1hi-p5p6p-0deqe6nuuyqns2i
+qr1hi-p5p6p-zuniv58hn8d0qd8
+</code></pre>
+</notextile>
+
+h3. Running registered workflows at the command line
+
+You can run a registered workflow at the command line by its UUID:
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arvados-cwl-runner qr1hi-p5p6p-zuniv58hn8d0qd8 --help</span>
+/home/peter/work/scripts/venv/bin/arvados-cwl-runner 0d62edcb9d25bf4dcdb20d8872ea7b438e12fc59 1.0.20161209192028, arvados-python-client 0.1.20161212125425, cwltool 1.0.20161207161158
+Resolved 'qr1hi-p5p6p-zuniv58hn8d0qd8' to 'keep:655c6cd07550151b210961ed1d3852cf+57/bwa-mem.cwl'
+usage: qr1hi-p5p6p-zuniv58hn8d0qd8 [-h] [--PL PL] --group_id GROUP_ID
+                                   --read_p1 READ_P1 [--read_p2 READ_P2]
+                                   [--reference REFERENCE] --sample_id
+                                   SAMPLE_ID
+                                   [job_order]
+
+positional arguments:
+  job_order             Job input json file
+
+optional arguments:
+  -h, --help            show this help message and exit
+  --PL PL
+  --group_id GROUP_ID
+  --read_p1 READ_P1     The reads, in fastq format.
+  --read_p2 READ_P2     For mate paired reads, the second file (optional).
+  --reference REFERENCE
+                        The index files produced by `bwa index`
+  --sample_id SAMPLE_ID
+</code></pre>
+</notextile>
+
+h2. Using cwltool
+
+When developing a workflow, it is often helpful to run it on the local host to avoid the overhead of submitting to the cluster.  To execute a workflow only on the local host (without submitting jobs to an Arvados cluster) you can use the @cwltool@ command.  Note that when using @cwltool@ you must have the input data accessible on the local file system using either @arv-mount@ or @arv-get@ to fetch the data from Keep.
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arv-get 2463fa9efeb75e099685528b3b9071e0+438/ .</span>
+156 MiB / 156 MiB 100.0%
+~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">arv-get ae480c5099b81e17267b7445e35b4bc7+180/ .</span>
+23 MiB / 23 MiB 100.0%
+~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">cwltool bwa-mem-input.yml bwa-mem-input-local.yml</span>
+cwltool 1.0.20160629140624
+[job bwa-mem.cwl] /home/example/arvados/doc/user/cwl/bwa-mem$ docker \
+    run \
+    -i \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.ann:/var/lib/cwl/job979368791_bwa-mem/19.fasta.ann:ro \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.fastq:/var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.fastq:ro \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.sa:/var/lib/cwl/job979368791_bwa-mem/19.fasta.sa:ro \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.amb:/var/lib/cwl/job979368791_bwa-mem/19.fasta.amb:ro \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.pac:/var/lib/cwl/job979368791_bwa-mem/19.fasta.pac:ro \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/HWI-ST1027_129_D0THKACXX.1_2.fastq:/var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_2.fastq:ro \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem/19.fasta.bwt:/var/lib/cwl/job979368791_bwa-mem/19.fasta.bwt:ro \
+    --volume=/home/example/arvados/doc/user/cwl/bwa-mem:/var/spool/cwl:rw \
+    --volume=/tmp/tmpgzyou9:/tmp:rw \
+    --workdir=/var/spool/cwl \
+    --read-only=true \
+    --log-driver=none \
+    --user=1001 \
+    --rm \
+    --env=TMPDIR=/tmp \
+    --env=HOME=/var/spool/cwl \
+    biodckr/bwa \
+    bwa \
+    mem \
+    -t \
+    1 \
+    -R \
+    '@RG       ID:arvados_tutorial     PL:illumina     SM:HWI-ST1027_129' \
+    /var/lib/cwl/job979368791_bwa-mem/19.fasta \
+    /var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.fastq \
+    /var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_2.fastq > /home/example/arvados/doc/user/cwl/bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.sam
+[M::bwa_idx_load_from_disk] read 0 ALT contigs
+[M::process] read 100000 sequences (10000000 bp)...
+[M::mem_pestat] # candidate unique pairs for (FF, FR, RF, RR): (0, 4745, 1, 0)
+[M::mem_pestat] skip orientation FF as there are not enough pairs
+[M::mem_pestat] analyzing insert size distribution for orientation FR...
+[M::mem_pestat] (25, 50, 75) percentile: (154, 181, 214)
+[M::mem_pestat] low and high boundaries for computing mean and std.dev: (34, 334)
+[M::mem_pestat] mean and std.dev: (185.63, 44.88)
+[M::mem_pestat] low and high boundaries for proper pairs: (1, 394)
+[M::mem_pestat] skip orientation RF as there are not enough pairs
+[M::mem_pestat] skip orientation RR as there are not enough pairs
+[M::mem_process_seqs] Processed 100000 reads in 9.848 CPU sec, 9.864 real sec
+[main] Version: 0.7.12-r1039
+[main] CMD: bwa mem -t 1 -R @RG        ID:arvados_tutorial     PL:illumina     SM:HWI-ST1027_129 /var/lib/cwl/job979368791_bwa-mem/19.fasta /var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.fastq /var/lib/cwl/job979368791_bwa-mem/HWI-ST1027_129_D0THKACXX.1_2.fastq
+[main] Real time: 10.061 sec; CPU: 10.032 sec
+Final process status is success
+{
+    "aligned_sam": {
+        "size": 30738959,
+        "path": "/home/example/arvados/doc/user/cwl/bwa-mem/HWI-ST1027_129_D0THKACXX.1_1.sam",
+        "checksum": "sha1$0c668cca45fef02397bb5302880526d300ee4dac",
+        "class": "File"
+    }
+}
+</code></pre>
+</notextile>
+
+If you get the error @JavascriptException: Long-running script killed after 20 seconds.@ this may be due to the Dockerized Node.js engine taking too long to start.  You may address this by installing Node.js locally (run @apt-get install nodejs@ on Debian or Ubuntu) or by specifying a longer timeout with the @--eval-timeout@ option.  For example, run the workflow with @cwltool --eval-timeout=40@ for a 40-second timeout.
+
+h2. Making workflows directly executable
+
+You can make a workflow file directly executable (@cwl-runner@ should be an alias to @arvados-cwl-runner@) by adding the following line to the top of the file:
+
+<notextile>
+<pre><code>#!/usr/bin/env cwl-runner
+</code></pre>
+</notextile>
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">./bwa-mem.cwl bwa-mem-input.yml</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
+{
+    "aligned_sam": {
+        "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
+        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
+        "class": "File",
+        "size": 30738986
+    }
+}
+</code></pre>
+</notextile>
+
+You can even make an input file directly executable the same way with the following two lines at the top:
+
+<notextile>
+<pre><code>#!/usr/bin/env cwl-runner
+cwl:tool: <span class="userinput">bwa-mem.cwl</span>
+</code></pre>
+</notextile>
+
+<notextile>
+<pre><code>~/arvados/doc/user/cwl/bwa-mem$ <span class="userinput">./bwa-mem-input.yml</span>
+arvados-cwl-runner 1.0.20160628195002, arvados-python-client 0.1.20160616015107, cwltool 1.0.20160629140624
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Upload local files: "bwa-mem.cwl"
+2016-06-30 14:56:36 arvados.arv-run[27002] INFO: Uploaded to qr1hi-4zz18-h7ljh5u76760ww2
+2016-06-30 14:56:40 arvados.cwl-runner[27002] INFO: Submitted job qr1hi-8i9sb-fm2n3b1w0l6bskg
+2016-06-30 14:56:41 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Running
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Job bwa-mem.cwl (qr1hi-8i9sb-fm2n3b1w0l6bskg) is Complete
+2016-06-30 14:57:12 arvados.cwl-runner[27002] INFO: Overall process status is success
+{
+    "aligned_sam": {
+        "path": "keep:54325254b226664960de07b3b9482349+154/HWI-ST1027_129_D0THKACXX.1_1.sam",
+        "checksum": "sha1$0dc46a3126d0b5d4ce213b5f0e86e2d05a54755a",
+        "class": "File",
+        "size": 30738986
+    }
+}
+</code></pre>
+</notextile>
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job

index 358743608b1f7f5e796e5d3f3d90e3c9dc6f8cb6..a2e42d6fac498b1b30437ad71def8bad7e946f3d 100755 (executable)
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -864,9 +864,9 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
          ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
          ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP "
          .q{&& declare -a VOLUMES=() }
-        .q{&& if which crunchrunner >/dev/null ; then VOLUMES+=("--volume=$(which crunchrunner):/usr/local/bin/crunchrunner") ; fi }
-        .q{&& if test -f /etc/ssl/certs/ca-certificates.crt ; then VOLUMES+=("--volume=/etc/ssl/certs/ca-certificates.crt:/etc/arvados/ca-certificates.crt") ; }
-        .q{elif test -f /etc/pki/tls/certs/ca-bundle.crt ; then VOLUMES+=("--volume=/etc/pki/tls/certs/ca-bundle.crt:/etc/arvados/ca-certificates.crt") ; fi };
+        .q{&& if which crunchrunner >/dev/null ; then VOLUMES+=("--volume=$(which crunchrunner):/usr/local/bin/crunchrunner:ro") ; fi }
+        .q{&& if test -f /etc/ssl/certs/ca-certificates.crt ; then VOLUMES+=("--volume=/etc/ssl/certs/ca-certificates.crt:/etc/arvados/ca-certificates.crt:ro") ; }
+        .q{elif test -f /etc/pki/tls/certs/ca-bundle.crt ; then VOLUMES+=("--volume=/etc/pki/tls/certs/ca-bundle.crt:/etc/arvados/ca-certificates.crt:ro") ; fi };
  
      $command .= "&& exec arv-mount --read-write --mount-by-pdh=by_pdh --mount-tmp=tmp --crunchstat-interval=10 --allow-other $arv_file_cache \Q$keep_mnt\E --exec ";
      $ENV{TASK_KEEPMOUNT} = "$keep_mnt/by_pdh";
@@ -1780,7 +1780,7 @@ sub log_writer_finish()
    close($log_pipe_in);
  
    my $logger_failed = 0;
-  my $read_result = log_writer_read_output(120);
+  my $read_result = log_writer_read_output(600);
    if ($read_result == -1) {
      $logger_failed = -1;
      Log (undef, "timed out reading from 'arv-put'");
diff --git a/sdk/cli/test/test_arv-keep-get.rb b/sdk/cli/test/test_arv-keep-get.rb

index d0224aedb01e354408f5a710eb88dff344314a90..04f454369cc6541be477fe3f585c637f45c7aee9 100644 (file)
--- a/sdk/cli/test/test_arv-keep-get.rb
+++ b/sdk/cli/test/test_arv-keep-get.rb
@@ -20,6 +20,14 @@ class TestArvKeepGet < Minitest::Test
      assert_match /^usage:/, err
    end
  
+  def test_get_version
+    out, err = capture_subprocess_io do
+      assert_arv_get '--version'
+    end
+    assert_empty(out, "STDOUT not expected: '#{out}'")
+    assert_match(/[0-9]+\.[0-9]+\.[0-9]+/, err, "Version information incorrect: '#{err}'")
+  end
+
    def test_help
      out, err = capture_subprocess_io do
        assert_arv_get '-h'
diff --git a/sdk/cli/test/test_arv-keep-put.rb b/sdk/cli/test/test_arv-keep-put.rb

index fefbc2729875e70cb890f69d56fe1d7f1c614b8d..e6ead25b807e70eb03d293bc9d3a92aad2a78c7b 100644 (file)
--- a/sdk/cli/test/test_arv-keep-put.rb
+++ b/sdk/cli/test/test_arv-keep-put.rb
@@ -40,7 +40,7 @@ class TestArvKeepPut < Minitest::Test
  
    def test_raw_file
      out, err = capture_subprocess_io do
-      assert arv_put('--raw', './tmp/foo')
+      assert arv_put('--no-cache', '--raw', './tmp/foo')
      end
      $stderr.write err
      assert_match '', err
@@ -87,7 +87,7 @@ class TestArvKeepPut < Minitest::Test
  
    def test_as_stream
      out, err = capture_subprocess_io do
-      assert arv_put('--as-stream', './tmp/foo')
+      assert arv_put('--no-cache', '--as-stream', './tmp/foo')
      end
      $stderr.write err
      assert_match '', err
@@ -96,7 +96,7 @@ class TestArvKeepPut < Minitest::Test
  
    def test_progress
      out, err = capture_subprocess_io do
-      assert arv_put('--manifest', '--progress', './tmp/foo')
+      assert arv_put('--no-cache', '--manifest', '--progress', './tmp/foo')
      end
      assert_match /%/, err
      assert match_collection_uuid(out)
@@ -104,7 +104,7 @@ class TestArvKeepPut < Minitest::Test
  
    def test_batch_progress
      out, err = capture_subprocess_io do
-      assert arv_put('--manifest', '--batch-progress', './tmp/foo')
+      assert arv_put('--no-cache', '--manifest', '--batch-progress', './tmp/foo')
      end
      assert_match /: 0 written 3 total/, err
      assert_match /: 3 written 3 total/, err
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py

index 9cabedf7f77a15532ad1379d638d5ca9a4169e44..3ffc4c7254a0b8a850716e82aebe6bbd1afb3c26 100644 (file)
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -17,17 +17,21 @@ import pkg_resources  # part of setuptools
  from cwltool.errors import WorkflowException
  import cwltool.main
  import cwltool.workflow
+import cwltool.process
  import schema_salad
+from schema_salad.sourceline import SourceLine
  
  import arvados
  import arvados.config
+from arvados.keep import KeepClient
+from arvados.errors import ApiError
  
  from .arvcontainer import ArvadosContainer, RunnerContainer
  from .arvjob import ArvadosJob, RunnerJob, RunnerTemplate
  from. runner import Runner, upload_instance
  from .arvtool import ArvadosCommandTool
  from .arvworkflow import ArvadosWorkflow, upload_workflow
-from .fsaccess import CollectionFsAccess
+from .fsaccess import CollectionFsAccess, CollectionFetcher, collectionResolver
  from .perf import Perf
  from .pathmapper import FinalOutputPathMapper
  from ._version import __version__
@@ -42,6 +46,9 @@ logger = logging.getLogger('arvados.cwl-runner')
  metrics = logging.getLogger('arvados.cwl-runner.metrics')
  logger.setLevel(logging.INFO)
  
+arvados.log_handler.setFormatter(logging.Formatter(
+        '%(asctime)s %(name)s %(levelname)s: %(message)s',
+        '%Y-%m-%d %H:%M:%S'))
  
  class ArvCwlRunner(object):
      """Execute a CWL tool or workflow, submit work (using either jobs or
@@ -49,7 +56,7 @@ class ArvCwlRunner(object):
  
      """
  
-    def __init__(self, api_client, work_api=None, keep_client=None, output_name=None, output_tags=None):
+    def __init__(self, api_client, work_api=None, keep_client=None, output_name=None, output_tags=None, num_retries=4):
          self.api = api_client
          self.processes = {}
          self.lock = threading.Lock()
@@ -57,7 +64,7 @@ class ArvCwlRunner(object):
          self.final_output = None
          self.final_status = None
          self.uploaded = {}
-        self.num_retries = 4
+        self.num_retries = num_retries
          self.uuid = None
          self.stop_polling = threading.Event()
          self.poll_api = None
@@ -72,7 +79,9 @@ class ArvCwlRunner(object):
          else:
              self.keep_client = arvados.keep.KeepClient(api_client=self.api, num_retries=self.num_retries)
  
-        for api in ["jobs", "containers"]:
+        self.work_api = None
+        expected_api = ["jobs", "containers"]
+        for api in expected_api:
              try:
                  methods = self.api._rootDesc.get('resources')[api]['methods']
                  if ('httpMethod' in methods['create'] and
@@ -81,14 +90,18 @@ class ArvCwlRunner(object):
                      break
              except KeyError:
                  pass
+
          if not self.work_api:
              if work_api is None:
                  raise Exception("No supported APIs")
              else:
-                raise Exception("Unsupported API '%s'" % work_api)
+                raise Exception("Unsupported API '%s', expected one of %s" % (work_api, expected_api))
  
      def arv_make_tool(self, toolpath_object, **kwargs):
          kwargs["work_api"] = self.work_api
+        kwargs["fetcher_constructor"] = partial(CollectionFetcher,
+                                                api_client=self.api,
+                                                keep_client=self.keep_client)
          if "class" in toolpath_object and toolpath_object["class"] == "CommandLineTool":
              return ArvadosCommandTool(self, toolpath_object, **kwargs)
          elif "class" in toolpath_object and toolpath_object["class"] == "Workflow":
@@ -117,22 +130,24 @@ class ArvCwlRunner(object):
                      uuid = event["object_uuid"]
                      with self.lock:
                          j = self.processes[uuid]
-                        logger.info("Job %s (%s) is Running", j.name, uuid)
+                        logger.info("%s %s is Running", self.label(j), uuid)
                          j.running = True
                          j.update_pipeline_component(event["properties"]["new_attributes"])
-                elif event["properties"]["new_attributes"]["state"] in ("Complete", "Failed", "Cancelled"):
+                elif event["properties"]["new_attributes"]["state"] in ("Complete", "Failed", "Cancelled", "Final"):
                      uuid = event["object_uuid"]
                      try:
                          self.cond.acquire()
                          j = self.processes[uuid]
-                        txt = self.work_api[0].upper() + self.work_api[1:-1]
-                        logger.info("%s %s (%s) is %s", txt, j.name, uuid, event["properties"]["new_attributes"]["state"])
+                        logger.info("%s %s is %s", self.label(j), uuid, event["properties"]["new_attributes"]["state"])
                          with Perf(metrics, "done %s" % j.name):
                              j.done(event["properties"]["new_attributes"])
                          self.cond.notify()
                      finally:
                          self.cond.release()
  
+    def label(self, obj):
+        return "[%s %s]" % (self.work_api[0:-1], obj.name)
+
      def poll_states(self):
          """Poll status of jobs or containers listed in the processes dict.
  
@@ -150,7 +165,7 @@ class ArvCwlRunner(object):
                      continue
  
                  if self.work_api == "containers":
-                    table = self.poll_api.containers()
+                    table = self.poll_api.container_requests()
                  elif self.work_api == "jobs":
                      table = self.poll_api.jobs()
  
@@ -183,15 +198,25 @@ class ArvCwlRunner(object):
      def add_uploaded(self, src, pair):
          self.uploaded[src] = pair
  
-    def check_writable(self, obj):
+    def check_features(self, obj):
          if isinstance(obj, dict):
+            if obj.get("class") == "InitialWorkDirRequirement":
+                if self.work_api == "containers":
+                    raise UnsupportedRequirement("InitialWorkDirRequirement not supported with --api=containers")
              if obj.get("writable"):
-                raise UnsupportedRequirement("InitialWorkDir feature 'writable: true' not supported")
+                raise SourceLine(obj, "writable", UnsupportedRequirement).makeError("InitialWorkDir feature 'writable: true' not supported")
+            if obj.get("class") == "CommandLineTool":
+                if self.work_api == "containers":
+                    if obj.get("stdin"):
+                        raise SourceLine(obj, "stdin", UnsupportedRequirement).makeError("Stdin redirection currently not suppported with --api=containers")
+                    if obj.get("stderr"):
+                        raise SourceLine(obj, "stderr", UnsupportedRequirement).makeError("Stderr redirection currently not suppported with --api=containers")
              for v in obj.itervalues():
-                self.check_writable(v)
-        if isinstance(obj, list):
-            for v in obj:
-                self.check_writable(v)
+                self.check_features(v)
+        elif isinstance(obj, list):
+            for i,v in enumerate(obj):
+                with SourceLine(obj, i, UnsupportedRequirement):
+                    self.check_features(v)
  
      def make_output_collection(self, name, tagsString, outputObj):
          outputObj = copy.deepcopy(outputObj)
@@ -277,6 +302,12 @@ class ArvCwlRunner(object):
          if self.work_api == "containers":
              try:
                  current = self.api.containers().current().execute(num_retries=self.num_retries)
+            except ApiError as e:
+                # Status code 404 just means we're not running in a container.
+                if e.resp.status != 404:
+                    logger.info("Getting current container: %s", e)
+                return
+            try:
                  self.api.containers().update(uuid=current['uuid'],
                                               body={
                                                   'output': self.final_output_collection.portable_data_hash(),
@@ -294,7 +325,7 @@ class ArvCwlRunner(object):
      def arv_executor(self, tool, job_order, **kwargs):
          self.debug = kwargs.get("debug")
  
-        tool.visit(self.check_writable)
+        tool.visit(self.check_features)
  
          self.project_uuid = kwargs.get("project_uuid")
          self.pipeline = None
@@ -308,14 +339,18 @@ class ArvCwlRunner(object):
              if self.work_api == "jobs":
                  tmpl = RunnerTemplate(self, tool, job_order,
                                        kwargs.get("enable_reuse"),
-                                      uuid=existing_uuid)
+                                      uuid=existing_uuid,
+                                      submit_runner_ram=kwargs.get("submit_runner_ram"),
+                                      name=kwargs.get("name"))
                  tmpl.save()
                  # cwltool.main will write our return value to stdout.
-                return tmpl.uuid
+                return (tmpl.uuid, "success")
              else:
-                return upload_workflow(self, tool, job_order,
+                return (upload_workflow(self, tool, job_order,
                                         self.project_uuid,
-                                       uuid=existing_uuid)
+                                       uuid=existing_uuid,
+                                       submit_runner_ram=kwargs.get("submit_runner_ram"),
+                                        name=kwargs.get("name")), "success")
  
          self.ignore_docker_for_reuse = kwargs.get("ignore_docker_for_reuse")
  
@@ -323,9 +358,11 @@ class ArvCwlRunner(object):
          kwargs["enable_reuse"] = kwargs.get("enable_reuse")
          kwargs["use_container"] = True
          kwargs["tmpdir_prefix"] = "tmp"
-        kwargs["on_error"] = "continue"
          kwargs["compute_checksum"] = kwargs.get("compute_checksum")
  
+        if not kwargs["name"]:
+            del kwargs["name"]
+
          if self.work_api == "containers":
              kwargs["outdir"] = "/var/spool/cwl"
              kwargs["docker_outdir"] = "/var/spool/cwl"
@@ -342,27 +379,32 @@ class ArvCwlRunner(object):
          if kwargs.get("submit"):
              if self.work_api == "containers":
                  if tool.tool["class"] == "CommandLineTool":
+                    kwargs["runnerjob"] = tool.tool["id"]
                      runnerjob = tool.job(job_order,
                                           self.output_callback,
                                           **kwargs).next()
                  else:
-                    runnerjob = RunnerContainer(self, tool, job_order, kwargs.get("enable_reuse"), self.output_name, self.output_tags)
+                    runnerjob = RunnerContainer(self, tool, job_order, kwargs.get("enable_reuse"), self.output_name,
+                                                self.output_tags, submit_runner_ram=kwargs.get("submit_runner_ram"),
+                                                name=kwargs.get("name"), on_error=kwargs.get("on_error"))
              else:
-                runnerjob = RunnerJob(self, tool, job_order, kwargs.get("enable_reuse"), self.output_name, self.output_tags)
+                runnerjob = RunnerJob(self, tool, job_order, kwargs.get("enable_reuse"), self.output_name,
+                                      self.output_tags, submit_runner_ram=kwargs.get("submit_runner_ram"),
+                                      name=kwargs.get("name"), on_error=kwargs.get("on_error"))
  
          if not kwargs.get("submit") and "cwl_runner_job" not in kwargs and not self.work_api == "containers":
              # Create pipeline for local run
              self.pipeline = self.api.pipeline_instances().create(
                  body={
                      "owner_uuid": self.project_uuid,
-                    "name": shortname(tool.tool["id"]),
+                    "name": kwargs["name"] if kwargs.get("name") else shortname(tool.tool["id"]),
                      "components": {},
                      "state": "RunningOnClient"}).execute(num_retries=self.num_retries)
              logger.info("Pipeline instance %s", self.pipeline["uuid"])
  
          if runnerjob and not kwargs.get("wait"):
              runnerjob.run(wait=kwargs.get("wait"))
-            return runnerjob.uuid
+            return (runnerjob.uuid, "success")
  
          self.poll_api = arvados.api('v1')
          self.polling_thread = threading.Thread(target=self.poll_states)
@@ -440,14 +482,11 @@ class ArvCwlRunner(object):
              self.final_output, self.final_output_collection = self.make_output_collection(self.output_name, self.output_tags, self.final_output)
              self.set_crunch_output()
  
-        if self.final_status != "success":
-            raise WorkflowException("Workflow failed.")
-
          if kwargs.get("compute_checksum"):
              adjustDirObjs(self.final_output, partial(getListing, self.fs_access))
              adjustFileObjs(self.final_output, partial(compute_checksums, self.fs_access))
  
-        return self.final_output
+        return (self.final_output, self.final_status)
  
  
  def versionstring():
@@ -516,6 +555,12 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
      exgroup.add_argument("--no-wait", action="store_false", help="Submit workflow runner job and exit.",
                          default=True, dest="wait")
  
+    exgroup = parser.add_mutually_exclusive_group()
+    exgroup.add_argument("--log-timestamps", action="store_true", help="Prefix logging lines with timestamp",
+                        default=True, dest="log_timestamps")
+    exgroup.add_argument("--no-log-timestamps", action="store_false", help="No timestamp on logging lines",
+                        default=True, dest="log_timestamps")
+
      parser.add_argument("--api", type=str,
                          default=None, dest="work_api",
                          help="Select work submission API, one of 'jobs' or 'containers'. Default is 'jobs' if that API is available, otherwise 'containers'.")
@@ -524,6 +569,18 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
                          help="Compute checksum of contents while collecting outputs",
                          dest="compute_checksum")
  
+    parser.add_argument("--submit-runner-ram", type=int,
+                        help="RAM (in MiB) required for the workflow runner job (default 1024)",
+                        default=1024)
+
+    parser.add_argument("--name", type=str,
+                        help="Name to use for workflow execution instance.",
+                        default=None)
+
+    parser.add_argument("--on-error", type=str,
+                        help="Desired workflow behavior when a step fails.  One of 'stop' or 'continue'. "
+                        "Default is 'continue'.", default="continue", choices=("stop", "continue"))
+
      parser.add_argument("workflow", type=str, nargs="?", default=None, help="The workflow to execute")
      parser.add_argument("job_order", nargs=argparse.REMAINDER, help="The input object to the workflow.")
  
@@ -547,6 +604,10 @@ def main(args, stdout, stderr, api_client=None, keep_client=None):
      job_order_object = None
      arvargs = parser.parse_args(args)
  
+    if arvargs.version:
+        print versionstring()
+        return
+
      if arvargs.update_workflow:
          if arvargs.update_workflow.find('-7fd4e-') == 5:
              want_api = 'containers'
@@ -568,7 +629,11 @@ def main(args, stdout, stderr, api_client=None, keep_client=None):
      try:
          if api_client is None:
              api_client=arvados.api('v1', model=OrderedJsonModel())
-        runner = ArvCwlRunner(api_client, work_api=arvargs.work_api, keep_client=keep_client, output_name=arvargs.output_name, output_tags=arvargs.output_tags)
+        if keep_client is None:
+            keep_client = arvados.keep.KeepClient(api_client=api_client, num_retries=4)
+        runner = ArvCwlRunner(api_client, work_api=arvargs.work_api, keep_client=keep_client,
+                              num_retries=4, output_name=arvargs.output_name,
+                              output_tags=arvargs.output_tags)
      except Exception as e:
          logger.error(e)
          return 1
@@ -584,9 +649,17 @@ def main(args, stdout, stderr, api_client=None, keep_client=None):
          metrics.setLevel(logging.DEBUG)
          logging.getLogger("cwltool.metrics").setLevel(logging.DEBUG)
  
+    if arvargs.log_timestamps:
+        arvados.log_handler.setFormatter(logging.Formatter(
+            '%(asctime)s %(name)s %(levelname)s: %(message)s',
+            '%Y-%m-%d %H:%M:%S'))
+    else:
+        arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))
+
      arvargs.conformance_test = None
      arvargs.use_container = True
      arvargs.relax_path_checks = True
+    arvargs.validate = None
  
      return cwltool.main.main(args=arvargs,
                               stdout=stdout,
@@ -595,4 +668,11 @@ def main(args, stdout, stderr, api_client=None, keep_client=None):
                               makeTool=runner.arv_make_tool,
                               versionfunc=versionstring,
                               job_order_object=job_order_object,
-                             make_fs_access=partial(CollectionFsAccess, api_client=api_client))
+                             make_fs_access=partial(CollectionFsAccess,
+                                                    api_client=api_client,
+                                                    keep_client=keep_client),
+                             fetcher_constructor=partial(CollectionFetcher,
+                                                         api_client=api_client,
+                                                         keep_client=keep_client),
+                             resolver=partial(collectionResolver, api_client),
+                             logger_handler=arvados.log_handler)
diff --git a/sdk/cwl/arvados_cwl/arvcontainer.py b/sdk/cwl/arvados_cwl/arvcontainer.py

index 1fda4122172c06f926ec89941868be4914be0b13..987b0d64bbc821bc199456b4ab317ab252841ee6 100644 (file)
--- a/sdk/cwl/arvados_cwl/arvcontainer.py
+++ b/sdk/cwl/arvados_cwl/arvcontainer.py
@@ -2,6 +2,8 @@ import logging
  import json
  import os
  
+import ruamel.yaml as yaml
+
  from cwltool.errors import WorkflowException
  from cwltool.process import get_feature, UnsupportedRequirement, shortname
  from cwltool.pathmapper import adjustFiles
@@ -12,6 +14,7 @@ import arvados.collection
  from .arvdocker import arv_docker_get_image
  from . import done
  from .runner import Runner, arvados_jobs_image
+from .fsaccess import CollectionFetcher
  
  logger = logging.getLogger('arvados.cwl-runner')
  
@@ -34,7 +37,8 @@ class ArvadosContainer(object):
              "output_path": self.outdir,
              "cwd": self.outdir,
              "priority": 1,
-            "state": "Committed"
+            "state": "Committed",
+            "properties": {}
          }
          runtime_constraints = {}
          mounts = {
@@ -62,7 +66,7 @@ class ArvadosContainer(object):
                  }
  
          if self.generatefiles["listing"]:
-            raise UnsupportedRequirement("Generate files not supported")
+            raise UnsupportedRequirement("InitialWorkDirRequirement not supported with --api=containers")
  
          container_request["environment"] = {"TMPDIR": self.tmpdir, "HOME": self.outdir}
          if self.environment:
@@ -110,29 +114,36 @@ class ArvadosContainer(object):
          container_request["use_existing"] = kwargs.get("enable_reuse", True)
          container_request["scheduling_parameters"] = scheduling_parameters
  
+        if kwargs.get("runnerjob", "").startswith("arvwf:"):
+            wfuuid = kwargs["runnerjob"][6:kwargs["runnerjob"].index("#")]
+            wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries)
+            if container_request["name"] == "main":
+                container_request["name"] = wfrecord["name"]
+            container_request["properties"]["template_uuid"] = wfuuid
+
          try:
              response = self.arvrunner.api.container_requests().create(
                  body=container_request
              ).execute(num_retries=self.arvrunner.num_retries)
  
-            self.arvrunner.processes[response["container_uuid"]] = self
+            self.uuid = response["uuid"]
+            self.arvrunner.processes[self.uuid] = self
  
-            container = self.arvrunner.api.containers().get(
-                uuid=response["container_uuid"]
-            ).execute(num_retries=self.arvrunner.num_retries)
+            logger.info("%s %s state is %s", self.arvrunner.label(self), response["uuid"], response["state"])
  
-            logger.info("Container request %s (%s) state is %s with container %s %s", self.name, response["uuid"], response["state"], container["uuid"], container["state"])
-
-            if container["state"] in ("Complete", "Cancelled"):
-                self.done(container)
+            if response["state"] == "Final":
+                self.done(response)
          except Exception as e:
-            logger.error("Got error %s" % str(e))
+            logger.error("%s got error %s" % (self.arvrunner.label(self), str(e)))
              self.output_callback({}, "permanentFail")
  
      def done(self, record):
          try:
-            if record["state"] == "Complete":
-                rcode = record["exit_code"]
+            container = self.arvrunner.api.containers().get(
+                uuid=record["container_uuid"]
+            ).execute(num_retries=self.arvrunner.num_retries)
+            if container["state"] == "Complete":
+                rcode = container["exit_code"]
                  if self.successCodes and rcode in self.successCodes:
                      processStatus = "success"
                  elif self.temporaryFailCodes and rcode in self.temporaryFailCodes:
@@ -146,27 +157,27 @@ class ArvadosContainer(object):
              else:
                  processStatus = "permanentFail"
  
-            try:
-                outputs = {}
-                if record["output"]:
-                    outputs = done.done(self, record, "/tmp", self.outdir, "/keep")
-            except WorkflowException as e:
-                logger.error("Error while collecting output for container %s:\n%s", self.name, e, exc_info=(e if self.arvrunner.debug else False))
-                processStatus = "permanentFail"
-            except Exception as e:
-                logger.exception("Got unknown exception while collecting output for container %s:", self.name)
-                processStatus = "permanentFail"
-
-            # Note: Currently, on error output_callback is expecting an empty dict,
-            # anything else will fail.
-            if not isinstance(outputs, dict):
-                logger.error("Unexpected output type %s '%s'", type(outputs), outputs)
-                outputs = {}
-                processStatus = "permanentFail"
-
-            self.output_callback(outputs, processStatus)
+            if processStatus == "permanentFail":
+                logc = arvados.collection.CollectionReader(container["log"],
+                                                           api_client=self.arvrunner.api,
+                                                           keep_client=self.arvrunner.keep_client,
+                                                           num_retries=self.arvrunner.num_retries)
+                done.logtail(logc, logger, "%s error log:" % self.arvrunner.label(self))
+
+            outputs = {}
+            if container["output"]:
+                outputs = done.done_outputs(self, container, "/tmp", self.outdir, "/keep")
+        except WorkflowException as e:
+            logger.error("%s unable to collect output from %s:\n%s",
+                         self.arvrunner.label(self), container["output"], e, exc_info=(e if self.arvrunner.debug else False))
+            processStatus = "permanentFail"
+        except Exception as e:
+            logger.exception("%s while getting output object: %s", self.arvrunner.label(self), e)
+            processStatus = "permanentFail"
          finally:
-            del self.arvrunner.processes[record["uuid"]]
+            self.output_callback(outputs, processStatus)
+            if record["uuid"] in self.arvrunner.processes:
+                del self.arvrunner.processes[record["uuid"]]
  
  
  class RunnerContainer(Runner):
@@ -181,35 +192,7 @@ class RunnerContainer(Runner):
  
          workflowmapper = super(RunnerContainer, self).arvados_job_spec(dry_run=dry_run, pull_image=pull_image, **kwargs)
  
-        with arvados.collection.Collection(api_client=self.arvrunner.api,
-                                           keep_client=self.arvrunner.keep_client,
-                                           num_retries=self.arvrunner.num_retries) as jobobj:
-            with jobobj.open("cwl.input.json", "w") as f:
-                json.dump(self.job_order, f, sort_keys=True, indent=4)
-            jobobj.save_new(owner_uuid=self.arvrunner.project_uuid)
-
-        workflowname = os.path.basename(self.tool.tool["id"])
-        workflowpath = "/var/lib/cwl/workflow/%s" % workflowname
-        workflowcollection = workflowmapper.mapper(self.tool.tool["id"])[1]
-        workflowcollection = workflowcollection[5:workflowcollection.index('/')]
-        jobpath = "/var/lib/cwl/job/cwl.input.json"
-
-        command = ["arvados-cwl-runner", "--local", "--api=containers"]
-        if self.output_name:
-            command.append("--output-name=" + self.output_name)
-
-        if self.output_tags:
-            command.append("--output-tags=" + self.output_tags)
-
-        if self.enable_reuse:
-            command.append("--enable-reuse")
-        else:
-            command.append("--disable-reuse")
-
-        command.extend([workflowpath, jobpath])
-
-        return {
-            "command": command,
+        container_req = {
              "owner_uuid": self.arvrunner.project_uuid,
              "name": self.name,
              "output_path": "/var/spool/cwl",
@@ -218,13 +201,9 @@ class RunnerContainer(Runner):
              "state": "Committed",
              "container_image": arvados_jobs_image(self.arvrunner),
              "mounts": {
-                "/var/lib/cwl/workflow": {
-                    "kind": "collection",
-                    "portable_data_hash": "%s" % workflowcollection
-                },
-                jobpath: {
-                    "kind": "collection",
-                    "portable_data_hash": "%s/cwl.input.json" % jobobj.portable_data_hash()
+                "/var/lib/cwl/cwl.input.json": {
+                    "kind": "json",
+                    "content": self.job_order
                  },
                  "stdout": {
                      "kind": "file",
@@ -237,11 +216,60 @@ class RunnerContainer(Runner):
              },
              "runtime_constraints": {
                  "vcpus": 1,
-                "ram": 1024*1024*256,
+                "ram": 1024*1024 * self.submit_runner_ram,
                  "API": True
-            }
+            },
+            "properties": {}
          }
  
+        workflowcollection = workflowmapper.mapper(self.tool.tool["id"])[1]
+        if workflowcollection.startswith("keep:"):
+            workflowcollection = workflowcollection[5:workflowcollection.index('/')]
+            workflowname = os.path.basename(self.tool.tool["id"])
+            workflowpath = "/var/lib/cwl/workflow/%s" % workflowname
+            container_req["mounts"]["/var/lib/cwl/workflow"] = {
+                "kind": "collection",
+                "portable_data_hash": "%s" % workflowcollection
+                }
+        elif workflowcollection.startswith("arvwf:"):
+            workflowpath = "/var/lib/cwl/workflow.json#main"
+            wfuuid = workflowcollection[6:workflowcollection.index("#")]
+            wfrecord = self.arvrunner.api.workflows().get(uuid=wfuuid).execute(num_retries=self.arvrunner.num_retries)
+            wfobj = yaml.safe_load(wfrecord["definition"])
+            if container_req["name"].startswith("arvwf:"):
+                container_req["name"] = wfrecord["name"]
+            container_req["mounts"]["/var/lib/cwl/workflow.json"] = {
+                "kind": "json",
+                "json": wfobj
+            }
+            container_req["properties"]["template_uuid"] = wfuuid
+
+        command = ["arvados-cwl-runner", "--local", "--api=containers", "--no-log-timestamps"]
+        if self.output_name:
+            command.append("--output-name=" + self.output_name)
+            container_req["output_name"] = self.output_name
+
+        if self.output_tags:
+            command.append("--output-tags=" + self.output_tags)
+
+        if kwargs.get("debug"):
+            command.append("--debug")
+
+        if self.enable_reuse:
+            command.append("--enable-reuse")
+        else:
+            command.append("--disable-reuse")
+
+        if self.on_error:
+            command.append("--on-error=" + self.on_error)
+
+        command.extend([workflowpath, "/var/lib/cwl/cwl.input.json"])
+
+        container_req["command"] = command
+
+        return container_req
+
+
      def run(self, *args, **kwargs):
          kwargs["keepprefix"] = "keep:"
          job_spec = self.arvados_job_spec(*args, **kwargs)
@@ -252,9 +280,23 @@ class RunnerContainer(Runner):
          ).execute(num_retries=self.arvrunner.num_retries)
  
          self.uuid = response["uuid"]
-        self.arvrunner.processes[response["container_uuid"]] = self
+        self.arvrunner.processes[self.uuid] = self
  
-        logger.info("Submitted container %s", response["uuid"])
+        logger.info("%s submitted container %s", self.arvrunner.label(self), response["uuid"])
  
-        if response["state"] in ("Complete", "Failed", "Cancelled"):
+        if response["state"] == "Final":
              self.done(response)
+
+    def done(self, record):
+        try:
+            container = self.arvrunner.api.containers().get(
+                uuid=record["container_uuid"]
+            ).execute(num_retries=self.arvrunner.num_retries)
+        except Exception as e:
+            logger.exception("%s while getting runner container: %s", self.arvrunner.label(self), e)
+            self.arvrunner.output_callback({}, "permanentFail")
+        else:
+            super(RunnerContainer, self).done(container)
+        finally:
+            if record["uuid"] in self.arvrunner.processes:
+                del self.arvrunner.processes[record["uuid"]]
diff --git a/sdk/cwl/arvados_cwl/arvdocker.py b/sdk/cwl/arvados_cwl/arvdocker.py

index 7f6ab587d323a7dc65e39c00e8e1b38d019f009d..88c5dd2d4f428e946602c4eaeb5c59c1e4e4a2e6 100644 (file)
--- a/sdk/cwl/arvados_cwl/arvdocker.py
+++ b/sdk/cwl/arvados_cwl/arvdocker.py
@@ -2,6 +2,8 @@ import logging
  import sys
  import threading
  
+from schema_salad.sourceline import SourceLine
+
  import cwltool.docker
  from cwltool.errors import WorkflowException
  import arvados.commands.keepdocker
@@ -16,6 +18,8 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
  
      if "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement:
          dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"]
+        if hasattr(dockerRequirement, 'lc'):
+            dockerRequirement.lc.data["dockerImageId"] = dockerRequirement.lc.data["dockerPull"]
  
      global cached_lookups
      global cached_lookups_lock
@@ -23,45 +27,46 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
          if dockerRequirement["dockerImageId"] in cached_lookups:
              return cached_lookups[dockerRequirement["dockerImageId"]]
  
-    sp = dockerRequirement["dockerImageId"].split(":")
-    image_name = sp[0]
-    image_tag = sp[1] if len(sp) > 1 else None
-
-    images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3,
-                                                            image_name=image_name,
-                                                            image_tag=image_tag)
-
-    if not images:
-        # Fetch Docker image if necessary.
-        cwltool.docker.get_image(dockerRequirement, pull_image)
-
-        # Upload image to Arvados
-        args = []
-        if project_uuid:
-            args.append("--project-uuid="+project_uuid)
-        args.append(image_name)
-        if image_tag:
-            args.append(image_tag)
-        logger.info("Uploading Docker image %s", ":".join(args[1:]))
-        try:
-            arvados.commands.keepdocker.main(args, stdout=sys.stderr)
-        except SystemExit as e:
-            if e.code:
-                raise WorkflowException("keepdocker exited with code %s" % e.code)
+    with SourceLine(dockerRequirement, "dockerImageId", WorkflowException):
+        sp = dockerRequirement["dockerImageId"].split(":")
+        image_name = sp[0]
+        image_tag = sp[1] if len(sp) > 1 else None
  
          images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3,
                                                                  image_name=image_name,
                                                                  image_tag=image_tag)
  
-    if not images:
-        raise WorkflowException("Could not find Docker image %s:%s" % (image_name, image_tag))
-
-    pdh = api_client.collections().get(uuid=images[0][0]).execute()["portable_data_hash"]
-
-    with cached_lookups_lock:
-        cached_lookups[dockerRequirement["dockerImageId"]] = pdh
-
-    return pdh
+        if not images:
+            # Fetch Docker image if necessary.
+            cwltool.docker.get_image(dockerRequirement, pull_image)
+
+            # Upload image to Arvados
+            args = []
+            if project_uuid:
+                args.append("--project-uuid="+project_uuid)
+            args.append(image_name)
+            if image_tag:
+                args.append(image_tag)
+            logger.info("Uploading Docker image %s", ":".join(args[1:]))
+            try:
+                arvados.commands.keepdocker.main(args, stdout=sys.stderr)
+            except SystemExit as e:
+                if e.code:
+                    raise WorkflowException("keepdocker exited with code %s" % e.code)
+
+            images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3,
+                                                                    image_name=image_name,
+                                                                    image_tag=image_tag)
+
+        if not images:
+            raise WorkflowException("Could not find Docker image %s:%s" % (image_name, image_tag))
+
+        pdh = api_client.collections().get(uuid=images[0][0]).execute()["portable_data_hash"]
+
+        with cached_lookups_lock:
+            cached_lookups[dockerRequirement["dockerImageId"]] = pdh
+
+        return pdh
  
  def arv_docker_clear_cache():
      global cached_lookups
diff --git a/sdk/cwl/arvados_cwl/arvjob.py b/sdk/cwl/arvados_cwl/arvjob.py

index 1afb9afc0e4cf4a888ad34b73cb0a5d7713f5ef9..11ef653a3d76c5862148493dc1736c0d72fadd0d 100644 (file)
--- a/sdk/cwl/arvados_cwl/arvjob.py
+++ b/sdk/cwl/arvados_cwl/arvjob.py
@@ -24,6 +24,8 @@ metrics = logging.getLogger('arvados.cwl-runner.metrics')
  
  crunchrunner_re = re.compile(r"^\S+ \S+ \d+ \d+ stderr \S+ \S+ crunchrunner: \$\(task\.(tmpdir|outdir|keep)\)=(.*)")
  
+crunchrunner_git_commit = 'a3f2cb186e437bfce0031b024b2157b73ed2717d'
+
  class ArvadosJob(object):
      """Submit and manage a Crunch job for executing a CWL CommandLineTool."""
  
@@ -86,7 +88,8 @@ class ArvadosJob(object):
              (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
              if docker_req and kwargs.get("use_container") is not False:
                  if docker_req.get("dockerOutputDirectory"):
-                    raise UnsupportedRequirement("Option 'dockerOutputDirectory' of DockerRequirement not supported.")
+                    raise SourceLine(docker_req, "dockerOutputDirectory", UnsupportedRequirement).makeError(
+                        "Option 'dockerOutputDirectory' of DockerRequirement not supported.")
                  runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image, self.arvrunner.project_uuid)
              else:
                  runtime_constraints["docker_image"] = arvados_jobs_image(self.arvrunner)
@@ -109,7 +112,7 @@ class ArvadosJob(object):
  
          filters = [["repository", "=", "arvados"],
                     ["script", "=", "crunchrunner"],
-                   ["script_version", "in git", "9e5b98e8f5f4727856b53447191f9c06e3da2ba6"]]
+                   ["script_version", "in git", crunchrunner_git_commit]]
          if not self.arvrunner.ignore_docker_for_reuse:
              filters.append(["docker_image_locator", "in docker", runtime_constraints["docker_image"]])
  
@@ -121,7 +124,7 @@ class ArvadosJob(object):
                          "script": "crunchrunner",
                          "repository": "arvados",
                          "script_version": "master",
-                        "minimum_script_version": "9e5b98e8f5f4727856b53447191f9c06e3da2ba6",
+                        "minimum_script_version": crunchrunner_git_commit,
                          "script_parameters": {"tasks": [script_parameters]},
                          "runtime_constraints": runtime_constraints
                      },
@@ -133,13 +136,13 @@ class ArvadosJob(object):
  
              self.update_pipeline_component(response)
  
-            logger.info("Job %s (%s) is %s", self.name, response["uuid"], response["state"])
+            logger.info("%s %s is %s", self.arvrunner.label(self), response["uuid"], response["state"])
  
              if response["state"] in ("Complete", "Failed", "Cancelled"):
                  with Perf(metrics, "done %s" % self.name):
                      self.done(response)
          except Exception as e:
-            logger.exception("Job %s error" % (self.name))
+            logger.exception("%s error" % (self.arvrunner.label(self)))
              self.output_callback({}, "permanentFail")
  
      def update_pipeline_component(self, record):
@@ -200,11 +203,15 @@ class ArvadosJob(object):
                              if g:
                                  dirs[g.group(1)] = g.group(2)
  
+                    if processStatus == "permanentFail":
+                        done.logtail(logc, logger, "%s error log:" % self.arvrunner.label(self))
+
                      with Perf(metrics, "output collection %s" % self.name):
                          outputs = done.done(self, record, dirs["tmpdir"],
                                              dirs["outdir"], dirs["keep"])
              except WorkflowException as e:
-                logger.error("Error while collecting output for job %s:\n%s", self.name, e, exc_info=(e if self.arvrunner.debug else False))
+                logger.error("%s unable to collect output from %s:\n%s",
+                             self.arvrunner.label(self), record["output"], e, exc_info=(e if self.arvrunner.debug else False))
                  processStatus = "permanentFail"
              except Exception as e:
                  logger.exception("Got unknown exception while collecting output for job %s:", self.name)
@@ -216,11 +223,10 @@ class ArvadosJob(object):
                  logger.error("Unexpected output type %s '%s'", type(outputs), outputs)
                  outputs = {}
                  processStatus = "permanentFail"
-
-            self.output_callback(outputs, processStatus)
          finally:
-            del self.arvrunner.processes[record["uuid"]]
-
+            self.output_callback(outputs, processStatus)
+            if record["uuid"] in self.arvrunner.processes:
+                del self.arvrunner.processes[record["uuid"]]
  
  class RunnerJob(Runner):
      """Submit and manage a Crunch job that runs crunch_scripts/cwl-runner."""
@@ -235,12 +241,6 @@ class RunnerJob(Runner):
  
          workflowmapper = super(RunnerJob, self).arvados_job_spec(dry_run=dry_run, pull_image=pull_image, **kwargs)
  
-        # Need to filter this out, gets added by cwltool when providing
-        # parameters on the command line, and arv-run-pipeline-instance doesn't
-        # like it.
-        if "job_order" in self.job_order:
-            del self.job_order["job_order"]
-
          self.job_order["cwl:tool"] = workflowmapper.mapper(self.tool.tool["id"]).target[5:]
  
          if self.output_name:
@@ -251,13 +251,18 @@ class RunnerJob(Runner):
  
          self.job_order["arv:enable_reuse"] = self.enable_reuse
  
+        if self.on_error:
+            self.job_order["arv:on_error"] = self.on_error
+
          return {
              "script": "cwl-runner",
-            "script_version": __version__,
+            "script_version": "master",
+            "minimum_script_version": "570509ab4d2ef93d870fd2b1f2eab178afb1bad9",
              "repository": "arvados",
              "script_parameters": self.job_order,
              "runtime_constraints": {
-                "docker_image": arvados_jobs_image(self.arvrunner)
+                "docker_image": arvados_jobs_image(self.arvrunner),
+                "min_ram_mb_per_node": self.submit_runner_ram
              }
          }
  
@@ -280,7 +285,7 @@ class RunnerJob(Runner):
          self.arvrunner.pipeline = self.arvrunner.api.pipeline_instances().create(
              body={
                  "owner_uuid": self.arvrunner.project_uuid,
-                "name": shortname(self.tool.tool["id"]),
+                "name": self.name,
                  "components": {"cwl-runner": job_spec },
                  "state": "RunningOnServer"}).execute(num_retries=self.arvrunner.num_retries)
          logger.info("Created pipeline %s", self.arvrunner.pipeline["uuid"])
@@ -308,7 +313,8 @@ class RunnerTemplate(object):
          'string': 'text',
      }
  
-    def __init__(self, runner, tool, job_order, enable_reuse, uuid):
+    def __init__(self, runner, tool, job_order, enable_reuse, uuid,
+                 submit_runner_ram=0, name=None):
          self.runner = runner
          self.tool = tool
          self.job = RunnerJob(
@@ -317,7 +323,9 @@ class RunnerTemplate(object):
              job_order=job_order,
              enable_reuse=enable_reuse,
              output_name=None,
-            output_tags=None)
+            output_tags=None,
+            submit_runner_ram=submit_runner_ram,
+            name=name)
          self.uuid = uuid
  
      def pipeline_component_spec(self):
diff --git a/sdk/cwl/arvados_cwl/arvworkflow.py b/sdk/cwl/arvados_cwl/arvworkflow.py

index f425ae8df9d2478a566c3dc47aea7e7e1c678d45..9e70a6e66f2b36f5393cd2f32039a38ec2489dd4 100644 (file)
--- a/sdk/cwl/arvados_cwl/arvworkflow.py
+++ b/sdk/cwl/arvados_cwl/arvworkflow.py
@@ -3,6 +3,8 @@ import json
  import copy
  import logging
  
+from schema_salad.sourceline import SourceLine, cmap
+
  from cwltool.pack import pack
  from cwltool.load_tool import fetch_document
  from cwltool.process import shortname
@@ -18,7 +20,8 @@ from .perf import Perf
  logger = logging.getLogger('arvados.cwl-runner')
  metrics = logging.getLogger('arvados.cwl-runner.metrics')
  
-def upload_workflow(arvRunner, tool, job_order, project_uuid, uuid=None):
+def upload_workflow(arvRunner, tool, job_order, project_uuid, uuid=None,
+                    submit_runner_ram=0, name=None):
      upload_docker(arvRunner, tool)
  
      document_loader, workflowobj, uri = (tool.doc_loader, tool.doc_loader.fetch(tool.tool["id"]), tool.tool["id"])
@@ -33,15 +36,19 @@ def upload_workflow(arvRunner, tool, job_order, project_uuid, uuid=None):
          if sn in job_order:
              inp["default"] = job_order[sn]
  
-    name = os.path.basename(tool.tool["id"])
+    if not name:
+        name = tool.tool.get("label", os.path.basename(tool.tool["id"]))
+
      upload_dependencies(arvRunner, name, document_loader,
                          packed, uri, False)
  
+    # TODO nowhere for submit_runner_ram to go.
+
      body = {
          "workflow": {
-            "name": tool.tool.get("label", name),
+            "name": name,
              "description": tool.tool.get("doc", ""),
-            "definition":yaml.safe_dump(packed)
+            "definition":yaml.round_trip_dump(packed)
          }}
      if project_uuid:
          body["workflow"]["owner_uuid"] = project_uuid
@@ -64,6 +71,9 @@ class ArvadosWorkflow(Workflow):
          kwargs["work_api"] = self.work_api
          req, _ = self.get_requirement("http://arvados.org/cwl#RunInSingleContainer")
          if req:
+            with SourceLine(self.tool, None, WorkflowException):
+                if "id" not in self.tool:
+                    raise WorkflowException("%s object must have 'id'" % (self.tool["class"]))
              document_loader, workflowobj, uri = (self.doc_loader, self.doc_loader.fetch(self.tool["id"]), self.tool["id"])
  
              with Perf(metrics, "subworkflow upload_deps"):
@@ -89,23 +99,25 @@ class ArvadosWorkflow(Workflow):
                  joborder_keepmount = copy.deepcopy(joborder)
  
                  def keepmount(obj):
-                    if "location" not in obj:
-                        raise WorkflowException("%s object is missing required 'location' field: %s" % (obj["class"], obj))
-                    if obj["location"].startswith("keep:"):
-                        obj["location"] = "/keep/" + obj["location"][5:]
-                        if "listing" in obj:
-                            del obj["listing"]
-                    elif obj["location"].startswith("_:"):
-                        del obj["location"]
-                    else:
-                        raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"])
+                    with SourceLine(obj, None, WorkflowException):
+                        if "location" not in obj:
+                            raise WorkflowException("%s object is missing required 'location' field: %s" % (obj["class"], obj))
+                    with SourceLine(obj, "location", WorkflowException):
+                        if obj["location"].startswith("keep:"):
+                            obj["location"] = "/keep/" + obj["location"][5:]
+                            if "listing" in obj:
+                                del obj["listing"]
+                        elif obj["location"].startswith("_:"):
+                            del obj["location"]
+                        else:
+                            raise WorkflowException("Location is not a keep reference or a literal: '%s'" % obj["location"])
  
                  adjustFileObjs(joborder_keepmount, keepmount)
                  adjustDirObjs(joborder_keepmount, keepmount)
                  adjustFileObjs(packed, keepmount)
                  adjustDirObjs(packed, keepmount)
  
-            wf_runner = {
+            wf_runner = cmap({
                  "class": "CommandLineTool",
                  "baseCommand": "cwltool",
                  "inputs": self.tool["inputs"],
@@ -116,15 +128,15 @@ class ArvadosWorkflow(Workflow):
                      "class": "InitialWorkDirRequirement",
                      "listing": [{
                              "entryname": "workflow.cwl",
-                            "entry": yaml.safe_dump(packed).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
+                            "entry": yaml.round_trip_dump(packed).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
                          }, {
                              "entryname": "cwl.input.yml",
-                            "entry": yaml.safe_dump(joborder_keepmount).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
+                            "entry": yaml.round_trip_dump(joborder_keepmount).replace("\\", "\\\\").replace('$(', '\$(').replace('${', '\${')
                          }]
                  }],
                  "hints": workflowobj["hints"],
                  "arguments": ["--no-container", "--move-outputs", "--preserve-entire-environment", "workflow.cwl#main", "cwl.input.yml"]
-            }
+            })
              kwargs["loader"] = self.doc_loader
              kwargs["avsc_names"] = self.doc_schema
              return ArvadosCommandTool(self.arvrunner, wf_runner, **kwargs).job(joborder, output_callback, **kwargs)
diff --git a/sdk/cwl/arvados_cwl/crunch_script.py b/sdk/cwl/arvados_cwl/crunch_script.py

index 849b177aebbd7c4f5a507a8d0bb05ec915cf3b58..500fbdd0d7aeb87b6db1cdc1f123b4e438fe3345 100644 (file)
--- a/sdk/cwl/arvados_cwl/crunch_script.py
+++ b/sdk/cwl/arvados_cwl/crunch_script.py
@@ -26,6 +26,9 @@ from cwltool.errors import WorkflowException
  logger = logging.getLogger('arvados.cwl-runner')
  
  def run():
+    # Timestamps are added by crunch-job, so don't print redundant timestamps.
+    arvados.log_handler.setFormatter(logging.Formatter('%(name)s %(levelname)s: %(message)s'))
+
      # Print package versions
      logger.info(arvados_cwl.versionstring())
  
@@ -36,6 +39,7 @@ def run():
      runner = None
      try:
          job_order_object = arvados.current_job()['script_parameters']
+        toolpath = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object.pop("cwl:tool"))
  
          pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+(/.+)?$')
  
@@ -48,8 +52,6 @@ def run():
          def keeppathObj(v):
              v["location"] = keeppath(v["location"])
  
-        job_order_object["cwl:tool"] = "file://%s/%s" % (os.environ['TASK_KEEPMOUNT'], job_order_object["cwl:tool"])
-
          for k,v in job_order_object.items():
              if isinstance(v, basestring) and arvados.util.keep_locator_pattern.match(v):
                  job_order_object[k] = {
@@ -65,6 +67,7 @@ def run():
          output_name = None
          output_tags = None
          enable_reuse = True
+        on_error = "continue"
          if "arv:output_name" in job_order_object:
              output_name = job_order_object["arv:output_name"]
              del job_order_object["arv:output_name"]
@@ -77,21 +80,27 @@ def run():
              enable_reuse = job_order_object["arv:enable_reuse"]
              del job_order_object["arv:enable_reuse"]
  
+        if "arv:on_error" in job_order_object:
+            on_error = job_order_object["arv:on_error"]
+            del job_order_object["arv:on_error"]
+
          runner = arvados_cwl.ArvCwlRunner(api_client=arvados.api('v1', model=OrderedJsonModel()),
                                            output_name=output_name, output_tags=output_tags)
  
-        t = load_tool(job_order_object, runner.arv_make_tool)
+        t = load_tool(toolpath, runner.arv_make_tool)
  
          args = argparse.Namespace()
          args.project_uuid = arvados.current_job()["owner_uuid"]
          args.enable_reuse = enable_reuse
+        args.on_error = on_error
          args.submit = False
-        args.debug = True
+        args.debug = False
          args.quiet = False
          args.ignore_docker_for_reuse = False
          args.basedir = os.getcwd()
+        args.name = None
          args.cwl_runner_job={"uuid": arvados.current_job()["uuid"], "state": arvados.current_job()["state"]}
-        outputObj = runner.arv_executor(t, job_order_object, **vars(args))
+        runner.arv_executor(t, job_order_object, **vars(args))
      except Exception as e:
          if isinstance(e, WorkflowException):
              logging.info("Workflow error %s", e)
diff --git a/sdk/cwl/arvados_cwl/done.py b/sdk/cwl/arvados_cwl/done.py

index 31f353e5d8e0395028d50b86973fd4bb6e197e8f..87908c28ce762b48094cad22e433796a43495266 100644 (file)
--- a/sdk/cwl/arvados_cwl/done.py
+++ b/sdk/cwl/arvados_cwl/done.py
@@ -1,4 +1,6 @@
+import re
  from cwltool.errors import WorkflowException
+from collections import deque
  
  def done(self, record, tmpdir, outdir, keepdir):
      colname = "Output %s of %s" % (record["output"][0:7], self.name)
@@ -22,8 +24,8 @@ def done(self, record, tmpdir, outdir, keepdir):
  
          if not collections["items"]:
              raise WorkflowException(
-                "Job output '%s' cannot be found on API server" % (
-                    record["output"]))
+                "[job %s] output '%s' cannot be found on API server" % (
+                    self.name, record["output"]))
  
          # Create new collection in the parent project
          # with the output contents.
@@ -35,6 +37,32 @@ def done(self, record, tmpdir, outdir, keepdir):
          }, ensure_unique_name=True).execute(
              num_retries=self.arvrunner.num_retries)
  
+    return done_outputs(self, record, tmpdir, outdir, keepdir)
+
+def done_outputs(self, record, tmpdir, outdir, keepdir):
      self.builder.outdir = outdir
      self.builder.pathmapper.keepdir = keepdir
      return self.collect_outputs("keep:" + record["output"])
+
+crunchstat_re = re.compile(r"^\d{4}-\d\d-\d\d_\d\d:\d\d:\d\d [a-z0-9]{5}-8i9sb-[a-z0-9]{15} \d+ \d+ stderr crunchstat:")
+timestamp_re = re.compile(r"^(\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d\.\d+Z) (.*)")
+
+def logtail(logcollection, logger, header, maxlen=25):
+    logtail = deque([], maxlen*len(logcollection))
+    containersapi = ("crunch-run.txt" in logcollection)
+
+    for log in logcollection.keys():
+        if not containersapi or log in ("crunch-run.txt", "stdout.txt", "stderr.txt"):
+            logname = log[:-4]
+            with logcollection.open(log) as f:
+                for l in f:
+                    if containersapi:
+                        g = timestamp_re.match(l)
+                        logtail.append("%s %s %s" % (g.group(1), logname, g.group(2)))
+                    elif not crunchstat_re.match(l):
+                        logtail.append(l)
+    if len(logcollection) > 1:
+        logtail = sorted(logtail)[-maxlen:]
+    logtxt = "\n  ".join(l.strip() for l in logtail)
+    logger.info(header)
+    logger.info("\n  %s", logtxt)
diff --git a/sdk/cwl/arvados_cwl/fsaccess.py b/sdk/cwl/arvados_cwl/fsaccess.py

index 89a4308bf14ef2d5f9f3275ddcc4201e749205d8..500ea0f4203793fac7fdf418f79b0ae562f64334 100644 (file)
--- a/sdk/cwl/arvados_cwl/fsaccess.py
+++ b/sdk/cwl/arvados_cwl/fsaccess.py
@@ -1,14 +1,21 @@
  import fnmatch
  import os
  import errno
+import urlparse
+import re
+
+import ruamel.yaml as yaml
  
  import cwltool.stdfsaccess
  from cwltool.pathmapper import abspath
+import cwltool.resolver
  
  import arvados.util
  import arvados.collection
  import arvados.arvfile
  
+from schema_salad.ref_resolver import DefaultFetcher
+
  class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess):
      """Implement the cwltool FsAccess interface for Arvados Collections."""
  
@@ -120,3 +127,80 @@ class CollectionFsAccess(cwltool.stdfsaccess.StdFsAccess):
              return path
          else:
              return os.path.realpath(path)
+
+class CollectionFetcher(DefaultFetcher):
+    def __init__(self, cache, session, api_client=None, keep_client=None):
+        super(CollectionFetcher, self).__init__(cache, session)
+        self.api_client = api_client
+        self.fsaccess = CollectionFsAccess("", api_client=api_client, keep_client=keep_client)
+
+    def fetch_text(self, url):
+        if url.startswith("keep:"):
+            with self.fsaccess.open(url, "r") as f:
+                return f.read()
+        if url.startswith("arvwf:"):
+            return self.api_client.workflows().get(uuid=url[6:]).execute()["definition"]
+        return super(CollectionFetcher, self).fetch_text(url)
+
+    def check_exists(self, url):
+        if url.startswith("keep:"):
+            return self.fsaccess.exists(url)
+        if url.startswith("arvwf:"):
+            if self.fetch_text(url):
+                return True
+        return super(CollectionFetcher, self).check_exists(url)
+
+    def urljoin(self, base_url, url):
+        if not url:
+            return base_url
+
+        urlsp = urlparse.urlsplit(url)
+        if urlsp.scheme or not base_url:
+            return url
+
+        basesp = urlparse.urlsplit(base_url)
+        if basesp.scheme in ("keep", "arvwf"):
+            if not basesp.path:
+                raise IOError(errno.EINVAL, "Invalid Keep locator", base_url)
+
+            baseparts = basesp.path.split("/")
+            urlparts = urlsp.path.split("/") if urlsp.path else []
+
+            pdh = baseparts.pop(0)
+
+            if basesp.scheme == "keep" and not arvados.util.keep_locator_pattern.match(pdh):
+                raise IOError(errno.EINVAL, "Invalid Keep locator", base_url)
+
+            if urlsp.path.startswith("/"):
+                baseparts = []
+                urlparts.pop(0)
+
+            if baseparts and urlsp.path:
+                baseparts.pop()
+
+            path = "/".join([pdh] + baseparts + urlparts)
+            return urlparse.urlunsplit((basesp.scheme, "", path, "", urlsp.fragment))
+
+        return super(CollectionFetcher, self).urljoin(base_url, url)
+
+workflow_uuid_pattern = re.compile(r'[a-z0-9]{5}-7fd4e-[a-z0-9]{15}')
+pipeline_template_uuid_pattern = re.compile(r'[a-z0-9]{5}-p5p6p-[a-z0-9]{15}')
+
+def collectionResolver(api_client, document_loader, uri):
+    if workflow_uuid_pattern.match(uri):
+        return "arvwf:%s#main" % (uri)
+
+    if pipeline_template_uuid_pattern.match(uri):
+        pt = api_client.pipeline_templates().get(uuid=uri).execute()
+        return "keep:" + pt["components"].values()[0]["script_parameters"]["cwl:tool"]
+
+    p = uri.split("/")
+    if arvados.util.keep_locator_pattern.match(p[0]):
+        return "keep:%s" % (uri)
+
+    if arvados.util.collection_uuid_pattern.match(p[0]):
+        return "keep:%s%s" % (api_client.collections().
+                              get(uuid=p[0]).execute()["portable_data_hash"],
+                              uri[len(p[0]):])
+
+    return cwltool.resolver.tool_resolver(document_loader, uri)
diff --git a/sdk/cwl/arvados_cwl/pathmapper.py b/sdk/cwl/arvados_cwl/pathmapper.py

index 58500d3a993ddb74327c419925c2aed2b769a1b6..a6b3d15e2c503af7bab06eaf0bd7407f5975b9fe 100644 (file)
--- a/sdk/cwl/arvados_cwl/pathmapper.py
+++ b/sdk/cwl/arvados_cwl/pathmapper.py
@@ -6,6 +6,8 @@ import os
  import arvados.commands.run
  import arvados.collection
  
+from schema_salad.sourceline import SourceLine
+
  from cwltool.pathmapper import PathMapper, MapperEnt, abspath, adjustFileObjs, adjustDirObjs
  from cwltool.workflow import WorkflowException
  
@@ -38,17 +40,20 @@ class ArvPathMapper(PathMapper):
                  # mount.
                  ab = abspath(src, self.input_basedir)
                  st = arvados.commands.run.statfile("", ab, fnPattern="keep:%s/%s")
-                if isinstance(st, arvados.commands.run.UploadFile):
-                    uploadfiles.add((src, ab, st))
-                elif isinstance(st, arvados.commands.run.ArvFile):
-                    self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % st.fn[5:], "File")
-                elif src.startswith("_:"):
-                    if "contents" in srcobj:
-                        pass
+                with SourceLine(srcobj, "location", WorkflowException):
+                    if isinstance(st, arvados.commands.run.UploadFile):
+                        uploadfiles.add((src, ab, st))
+                    elif isinstance(st, arvados.commands.run.ArvFile):
+                        self._pathmap[src] = MapperEnt(st.fn, self.collection_pattern % st.fn[5:], "File")
+                    elif src.startswith("_:"):
+                        if "contents" in srcobj:
+                            pass
+                        else:
+                            raise WorkflowException("File literal '%s' is missing contents" % src)
+                    elif src.startswith("arvwf:"):
+                        self._pathmap[src] = MapperEnt(src, src, "File")
                      else:
-                        raise WorkflowException("File literal '%s' is missing contents" % src)
-                else:
-                    raise WorkflowException("Input file path '%s' is invalid" % st)
+                        raise WorkflowException("Input file path '%s' is invalid" % st)
              if "secondaryFiles" in srcobj:
                  for l in srcobj["secondaryFiles"]:
                      self.visit(l, uploadfiles)
@@ -74,7 +79,7 @@ class ArvPathMapper(PathMapper):
              with c.open(path + "/" + obj["basename"], "w") as f:
                  f.write(obj["contents"].encode("utf-8"))
          else:
-            raise WorkflowException("Don't know what to do with '%s'" % obj["location"])
+            raise SourceLine(obj, "location", WorkflowException).makeError("Don't know what to do with '%s'" % obj["location"])
  
      def setup(self, referenced_files, basedir):
          # type: (List[Any], unicode) -> None
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py

index 3bbcb8b091763cd565051c23a65ce13d1069f08f..1c3625e26bb1345673b31af5377d7b9d5282a10b 100644 (file)
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -6,6 +6,8 @@ import json
  import re
  from cStringIO import StringIO
  
+from schema_salad.sourceline import SourceLine
+
  import cwltool.draft2tool
  from cwltool.draft2tool import CommandLineTool
  import cwltool.workflow
@@ -21,6 +23,7 @@ import ruamel.yaml as yaml
  from .arvdocker import arv_docker_get_image
  from .pathmapper import ArvPathMapper
  from ._version import __version__
+from . import done
  
  logger = logging.getLogger('arvados.cwl-runner')
  
@@ -57,7 +60,7 @@ def upload_dependencies(arvrunner, name, document_loader,
  
      loaded = set()
      def loadref(b, u):
-        joined = urlparse.urljoin(b, u)
+        joined = document_loader.fetcher.urljoin(b, u)
          defrg, _ = urlparse.urldefrag(joined)
          if defrg not in loaded:
              loaded.add(defrg)
@@ -85,7 +88,7 @@ def upload_dependencies(arvrunner, name, document_loader,
      sc = scandeps(uri, scanobj,
                    loadref_fields,
                    set(("$include", "$schemas", "location")),
-                  loadref)
+                  loadref, urljoin=document_loader.fetcher.urljoin)
  
      normalizeFilesDirs(sc)
  
@@ -112,7 +115,8 @@ def upload_docker(arvrunner, tool):
          if docker_req:
              if docker_req.get("dockerOutputDirectory"):
                  # TODO: can be supported by containers API, but not jobs API.
-                raise UnsupportedRequirement("Option 'dockerOutputDirectory' of DockerRequirement not supported.")
+                raise SourceLine(docker_req, "dockerOutputDirectory", UnsupportedRequirement).makeError(
+                    "Option 'dockerOutputDirectory' of DockerRequirement not supported.")
              arv_docker_get_image(arvrunner.api, docker_req, True, arvrunner.project_uuid)
      elif isinstance(tool, cwltool.workflow.Workflow):
          for s in tool.steps:
@@ -161,7 +165,9 @@ def arvados_jobs_image(arvrunner):
      return img
  
  class Runner(object):
-    def __init__(self, runner, tool, job_order, enable_reuse, output_name, output_tags):
+    def __init__(self, runner, tool, job_order, enable_reuse,
+                 output_name, output_tags, submit_runner_ram=0,
+                 name=None, on_error=None):
          self.arvrunner = runner
          self.tool = tool
          self.job_order = job_order
@@ -171,48 +177,77 @@ class Runner(object):
          self.final_output = None
          self.output_name = output_name
          self.output_tags = output_tags
+        self.name = name
+        self.on_error = on_error
+
+        if submit_runner_ram:
+            self.submit_runner_ram = submit_runner_ram
+        else:
+            self.submit_runner_ram = 1024
+
+        if self.submit_runner_ram <= 0:
+            raise Exception("Value of --submit-runner-ram must be greater than zero")
  
      def update_pipeline_component(self, record):
          pass
  
      def arvados_job_spec(self, *args, **kwargs):
-        self.name = os.path.basename(self.tool.tool["id"])
+        if self.name is None:
+            self.name = self.tool.tool.get("label") or os.path.basename(self.tool.tool["id"])
+
+        # Need to filter this out, gets added by cwltool when providing
+        # parameters on the command line.
+        if "job_order" in self.job_order:
+            del self.job_order["job_order"]
+
          workflowmapper = upload_instance(self.arvrunner, self.name, self.tool, self.job_order)
          adjustDirObjs(self.job_order, trim_listing)
          return workflowmapper
  
      def done(self, record):
-        if record["state"] == "Complete":
-            if record.get("exit_code") is not None:
-                if record["exit_code"] == 33:
-                    processStatus = "UnsupportedRequirement"
-                elif record["exit_code"] == 0:
-                    processStatus = "success"
+        try:
+            if record["state"] == "Complete":
+                if record.get("exit_code") is not None:
+                    if record["exit_code"] == 33:
+                        processStatus = "UnsupportedRequirement"
+                    elif record["exit_code"] == 0:
+                        processStatus = "success"
+                    else:
+                        processStatus = "permanentFail"
                  else:
-                    processStatus = "permanentFail"
+                    processStatus = "success"
              else:
-                processStatus = "success"
-        else:
-            processStatus = "permanentFail"
+                processStatus = "permanentFail"
  
-        outputs = {}
-        try:
-            try:
-                self.final_output = record["output"]
-                outc = arvados.collection.CollectionReader(self.final_output,
+            outputs = {}
+
+            if processStatus == "permanentFail":
+                logc = arvados.collection.CollectionReader(record["log"],
                                                             api_client=self.arvrunner.api,
                                                             keep_client=self.arvrunner.keep_client,
                                                             num_retries=self.arvrunner.num_retries)
+                done.logtail(logc, logger, "%s error log:" % self.arvrunner.label(self), maxlen=40)
+
+            self.final_output = record["output"]
+            outc = arvados.collection.CollectionReader(self.final_output,
+                                                       api_client=self.arvrunner.api,
+                                                       keep_client=self.arvrunner.keep_client,
+                                                       num_retries=self.arvrunner.num_retries)
+            if "cwl.output.json" in outc:
                  with outc.open("cwl.output.json") as f:
-                    outputs = json.load(f)
-                def keepify(fileobj):
-                    path = fileobj["location"]
-                    if not path.startswith("keep:"):
-                        fileobj["location"] = "keep:%s/%s" % (record["output"], path)
-                adjustFileObjs(outputs, keepify)
-                adjustDirObjs(outputs, keepify)
-            except Exception as e:
-                logger.exception("While getting final output object: %s", e)
+                    if f.size() > 0:
+                        outputs = json.load(f)
+            def keepify(fileobj):
+                path = fileobj["location"]
+                if not path.startswith("keep:"):
+                    fileobj["location"] = "keep:%s/%s" % (record["output"], path)
+            adjustFileObjs(outputs, keepify)
+            adjustDirObjs(outputs, keepify)
+        except Exception as e:
+            logger.exception("[%s] While getting final output object: %s", self.name, e)
+            self.arvrunner.output_callback({}, "permanentFail")
+        else:
              self.arvrunner.output_callback(outputs, processStatus)
          finally:
-            del self.arvrunner.processes[record["uuid"]]
+            if record["uuid"] in self.arvrunner.processes:
+                del self.arvrunner.processes[record["uuid"]]
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py

index bb673f459747e793a9584d3d35e4174f2ef2424f..61b8f0b952773064c03c18583e26e4aaf4146dd4 100644 (file)
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -45,11 +45,14 @@ setup(name='arvados-cwl-runner',
            'bin/cwl-runner',
            'bin/arvados-cwl-runner'
        ],
-      # Make sure to update arvados/build/run-build-packages.sh as well
-      # when updating the cwltool version pin.
+      # Note that arvados/build/run-build-packages.sh looks at this
+      # file to determine what version of cwltool and schema-salad to build.
        install_requires=[
-          'cwltool==1.0.20161122201220',
-          'arvados-python-client>=0.1.20160826210445'
+          'cwltool==1.0.20170105144051',
+          'schema-salad==2.1.20161227191302',
+          'ruamel.yaml==0.13.7',
+          'arvados-python-client>=0.1.20160826210445',
+          'setuptools'
        ],
        data_files=[
            ('share/doc/arvados-cwl-runner', ['LICENSE-2.0.txt', 'README.rst']),
diff --git a/sdk/cwl/tests/test_container.py b/sdk/cwl/tests/test_container.py

index bb4bac31dd1767081cdc12a313496a4bb13b4546..45e2c7c45f20915fbca39e8e553564f23fc18df9 100644 (file)
--- a/sdk/cwl/tests/test_container.py
+++ b/sdk/cwl/tests/test_container.py
@@ -7,8 +7,9 @@ import os
  import functools
  import cwltool.process
  from schema_salad.ref_resolver import Loader
+from schema_salad.sourceline import cmap
  
-from schema_salad.ref_resolver import Loader
+from .matcher import JsonDiffMatcher
  
  if not os.getenv('ARVADOS_DEBUG'):
      logging.getLogger('arvados.cwl-runner').setLevel(logging.WARN)
@@ -34,12 +35,12 @@ class TestContainer(unittest.TestCase):
  
              document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
  
-            tool = {
+            tool = cmap({
                  "inputs": [],
                  "outputs": [],
                  "baseCommand": "ls",
                  "arguments": [{"valueFrom": "$(runtime.outdir)"}]
-            }
+            })
              make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, api_client=runner.api)
              arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers", avsc_names=avsc_names,
                                                       basedir="", make_fs_access=make_fs_access, loader=Loader({}))
@@ -48,7 +49,7 @@ class TestContainer(unittest.TestCase):
                                   make_fs_access=make_fs_access, tmpdir="/tmp"):
                  j.run(enable_reuse=enable_reuse)
                  runner.api.container_requests().create.assert_called_with(
-                    body={
+                    body=JsonDiffMatcher({
                          'environment': {
                              'HOME': '/var/spool/cwl',
                              'TMPDIR': '/tmp'
@@ -69,8 +70,9 @@ class TestContainer(unittest.TestCase):
                          'container_image': '99999999999999999999999999999993+99',
                          'command': ['ls', '/var/spool/cwl'],
                          'cwd': '/var/spool/cwl',
-                        'scheduling_parameters': {}
-                    })
+                        'scheduling_parameters': {},
+                        'properties': {},
+                    }))
  
      # The test passes some fields in builder.resources
      # For the remaining fields, the defaults will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
@@ -86,7 +88,7 @@ class TestContainer(unittest.TestCase):
          runner.api.collections().get().execute.return_value = {
              "portable_data_hash": "99999999999999999999999999999993+99"}
  
-        tool = {
+        tool = cmap({
              "inputs": [],
              "outputs": [],
              "hints": [{
@@ -104,7 +106,7 @@ class TestContainer(unittest.TestCase):
                  "partition": "blurb"
              }],
              "baseCommand": "ls"
-        }
+        })
          make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, api_client=runner.api)
          arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="containers",
                                                   avsc_names=avsc_names, make_fs_access=make_fs_access,
@@ -141,7 +143,8 @@ class TestContainer(unittest.TestCase):
                  'cwd': '/var/spool/cwl',
                  'scheduling_parameters': {
                      'partitions': ['blurb']
-                }
+                },
+                'properties': {}
          }
  
          call_body = call_kwargs.get('body', None)
@@ -159,54 +162,11 @@ class TestContainer(unittest.TestCase):
          runner.num_retries = 0
          runner.ignore_docker_for_reuse = False
  
-        col().open.return_value = []
-        api.collections().list().execute.side_effect = ({"items": []},
-                                                        {"items": [{"manifest_text": "XYZ"}]})
-
-        arvjob = arvados_cwl.ArvadosContainer(runner)
-        arvjob.name = "testjob"
-        arvjob.builder = mock.MagicMock()
-        arvjob.output_callback = mock.MagicMock()
-        arvjob.collect_outputs = mock.MagicMock()
-        arvjob.successCodes = [0]
-        arvjob.outdir = "/var/spool/cwl"
-
-        arvjob.done({
-            "state": "Complete",
-            "output": "99999999999999999999999999999993+99",
-            "log": "99999999999999999999999999999994+99",
-            "uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz",
-            "exit_code": 0
-        })
-
-        api.collections().list.assert_has_calls([
-            mock.call(),
-            mock.call(filters=[['owner_uuid', '=', 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'],
-                          ['portable_data_hash', '=', '99999999999999999999999999999993+99'],
-                          ['name', '=', 'Output 9999999 of testjob']]),
-            mock.call().execute(num_retries=0),
-            mock.call(limit=1, filters=[['portable_data_hash', '=', '99999999999999999999999999999993+99']],
-                 select=['manifest_text']),
-            mock.call().execute(num_retries=0)])
-
-        api.collections().create.assert_called_with(
-            ensure_unique_name=True,
-            body={'portable_data_hash': '99999999999999999999999999999993+99',
-                  'manifest_text': 'XYZ',
-                  'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
-                  'name': 'Output 9999999 of testjob'})
-
-    @mock.patch("arvados.collection.Collection")
-    def test_done_use_existing_collection(self, col):
-        api = mock.MagicMock()
-
-        runner = mock.MagicMock()
-        runner.api = api
-        runner.project_uuid = "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
-        runner.num_retries = 0
+        runner.api.containers().get().execute.return_value = {"state":"Complete",
+                                                              "output": "abc+123",
+                                                              "exit_code": 0}
  
          col().open.return_value = []
-        api.collections().list().execute.side_effect = ({"items": [{"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2"}]},)
  
          arvjob = arvados_cwl.ArvadosContainer(runner)
          arvjob.name = "testjob"
@@ -216,19 +176,17 @@ class TestContainer(unittest.TestCase):
          arvjob.successCodes = [0]
          arvjob.outdir = "/var/spool/cwl"
  
+        arvjob.collect_outputs.return_value = {"out": "stuff"}
+
          arvjob.done({
-            "state": "Complete",
-            "output": "99999999999999999999999999999993+99",
-            "log": "99999999999999999999999999999994+99",
-            "uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz",
-            "exit_code": 0
+            "state": "Final",
+            "log_uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz1",
+            "output_uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2",
+            "uuid": "zzzzz-xvhdp-zzzzzzzzzzzzzzz",
+            "container_uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz"
          })
  
-        api.collections().list.assert_has_calls([
-            mock.call(),
-            mock.call(filters=[['owner_uuid', '=', 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'],
-                               ['portable_data_hash', '=', '99999999999999999999999999999993+99'],
-                               ['name', '=', 'Output 9999999 of testjob']]),
-            mock.call().execute(num_retries=0)])
-
          self.assertFalse(api.collections().create.called)
+
+        arvjob.collect_outputs.assert_called_with("keep:abc+123")
+        arvjob.output_callback.assert_called_with({"out": "stuff"}, "success")
diff --git a/sdk/cwl/tests/test_job.py b/sdk/cwl/tests/test_job.py

index c8813adf7ea48d71f5b5b8cb9d7c091bfafa09c0..7675e3d4bc45bd64b55a690c405597fa6b23a615 100644 (file)
--- a/sdk/cwl/tests/test_job.py
+++ b/sdk/cwl/tests/test_job.py
@@ -11,7 +11,9 @@ import arvados
  import arvados_cwl
  import cwltool.process
  from schema_salad.ref_resolver import Loader
+from schema_salad.sourceline import cmap
  from .mock_discovery import get_rootDesc
+from .matcher import JsonDiffMatcher
  
  if not os.getenv('ARVADOS_DEBUG'):
      logging.getLogger('arvados.cwl-runner').setLevel(logging.WARN)
@@ -33,12 +35,12 @@ class TestJob(unittest.TestCase):
              list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]]
              runner.api.collections().get().execute.return_vaulue = {"portable_data_hash": "99999999999999999999999999999993+99"}
  
-            tool = {
+            tool = cmap({
                  "inputs": [],
                  "outputs": [],
                  "baseCommand": "ls",
                  "arguments": [{"valueFrom": "$(runtime.outdir)"}]
-            }
+            })
              make_fs_access=functools.partial(arvados_cwl.CollectionFsAccess, api_client=runner.api)
              arvtool = arvados_cwl.ArvadosCommandTool(runner, tool, work_api="jobs", avsc_names=avsc_names,
                                                       basedir="", make_fs_access=make_fs_access, loader=Loader({}))
@@ -46,7 +48,7 @@ class TestJob(unittest.TestCase):
              for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access):
                  j.run(enable_reuse=enable_reuse)
                  runner.api.jobs().create.assert_called_with(
-                    body={
+                    body=JsonDiffMatcher({
                          'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                          'runtime_constraints': {},
                          'script_parameters': {
@@ -56,7 +58,7 @@ class TestJob(unittest.TestCase):
                              }],
                          },
                          'script_version': 'master',
-                        'minimum_script_version': '9e5b98e8f5f4727856b53447191f9c06e3da2ba6',
+                        'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d',
                          'repository': 'arvados',
                          'script': 'crunchrunner',
                          'runtime_constraints': {
@@ -65,11 +67,11 @@ class TestJob(unittest.TestCase):
                              'min_ram_mb_per_node': 1024,
                              'min_scratch_mb_per_node': 2048 # tmpdirSize + outdirSize
                          }
-                    },
+                    }),
                      find_or_create=enable_reuse,
                      filters=[['repository', '=', 'arvados'],
                               ['script', '=', 'crunchrunner'],
-                             ['script_version', 'in git', '9e5b98e8f5f4727856b53447191f9c06e3da2ba6'],
+                             ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],
                               ['docker_image_locator', 'in docker', 'arvados/jobs:'+arvados_cwl.__version__]]
                  )
  
@@ -113,7 +115,7 @@ class TestJob(unittest.TestCase):
          for j in arvtool.job({}, mock.MagicMock(), basedir="", make_fs_access=make_fs_access):
              j.run()
          runner.api.jobs().create.assert_called_with(
-            body={
+            body=JsonDiffMatcher({
                  'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                  'runtime_constraints': {},
                  'script_parameters': {
@@ -124,7 +126,7 @@ class TestJob(unittest.TestCase):
                      }]
              },
              'script_version': 'master',
-                'minimum_script_version': '9e5b98e8f5f4727856b53447191f9c06e3da2ba6',
+                'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d',
                  'repository': 'arvados',
                  'script': 'crunchrunner',
                  'runtime_constraints': {
@@ -134,11 +136,11 @@ class TestJob(unittest.TestCase):
                      'min_scratch_mb_per_node': 5024, # tmpdirSize + outdirSize
                      'keep_cache_mb_per_task': 512
                  }
-            },
+            }),
              find_or_create=True,
              filters=[['repository', '=', 'arvados'],
                       ['script', '=', 'crunchrunner'],
-                     ['script_version', 'in git', '9e5b98e8f5f4727856b53447191f9c06e3da2ba6'],
+                     ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],
                       ['docker_image_locator', 'in docker', 'arvados/jobs:'+arvados_cwl.__version__]])
  
      @mock.patch("arvados.collection.CollectionReader")
@@ -164,6 +166,7 @@ class TestJob(unittest.TestCase):
          arvjob.builder = mock.MagicMock()
          arvjob.output_callback = mock.MagicMock()
          arvjob.collect_outputs = mock.MagicMock()
+        arvjob.collect_outputs.return_value = {"out": "stuff"}
  
          arvjob.done({
              "state": "Complete",
@@ -189,6 +192,8 @@ class TestJob(unittest.TestCase):
                    'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
                    'name': 'Output 9999999 of testjob'})
  
+        arvjob.output_callback.assert_called_with({"out": "stuff"}, "success")
+
      @mock.patch("arvados.collection.CollectionReader")
      def test_done_use_existing_collection(self, reader):
          api = mock.MagicMock()
@@ -211,6 +216,7 @@ class TestJob(unittest.TestCase):
          arvjob.builder = mock.MagicMock()
          arvjob.output_callback = mock.MagicMock()
          arvjob.collect_outputs = mock.MagicMock()
+        arvjob.collect_outputs.return_value = {"out": "stuff"}
  
          arvjob.done({
              "state": "Complete",
@@ -228,6 +234,8 @@ class TestJob(unittest.TestCase):
  
          self.assertFalse(api.collections().create.called)
  
+        arvjob.output_callback.assert_called_with({"out": "stuff"}, "success")
+
  
  class TestWorkflow(unittest.TestCase):
      # The test passes no builder.resources
@@ -269,8 +277,8 @@ class TestWorkflow(unittest.TestCase):
              subwf = f.read()
  
          runner.api.jobs().create.assert_called_with(
-            body={
-                'minimum_script_version': '9e5b98e8f5f4727856b53447191f9c06e3da2ba6',
+            body=JsonDiffMatcher({
+                'minimum_script_version': 'a3f2cb186e437bfce0031b024b2157b73ed2717d',
                  'repository': 'arvados',
                  'script_version': 'master',
                  'script': 'crunchrunner',
@@ -290,15 +298,15 @@ class TestWorkflow(unittest.TestCase):
                      'docker_image': 'arvados/jobs:'+arvados_cwl.__version__,
                      'min_ram_mb_per_node': 1024
                  },
-                'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'},
+                'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'}),
              filters=[['repository', '=', 'arvados'],
                       ['script', '=', 'crunchrunner'],
-                     ['script_version', 'in git', '9e5b98e8f5f4727856b53447191f9c06e3da2ba6'],
+                     ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],
                       ['docker_image_locator', 'in docker', 'arvados/jobs:'+arvados_cwl.__version__]],
              find_or_create=True)
  
          mockcollection().open().__enter__().write.assert_has_calls([mock.call(subwf)])
-        mockcollection().open().__enter__().write.assert_has_calls([mock.call('{sleeptime: 5}')])
+        mockcollection().open().__enter__().write.assert_has_calls([mock.call('sleeptime: 5')])
  
      def test_default_work_api(self):
          arvados_cwl.add_arv_hints()
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py

index c4b0ceab0e219856a5f3bdec125d3ce06bbb8921..0a01fb4cc37dd99809ae306d24fd087a3d09195d 100644 (file)
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -98,7 +98,8 @@ def stubs(func):
          }
          stubs.expect_job_spec = {
              'runtime_constraints': {
-                'docker_image': 'arvados/jobs:'+arvados_cwl.__version__
+                'docker_image': 'arvados/jobs:'+arvados_cwl.__version__,
+                'min_ram_mb_per_node': 1024
              },
              'script_parameters': {
                  'x': {
@@ -124,7 +125,8 @@ def stubs(func):
                  '99999999999999999999999999999991+99/wf/submit_wf.cwl'
              },
              'repository': 'arvados',
-            'script_version': arvados_cwl.__version__,
+            'script_version': 'master',
+            'minimum_script_version': '570509ab4d2ef93d870fd2b1f2eab178afb1bad9',
              'script': 'cwl-runner'
          }
          stubs.pipeline_component = stubs.expect_job_spec.copy()
@@ -134,7 +136,7 @@ def stubs(func):
              'owner_uuid': None,
              "components": {
                  "cwl-runner": {
-                    'runtime_constraints': {'docker_image': 'arvados/jobs:'+arvados_cwl.__version__},
+                    'runtime_constraints': {'docker_image': 'arvados/jobs:'+arvados_cwl.__version__, 'min_ram_mb_per_node': 1024},
                      'script_parameters': {
                          'y': {"value": {'basename': '99999999999999999999999999999998+99', 'location': 'keep:99999999999999999999999999999998+99', 'class': 'Directory'}},
                          'x': {"value": {'basename': 'blorp.txt', 'class': 'File', 'location': 'keep:99999999999999999999999999999994+99/blorp.txt'}},
@@ -143,10 +145,12 @@ def stubs(func):
                                    {'basename': 'renamed.txt', 'class': 'File', 'location': 'keep:99999999999999999999999999999998+99/file1.txt'}
                                ]}},
                          'cwl:tool': '99999999999999999999999999999991+99/wf/submit_wf.cwl',
-                        'arv:enable_reuse': True
+                        'arv:enable_reuse': True,
+                        'arv:on_error': 'continue'
                      },
                      'repository': 'arvados',
-                    'script_version': arvados_cwl.__version__,
+                    'script_version': 'master',
+                    'minimum_script_version': '570509ab4d2ef93d870fd2b1f2eab178afb1bad9',
                      'script': 'cwl-runner',
                      'job': {'state': 'Queued', 'uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'}
                  }
@@ -178,14 +182,23 @@ def stubs(func):
                      'path': '/var/spool/cwl/cwl.output.json',
                      'kind': 'file'
                  },
-                '/var/lib/cwl/job/cwl.input.json': {
-                    'portable_data_hash': 'd20d7cddd1984f105dd3702c7f125afb+60/cwl.input.json',
-                    'kind': 'collection'
+                '/var/lib/cwl/cwl.input.json': {
+                    'kind': 'json',
+                    'content': {
+                        'y': {'basename': '99999999999999999999999999999998+99', 'location': 'keep:99999999999999999999999999999998+99', 'class': 'Directory'},
+                        'x': {'basename': u'blorp.txt', 'class': 'File', 'location': u'keep:99999999999999999999999999999994+99/blorp.txt'},
+                        'z': {'basename': 'anonymous', 'class': 'Directory', 'listing': [
+                            {'basename': 'renamed.txt', 'class': 'File', 'location': 'keep:99999999999999999999999999999998+99/file1.txt'}
+                        ]}
+                    },
+                    'kind': 'json'
                  }
              },
              'state': 'Committed',
              'owner_uuid': None,
-            'command': ['arvados-cwl-runner', '--local', '--api=containers', '--enable-reuse', '/var/lib/cwl/workflow/submit_wf.cwl', '/var/lib/cwl/job/cwl.input.json'],
+            'command': ['arvados-cwl-runner', '--local', '--api=containers', '--no-log-timestamps',
+                        '--enable-reuse', '--on-error=continue',
+                        '/var/lib/cwl/workflow/submit_wf.cwl', '/var/lib/cwl/cwl.input.json'],
              'name': 'submit_wf.cwl',
              'container_image': 'arvados/jobs:'+arvados_cwl.__version__,
              'output_path': '/var/spool/cwl',
@@ -193,8 +206,9 @@ def stubs(func):
              'runtime_constraints': {
                  'API': True,
                  'vcpus': 1,
-                'ram': 268435456
-            }
+                'ram': 1024*1024*1024
+            },
+            "properties": {}
          }
  
          stubs.expect_workflow_uuid = "zzzzz-7fd4e-zzzzzzzzzzzzzzz"
@@ -270,10 +284,58 @@ class TestSubmit(unittest.TestCase):
  
          expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
          stubs.api.pipeline_instances().create.assert_called_with(
-            body=expect_pipeline)
+            body=JsonDiffMatcher(expect_pipeline))
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_pipeline_uuid + '\n')
+
+    @mock.patch("time.sleep")
+    @stubs
+    def test_submit_on_error(self, stubs, tm):
+        capture_stdout = cStringIO.StringIO()
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--debug", "--on-error=stop",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        stubs.expect_pipeline_instance["components"]["cwl-runner"]["script_parameters"]["arv:on_error"] = "stop"
+
+        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
+        stubs.api.pipeline_instances().create.assert_called_with(
+            body=JsonDiffMatcher(expect_pipeline))
          self.assertEqual(capture_stdout.getvalue(),
                           stubs.expect_pipeline_uuid + '\n')
  
+
+    @mock.patch("time.sleep")
+    @stubs
+    def test_submit_runner_ram(self, stubs, tm):
+        capture_stdout = cStringIO.StringIO()
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--debug", "--submit-runner-ram=2048",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        stubs.expect_pipeline_instance["components"]["cwl-runner"]["runtime_constraints"]["min_ram_mb_per_node"] = 2048
+
+        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
+        stubs.api.pipeline_instances().create.assert_called_with(
+            body=JsonDiffMatcher(expect_pipeline))
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_pipeline_uuid + '\n')
+
+
+    @mock.patch("time.sleep")
+    @stubs
+    def test_submit_invalid_runner_ram(self, stubs, tm):
+        capture_stdout = cStringIO.StringIO()
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--debug", "--submit-runner-ram=-2048",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 1)
+
      @mock.patch("time.sleep")
      @stubs
      def test_submit_output_name(self, stubs, tm):
@@ -288,6 +350,25 @@ class TestSubmit(unittest.TestCase):
  
          stubs.expect_pipeline_instance["components"]["cwl-runner"]["script_parameters"]["arv:output_name"] = output_name
  
+        expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
+        stubs.api.pipeline_instances().create.assert_called_with(
+            body=JsonDiffMatcher(expect_pipeline))
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_pipeline_uuid + '\n')
+
+
+    @mock.patch("time.sleep")
+    @stubs
+    def test_submit_pipeline_name(self, stubs, tm):
+        capture_stdout = cStringIO.StringIO()
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--debug", "--name=hello job 123",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        stubs.expect_pipeline_instance["name"] = "hello job 123"
+
          expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
          stubs.api.pipeline_instances().create.assert_called_with(
              body=expect_pipeline)
@@ -310,7 +391,7 @@ class TestSubmit(unittest.TestCase):
  
          expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
          stubs.api.pipeline_instances().create.assert_called_with(
-            body=expect_pipeline)
+            body=JsonDiffMatcher(expect_pipeline))
          self.assertEqual(capture_stdout.getvalue(),
                           stubs.expect_pipeline_uuid + '\n')
  
@@ -329,7 +410,7 @@ class TestSubmit(unittest.TestCase):
          expect_pipeline = copy.deepcopy(stubs.expect_pipeline_instance)
          expect_pipeline["owner_uuid"] = project_uuid
          stubs.api.pipeline_instances().create.assert_called_with(
-            body=expect_pipeline)
+            body=JsonDiffMatcher(expect_pipeline))
  
      @stubs
      def test_submit_container(self, stubs):
@@ -370,7 +451,7 @@ class TestSubmit(unittest.TestCase):
  
          expect_container = copy.deepcopy(stubs.expect_container_spec)
          stubs.api.container_requests().create.assert_called_with(
-            body=expect_container)
+            body=JsonDiffMatcher(expect_container))
          self.assertEqual(capture_stdout.getvalue(),
                           stubs.expect_container_request_uuid + '\n')
  
@@ -386,11 +467,36 @@ class TestSubmit(unittest.TestCase):
          except:
              logging.exception("")
  
-        stubs.expect_container_spec["command"] = ['arvados-cwl-runner', '--local', '--api=containers', '--disable-reuse', '/var/lib/cwl/workflow/submit_wf.cwl', '/var/lib/cwl/job/cwl.input.json']
+        stubs.expect_container_spec["command"] = ['arvados-cwl-runner', '--local', '--api=containers', '--no-log-timestamps',
+                                                  '--disable-reuse', '--on-error=continue',
+                                                  '/var/lib/cwl/workflow/submit_wf.cwl', '/var/lib/cwl/cwl.input.json']
  
          expect_container = copy.deepcopy(stubs.expect_container_spec)
          stubs.api.container_requests().create.assert_called_with(
-            body=expect_container)
+            body=JsonDiffMatcher(expect_container))
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+
+
+    @stubs
+    def test_submit_container_on_error(self, stubs):
+        capture_stdout = cStringIO.StringIO()
+        try:
+            exited = arvados_cwl.main(
+                ["--submit", "--no-wait", "--api=containers", "--debug", "--on-error=stop",
+                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            self.assertEqual(exited, 0)
+        except:
+            logging.exception("")
+
+        stubs.expect_container_spec["command"] = ['arvados-cwl-runner', '--local', '--api=containers', '--no-log-timestamps',
+                                                  '--enable-reuse', '--on-error=stop',
+                                                  '/var/lib/cwl/workflow/submit_wf.cwl', '/var/lib/cwl/cwl.input.json']
+
+        expect_container = copy.deepcopy(stubs.expect_container_spec)
+        stubs.api.container_requests().create.assert_called_with(
+            body=JsonDiffMatcher(expect_container))
          self.assertEqual(capture_stdout.getvalue(),
                           stubs.expect_container_request_uuid + '\n')
  
@@ -408,11 +514,14 @@ class TestSubmit(unittest.TestCase):
          except:
              logging.exception("")
  
-        stubs.expect_container_spec["command"] = ['arvados-cwl-runner', '--local', '--api=containers', "--output-name="+output_name, '--enable-reuse', '/var/lib/cwl/workflow/submit_wf.cwl', '/var/lib/cwl/job/cwl.input.json']
+        stubs.expect_container_spec["command"] = ['arvados-cwl-runner', '--local', '--api=containers', '--no-log-timestamps',
+                                                  "--output-name="+output_name, '--enable-reuse', '--on-error=continue',
+                                                  '/var/lib/cwl/workflow/submit_wf.cwl', '/var/lib/cwl/cwl.input.json']
+        stubs.expect_container_spec["output_name"] = output_name
  
          expect_container = copy.deepcopy(stubs.expect_container_spec)
          stubs.api.container_requests().create.assert_called_with(
-            body=expect_container)
+            body=JsonDiffMatcher(expect_container))
          self.assertEqual(capture_stdout.getvalue(),
                           stubs.expect_container_request_uuid + '\n')
  
@@ -430,7 +539,211 @@ class TestSubmit(unittest.TestCase):
          except:
              logging.exception("")
  
-        stubs.expect_container_spec["command"] = ['arvados-cwl-runner', '--local', '--api=containers', "--output-tags="+output_tags, '--enable-reuse', '/var/lib/cwl/workflow/submit_wf.cwl', '/var/lib/cwl/job/cwl.input.json']
+        stubs.expect_container_spec["command"] = ['arvados-cwl-runner', '--local', '--api=containers', '--no-log-timestamps',
+                                                  "--output-tags="+output_tags, '--enable-reuse', '--on-error=continue',
+                                                  '/var/lib/cwl/workflow/submit_wf.cwl', '/var/lib/cwl/cwl.input.json']
+
+        expect_container = copy.deepcopy(stubs.expect_container_spec)
+        stubs.api.container_requests().create.assert_called_with(
+            body=JsonDiffMatcher(expect_container))
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+
+    @stubs
+    def test_submit_container_runner_ram(self, stubs):
+        capture_stdout = cStringIO.StringIO()
+        try:
+            exited = arvados_cwl.main(
+                ["--submit", "--no-wait", "--api=containers", "--debug", "--submit-runner-ram=2048",
+                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            self.assertEqual(exited, 0)
+        except:
+            logging.exception("")
+
+        stubs.expect_container_spec["runtime_constraints"]["ram"] = 2048*1024*1024
+
+        expect_container = copy.deepcopy(stubs.expect_container_spec)
+        stubs.api.container_requests().create.assert_called_with(
+            body=JsonDiffMatcher(expect_container))
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+
+    @mock.patch("arvados.collection.CollectionReader")
+    @mock.patch("time.sleep")
+    @stubs
+    def test_submit_file_keepref(self, stubs, tm, collectionReader):
+        capture_stdout = cStringIO.StringIO()
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug",
+             "tests/wf/submit_keepref_wf.cwl"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+
+    @mock.patch("arvados.collection.CollectionReader")
+    @mock.patch("time.sleep")
+    @stubs
+    def test_submit_keepref(self, stubs, tm, reader):
+        capture_stdout = cStringIO.StringIO()
+
+        with open("tests/wf/expect_arvworkflow.cwl") as f:
+            reader().open().__enter__().read.return_value = f.read()
+
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug",
+             "keep:99999999999999999999999999999994+99/expect_arvworkflow.cwl#main", "-x", "XxX"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        expect_container = {
+            'priority': 1,
+            'mounts': {
+                '/var/spool/cwl': {
+                    'writable': True,
+                    'kind': 'collection'
+                },
+                'stdout': {
+                    'path': '/var/spool/cwl/cwl.output.json',
+                    'kind': 'file'
+                },
+                '/var/lib/cwl/workflow': {
+                    'portable_data_hash': '99999999999999999999999999999994+99',
+                    'kind': 'collection'
+                },
+                '/var/lib/cwl/cwl.input.json': {
+                    'content': {
+                        'x': 'XxX'
+                    },
+                    'kind': 'json'
+                }
+            }, 'state': 'Committed',
+            'owner_uuid': None,
+            'output_path': '/var/spool/cwl',
+            'name': 'expect_arvworkflow.cwl#main',
+            'container_image': 'arvados/jobs:'+arvados_cwl.__version__,
+            'command': ['arvados-cwl-runner', '--local', '--api=containers', '--no-log-timestamps',
+                        '--enable-reuse', '--on-error=continue',
+                        '/var/lib/cwl/workflow/expect_arvworkflow.cwl#main', '/var/lib/cwl/cwl.input.json'],
+            'cwd': '/var/spool/cwl',
+            'runtime_constraints': {
+                'API': True,
+                'vcpus': 1,
+                'ram': 1073741824
+            },
+            "properties": {}
+        }
+
+        stubs.api.container_requests().create.assert_called_with(
+            body=JsonDiffMatcher(expect_container))
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+
+    @mock.patch("time.sleep")
+    @stubs
+    def test_submit_arvworkflow(self, stubs, tm):
+        capture_stdout = cStringIO.StringIO()
+
+        with open("tests/wf/expect_arvworkflow.cwl") as f:
+            stubs.api.workflows().get().execute.return_value = {"definition": f.read(), "name": "a test workflow"}
+
+        exited = arvados_cwl.main(
+            ["--submit", "--no-wait", "--api=containers", "--debug",
+             "962eh-7fd4e-gkbzl62qqtfig37", "-x", "XxX"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        expect_container = {
+            'priority': 1,
+            'mounts': {
+                '/var/spool/cwl': {
+                    'writable': True,
+                    'kind': 'collection'
+                },
+                'stdout': {
+                    'path': '/var/spool/cwl/cwl.output.json',
+                    'kind': 'file'
+                },
+                '/var/lib/cwl/workflow.json': {
+                    'kind': 'json',
+                    'json': {
+                        'cwlVersion': 'v1.0',
+                        '$graph': [
+                            {
+                                'inputs': [
+                                    {
+                                        'inputBinding': {'position': 1},
+                                        'type': 'string',
+                                        'id': '#submit_tool.cwl/x'}
+                                ],
+                                'requirements': [
+                                    {'dockerPull': 'debian:8', 'class': 'DockerRequirement'}
+                                ],
+                                'id': '#submit_tool.cwl',
+                                'outputs': [],
+                                'baseCommand': 'cat',
+                                'class': 'CommandLineTool'
+                            }, {
+                                'id': '#main',
+                                'inputs': [
+                                    {'type': 'string', 'id': '#main/x'}
+                                ],
+                                'steps': [
+                                    {'in': [{'source': '#main/x', 'id': '#main/step1/x'}],
+                                     'run': '#submit_tool.cwl',
+                                     'id': '#main/step1',
+                                     'out': []}
+                                ],
+                                'class': 'Workflow',
+                                'outputs': []
+                            }
+                        ]
+                    }
+                },
+                '/var/lib/cwl/cwl.input.json': {
+                    'content': {
+                        'x': 'XxX'
+                    },
+                    'kind': 'json'
+                }
+            }, 'state': 'Committed',
+            'owner_uuid': None,
+            'output_path': '/var/spool/cwl',
+            'name': 'a test workflow',
+            'container_image': 'arvados/jobs:'+arvados_cwl.__version__,
+            'command': ['arvados-cwl-runner', '--local', '--api=containers', '--no-log-timestamps',
+                        '--enable-reuse', '--on-error=continue',
+                        '/var/lib/cwl/workflow.json#main', '/var/lib/cwl/cwl.input.json'],
+            'cwd': '/var/spool/cwl',
+            'runtime_constraints': {
+                'API': True,
+                'vcpus': 1,
+                'ram': 1073741824
+            },
+            "properties": {
+                "template_uuid": "962eh-7fd4e-gkbzl62qqtfig37"
+            }
+        }
+
+        stubs.api.container_requests().create.assert_called_with(
+            body=JsonDiffMatcher(expect_container))
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_container_request_uuid + '\n')
+
+
+    @stubs
+    def test_submit_container_name(self, stubs):
+        capture_stdout = cStringIO.StringIO()
+        try:
+            exited = arvados_cwl.main(
+                ["--submit", "--no-wait", "--api=containers", "--debug", "--name=hello container 123",
+                 "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+                capture_stdout, sys.stderr, api_client=stubs.api, keep_client=stubs.keep_client)
+            self.assertEqual(exited, 0)
+        except:
+            logging.exception("")
+
+        stubs.expect_container_spec["name"] = "hello container 123"
  
          expect_container = copy.deepcopy(stubs.expect_container_spec)
          stubs.api.container_requests().create.assert_called_with(
@@ -438,6 +751,7 @@ class TestSubmit(unittest.TestCase):
          self.assertEqual(capture_stdout.getvalue(),
                           stubs.expect_container_request_uuid + '\n')
  
+
      @mock.patch("arvados.commands.keepdocker.find_one_image_hash")
      @mock.patch("cwltool.docker.get_image")
      @mock.patch("arvados.api")
@@ -482,23 +796,9 @@ class TestSubmit(unittest.TestCase):
          self.assertEqual("arvados/jobs:"+arvados_cwl.__version__, arvados_cwl.runner.arvados_jobs_image(arvrunner))
  
  class TestCreateTemplate(unittest.TestCase):
-    @stubs
-    def test_create(self, stubs):
-        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+    existing_template_uuid = "zzzzz-d1hrv-validworkfloyml"
  
-        capture_stdout = cStringIO.StringIO()
-
-        exited = arvados_cwl.main(
-            ["--create-workflow", "--debug",
-             "--project-uuid", project_uuid,
-             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
-            capture_stdout, sys.stderr, api_client=stubs.api)
-        self.assertEqual(exited, 0)
-
-        stubs.api.pipeline_instances().create.refute_called()
-        stubs.api.jobs().create.refute_called()
-
-        expect_component = copy.deepcopy(stubs.expect_job_spec)
+    def _adjust_script_params(self, expect_component):
          expect_component['script_parameters']['x'] = {
              'dataclass': 'File',
              'required': True,
@@ -516,6 +816,26 @@ class TestCreateTemplate(unittest.TestCase):
              'required': True,
              'type': 'Directory',
          }
+
+    @stubs
+    def test_create(self, stubs):
+        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+
+        capture_stdout = cStringIO.StringIO()
+
+        exited = arvados_cwl.main(
+            ["--create-workflow", "--debug",
+             "--api=jobs",
+             "--project-uuid", project_uuid,
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        stubs.api.pipeline_instances().create.refute_called()
+        stubs.api.jobs().create.refute_called()
+
+        expect_component = copy.deepcopy(stubs.expect_job_spec)
+        self._adjust_script_params(expect_component)
          expect_template = {
              "components": {
                  "submit_wf.cwl": expect_component,
@@ -530,6 +850,76 @@ class TestCreateTemplate(unittest.TestCase):
                           stubs.expect_pipeline_template_uuid + '\n')
  
  
+    @stubs
+    def test_create_name(self, stubs):
+        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+
+        capture_stdout = cStringIO.StringIO()
+
+        exited = arvados_cwl.main(
+            ["--create-workflow", "--debug",
+             "--project-uuid", project_uuid,
+             "--api=jobs",
+             "--name", "testing 123",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        stubs.api.pipeline_instances().create.refute_called()
+        stubs.api.jobs().create.refute_called()
+
+        expect_component = copy.deepcopy(stubs.expect_job_spec)
+        self._adjust_script_params(expect_component)
+        expect_template = {
+            "components": {
+                "testing 123": expect_component,
+            },
+            "name": "testing 123",
+            "owner_uuid": project_uuid,
+        }
+        stubs.api.pipeline_templates().create.assert_called_with(
+            body=JsonDiffMatcher(expect_template), ensure_unique_name=True)
+
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_pipeline_template_uuid + '\n')
+
+
+    @stubs
+    def test_update_name(self, stubs):
+        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+
+        capture_stdout = cStringIO.StringIO()
+
+        exited = arvados_cwl.main(
+            ["--update-workflow", self.existing_template_uuid,
+             "--debug",
+             "--project-uuid", project_uuid,
+             "--api=jobs",
+             "--name", "testing 123",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        stubs.api.pipeline_instances().create.refute_called()
+        stubs.api.jobs().create.refute_called()
+
+        expect_component = copy.deepcopy(stubs.expect_job_spec)
+        self._adjust_script_params(expect_component)
+        expect_template = {
+            "components": {
+                "testing 123": expect_component,
+            },
+            "name": "testing 123",
+            "owner_uuid": project_uuid,
+        }
+        stubs.api.pipeline_templates().create.refute_called()
+        stubs.api.pipeline_templates().update.assert_called_with(
+            body=JsonDiffMatcher(expect_template), uuid=self.existing_template_uuid)
+
+        self.assertEqual(capture_stdout.getvalue(),
+                         self.existing_template_uuid + '\n')
+
+
  class TestCreateWorkflow(unittest.TestCase):
      existing_workflow_uuid = "zzzzz-7fd4e-validworkfloyml"
      expect_workflow = open("tests/wf/expect_packed.cwl").read()
@@ -565,6 +955,39 @@ class TestCreateWorkflow(unittest.TestCase):
          self.assertEqual(capture_stdout.getvalue(),
                           stubs.expect_workflow_uuid + '\n')
  
+
+    @stubs
+    def test_create_name(self, stubs):
+        project_uuid = 'zzzzz-j7d0g-zzzzzzzzzzzzzzz'
+
+        capture_stdout = cStringIO.StringIO()
+
+        exited = arvados_cwl.main(
+            ["--create-workflow", "--debug",
+             "--api=containers",
+             "--project-uuid", project_uuid,
+             "--name", "testing 123",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        stubs.api.pipeline_templates().create.refute_called()
+        stubs.api.container_requests().create.refute_called()
+
+        body = {
+            "workflow": {
+                "owner_uuid": project_uuid,
+                "name": "testing 123",
+                "description": "",
+                "definition": self.expect_workflow,
+            }
+        }
+        stubs.api.workflows().create.assert_called_with(
+            body=JsonDiffMatcher(body))
+
+        self.assertEqual(capture_stdout.getvalue(),
+                         stubs.expect_workflow_uuid + '\n')
+
      @stubs
      def test_incompatible_api(self, stubs):
          capture_stderr = cStringIO.StringIO()
@@ -607,12 +1030,38 @@ class TestCreateWorkflow(unittest.TestCase):
                           self.existing_workflow_uuid + '\n')
  
  
+    @stubs
+    def test_update_name(self, stubs):
+        capture_stdout = cStringIO.StringIO()
+
+        exited = arvados_cwl.main(
+            ["--update-workflow", self.existing_workflow_uuid,
+             "--debug", "--name", "testing 123",
+             "tests/wf/submit_wf.cwl", "tests/submit_test_job.json"],
+            capture_stdout, sys.stderr, api_client=stubs.api)
+        self.assertEqual(exited, 0)
+
+        body = {
+            "workflow": {
+                "name": "testing 123",
+                "description": "",
+                "definition": self.expect_workflow,
+            }
+        }
+        stubs.api.workflows().update.assert_called_with(
+            uuid=self.existing_workflow_uuid,
+            body=JsonDiffMatcher(body))
+        self.assertEqual(capture_stdout.getvalue(),
+                         self.existing_workflow_uuid + '\n')
+
+
  class TestTemplateInputs(unittest.TestCase):
      expect_template = {
          "components": {
              "inputs_test.cwl": {
                  'runtime_constraints': {
                      'docker_image': 'arvados/jobs:'+arvados_cwl.__version__,
+                    'min_ram_mb_per_node': 1024
                  },
                  'script_parameters': {
                      'cwl:tool':
@@ -647,7 +1096,8 @@ class TestTemplateInputs(unittest.TestCase):
                      },
                  },
                  'repository': 'arvados',
-                'script_version': arvados_cwl.__version__,
+                'script_version': 'master',
+                'minimum_script_version': '570509ab4d2ef93d870fd2b1f2eab178afb1bad9',
                  'script': 'cwl-runner',
              },
          },
@@ -657,7 +1107,7 @@ class TestTemplateInputs(unittest.TestCase):
      @stubs
      def test_inputs_empty(self, stubs):
          exited = arvados_cwl.main(
-            ["--create-template", "--no-wait",
+            ["--create-template",
               "tests/wf/inputs_test.cwl", "tests/order/empty_order.json"],
              cStringIO.StringIO(), sys.stderr, api_client=stubs.api)
          self.assertEqual(exited, 0)
@@ -668,7 +1118,7 @@ class TestTemplateInputs(unittest.TestCase):
      @stubs
      def test_inputs(self, stubs):
          exited = arvados_cwl.main(
-            ["--create-template", "--no-wait",
+            ["--create-template",
               "tests/wf/inputs_test.cwl", "tests/order/inputs_test_order.json"],
              cStringIO.StringIO(), sys.stderr, api_client=stubs.api)
          self.assertEqual(exited, 0)
diff --git a/sdk/cwl/tests/test_urljoin.py b/sdk/cwl/tests/test_urljoin.py

new file mode 100644 (file)

index 0000000..b9c8cea
--- /dev/null
+++ b/sdk/cwl/tests/test_urljoin.py
@@ -0,0 +1,56 @@
+import functools
+import mock
+import sys
+import unittest
+import json
+import logging
+import os
+
+import arvados
+import arvados.keep
+import arvados.collection
+import arvados_cwl
+
+from arvados_cwl.fsaccess import CollectionFetcher
+
+class TestUrljoin(unittest.TestCase):
+    def test_urljoin(self):
+        """Test path joining for keep references."""
+
+        cf = CollectionFetcher({}, None)
+
+        self.assertEquals("keep:99999999999999999999999999999991+99/hw.py",
+                          cf.urljoin("keep:99999999999999999999999999999991+99", "hw.py"))
+
+        self.assertEquals("keep:99999999999999999999999999999991+99/hw.py",
+                          cf.urljoin("keep:99999999999999999999999999999991+99/", "hw.py"))
+
+        self.assertEquals("keep:99999999999999999999999999999991+99/hw.py#main",
+                          cf.urljoin("keep:99999999999999999999999999999991+99", "hw.py#main"))
+
+        self.assertEquals("keep:99999999999999999999999999999991+99/hw.py#main",
+                          cf.urljoin("keep:99999999999999999999999999999991+99/hw.py", "#main"))
+
+        self.assertEquals("keep:99999999999999999999999999999991+99/dir/hw.py#main",
+                          cf.urljoin("keep:99999999999999999999999999999991+99/dir/hw.py", "#main"))
+
+        self.assertEquals("keep:99999999999999999999999999999991+99/dir/wh.py",
+                          cf.urljoin("keep:99999999999999999999999999999991+99/dir/hw.py", "wh.py"))
+
+        self.assertEquals("keep:99999999999999999999999999999991+99/wh.py",
+                          cf.urljoin("keep:99999999999999999999999999999991+99/dir/hw.py", "/wh.py"))
+
+        self.assertEquals("keep:99999999999999999999999999999991+99/wh.py#main",
+                          cf.urljoin("keep:99999999999999999999999999999991+99/dir/hw.py", "/wh.py#main"))
+
+        self.assertEquals("keep:99999999999999999999999999999991+99/wh.py",
+                          cf.urljoin("keep:99999999999999999999999999999991+99/hw.py#main", "wh.py"))
+
+        self.assertEquals("keep:99999999999999999999999999999992+99",
+                          cf.urljoin("keep:99999999999999999999999999999991+99", "keep:99999999999999999999999999999992+99"))
+
+        self.assertEquals("keep:99999999999999999999999999999991+99/dir/wh.py",
+                          cf.urljoin("keep:99999999999999999999999999999991+99/dir/", "wh.py"))
+
+    def test_resolver(self):
+        pass
diff --git a/sdk/cwl/tests/wf/expect_arvworkflow.cwl b/sdk/cwl/tests/wf/expect_arvworkflow.cwl

new file mode 100644 (file)

index 0000000..56ce0d5
--- /dev/null
+++ b/sdk/cwl/tests/wf/expect_arvworkflow.cwl
@@ -0,0 +1,24 @@
+$graph:
+- baseCommand: cat
+  class: CommandLineTool
+  id: '#submit_tool.cwl'
+  inputs:
+  - id: '#submit_tool.cwl/x'
+    inputBinding: {position: 1}
+    type: string
+  outputs: []
+  requirements:
+  - {class: DockerRequirement, dockerPull: 'debian:8'}
+- class: Workflow
+  id: '#main'
+  inputs:
+  - id: '#main/x'
+    type: string
+  outputs: []
+  steps:
+  - id: '#main/step1'
+    in:
+    - {id: '#main/step1/x', source: '#main/x'}
+    out: []
+    run: '#submit_tool.cwl'
+cwlVersion: v1.0
diff --git a/sdk/cwl/tests/wf/expect_packed.cwl b/sdk/cwl/tests/wf/expect_packed.cwl

index 25d02b2b809dcb7ad0272485eadb357b9e6f2eb6..f4d60dbfe8cad258da5e6eb81a7ec3315d629979 100644 (file)
--- a/sdk/cwl/tests/wf/expect_packed.cwl
+++ b/sdk/cwl/tests/wf/expect_packed.cwl
@@ -1,31 +1,34 @@
+cwlVersion: v1.0
  $graph:
-- baseCommand: cat
-  class: CommandLineTool
-  id: '#submit_tool.cwl'
+- class: CommandLineTool
+  requirements:
+  - class: DockerRequirement
+    dockerPull: debian:8
    inputs:
-  - default: {class: File, location: 'keep:99999999999999999999999999999991+99/tool/blub.txt'}
-    id: '#submit_tool.cwl/x'
-    inputBinding: {position: 1}
+  - id: '#submit_tool.cwl/x'
      type: File
+    default:
+      class: File
+      location: keep:99999999999999999999999999999991+99/tool/blub.txt
+    inputBinding:
+      position: 1
    outputs: []
-  requirements:
-  - {class: DockerRequirement, dockerImageId: 'debian:8', dockerPull: 'debian:8'}
+  baseCommand: cat
+  id: '#submit_tool.cwl'
  - class: Workflow
-  id: '#main'
    inputs:
-  - default: {basename: blorp.txt, class: File, location: 'keep:99999999999999999999999999999991+99/input/blorp.txt'}
-    id: '#main/x'
+  - id: '#main/x'
      type: File
-  - default: {basename: 99999999999999999999999999999998+99, class: Directory, location: 'keep:99999999999999999999999999999998+99'}
-    id: '#main/y'
+    default: {class: File, location: 'keep:99999999999999999999999999999991+99/input/blorp.txt',
+      basename: blorp.txt}
+  - id: '#main/y'
      type: Directory
-  - default:
-      basename: anonymous
-      class: Directory
-      listing:
-      - {basename: renamed.txt, class: File, location: 'keep:99999999999999999999999999999998+99/file1.txt'}
-    id: '#main/z'
+    default: {class: Directory, location: 'keep:99999999999999999999999999999998+99',
+      basename: 99999999999999999999999999999998+99}
+  - id: '#main/z'
      type: Directory
+    default: {class: Directory, basename: anonymous, listing: [{basename: renamed.txt,
+          class: File, location: 'keep:99999999999999999999999999999998+99/file1.txt'}]}
    outputs: []
    steps:
    - id: '#main/step1'
@@ -33,4 +36,4 @@ $graph:
      - {id: '#main/step1/x', source: '#main/x'}
      out: []
      run: '#submit_tool.cwl'
-cwlVersion: v1.0
+  id: '#main'
diff --git a/sdk/cwl/tests/wf/scatter2_subwf.cwl b/sdk/cwl/tests/wf/scatter2_subwf.cwl

index 0ae1cf04f05ed57fd57b4832cac207d8cdf629c1..daf18b11ca79cf6ddbb6892d331270df0bc2e9a0 100644 (file)
--- a/sdk/cwl/tests/wf/scatter2_subwf.cwl
+++ b/sdk/cwl/tests/wf/scatter2_subwf.cwl
@@ -1,33 +1,41 @@
+cwlVersion: v1.0
  $graph:
  - class: Workflow
-  hints:
-  - {class: 'http://arvados.org/cwl#RunInSingleContainer'}
    id: '#main'
    inputs:
-  - {id: '#main/sleeptime', type: int}
+  - type: int
+    id: '#main/sleeptime'
    outputs:
-  - {id: '#main/out', outputSource: '#main/sleep1/out', type: string}
-  requirements:
-  - {class: InlineJavascriptRequirement}
-  - {class: ScatterFeatureRequirement}
-  - {class: StepInputExpressionRequirement}
-  - {class: SubworkflowFeatureRequirement}
+  - type: string
+    outputSource: '#main/sleep1/out'
+    id: '#main/out'
    steps:
-  - id: '#main/sleep1'
-    in:
-    - {id: '#main/sleep1/blurb', valueFrom: "${\n  return String(inputs.sleeptime)\
-        \ + \"b\";\n}\n"}
-    - {id: '#main/sleep1/sleeptime', source: '#main/sleeptime'}
+  - in:
+    - valueFrom: |
+        ${
+          return String(inputs.sleeptime) + "b";
+        }
+      id: '#main/sleep1/blurb'
+    - source: '#main/sleeptime'
+      id: '#main/sleep1/sleeptime'
      out: ['#main/sleep1/out']
      run:
-      baseCommand: sleep
        class: CommandLineTool
        inputs:
-      - id: '#main/sleep1/sleeptime'
+      - type: int
          inputBinding: {position: 1}
-        type: int
+        id: '#main/sleep1/sleeptime'
        outputs:
-      - id: '#main/sleep1/out'
-        outputBinding: {outputEval: out}
-        type: string
-cwlVersion: v1.0
-\ No newline at end of file
+      - type: string
+        outputBinding:
+          outputEval: out
+        id: '#main/sleep1/out'
+      baseCommand: sleep
+    id: '#main/sleep1'
+  requirements:
+  - {class: InlineJavascriptRequirement}
+  - {class: ScatterFeatureRequirement}
+  - {class: StepInputExpressionRequirement}
+  - {class: SubworkflowFeatureRequirement}
+  hints:
+  - class: http://arvados.org/cwl#RunInSingleContainer
+\ No newline at end of file
diff --git a/sdk/cwl/tests/wf/submit_keepref_wf.cwl b/sdk/cwl/tests/wf/submit_keepref_wf.cwl

new file mode 100644 (file)

index 0000000..f07714e
--- /dev/null
+++ b/sdk/cwl/tests/wf/submit_keepref_wf.cwl
@@ -0,0 +1,20 @@
+# Test case for arvados-cwl-runner
+#
+# Used to test whether scanning a workflow file for dependencies
+# (e.g. submit_tool.cwl) and uploading to Keep works as intended.
+
+class: Workflow
+cwlVersion: v1.0
+inputs:
+  x:
+    type: File
+    default:
+      class: File
+      location: keep:99999999999999999999999999999994+99/blorp.txt
+outputs: []
+steps:
+  step1:
+    in:
+      x: x
+    out: []
+    run: ../tool/submit_tool.cwl
diff --git a/sdk/dev-jobs.dockerfile b/sdk/dev-jobs.dockerfile

new file mode 100644 (file)

index 0000000..38fefd0
--- /dev/null
+++ b/sdk/dev-jobs.dockerfile
@@ -0,0 +1,38 @@
+# Dockerfile for building an arvados/jobs Docker image from local git tree.
+#
+# Intended for use by developers working on arvados-python-client or
+# arvados-cwl-runner and need to run a crunch job with a custom package
+# version.
+#
+# Use arvados/build/build-dev-docker-jobs-image.sh to build.
+#
+# (This dockerfile file must be located in the arvados/sdk/ directory because
+#  of the docker build root.)
+
+FROM debian:jessie
+MAINTAINER Ward Vandewege <ward@curoverse.com>
+
+ENV DEBIAN_FRONTEND noninteractive
+
+RUN apt-get update -q && apt-get install -qy git python-pip python-virtualenv python-dev libcurl4-gnutls-dev libgnutls28-dev nodejs python-pyasn1-modules
+
+RUN pip install -U setuptools
+
+ARG sdk
+ARG runner
+ARG cwltool
+
+ADD python/dist/$sdk /tmp/
+ADD cwl/cwltool_dist/$cwltool /tmp/
+ADD cwl/dist/$runner /tmp/
+
+RUN cd /tmp/arvados-python-client-* && python setup.py install
+RUN if test -d /tmp/cwltool-* ; then cd /tmp/cwltool-* && python setup.py install ; fi
+RUN cd /tmp/arvados-cwl-runner-* && python setup.py install
+
+# Install dependencies and set up system.
+RUN /usr/sbin/adduser --disabled-password \
+      --gecos 'Crunch execution user' crunch && \
+    /usr/bin/install --directory --owner=crunch --group=crunch --mode=0700 /keep /tmp/crunch-src /tmp/crunch-job
+
+USER crunch
diff --git a/sdk/go/arvados/client.go b/sdk/go/arvados/client.go

index 36f4eb52ae298982dfa09ddf82b0cea08c2604f7..fc937494e5a679728aea021dbe80a95231582d72 100644 (file)
--- a/sdk/go/arvados/client.go
+++ b/sdk/go/arvados/client.go
@@ -41,6 +41,8 @@ type Client struct {
         // callers who use a Client to initialize an
         // arvadosclient.ArvadosClient.)
         KeepServiceURIs []string `json:",omitempty"`
+
+       dd *DiscoveryDocument
  }
  
  // The default http.Client used by a Client with Insecure==true and
@@ -198,14 +200,103 @@ func (c *Client) apiURL(path string) string {
  
  // DiscoveryDocument is the Arvados server's description of itself.
  type DiscoveryDocument struct {
-       DefaultCollectionReplication int   `json:"defaultCollectionReplication"`
-       BlobSignatureTTL             int64 `json:"blobSignatureTtl"`
+       BasePath                     string              `json:"basePath"`
+       DefaultCollectionReplication int                 `json:"defaultCollectionReplication"`
+       BlobSignatureTTL             int64               `json:"blobSignatureTtl"`
+       Schemas                      map[string]Schema   `json:"schemas"`
+       Resources                    map[string]Resource `json:"resources"`
+}
+
+type Resource struct {
+       Methods map[string]ResourceMethod `json:"methods"`
+}
+
+type ResourceMethod struct {
+       HTTPMethod string         `json:"httpMethod"`
+       Path       string         `json:"path"`
+       Response   MethodResponse `json:"response"`
+}
+
+type MethodResponse struct {
+       Ref string `json:"$ref"`
+}
+
+type Schema struct {
+       UUIDPrefix string `json:"uuidPrefix"`
  }
  
  // DiscoveryDocument returns a *DiscoveryDocument. The returned object
  // should not be modified: the same object may be returned by
  // subsequent calls.
  func (c *Client) DiscoveryDocument() (*DiscoveryDocument, error) {
+       if c.dd != nil {
+               return c.dd, nil
+       }
         var dd DiscoveryDocument
-       return &dd, c.RequestAndDecode(&dd, "GET", "discovery/v1/apis/arvados/v1/rest", nil, nil)
+       err := c.RequestAndDecode(&dd, "GET", "discovery/v1/apis/arvados/v1/rest", nil, nil)
+       if err != nil {
+               return nil, err
+       }
+       c.dd = &dd
+       return c.dd, nil
+}
+
+func (c *Client) modelForUUID(dd *DiscoveryDocument, uuid string) (string, error) {
+       if len(uuid) != 27 {
+               return "", fmt.Errorf("invalid UUID: %q", uuid)
+       }
+       infix := uuid[6:11]
+       var model string
+       for m, s := range dd.Schemas {
+               if s.UUIDPrefix == infix {
+                       model = m
+                       break
+               }
+       }
+       if model == "" {
+               return "", fmt.Errorf("unrecognized type portion %q in UUID %q", infix, uuid)
+       }
+       return model, nil
+}
+
+func (c *Client) KindForUUID(uuid string) (string, error) {
+       dd, err := c.DiscoveryDocument()
+       if err != nil {
+               return "", err
+       }
+       model, err := c.modelForUUID(dd, uuid)
+       if err != nil {
+               return "", err
+       }
+       return "arvados#" + strings.ToLower(model[:1]) + model[1:], nil
+}
+
+func (c *Client) PathForUUID(method, uuid string) (string, error) {
+       dd, err := c.DiscoveryDocument()
+       if err != nil {
+               return "", err
+       }
+       model, err := c.modelForUUID(dd, uuid)
+       if err != nil {
+               return "", err
+       }
+       var resource string
+       for r, rsc := range dd.Resources {
+               if rsc.Methods["get"].Response.Ref == model {
+                       resource = r
+                       break
+               }
+       }
+       if resource == "" {
+               return "", fmt.Errorf("no resource for model: %q", model)
+       }
+       m, ok := dd.Resources[resource].Methods[method]
+       if !ok {
+               return "", fmt.Errorf("no method %q for resource %q", method, resource)
+       }
+       path := dd.BasePath + strings.Replace(m.Path, "{uuid}", uuid, -1)
+       if path[0] == '/' {
+               path = path[1:]
+       }
+       return path, nil
  }
diff --git a/sdk/go/arvados/collection.go b/sdk/go/arvados/collection.go

index 295943b38d5f32d7f0a433103bb06e0863a0ab34..157ce1678873af7f709c16752e590fb79b2ea822 100644 (file)
--- a/sdk/go/arvados/collection.go
+++ b/sdk/go/arvados/collection.go
@@ -12,7 +12,7 @@ import (
  // Collection is an arvados#collection resource.
  type Collection struct {
         UUID                   string     `json:"uuid,omitempty"`
-       ExpiresAt              *time.Time `json:"expires_at,omitempty"`
+       TrashAt                *time.Time `json:"trash_at,omitempty"`
         ManifestText           string     `json:"manifest_text,omitempty"`
         UnsignedManifestText   string     `json:"unsigned_manifest_text,omitempty"`
         CreatedAt              *time.Time `json:"created_at,omitempty"`
@@ -21,6 +21,8 @@ type Collection struct {
         ReplicationConfirmed   *int       `json:"replication_confirmed,omitempty"`
         ReplicationConfirmedAt *time.Time `json:"replication_confirmed_at,omitempty"`
         ReplicationDesired     *int       `json:"replication_desired,omitempty"`
+       DeleteAt               *time.Time `json:"delete_at,omitempty"`
+       IsTrashed              bool       `json:"is_trashed,omitempty"`
  }
  
  // SizedDigests returns the hash+size part of each data block
diff --git a/sdk/go/arvados/log.go b/sdk/go/arvados/log.go

new file mode 100644 (file)

index 0000000..a48f1c6
--- /dev/null
+++ b/sdk/go/arvados/log.go
@@ -0,0 +1,25 @@
+package arvados
+
+import (
+       "time"
+)
+
+// Log is an arvados#log record
+type Log struct {
+       ID              uint64                 `json:"id"`
+       UUID            string                 `json:"uuid"`
+       ObjectUUID      string                 `json:"object_uuid"`
+       ObjectOwnerUUID string                 `json:"object_owner_uuid"`
+       EventType       string                 `json:"event_type"`
+       EventAt         *time.Time             `json:"event,omitempty"`
+       Properties      map[string]interface{} `json:"properties"`
+       CreatedAt       *time.Time             `json:"created_at,omitempty"`
+}
+
+// LogList is an arvados#logList resource.
+type LogList struct {
+       Items          []Log `json:"items"`
+       ItemsAvailable int   `json:"items_available"`
+       Offset         int   `json:"offset"`
+       Limit          int   `json:"limit"`
+}
diff --git a/sdk/go/arvados/resource_list.go b/sdk/go/arvados/resource_list.go

index e9ea268ff2efeb73f168d1f842fc923cf6063ea5..242fb7e406542bda161a528519ea03f4f204db0e 100644 (file)
--- a/sdk/go/arvados/resource_list.go
+++ b/sdk/go/arvados/resource_list.go
@@ -5,11 +5,12 @@ import "encoding/json"
  // ResourceListParams expresses which results are requested in a
  // list/index API.
  type ResourceListParams struct {
-       Select  []string `json:"select,omitempty"`
-       Filters []Filter `json:"filters,omitempty"`
-       Limit   *int     `json:"limit,omitempty"`
-       Offset  int      `json:"offset,omitempty"`
-       Order   string   `json:"order,omitempty"`
+       Select       []string `json:"select,omitempty"`
+       Filters      []Filter `json:"filters,omitempty"`
+       IncludeTrash bool     `json:"include_trash,omitempty"`
+       Limit        *int     `json:"limit,omitempty"`
+       Offset       int      `json:"offset,omitempty"`
+       Order        string   `json:"order,omitempty"`
  }
  
  // A Filter restricts the set of records returned by a list/index API.
diff --git a/sdk/go/arvadosclient/arvadosclient.go b/sdk/go/arvadosclient/arvadosclient.go

index 5f24c7107d72798621b4a3110030981297489fc9..021b9471ff93814b81c933923e819f821efd8f1b 100644 (file)
--- a/sdk/go/arvadosclient/arvadosclient.go
+++ b/sdk/go/arvadosclient/arvadosclient.go
@@ -5,10 +5,12 @@ package arvadosclient
  import (
         "bytes"
         "crypto/tls"
+       "crypto/x509"
         "encoding/json"
         "errors"
         "fmt"
         "io"
+       "io/ioutil"
         "net/http"
         "net/url"
         "os"
@@ -103,22 +105,55 @@ type ArvadosClient struct {
         Retries int
  }
  
+var CertFiles = []string{
+       "/etc/arvados/ca-certificates.crt",
+       "/etc/ssl/certs/ca-certificates.crt", // Debian/Ubuntu/Gentoo etc.
+       "/etc/pki/tls/certs/ca-bundle.crt",   // Fedora/RHEL
+}
+
+// MakeTLSConfig sets up TLS configuration for communicating with Arvados and Keep services.
+func MakeTLSConfig(insecure bool) *tls.Config {
+       tlsconfig := tls.Config{InsecureSkipVerify: insecure}
+
+       if !insecure {
+               // Look for /etc/arvados/ca-certificates.crt in addition to normal system certs.
+               certs := x509.NewCertPool()
+               for _, file := range CertFiles {
+                       data, err := ioutil.ReadFile(file)
+                       if err == nil {
+                               success := certs.AppendCertsFromPEM(data)
+                               if !success {
+                                       fmt.Printf("Unable to load any certificates from %v", file)
+                               } else {
+                                       tlsconfig.RootCAs = certs
+                                       break
+                               }
+                       }
+               }
+               // Will use system default CA roots instead.
+       }
+
+       return &tlsconfig
+}
+
  // New returns an ArvadosClient using the given arvados.Client
  // configuration. This is useful for callers who load arvados.Client
  // fields from configuration files but still need to use the
  // arvadosclient.ArvadosClient package.
  func New(c *arvados.Client) (*ArvadosClient, error) {
-       return &ArvadosClient{
+       ac := &ArvadosClient{
                 Scheme:      "https",
                 ApiServer:   c.APIHost,
                 ApiToken:    c.AuthToken,
                 ApiInsecure: c.Insecure,
                 Client: &http.Client{Transport: &http.Transport{
-                       TLSClientConfig: &tls.Config{InsecureSkipVerify: c.Insecure}}},
+                       TLSClientConfig: MakeTLSConfig(c.Insecure)}},
                 External:          false,
                 Retries:           2,
                 lastClosedIdlesAt: time.Now(),
-       }, nil
+       }
+
+       return ac, nil
  }
  
  // MakeArvadosClient creates a new ArvadosClient using the standard
@@ -136,7 +171,7 @@ func MakeArvadosClient() (ac *ArvadosClient, err error) {
                 ApiToken:    os.Getenv("ARVADOS_API_TOKEN"),
                 ApiInsecure: insecure,
                 Client: &http.Client{Transport: &http.Transport{
-                       TLSClientConfig: &tls.Config{InsecureSkipVerify: insecure}}},
+                       TLSClientConfig: MakeTLSConfig(insecure)}},
                 External: external,
                 Retries:  2}
  
diff --git a/sdk/go/config/load.go b/sdk/go/config/load.go

index 9c65d65e84a57d9120d69dd84912615ff3949e35..2bbb440fb31211241a78a6f15c788f7e4d706334 100644 (file)
--- a/sdk/go/config/load.go
+++ b/sdk/go/config/load.go
@@ -22,3 +22,8 @@ func LoadFile(cfg interface{}, configPath string) error {
         }
         return nil
  }
+
+// Dump returns a YAML representation of cfg.
+func Dump(cfg interface{}) ([]byte, error) {
+       return yaml.Marshal(cfg)
+}
diff --git a/sdk/go/crunchrunner/crunchrunner.go b/sdk/go/crunchrunner/crunchrunner.go

index 5e0e101e7726b25d3791d137a7196d2b219782d6..5d7e10be4beb34fef1892b2d2d7c150fd9906176 100644 (file)
--- a/sdk/go/crunchrunner/crunchrunner.go
+++ b/sdk/go/crunchrunner/crunchrunner.go
@@ -1,7 +1,6 @@
  package main
  
  import (
-       "crypto/x509"
         "encoding/json"
         "fmt"
         "git.curoverse.com/arvados.git/sdk/go/arvados"
@@ -10,7 +9,6 @@ import (
         "io"
         "io/ioutil"
         "log"
-       "net/http"
         "os"
         "os/exec"
         "os/signal"
@@ -396,24 +394,6 @@ func main() {
                 log.Fatal(err)
         }
  
-       // Container may not have certificates installed, so need to look for
-       // /etc/arvados/ca-certificates.crt in addition to normal system certs.
-       var certFiles = []string{
-               "/etc/ssl/certs/ca-certificates.crt", // Debian
-               "/etc/pki/tls/certs/ca-bundle.crt",   // Red Hat
-               "/etc/arvados/ca-certificates.crt",
-       }
-
-       certs := x509.NewCertPool()
-       for _, file := range certFiles {
-               data, err := ioutil.ReadFile(file)
-               if err == nil {
-                       log.Printf("Using TLS certificates at %v", file)
-                       certs.AppendCertsFromPEM(data)
-               }
-       }
-       api.Client.Transport.(*http.Transport).TLSClientConfig.RootCAs = certs
-
         jobUuid := os.Getenv("JOB_UUID")
         taskUuid := os.Getenv("TASK_UUID")
         tmpdir := os.Getenv("TASK_WORK")
diff --git a/sdk/go/ctxlog/log.go b/sdk/go/ctxlog/log.go

new file mode 100644 (file)

index 0000000..6565c88
--- /dev/null
+++ b/sdk/go/ctxlog/log.go
@@ -0,0 +1,59 @@
+package ctxlog
+
+import (
+       "context"
+
+       "github.com/Sirupsen/logrus"
+)
+
+var (
+       loggerCtxKey = new(int)
+       rootLogger   = logrus.New()
+)
+
+const rfc3339NanoFixed = "2006-01-02T15:04:05.000000000Z07:00"
+
+// Context returns a new child context such that FromContext(child)
+// returns the given logger.
+func Context(ctx context.Context, logger *logrus.Entry) context.Context {
+       return context.WithValue(ctx, loggerCtxKey, logger)
+}
+
+// FromContext returns the logger suitable for the given context -- the one
+// attached by contextWithLogger() if applicable, otherwise the
+// top-level logger with no fields/values.
+func FromContext(ctx context.Context) *logrus.Entry {
+       if ctx != nil {
+               if logger, ok := ctx.Value(loggerCtxKey).(*logrus.Entry); ok {
+                       return logger
+               }
+       }
+       return rootLogger.WithFields(nil)
+}
+
+// SetLevel sets the current logging level. See logrus for level
+// names.
+func SetLevel(level string) {
+       lvl, err := logrus.ParseLevel(level)
+       if err != nil {
+               logrus.Fatal(err)
+       }
+       rootLogger.Level = lvl
+}
+
+// SetFormat sets the current logging format to "json" or "text".
+func SetFormat(format string) {
+       switch format {
+       case "text":
+               rootLogger.Formatter = &logrus.TextFormatter{
+                       FullTimestamp:   true,
+                       TimestampFormat: rfc3339NanoFixed,
+               }
+       case "json":
+               rootLogger.Formatter = &logrus.JSONFormatter{
+                       TimestampFormat: rfc3339NanoFixed,
+               }
+       default:
+               logrus.WithField("LogFormat", format).Fatal("unknown log format")
+       }
+}
diff --git a/sdk/go/httpserver/request_limiter.go b/sdk/go/httpserver/request_limiter.go

index 178ffb90f4facbebdfd6809bb1448e84904bc82f..ee35f4748b78ecfabac5c431ac5ad73340e4f300 100644 (file)
--- a/sdk/go/httpserver/request_limiter.go
+++ b/sdk/go/httpserver/request_limiter.go
@@ -4,18 +4,42 @@ import (
         "net/http"
  )
  
+// RequestCounter is an http.Handler that tracks the number of
+// requests in progress.
+type RequestCounter interface {
+       http.Handler
+
+       // Current() returns the number of requests in progress.
+       Current() int
+
+       // Max() returns the maximum number of concurrent requests
+       // that will be accepted.
+       Max() int
+}
+
  type limiterHandler struct {
         requests chan struct{}
         handler  http.Handler
  }
  
-func NewRequestLimiter(maxRequests int, handler http.Handler) http.Handler {
+// NewRequestLimiter returns a RequestCounter that delegates up to
+// maxRequests at a time to the given handler, and responds 503 to all
+// incoming requests beyond that limit.
+func NewRequestLimiter(maxRequests int, handler http.Handler) RequestCounter {
         return &limiterHandler{
                 requests: make(chan struct{}, maxRequests),
                 handler:  handler,
         }
  }
  
+func (h *limiterHandler) Current() int {
+       return len(h.requests)
+}
+
+func (h *limiterHandler) Max() int {
+       return cap(h.requests)
+}
+
  func (h *limiterHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
         select {
         case h.requests <- struct{}{}:
diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go

index 58f3ffb8348ff7b5f9d9588e6455ae7c9e9ff18a..baf4bac02444170446c91a61c0b7469813bf308c 100644 (file)
--- a/sdk/go/keepclient/keepclient.go
+++ b/sdk/go/keepclient/keepclient.go
@@ -4,7 +4,6 @@ package keepclient
  import (
         "bytes"
         "crypto/md5"
-       "crypto/tls"
         "errors"
         "fmt"
         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
@@ -103,7 +102,7 @@ func New(arv *arvadosclient.ArvadosClient) *KeepClient {
                 Arvados:       arv,
                 Want_replicas: defaultReplicationLevel,
                 Client: &http.Client{Transport: &http.Transport{
-                       TLSClientConfig: &tls.Config{InsecureSkipVerify: arv.ApiInsecure}}},
+                       TLSClientConfig: arvadosclient.MakeTLSConfig(arv.ApiInsecure)}},
                 Retries: 2,
         }
         return kc
@@ -168,6 +167,10 @@ func (kc *KeepClient) PutR(r io.Reader) (locator string, replicas int, err error
  }
  
  func (kc *KeepClient) getOrHead(method string, locator string) (io.ReadCloser, int64, string, error) {
+       if strings.HasPrefix(locator, "d41d8cd98f00b204e9800998ecf8427e+0") {
+               return ioutil.NopCloser(bytes.NewReader(nil)), 0, "", nil
+       }
+
         var errs []string
  
         tries_remaining := 1 + kc.Retries
diff --git a/sdk/go/keepclient/keepclient_test.go b/sdk/go/keepclient/keepclient_test.go

index bd36d9d5e12f43b93567a5b30bd5a75c77d1e921..f0da600c24187f05cb3e8b2ab97512677c072794 100644 (file)
--- a/sdk/go/keepclient/keepclient_test.go
+++ b/sdk/go/keepclient/keepclient_test.go
@@ -2,7 +2,6 @@ package keepclient
  
  import (
         "crypto/md5"
-       "flag"
         "fmt"
         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
         "git.curoverse.com/arvados.git/sdk/go/arvadostest"
@@ -28,8 +27,6 @@ func Test(t *testing.T) {
  var _ = Suite(&ServerRequiredSuite{})
  var _ = Suite(&StandaloneSuite{})
  
-var no_server = flag.Bool("no-server", false, "Skip 'ServerRequireSuite'")
-
  // Tests that require the Keep server running
  type ServerRequiredSuite struct{}
  
@@ -42,18 +39,11 @@ func pythonDir() string {
  }
  
  func (s *ServerRequiredSuite) SetUpSuite(c *C) {
-       if *no_server {
-               c.Skip("Skipping tests that require server")
-               return
-       }
         arvadostest.StartAPI()
         arvadostest.StartKeep(2, false)
  }
  
  func (s *ServerRequiredSuite) TearDownSuite(c *C) {
-       if *no_server {
-               return
-       }
         arvadostest.StopKeep(2)
         arvadostest.StopAPI()
  }
@@ -515,6 +505,27 @@ func (s *StandaloneSuite) TestGet404(c *C) {
         c.Check(r, Equals, nil)
  }
  
+func (s *StandaloneSuite) TestGetEmptyBlock(c *C) {
+       st := Error404Handler{make(chan string, 1)}
+
+       ks := RunFakeKeepServer(st)
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, nil, nil)
+
+       r, n, url2, err := kc.Get("d41d8cd98f00b204e9800998ecf8427e+0")
+       c.Check(err, IsNil)
+       c.Check(n, Equals, int64(0))
+       c.Check(url2, Equals, "")
+       c.Assert(r, NotNil)
+       buf, err := ioutil.ReadAll(r)
+       c.Check(err, IsNil)
+       c.Check(buf, DeepEquals, []byte{})
+}
+
  func (s *StandaloneSuite) TestGetFail(c *C) {
         hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
  
diff --git a/sdk/go/logger/logger.go b/sdk/go/logger/logger.go

deleted file mode 100644 (file)

index 6dd7fb3..0000000
--- a/sdk/go/logger/logger.go
+++ /dev/null
@@ -1,204 +0,0 @@
-// Logger periodically writes a log to the Arvados SDK.
-//
-// This package is useful for maintaining a log object that is updated
-// over time. This log object will be periodically written to the log,
-// as specified by WriteInterval in the Params.
-//
-// This package is safe for concurrent use as long as:
-// The maps passed to a LogMutator are not accessed outside of the
-// LogMutator
-//
-// Usage:
-// arvLogger := logger.NewLogger(params)
-// arvLogger.Update(func(properties map[string]interface{},
-//     entry map[string]interface{}) {
-//   // Modifiy properties and entry however you want
-//   // properties is a shortcut for entry["properties"].(map[string]interface{})
-//   // properties can take any (valid) values you want to give it,
-//   // entry will only take the fields listed at
-//   // http://doc.arvados.org/api/schema/Log.html
-//   // Valid values for properties are anything that can be json
-//   // encoded (i.e. will not error if you call json.Marshal() on it.
-// })
-package logger
-
-import (
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "log"
-       "time"
-)
-
-const (
-       startSuffix              = "-start"
-       partialSuffix            = "-partial"
-       finalSuffix              = "-final"
-       numberNoMoreWorkMessages = 2 // To return from FinalUpdate() & Work().
-)
-
-type LoggerParams struct {
-       Client          *arvadosclient.ArvadosClient // The client we use to write log entries
-       EventTypePrefix string                       // The prefix we use for the event type in the log entry
-       WriteInterval   time.Duration                // Wait at least this long between log writes
-}
-
-// A LogMutator is a function which modifies the log entry.
-// It takes two maps as arguments, properties is the first and entry
-// is the second
-// properties is a shortcut for entry["properties"].(map[string]interface{})
-// properties can take any values you want to give it.
-// entry will only take the fields listed at http://doc.arvados.org/api/schema/Log.html
-// properties and entry are only safe to access inside the LogMutator,
-// they should not be stored anywhere, otherwise you'll risk
-// concurrent access.
-type LogMutator func(map[string]interface{}, map[string]interface{})
-
-// A Logger is used to build up a log entry over time and write every
-// version of it.
-type Logger struct {
-       // The data we write
-       data       map[string]interface{} // The entire map that we give to the api
-       entry      map[string]interface{} // Convenience shortcut into data
-       properties map[string]interface{} // Convenience shortcut into data
-
-       params LoggerParams // Parameters we were given
-
-       // Variables to coordinate updating and writing.
-       modified    bool            // Has this data been modified since the last write?
-       workToDo    chan LogMutator // Work to do in the worker thread.
-       writeTicker *time.Ticker    // On each tick we write the log data to arvados, if it has been modified.
-       hasWritten  bool            // Whether we've written at all yet.
-       noMoreWork  chan bool       // Signals that we're done writing.
-
-       writeHooks []LogMutator // Mutators we call before each write.
-}
-
-// Create a new logger based on the specified parameters.
-func NewLogger(params LoggerParams) (l *Logger, err error) {
-       // sanity check parameters
-       if &params.Client == nil {
-               err = fmt.Errorf("Nil arvados client in LoggerParams passed in to NewLogger()")
-               return
-       }
-       if params.EventTypePrefix == "" {
-               err = fmt.Errorf("Empty event type prefix in LoggerParams passed in to NewLogger()")
-               return
-       }
-
-       l = &Logger{
-               data:        make(map[string]interface{}),
-               entry:       make(map[string]interface{}),
-               properties:  make(map[string]interface{}),
-               params:      params,
-               workToDo:    make(chan LogMutator, 10),
-               writeTicker: time.NewTicker(params.WriteInterval),
-               noMoreWork:  make(chan bool, numberNoMoreWorkMessages)}
-
-       l.data["log"] = l.entry
-       l.entry["properties"] = l.properties
-
-       // Start the worker goroutine.
-       go l.work()
-
-       return l, nil
-}
-
-// Exported functions will be called from other goroutines, therefore
-// all they are allowed to do is enqueue work to be done in the worker
-// goroutine.
-
-// Enqueues an update. This will happen in another goroutine after
-// this method returns.
-func (l *Logger) Update(mutator LogMutator) {
-       l.workToDo <- mutator
-}
-
-// Similar to Update(), but writes the log entry as soon as possible
-// (ignoring MinimumWriteInterval) and blocks until the entry has been
-// written. This is useful if you know that you're about to quit
-// (e.g. if you discovered a fatal error, or you're finished), since
-// go will not wait for timers (including the pending write timer) to
-// go off before exiting.
-func (l *Logger) FinalUpdate(mutator LogMutator) {
-       // TODO(misha): Consider not accepting any future updates somehow,
-       // since they won't get written if they come in after this.
-
-       // Stop the periodic write ticker. We'll perform the final write
-       // before returning from this function.
-       l.workToDo <- func(p map[string]interface{}, e map[string]interface{}) {
-               l.writeTicker.Stop()
-       }
-
-       // Apply the final update
-       l.workToDo <- mutator
-
-       // Perform the final write and signal that we can return.
-       l.workToDo <- func(p map[string]interface{}, e map[string]interface{}) {
-               l.write(true)
-               for i := 0; i < numberNoMoreWorkMessages; {
-                       l.noMoreWork <- true
-               }
-       }
-
-       // Wait until we've performed the write.
-       <-l.noMoreWork
-}
-
-// Adds a hook which will be called every time this logger writes an entry.
-func (l *Logger) AddWriteHook(hook LogMutator) {
-       // We do the work in a LogMutator so that it happens in the worker
-       // goroutine.
-       l.workToDo <- func(p map[string]interface{}, e map[string]interface{}) {
-               l.writeHooks = append(l.writeHooks, hook)
-       }
-}
-
-// The worker loop
-func (l *Logger) work() {
-       for {
-               select {
-               case <-l.writeTicker.C:
-                       if l.modified {
-                               l.write(false)
-                               l.modified = false
-                       }
-               case mutator := <-l.workToDo:
-                       mutator(l.properties, l.entry)
-                       l.modified = true
-               case <-l.noMoreWork:
-                       return
-               }
-       }
-}
-
-// Actually writes the log entry.
-func (l *Logger) write(isFinal bool) {
-
-       // Run all our hooks
-       for _, hook := range l.writeHooks {
-               hook(l.properties, l.entry)
-       }
-
-       // Update the event type.
-       if isFinal {
-               l.entry["event_type"] = l.params.EventTypePrefix + finalSuffix
-       } else if l.hasWritten {
-               l.entry["event_type"] = l.params.EventTypePrefix + partialSuffix
-       } else {
-               l.entry["event_type"] = l.params.EventTypePrefix + startSuffix
-       }
-       l.hasWritten = true
-
-       // Write the log entry.
-       // This is a network write and will take a while, which is bad
-       // because we're blocking all the other work on this goroutine.
-       //
-       // TODO(misha): Consider rewriting this so that we can encode l.data
-       // into a string, and then perform the actual write in another
-       // routine. This will be tricky and will require support in the
-       // client.
-       err := l.params.Client.Create("logs", l.data, nil)
-       if err != nil {
-               log.Printf("Received error writing %v: %v", l.data, err)
-       }
-}
diff --git a/sdk/go/logger/util.go b/sdk/go/logger/util.go

deleted file mode 100644 (file)

index 6425aca..0000000
--- a/sdk/go/logger/util.go
+++ /dev/null
@@ -1,20 +0,0 @@
-// Helper methods for interacting with Logger.
-package logger
-
-// Retrieves the map[string]interface{} stored at parent[key] if it
-// exists, otherwise it makes it and stores it there.
-// This is useful for logger because you may not know if a map you
-// need has already been created.
-func GetOrCreateMap(
-       parent map[string]interface{},
-       key string) (child map[string]interface{}) {
-       read, exists := parent[key]
-       if exists {
-               child = read.(map[string]interface{})
-
-       } else {
-               child = make(map[string]interface{})
-               parent[key] = child
-       }
-       return
-}
diff --git a/sdk/go/stats/duration.go b/sdk/go/stats/duration.go

new file mode 100644 (file)

index 0000000..103dea0
--- /dev/null
+++ b/sdk/go/stats/duration.go
@@ -0,0 +1,35 @@
+package stats
+
+import (
+       "fmt"
+       "strconv"
+       "time"
+)
+
+// Duration is a duration that is displayed as a number of seconds in
+// fixed-point notation.
+type Duration time.Duration
+
+// MarshalJSON implements json.Marshaler.
+func (d Duration) MarshalJSON() ([]byte, error) {
+       return []byte(d.String()), nil
+}
+
+// String implements fmt.Stringer.
+func (d Duration) String() string {
+       return fmt.Sprintf("%.6f", time.Duration(d).Seconds())
+}
+
+// UnmarshalJSON implements json.Unmarshaler
+func (d *Duration) UnmarshalJSON(data []byte) error {
+       return d.Set(string(data))
+}
+
+// Value implements flag.Value
+func (d *Duration) Set(s string) error {
+       sec, err := strconv.ParseFloat(s, 64)
+       if err == nil {
+               *d = Duration(sec * float64(time.Second))
+       }
+       return err
+}
diff --git a/sdk/go/stats/duration_test.go b/sdk/go/stats/duration_test.go

new file mode 100644 (file)

index 0000000..730e646
--- /dev/null
+++ b/sdk/go/stats/duration_test.go
@@ -0,0 +1,23 @@
+package stats
+
+import (
+       "testing"
+       "time"
+)
+
+func TestString(t *testing.T) {
+       d := Duration(123123123123 * time.Nanosecond)
+       if s, expect := d.String(), "123.123123"; s != expect {
+               t.Errorf("got %s, expect %s", s, expect)
+       }
+}
+
+func TestSet(t *testing.T) {
+       var d Duration
+       if err := d.Set("123.456"); err != nil {
+               t.Fatal(err)
+       }
+       if got, expect := time.Duration(d).Nanoseconds(), int64(123456000000); got != expect {
+               t.Errorf("got %d, expect %d", got, expect)
+       }
+}
diff --git a/sdk/go/util/util.go b/sdk/go/util/util.go

deleted file mode 100644 (file)

index ac510de..0000000
--- a/sdk/go/util/util.go
+++ /dev/null
@@ -1,34 +0,0 @@
-/* Helper methods for dealing with responses from API Server. */
-
-package util
-
-import (
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-)
-
-func UserIsAdmin(arv *arvadosclient.ArvadosClient) (is_admin bool, err error) {
-       type user struct {
-               IsAdmin bool `json:"is_admin"`
-       }
-       var u user
-       err = arv.Call("GET", "users", "", "current", nil, &u)
-       return u.IsAdmin, err
-}
-
-// Returns the total count of a particular type of resource
-//
-//   resource - the arvados resource to count
-// return
-//   count - the number of items of type resource the api server reports, if no error
-//   err - error accessing the resource, or nil if no error
-func NumberItemsAvailable(client *arvadosclient.ArvadosClient, resource string) (count int, err error) {
-       var response struct {
-               ItemsAvailable int `json:"items_available"`
-       }
-       sdkParams := arvadosclient.Dict{"limit": 0}
-       err = client.List(resource, sdkParams, &response)
-       if err == nil {
-               count = response.ItemsAvailable
-       }
-       return
-}
diff --git a/sdk/python/arvados/_version.py b/sdk/python/arvados/_version.py

new file mode 100644 (file)

index 0000000..d823afc
--- /dev/null
+++ b/sdk/python/arvados/_version.py
@@ -0,0 +1,3 @@
+import pkg_resources
+
+__version__ = pkg_resources.require('arvados-python-client')[0].version
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py

index 517d617d8c4f8403953b5d0b105808e0bd18ac0d..4cc2591ebb25034d0145de40c11f6638e3973864 100644 (file)
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -759,6 +759,14 @@ class ArvadosFile(object):
      def writable(self):
          return self.parent.writable()
  
+    @synchronized
+    def permission_expired(self, as_of_dt=None):
+        """Returns True if any of the segment's locators is expired"""
+        for r in self._segments:
+            if KeepLocator(r.locator).permission_expired(as_of_dt):
+                return True
+        return False
+
      @synchronized
      def segments(self):
          return copy.copy(self._segments)
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py

index 27aad033ae55523de4fd14ae7bf9cf8a7d2b654f..812438e2ccf493507d06ca9468e3c9418f9e0e69 100644 (file)
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -565,16 +565,23 @@ class RichCollectionBase(CollectionBase):
      def find(self, path):
          """Recursively search the specified file path.
  
-        May return either a Collection or ArvadosFile.  Return None if not
+        May return either a Collection or ArvadosFile. Return None if not
          found.
+        If path is invalid (ex: starts with '/'), an IOError exception will be
+        raised.
  
          """
          if not path:
              raise errors.ArgumentError("Parameter 'path' is empty.")
  
          pathcomponents = path.split("/", 1)
+        if pathcomponents[0] == '':
+            raise IOError(errno.ENOTDIR, "Not a directory", pathcomponents[0])
+
          item = self._items.get(pathcomponents[0])
-        if len(pathcomponents) == 1:
+        if item is None:
+            return None
+        elif len(pathcomponents) == 1:
              return item
          else:
              if isinstance(item, RichCollectionBase):
@@ -829,7 +836,7 @@ class RichCollectionBase(CollectionBase):
          if target_dir is None:
              raise IOError(errno.ENOENT, "Target directory not found", target_name)
  
-        if target_name in target_dir and isinstance(self[target_name], RichCollectionBase) and sourcecomponents:
+        if target_name in target_dir and isinstance(target_dir[target_name], RichCollectionBase) and sourcecomponents:
              target_dir = target_dir[target_name]
              target_name = sourcecomponents[-1]
  
diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py

index badbd668d951c46dd882b2468940463a17610728..1f72635406e4ecf62f7fa1245334cce96ae7b215 100755 (executable)
--- a/sdk/python/arvados/commands/arv_copy.py
+++ b/sdk/python/arvados/commands/arv_copy.py
@@ -35,6 +35,7 @@ import arvados.commands._util as arv_cmd
  import arvados.commands.keepdocker
  
  from arvados.api import OrderedJsonModel
+from arvados._version import __version__
  
  COMMIT_HASH_RE = re.compile(r'^[0-9a-f]{1,40}$')
  
@@ -61,6 +62,9 @@ src_owner_uuid = None
  def main():
      copy_opts = argparse.ArgumentParser(add_help=False)
  
+    copy_opts.add_argument(
+        '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
+        help='Print version and exit.')
      copy_opts.add_argument(
          '-v', '--verbose', dest='verbose', action='store_true',
          help='Verbose output.')
diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py

index 9310f066219ae3063153e4a4393ecba771b7c6ff..3a0b64c38f4f543d5b52c8f96f4b38b03a4d741a 100644 (file)
--- a/sdk/python/arvados/commands/keepdocker.py
+++ b/sdk/python/arvados/commands/keepdocker.py
@@ -21,6 +21,8 @@ import arvados.commands._util as arv_cmd
  import arvados.commands.put as arv_put
  import ciso8601
  
+from arvados._version import __version__
+
  EARLIEST_DATETIME = datetime.datetime(datetime.MINYEAR, 1, 1, 0, 0, 0)
  STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
  
@@ -28,6 +30,9 @@ DockerImage = collections.namedtuple(
      'DockerImage', ['repo', 'tag', 'hash', 'created', 'vsize'])
  
  keepdocker_parser = argparse.ArgumentParser(add_help=False)
+keepdocker_parser.add_argument(
+    '--version', action='version', version="%s %s" % (sys.argv[0], __version__),
+    help='Print version and exit.')
  keepdocker_parser.add_argument(
      '-f', '--force', action='store_true', default=False,
      help="Re-upload the image even if it already exists on the server")
diff --git a/sdk/python/arvados/commands/ls.py b/sdk/python/arvados/commands/ls.py

index e87244d7d12426d060bcb8a724445406448eb160..a2f2e542754f7e2e44edbd5673cf36d2c5d130af 100755 (executable)
--- a/sdk/python/arvados/commands/ls.py
+++ b/sdk/python/arvados/commands/ls.py
@@ -3,10 +3,13 @@
  from __future__ import print_function
  
  import argparse
+import sys
  
  import arvados
  import arvados.commands._util as arv_cmd
  
+from arvados._version import __version__
+
  def parse_args(args):
      parser = argparse.ArgumentParser(
          description='List contents of a manifest',
@@ -16,6 +19,9 @@ def parse_args(args):
                          help="""Collection UUID or locator""")
      parser.add_argument('-s', action='store_true',
                          help="""List file sizes, in KiB.""")
+    parser.add_argument('--version', action='version',
+                        version="%s %s" % (sys.argv[0], __version__),
+                        help='Print version and exit.')
  
      return parser.parse_args(args)
  
diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py

index 34cef6725500e6cb5cd12ec317aa1be8eeb4bd80..714281cc95b0475831f1761470c9cf1b5e91cce5 100644 (file)
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -7,22 +7,24 @@ import argparse
  import arvados
  import arvados.collection
  import base64
+import copy
  import datetime
  import errno
  import fcntl
  import hashlib
  import json
+import logging
  import os
  import pwd
-import time
+import re
  import signal
  import socket
  import sys
  import tempfile
  import threading
-import copy
-import logging
+import time
  from apiclient import errors as apiclient_errors
+from arvados._version import __version__
  
  import arvados.commands._util as arv_cmd
  
@@ -31,6 +33,9 @@ api_client = None
  
  upload_opts = argparse.ArgumentParser(add_help=False)
  
+upload_opts.add_argument('--version', action='version',
+                         version="%s %s" % (sys.argv[0], __version__),
+                         help='Print version and exit.')
  upload_opts.add_argument('paths', metavar='path', type=str, nargs='*',
                           help="""
  Local file or directory. Default: read from standard input.
@@ -39,13 +44,7 @@ Local file or directory. Default: read from standard input.
  _group = upload_opts.add_mutually_exclusive_group()
  
  _group.add_argument('--max-manifest-depth', type=int, metavar='N',
-                    default=-1, help="""
-Maximum depth of directory tree to represent in the manifest
-structure. A directory structure deeper than this will be represented
-as a single stream in the manifest. If N=0, the manifest will contain
-a single stream. Default: -1 (unlimited), i.e., exactly one manifest
-stream per filesystem directory that contains files.
-""")
+                    default=-1, help=argparse.SUPPRESS)
  
  _group.add_argument('--normalize', action='store_true',
                      help="""
@@ -53,6 +52,12 @@ Normalize the manifest by re-ordering files and streams after writing
  data.
  """)
  
+_group.add_argument('--dry-run', action='store_true', default=False,
+                    help="""
+Don't actually upload files, but only check if any file should be
+uploaded. Exit with code=2 when files are pending for upload.
+""")
+
  _group = upload_opts.add_mutually_exclusive_group()
  
  _group.add_argument('--as-stream', action='store_true', dest='stream',
@@ -96,6 +101,12 @@ separated by commas, with a trailing newline. Do not store a
  manifest.
  """)
  
+upload_opts.add_argument('--update-collection', type=str, default=None,
+                         dest='update_collection', metavar="UUID", help="""
+Update an existing collection identified by the given Arvados collection
+UUID. All new local files will be uploaded.
+""")
+
  upload_opts.add_argument('--use-filename', type=str, default=None,
                           dest='filename', help="""
  Synonym for --filename.
@@ -163,6 +174,16 @@ _group.add_argument('--no-resume', action='store_false', dest='resume',
  Do not continue interrupted uploads from cached state.
  """)
  
+_group = run_opts.add_mutually_exclusive_group()
+_group.add_argument('--cache', action='store_true', dest='use_cache', default=True,
+                    help="""
+Save upload state in a cache file for resuming (default).
+""")
+_group.add_argument('--no-cache', action='store_false', dest='use_cache',
+                    help="""
+Do not save upload state in a cache file for resuming.
+""")
+
  arg_parser = argparse.ArgumentParser(
      description='Copy data from the local filesystem to Keep.',
      parents=[upload_opts, run_opts, arv_cmd.retry_opt])
@@ -187,17 +208,54 @@ def parse_arguments(arguments):
          and os.isatty(sys.stderr.fileno())):
          args.progress = True
  
+    # Turn off --resume (default) if --no-cache is used.
+    if not args.use_cache:
+        args.resume = False
+
      if args.paths == ['-']:
+        if args.update_collection:
+            arg_parser.error("""
+    --update-collection cannot be used when reading from stdin.
+    """)
          args.resume = False
+        args.use_cache = False
          if not args.filename:
              args.filename = 'stdin'
  
      return args
  
+
+class CollectionUpdateError(Exception):
+    pass
+
+
  class ResumeCacheConflict(Exception):
      pass
  
  
+class ArvPutArgumentConflict(Exception):
+    pass
+
+
+class ArvPutUploadIsPending(Exception):
+    pass
+
+
+class ArvPutUploadNotPending(Exception):
+    pass
+
+
+class FileUploadList(list):
+    def __init__(self, dry_run=False):
+        list.__init__(self)
+        self.dry_run = dry_run
+
+    def append(self, other):
+        if self.dry_run:
+            raise ArvPutUploadIsPending()
+        super(FileUploadList, self).append(other)
+
+
  class ResumeCache(object):
      CACHE_DIR = '.cache/arvados/arv-put'
  
@@ -213,7 +271,7 @@ class ResumeCache(object):
          realpaths = sorted(os.path.realpath(path) for path in args.paths)
          md5.update('\0'.join(realpaths))
          if any(os.path.isdir(path) for path in realpaths):
-            md5.update(str(max(args.max_manifest_depth, -1)))
+            md5.update("-1")
          elif args.filename:
              md5.update(args.filename)
          return os.path.join(
@@ -287,12 +345,15 @@ class ArvPutUploadJob(object):
          'files' : {} # Previous run file list: {path : {size, mtime}}
      }
  
-    def __init__(self, paths, resume=True, reporter=None, bytes_expected=None,
-                 name=None, owner_uuid=None, ensure_unique_name=False,
-                 num_retries=None, replication_desired=None,
-                 filename=None, update_time=1.0):
+    def __init__(self, paths, resume=True, use_cache=True, reporter=None,
+                 bytes_expected=None, name=None, owner_uuid=None,
+                 ensure_unique_name=False, num_retries=None, replication_desired=None,
+                 filename=None, update_time=20.0, update_collection=None,
+                 logger=logging.getLogger('arvados.arv_put'), dry_run=False):
          self.paths = paths
          self.resume = resume
+        self.use_cache = use_cache
+        self.update = False
          self.reporter = reporter
          self.bytes_expected = bytes_expected
          self.bytes_written = 0
@@ -307,51 +368,108 @@ class ArvPutUploadJob(object):
          self._state = None # Previous run state (file list & manifest)
          self._current_files = [] # Current run file list
          self._cache_file = None
-        self._collection = None
          self._collection_lock = threading.Lock()
+        self._remote_collection = None # Collection being updated (if asked)
+        self._local_collection = None # Collection from previous run manifest
+        self._file_paths = [] # Files to be updated in remote collection
          self._stop_checkpointer = threading.Event()
          self._checkpointer = threading.Thread(target=self._update_task)
+        self._checkpointer.daemon = True
          self._update_task_time = update_time  # How many seconds wait between update runs
-        self.logger = logging.getLogger('arvados.arv_put')
+        self._files_to_upload = FileUploadList(dry_run=dry_run)
+        self.logger = logger
+        self.dry_run = dry_run
+
+        if not self.use_cache and self.resume:
+            raise ArvPutArgumentConflict('resume cannot be True when use_cache is False')
+
+        # Check for obvious dry-run responses
+        if self.dry_run and (not self.use_cache or not self.resume):
+            raise ArvPutUploadIsPending()
+
          # Load cached data if any and if needed
-        self._setup_state()
+        self._setup_state(update_collection)
  
-    def start(self):
+    def start(self, save_collection):
          """
          Start supporting thread & file uploading
          """
-        self._checkpointer.daemon = True
-        self._checkpointer.start()
+        if not self.dry_run:
+            self._checkpointer.start()
          try:
              for path in self.paths:
                  # Test for stdin first, in case some file named '-' exist
                  if path == '-':
+                    if self.dry_run:
+                        raise ArvPutUploadIsPending()
                      self._write_stdin(self.filename or 'stdin')
                  elif os.path.isdir(path):
-                    self._write_directory_tree(path)
+                    # Use absolute paths on cache index so CWD doesn't interfere
+                    # with the caching logic.
+                    prefixdir = path = os.path.abspath(path)
+                    if prefixdir != '/':
+                        prefixdir += '/'
+                    for root, dirs, files in os.walk(path):
+                        # Make os.walk()'s dir traversing order deterministic
+                        dirs.sort()
+                        files.sort()
+                        for f in files:
+                            self._check_file(os.path.join(root, f),
+                                             os.path.join(root[len(prefixdir):], f))
                  else:
-                    self._write_file(path, self.filename or os.path.basename(path))
-        finally:
-            # Stop the thread before doing anything else
-            self._stop_checkpointer.set()
-            self._checkpointer.join()
-            # Commit all & one last _update()
-            self.manifest_text()
+                    self._check_file(os.path.abspath(path),
+                                     self.filename or os.path.basename(path))
+            # If dry-mode is on, and got up to this point, then we should notify that
+            # there aren't any file to upload.
+            if self.dry_run:
+                raise ArvPutUploadNotPending()
+            # Remove local_collection's files that don't exist locally anymore, so the
+            # bytes_written count is correct.
+            for f in self.collection_file_paths(self._local_collection,
+                                                path_prefix=""):
+                if f != 'stdin' and f != self.filename and not f in self._file_paths:
+                    self._local_collection.remove(f)
+            # Update bytes_written from current local collection and
+            # report initial progress.
              self._update()
-            if self.resume:
+            # Actual file upload
+            self._upload_files()
+        finally:
+            if not self.dry_run:
+                # Stop the thread before doing anything else
+                self._stop_checkpointer.set()
+                self._checkpointer.join()
+                # Commit all pending blocks & one last _update()
+                self._local_collection.manifest_text()
+                self._update(final=True)
+                if save_collection:
+                    self.save_collection()
+            if self.use_cache:
                  self._cache_file.close()
-                # Correct the final written bytes count
-                self.bytes_written -= self.bytes_skipped
  
      def save_collection(self):
-        with self._collection_lock:
-            self._my_collection().save_new(
+        if self.update:
+            # Check if files should be updated on the remote collection.
+            for fp in self._file_paths:
+                remote_file = self._remote_collection.find(fp)
+                if not remote_file:
+                    # File don't exist on remote collection, copy it.
+                    self._remote_collection.copy(fp, fp, self._local_collection)
+                elif remote_file != self._local_collection.find(fp):
+                    # A different file exist on remote collection, overwrite it.
+                    self._remote_collection.copy(fp, fp, self._local_collection, overwrite=True)
+                else:
+                    # The file already exist on remote collection, skip it.
+                    pass
+            self._remote_collection.save(num_retries=self.num_retries)
+        else:
+            self._local_collection.save_new(
                  name=self.name, owner_uuid=self.owner_uuid,
                  ensure_unique_name=self.ensure_unique_name,
                  num_retries=self.num_retries)
  
      def destroy_cache(self):
-        if self.resume:
+        if self.use_cache:
              try:
                  os.unlink(self._cache_filename)
              except OSError as error:
@@ -380,17 +498,20 @@ class ArvPutUploadJob(object):
          while not self._stop_checkpointer.wait(self._update_task_time):
              self._update()
  
-    def _update(self):
+    def _update(self, final=False):
          """
          Update cached manifest text and report progress.
          """
          with self._collection_lock:
-            self.bytes_written = self._collection_size(self._my_collection())
-            # Update cache, if resume enabled
-            if self.resume:
+            self.bytes_written = self._collection_size(self._local_collection)
+            if self.use_cache:
+                # Update cache
                  with self._state_lock:
-                    # Get the manifest text without comitting pending blocks
-                    self._state['manifest'] = self._my_collection()._get_manifest_text(".", strip=False, normalize=False, only_committed=True)
+                    if final:
+                        self._state['manifest'] = self._local_collection.manifest_text()
+                    else:
+                        # Get the manifest text without comitting pending blocks
+                        self._state['manifest'] = self._local_collection._get_manifest_text(".", strip=False, normalize=False, only_committed=True)
                  self._save_state()
          # Call the reporter, if any
          self.report_progress()
@@ -399,114 +520,116 @@ class ArvPutUploadJob(object):
          if self.reporter is not None:
              self.reporter(self.bytes_written, self.bytes_expected)
  
-    def _write_directory_tree(self, path, stream_name="."):
-        # TODO: Check what happens when multiple directories are passed as
-        # arguments.
-        # If the code below is uncommented, integration test
-        # test_ArvPutSignedManifest (tests.test_arv_put.ArvPutIntegrationTest)
-        # fails, I suppose it is because the manifest_uuid changes because
-        # of the dir addition to stream_name.
-
-        # if stream_name == '.':
-        #     stream_name = os.path.join('.', os.path.basename(path))
-        for item in os.listdir(path):
-            if os.path.isdir(os.path.join(path, item)):
-                self._write_directory_tree(os.path.join(path, item),
-                                os.path.join(stream_name, item))
-            else:
-                self._write_file(os.path.join(path, item),
-                                os.path.join(stream_name, item))
-
      def _write_stdin(self, filename):
-        with self._collection_lock:
-            output = self._my_collection().open(filename, 'w')
+        output = self._local_collection.open(filename, 'w')
          self._write(sys.stdin, output)
          output.close()
  
-    def _write_file(self, source, filename):
+    def _check_file(self, source, filename):
+        """Check if this file needs to be uploaded"""
          resume_offset = 0
-        if self.resume:
-            # Check if file was already uploaded (at least partially)
-            with self._collection_lock:
-                try:
-                    file_in_collection = self._my_collection().find(filename)
-                except IOError:
-                    # Not found
-                    file_in_collection = None
+        should_upload = False
+        new_file_in_cache = False
+        # Record file path for updating the remote collection before exiting
+        self._file_paths.append(filename)
+
+        with self._state_lock:
              # If no previous cached data on this file, store it for an eventual
              # repeated run.
              if source not in self._state['files']:
-                with self._state_lock:
-                    self._state['files'][source] = {
-                        'mtime': os.path.getmtime(source),
-                        'size' : os.path.getsize(source)
-                    }
-            with self._state_lock:
-                cached_file_data = self._state['files'][source]
-            # See if this file was already uploaded at least partially
-            if file_in_collection:
-                if cached_file_data['mtime'] == os.path.getmtime(source) and cached_file_data['size'] == os.path.getsize(source):
-                    if cached_file_data['size'] == file_in_collection.size():
-                        # File already there, skip it.
-                        self.bytes_skipped += cached_file_data['size']
-                        return
-                    elif cached_file_data['size'] > file_in_collection.size():
-                        # File partially uploaded, resume!
-                        resume_offset = file_in_collection.size()
-                    else:
-                        # Inconsistent cache, re-upload the file
-                        self.logger.warning("Uploaded version of file '{}' is bigger than local version, will re-upload it from scratch.".format(source))
-                else:
-                    # Local file differs from cached data, re-upload it
-                    pass
-        with open(source, 'r') as source_fd:
-            if resume_offset > 0:
-                # Start upload where we left off
-                with self._collection_lock:
-                    output = self._my_collection().open(filename, 'a')
-                source_fd.seek(resume_offset)
+                self._state['files'][source] = {
+                    'mtime': os.path.getmtime(source),
+                    'size' : os.path.getsize(source)
+                }
+                new_file_in_cache = True
+            cached_file_data = self._state['files'][source]
+
+        # Check if file was already uploaded (at least partially)
+        file_in_local_collection = self._local_collection.find(filename)
+
+        # If not resuming, upload the full file.
+        if not self.resume:
+            should_upload = True
+        # New file detected from last run, upload it.
+        elif new_file_in_cache:
+            should_upload = True
+        # Local file didn't change from last run.
+        elif cached_file_data['mtime'] == os.path.getmtime(source) and cached_file_data['size'] == os.path.getsize(source):
+            if not file_in_local_collection:
+                # File not uploaded yet, upload it completely
+                should_upload = True
+            elif file_in_local_collection.permission_expired():
+                # Permission token expired, re-upload file. This will change whenever
+                # we have a API for refreshing tokens.
+                should_upload = True
+                self._local_collection.remove(filename)
+            elif cached_file_data['size'] == file_in_local_collection.size():
+                # File already there, skip it.
+                self.bytes_skipped += cached_file_data['size']
+            elif cached_file_data['size'] > file_in_local_collection.size():
+                # File partially uploaded, resume!
+                resume_offset = file_in_local_collection.size()
                  self.bytes_skipped += resume_offset
+                should_upload = True
              else:
-                # Start from scratch
-                with self._collection_lock:
-                    output = self._my_collection().open(filename, 'w')
-            self._write(source_fd, output)
-            output.close(flush=False)
+                # Inconsistent cache, re-upload the file
+                should_upload = True
+                self._local_collection.remove(filename)
+                self.logger.warning("Uploaded version of file '{}' is bigger than local version, will re-upload it from scratch.".format(source))
+        # Local file differs from cached data, re-upload it.
+        else:
+            if file_in_local_collection:
+                self._local_collection.remove(filename)
+            should_upload = True
+
+        if should_upload:
+            self._files_to_upload.append((source, resume_offset, filename))
+
+    def _upload_files(self):
+        for source, resume_offset, filename in self._files_to_upload:
+            with open(source, 'r') as source_fd:
+                with self._state_lock:
+                    self._state['files'][source]['mtime'] = os.path.getmtime(source)
+                    self._state['files'][source]['size'] = os.path.getsize(source)
+                if resume_offset > 0:
+                    # Start upload where we left off
+                    output = self._local_collection.open(filename, 'a')
+                    source_fd.seek(resume_offset)
+                else:
+                    # Start from scratch
+                    output = self._local_collection.open(filename, 'w')
+                self._write(source_fd, output)
+                output.close(flush=False)
  
      def _write(self, source_fd, output):
-        first_read = True
          while True:
              data = source_fd.read(arvados.config.KEEP_BLOCK_SIZE)
-            # Allow an empty file to be written
-            if not data and not first_read:
+            if not data:
                  break
-            if first_read:
-                first_read = False
              output.write(data)
  
      def _my_collection(self):
-        """
-        Create a new collection if none cached. Load it from cache otherwise.
-        """
-        if self._collection is None:
-            with self._state_lock:
-                manifest = self._state['manifest']
-            if self.resume and manifest is not None:
-                # Create collection from saved state
-                self._collection = arvados.collection.Collection(
-                    manifest,
-                    replication_desired=self.replication_desired)
-            else:
-                # Create new collection
-                self._collection = arvados.collection.Collection(
-                    replication_desired=self.replication_desired)
-        return self._collection
+        return self._remote_collection if self.update else self._local_collection
  
-    def _setup_state(self):
+    def _setup_state(self, update_collection):
          """
          Create a new cache file or load a previously existing one.
          """
-        if self.resume:
+        # Load an already existing collection for update
+        if update_collection and re.match(arvados.util.collection_uuid_pattern,
+                                          update_collection):
+            try:
+                self._remote_collection = arvados.collection.Collection(update_collection)
+            except arvados.errors.ApiError as error:
+                raise CollectionUpdateError("Cannot read collection {} ({})".format(update_collection, error))
+            else:
+                self.update = True
+        elif update_collection:
+            # Collection locator provided, but unknown format
+            raise CollectionUpdateError("Collection locator unknown: '{}'".format(update_collection))
+
+        if self.use_cache:
+            # Set up cache file name from input paths.
              md5 = hashlib.md5()
              md5.update(arvados.config.get('ARVADOS_API_HOST', '!nohost'))
              realpaths = sorted(os.path.realpath(path) for path in self.paths)
@@ -514,13 +637,20 @@ class ArvPutUploadJob(object):
              if self.filename:
                  md5.update(self.filename)
              cache_filename = md5.hexdigest()
-            self._cache_file = open(os.path.join(
+            cache_filepath = os.path.join(
                  arv_cmd.make_home_conf_dir(self.CACHE_DIR, 0o700, 'raise'),
-                cache_filename), 'a+')
+                cache_filename)
+            if self.resume:
+                self._cache_file = open(cache_filepath, 'a+')
+            else:
+                # --no-resume means start with a empty cache file.
+                self._cache_file = open(cache_filepath, 'w+')
              self._cache_filename = self._cache_file.name
              self._lock_file(self._cache_file)
              self._cache_file.seek(0)
-            with self._state_lock:
+
+        with self._state_lock:
+            if self.use_cache:
                  try:
                      self._state = json.load(self._cache_file)
                      if not set(['manifest', 'files']).issubset(set(self._state.keys())):
@@ -529,13 +659,22 @@ class ArvPutUploadJob(object):
                  except ValueError:
                      # Cache file empty, set up new cache
                      self._state = copy.deepcopy(self.EMPTY_STATE)
-            # Load how many bytes were uploaded on previous run
-            with self._collection_lock:
-                self.bytes_written = self._collection_size(self._my_collection())
-        # No resume required
-        else:
-            with self._state_lock:
+            else:
+                # No cache file, set empty state
                  self._state = copy.deepcopy(self.EMPTY_STATE)
+            # Load the previous manifest so we can check if files were modified remotely.
+            self._local_collection = arvados.collection.Collection(self._state['manifest'], replication_desired=self.replication_desired)
+
+    def collection_file_paths(self, col, path_prefix='.'):
+        """Return a list of file paths by recursively go through the entire collection `col`"""
+        file_paths = []
+        for name, item in col.items():
+            if isinstance(item, arvados.arvfile.ArvadosFile):
+                file_paths.append(os.path.join(path_prefix, name))
+            elif isinstance(item, arvados.collection.Subcollection):
+                new_prefix = os.path.join(path_prefix, name)
+                file_paths += self.collection_file_paths(item, path_prefix=new_prefix)
+        return file_paths
  
      def _lock_file(self, fileobj):
          try:
@@ -549,7 +688,7 @@ class ArvPutUploadJob(object):
          """
          try:
              with self._state_lock:
-                state = self._state
+                state = copy.deepcopy(self._state)
              new_cache_fd, new_cache_name = tempfile.mkstemp(
                  dir=os.path.dirname(self._cache_filename))
              self._lock_file(new_cache_fd)
@@ -569,24 +708,16 @@ class ArvPutUploadJob(object):
              self._cache_file = new_cache
  
      def collection_name(self):
-        with self._collection_lock:
-            name = self._my_collection().api_response()['name'] if self._my_collection().api_response() else None
-        return name
+        return self._my_collection().api_response()['name'] if self._my_collection().api_response() else None
  
      def manifest_locator(self):
-        with self._collection_lock:
-            locator = self._my_collection().manifest_locator()
-        return locator
+        return self._my_collection().manifest_locator()
  
      def portable_data_hash(self):
-        with self._collection_lock:
-            datahash = self._my_collection().portable_data_hash()
-        return datahash
+        return self._my_collection().portable_data_hash()
  
      def manifest_text(self, stream_name=".", strip=False, normalize=False):
-        with self._collection_lock:
-            manifest = self._my_collection().manifest_text(stream_name, strip, normalize)
-        return manifest
+        return self._my_collection().manifest_text(stream_name, strip, normalize)
  
      def _datablocks_on_item(self, item):
          """
@@ -669,6 +800,7 @@ def desired_project_uuid(api_client, project_uuid, num_retries):
  def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
      global api_client
  
+    logger = logging.getLogger('arvados.arv_put')
      args = parse_arguments(arguments)
      status = 0
      if api_client is None:
@@ -677,7 +809,10 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
      # Determine the name to use
      if args.name:
          if args.stream or args.raw:
-            print >>stderr, "Cannot use --name with --stream or --raw"
+            logger.error("Cannot use --name with --stream or --raw")
+            sys.exit(1)
+        elif args.update_collection:
+            logger.error("Cannot use --name with --update-collection")
              sys.exit(1)
          collection_name = args.name
      else:
@@ -687,7 +822,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
              socket.gethostname())
  
      if args.project_uuid and (args.stream or args.raw):
-        print >>stderr, "Cannot use --project-uuid with --stream or --raw"
+        logger.error("Cannot use --project-uuid with --stream or --raw")
          sys.exit(1)
  
      # Determine the parent project
@@ -695,7 +830,7 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
          project_uuid = desired_project_uuid(api_client, args.project_uuid,
                                              args.retries)
      except (apiclient_errors.Error, ValueError) as error:
-        print >>stderr, error
+        logger.error(error)
          sys.exit(1)
  
      if args.progress:
@@ -706,9 +841,11 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
          reporter = None
  
      bytes_expected = expected_bytes_for(args.paths)
+
      try:
          writer = ArvPutUploadJob(paths = args.paths,
                                   resume = args.resume,
+                                 use_cache = args.use_cache,
                                   filename = args.filename,
                                   reporter = reporter,
                                   bytes_expected = bytes_expected,
@@ -716,28 +853,54 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
                                   replication_desired = args.replication,
                                   name = collection_name,
                                   owner_uuid = project_uuid,
-                                 ensure_unique_name = True)
+                                 ensure_unique_name = True,
+                                 update_collection = args.update_collection,
+                                 logger=logger,
+                                 dry_run=args.dry_run)
      except ResumeCacheConflict:
-        print >>stderr, "\n".join([
+        logger.error("\n".join([
              "arv-put: Another process is already uploading this data.",
-            "         Use --no-resume if this is really what you want."])
+            "         Use --no-cache if this is really what you want."]))
          sys.exit(1)
+    except CollectionUpdateError as error:
+        logger.error("\n".join([
+            "arv-put: %s" % str(error)]))
+        sys.exit(1)
+    except ArvPutUploadIsPending:
+        # Dry run check successful, return proper exit code.
+        sys.exit(2)
+    except ArvPutUploadNotPending:
+        # No files pending for upload
+        sys.exit(0)
  
      # Install our signal handler for each code in CAUGHT_SIGNALS, and save
      # the originals.
      orig_signal_handlers = {sigcode: signal.signal(sigcode, exit_signal_handler)
                              for sigcode in CAUGHT_SIGNALS}
  
-    if args.resume and writer.bytes_written > 0:
-        print >>stderr, "\n".join([
-                "arv-put: Resuming previous upload from last checkpoint.",
-                "         Use the --no-resume option to start over."])
+    if not args.dry_run and not args.update_collection and args.resume and writer.bytes_written > 0:
+        logger.warning("\n".join([
+            "arv-put: Resuming previous upload from last checkpoint.",
+            "         Use the --no-resume option to start over."]))
  
-    writer.report_progress()
+    if not args.dry_run:
+        writer.report_progress()
      output = None
-    writer.start()
+    try:
+        writer.start(save_collection=not(args.stream or args.raw))
+    except arvados.errors.ApiError as error:
+        logger.error("\n".join([
+            "arv-put: %s" % str(error)]))
+        sys.exit(1)
+    except ArvPutUploadIsPending:
+        # Dry run check successful, return proper exit code.
+        sys.exit(2)
+    except ArvPutUploadNotPending:
+        # No files pending for upload
+        sys.exit(0)
+
      if args.progress:  # Print newline to split stderr from stdout for humans.
-        print >>stderr
+        logger.info("\n")
  
      if args.stream:
          if args.normalize:
@@ -748,14 +911,16 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
          output = ','.join(writer.data_locators())
      else:
          try:
-            writer.save_collection()
-            print >>stderr, "Collection saved as '%s'" % writer.collection_name()
+            if args.update_collection:
+                logger.info("Collection updated: '{}'".format(writer.collection_name()))
+            else:
+                logger.info("Collection saved as '{}'".format(writer.collection_name()))
              if args.portable_data_hash:
                  output = writer.portable_data_hash()
              else:
                  output = writer.manifest_locator()
          except apiclient_errors.Error as error:
-            print >>stderr, (
+            logger.error(
                  "arv-put: Error creating Collection on project: {}.".format(
                      error))
              status = 1
@@ -775,7 +940,6 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
          sys.exit(status)
  
      # Success!
-    writer.destroy_cache()
      return output
  
  
diff --git a/sdk/python/arvados/commands/run.py b/sdk/python/arvados/commands/run.py

index 54df452394e47bc7b44437bf580a3af2dc17b36e..8403327b44342befc23b7e1a3650213378f70b86 100644 (file)
--- a/sdk/python/arvados/commands/run.py
+++ b/sdk/python/arvados/commands/run.py
@@ -11,22 +11,38 @@ import put
  import time
  import subprocess
  import logging
+import sys
  import arvados.commands._util as arv_cmd
  
+from arvados._version import __version__
+
  logger = logging.getLogger('arvados.arv-run')
  logger.setLevel(logging.INFO)
  
  arvrun_parser = argparse.ArgumentParser(parents=[arv_cmd.retry_opt])
-arvrun_parser.add_argument('--dry-run', action="store_true", help="Print out the pipeline that would be submitted and exit")
-arvrun_parser.add_argument('--local', action="store_true", help="Run locally using arv-run-pipeline-instance")
-arvrun_parser.add_argument('--docker-image', type=str, help="Docker image to use, otherwise use instance default.")
-arvrun_parser.add_argument('--ignore-rcode', action="store_true", help="Commands that return non-zero return codes should not be considered failed.")
-arvrun_parser.add_argument('--no-reuse', action="store_true", help="Do not reuse past jobs.")
-arvrun_parser.add_argument('--no-wait', action="store_true", help="Do not wait and display logs after submitting command, just exit.")
-arvrun_parser.add_argument('--project-uuid', type=str, help="Parent project of the pipeline")
-arvrun_parser.add_argument('--git-dir', type=str, default="", help="Git repository passed to arv-crunch-job when using --local")
-arvrun_parser.add_argument('--repository', type=str, default="arvados", help="repository field of component, default 'arvados'")
-arvrun_parser.add_argument('--script-version', type=str, default="master", help="script_version field of component, default 'master'")
+arvrun_parser.add_argument('--dry-run', action="store_true",
+                           help="Print out the pipeline that would be submitted and exit")
+arvrun_parser.add_argument('--local', action="store_true",
+                           help="Run locally using arv-run-pipeline-instance")
+arvrun_parser.add_argument('--docker-image', type=str,
+                           help="Docker image to use, otherwise use instance default.")
+arvrun_parser.add_argument('--ignore-rcode', action="store_true",
+                           help="Commands that return non-zero return codes should not be considered failed.")
+arvrun_parser.add_argument('--no-reuse', action="store_true",
+                           help="Do not reuse past jobs.")
+arvrun_parser.add_argument('--no-wait', action="store_true",
+                           help="Do not wait and display logs after submitting command, just exit.")
+arvrun_parser.add_argument('--project-uuid', type=str,
+                           help="Parent project of the pipeline")
+arvrun_parser.add_argument('--git-dir', type=str, default="",
+                           help="Git repository passed to arv-crunch-job when using --local")
+arvrun_parser.add_argument('--repository', type=str, default="arvados",
+                           help="repository field of component, default 'arvados'")
+arvrun_parser.add_argument('--script-version', type=str, default="master",
+                           help="script_version field of component, default 'master'")
+arvrun_parser.add_argument('--version', action='version',
+                           version="%s %s" % (sys.argv[0], __version__),
+                           help='Print version and exit.')
  arvrun_parser.add_argument('args', nargs=argparse.REMAINDER)
  
  class ArvFile(object):
diff --git a/sdk/python/arvados/commands/ws.py b/sdk/python/arvados/commands/ws.py

index f6dee177d9a6b1e5a69e44d1edefd396280f0ed7..72ef1befed85ffd4d8b883270ebefa0a3bcd3dac 100644 (file)
--- a/sdk/python/arvados/commands/ws.py
+++ b/sdk/python/arvados/commands/ws.py
@@ -6,12 +6,16 @@ import argparse
  import arvados
  import json
  from arvados.events import subscribe
+from arvados._version import __version__
  import signal
  
  def main(arguments=None):
      logger = logging.getLogger('arvados.arv-ws')
  
      parser = argparse.ArgumentParser()
+    parser.add_argument('--version', action='version',
+                        version="%s %s" % (sys.argv[0], __version__),
+                        help='Print version and exit.')
      parser.add_argument('-u', '--uuid', type=str, default="", help="Filter events on object_uuid")
      parser.add_argument('-f', '--filters', type=str, default="", help="Arvados query filter to apply to log events (JSON encoded)")
      parser.add_argument('-s', '--start-time', type=str, default="", help="Arvados query filter to fetch log events created at or after this time. This will be server time in UTC. Allowed format: YYYY-MM-DD or YYYY-MM-DD hh:mm:ss")
diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py

index db7835be3746f8f67eddd61d2aac505356e601f4..38f332b38e2d51aae9b6a3fa5007a59aad6b006f 100644 (file)
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -296,14 +296,14 @@ class KeepClient(object):
  
          def _get_user_agent(self):
              try:
-                return self._user_agent_pool.get(False)
+                return self._user_agent_pool.get(block=False)
              except Queue.Empty:
                  return pycurl.Curl()
  
          def _put_user_agent(self, ua):
              try:
                  ua.reset()
-                self._user_agent_pool.put(ua, False)
+                self._user_agent_pool.put(ua, block=False)
              except:
                  ua.close()
  
@@ -511,8 +511,10 @@ class KeepClient(object):
              with self.successful_copies_lock:
                  self.successful_copies += replicas_nr
                  self.response = response
+            with self.pending_tries_notification:
+                self.pending_tries_notification.notify_all()
          
-        def write_fail(self, ks, status_code):
+        def write_fail(self, ks):
              with self.pending_tries_notification:
                  self.pending_tries += 1
                  self.pending_tries_notification.notify()
@@ -520,8 +522,36 @@ class KeepClient(object):
          def pending_copies(self):
              with self.successful_copies_lock:
                  return self.wanted_copies - self.successful_copies
-    
-    
+
+        def get_next_task(self):
+            with self.pending_tries_notification:
+                while True:
+                    if self.pending_copies() < 1:
+                        # This notify_all() is unnecessary --
+                        # write_success() already called notify_all()
+                        # when pending<1 became true, so it's not
+                        # possible for any other thread to be in
+                        # wait() now -- but it's cheap insurance
+                        # against deadlock so we do it anyway:
+                        self.pending_tries_notification.notify_all()
+                        # Drain the queue and then raise Queue.Empty
+                        while True:
+                            self.get_nowait()
+                            self.task_done()
+                    elif self.pending_tries > 0:
+                        service, service_root = self.get_nowait()
+                        if service.finished():
+                            self.task_done()
+                            continue
+                        self.pending_tries -= 1
+                        return service, service_root
+                    elif self.empty():
+                        self.pending_tries_notification.notify_all()
+                        raise Queue.Empty
+                    else:
+                        self.pending_tries_notification.wait()
+
+
      class KeepWriterThreadPool(object):
          def __init__(self, data, data_hash, copies, max_service_replicas, timeout=None):
              self.total_task_nr = 0
@@ -551,74 +581,64 @@ class KeepClient(object):
                  worker.start()
              # Wait for finished work
              self.queue.join()
-            with self.queue.pending_tries_notification:
-                self.queue.pending_tries_notification.notify_all()
-            for worker in self.workers:
-                worker.join()
          
          def response(self):
              return self.queue.response
      
      
      class KeepWriterThread(threading.Thread):
+        TaskFailed = RuntimeError()
+
          def __init__(self, queue, data, data_hash, timeout=None):
              super(KeepClient.KeepWriterThread, self).__init__()
              self.timeout = timeout
              self.queue = queue
              self.data = data
              self.data_hash = data_hash
-        
+            self.daemon = True
+
          def run(self):
-            while not self.queue.empty():
-                if self.queue.pending_copies() > 0:
-                    # Avoid overreplication, wait for some needed re-attempt
-                    with self.queue.pending_tries_notification:
-                        if self.queue.pending_tries <= 0:
-                            self.queue.pending_tries_notification.wait()
-                            continue # try again when awake
-                        self.queue.pending_tries -= 1
-
-                    # Get to work
-                    try:
-                        service, service_root = self.queue.get_nowait()
-                    except Queue.Empty:
-                        continue
-                    if service.finished():
-                        self.queue.task_done()
-                        continue
-                    success = bool(service.put(self.data_hash,
-                                                self.data,
-                                                timeout=self.timeout))
-                    result = service.last_result()
-                    if success:
-                        _logger.debug("KeepWriterThread %s succeeded %s+%i %s",
-                                      str(threading.current_thread()),
-                                      self.data_hash,
-                                      len(self.data),
-                                      service_root)
-                        try:
-                            replicas_stored = int(result['headers']['x-keep-replicas-stored'])
-                        except (KeyError, ValueError):
-                            replicas_stored = 1
-                        
-                        self.queue.write_success(result['body'].strip(), replicas_stored)
-                    else:
-                        if result.get('status_code', None):
-                            _logger.debug("Request fail: PUT %s => %s %s",
-                                          self.data_hash,
-                                          result['status_code'],
-                                          result['body'])
-                        self.queue.write_fail(service, result.get('status_code', None)) # Schedule a re-attempt with next service
-                    # Mark as done so the queue can be join()ed
-                    self.queue.task_done()
+            while True:
+                try:
+                    service, service_root = self.queue.get_next_task()
+                except Queue.Empty:
+                    return
+                try:
+                    locator, copies = self.do_task(service, service_root)
+                except Exception as e:
+                    if e is not self.TaskFailed:
+                        _logger.exception("Exception in KeepWriterThread")
+                    self.queue.write_fail(service)
                  else:
-                    # Remove the task from the queue anyways
-                    try:
-                        self.queue.get_nowait()
-                        # Mark as done so the queue can be join()ed
-                        self.queue.task_done()
-                    except Queue.Empty:
-                        continue
+                    self.queue.write_success(locator, copies)
+                finally:
+                    self.queue.task_done()
+
+        def do_task(self, service, service_root):
+            success = bool(service.put(self.data_hash,
+                                        self.data,
+                                        timeout=self.timeout))
+            result = service.last_result()
+
+            if not success:
+                if result.get('status_code', None):
+                    _logger.debug("Request fail: PUT %s => %s %s",
+                                  self.data_hash,
+                                  result['status_code'],
+                                  result['body'])
+                raise self.TaskFailed
+
+            _logger.debug("KeepWriterThread %s succeeded %s+%i %s",
+                          str(threading.current_thread()),
+                          self.data_hash,
+                          len(self.data),
+                          service_root)
+            try:
+                replicas_stored = int(result['headers']['x-keep-replicas-stored'])
+            except (KeyError, ValueError):
+                replicas_stored = 1
+
+            return result['body'].strip(), replicas_stored
  
  
      def __init__(self, api_client=None, proxy=None,
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py

index 2ac6ab9129af16edfac4670452c7194761332274..e2692b738aa16a945d2d3935d39bae3030f5c687 100644 (file)
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -383,6 +383,8 @@ def ca_certs_path(fallback=httplib2.CA_CERTS):
      it returns the value of `fallback` (httplib2's CA certs by default).
      """
      for ca_certs_path in [
+        # Arvados specific:
+        '/etc/arvados/ca-certificates.crt',
          # Debian:
          '/etc/ssl/certs/ca-certificates.crt',
          # Red Hat:
diff --git a/sdk/python/bin/arv-get b/sdk/python/bin/arv-get

index 60d4bec3b95c429643d7df4a600f72754954809a..f91b3977090da7c6f8b30844635174d122e67ba2 100755 (executable)
--- a/sdk/python/bin/arv-get
+++ b/sdk/python/bin/arv-get
@@ -11,6 +11,8 @@ import logging
  import arvados
  import arvados.commands._util as arv_cmd
  
+from arvados._version import __version__
+
  logger = logging.getLogger('arvados.arv-get')
  
  def abort(msg, code=1):
@@ -20,6 +22,9 @@ def abort(msg, code=1):
  parser = argparse.ArgumentParser(
      description='Copy data from Keep to a local file or pipe.',
      parents=[arv_cmd.retry_opt])
+parser.add_argument('--version', action='version',
+                    version="%s %s" % (sys.argv[0], __version__),
+                    help='Print version and exit.')
  parser.add_argument('locator', type=str,
                      help="""
  Collection locator, optionally with a file path or prefix.
diff --git a/sdk/python/bin/arv-normalize b/sdk/python/bin/arv-normalize

index b059d79459278e4859cdd7221183f2b863a3e73b..05a055e10855066588a707d539b2f250ea527be6 100755 (executable)
--- a/sdk/python/bin/arv-normalize
+++ b/sdk/python/bin/arv-normalize
@@ -7,16 +7,22 @@ import re
  import string
  import sys
  
+import arvados
+from arvados._version import __version__
+
  parser = argparse.ArgumentParser(
      description='Read manifest on standard input and put normalized manifest on standard output.')
  
-parser.add_argument('--extract', type=str, help="The file to extract from the input manifest")
-parser.add_argument('--strip', action='store_true', help="Strip authorization tokens")
+parser.add_argument('--extract', type=str,
+                    help="The file to extract from the input manifest")
+parser.add_argument('--strip', action='store_true',
+                    help="Strip authorization tokens")
+parser.add_argument('--version', action='version',
+                    version="%s %s" % (sys.argv[0], __version__),
+                    help='Print version and exit.')
  
  args = parser.parse_args()
  
-import arvados
-
  r = sys.stdin.read()
  
  cr = arvados.CollectionReader(r)
diff --git a/sdk/python/setup.py b/sdk/python/setup.py

index e0aae9625eb54d82eb4ee983696487079fa0d441..d470ab4d0046f01a40017d9aaa58dc0e231b1c51 100644 (file)
--- a/sdk/python/setup.py
+++ b/sdk/python/setup.py
@@ -46,11 +46,11 @@ setup(name='arvados-python-client',
        install_requires=[
            'google-api-python-client==1.4.2',
            'oauth2client >=1.4.6, <2',
-          'pyasn1-modules==0.0.5',
            'ciso8601',
            'httplib2',
            'pycurl >=7.19.5.1, <7.21.5',
            'python-gflags<3.0',
+          'setuptools',
            'ws4py'
        ],
        test_suite='tests',
diff --git a/sdk/python/tests/arvados_testutil.py b/sdk/python/tests/arvados_testutil.py

index 71c9b178e7525808508babf86a383a37b4ab4ba6..dae3dd3b7b19c923ff53381e9f3ebef8c5abae49 100644 (file)
--- a/sdk/python/tests/arvados_testutil.py
+++ b/sdk/python/tests/arvados_testutil.py
@@ -1,6 +1,7 @@
  #!/usr/bin/env python
  
  import arvados
+import contextlib
  import errno
  import hashlib
  import httplib
@@ -11,6 +12,7 @@ import os
  import pycurl
  import Queue
  import shutil
+import sys
  import tempfile
  import unittest
  
@@ -50,6 +52,17 @@ def mock_api_responses(api_client, body, codes, headers={}):
  def str_keep_locator(s):
      return '{}+{}'.format(hashlib.md5(s).hexdigest(), len(s))
  
+@contextlib.contextmanager
+def redirected_streams(stdout=None, stderr=None):
+    orig_stdout, sys.stdout = sys.stdout, stdout or sys.stdout
+    orig_stderr, sys.stderr = sys.stderr, stderr or sys.stderr
+    try:
+        yield
+    finally:
+        sys.stdout = orig_stdout
+        sys.stderr = orig_stderr
+
+
  class FakeCurl:
      @classmethod
      def make(cls, code, body='', headers={}):
diff --git a/sdk/python/tests/nginx.conf b/sdk/python/tests/nginx.conf

index 2b8b6ca1c4ad531c29bfd1c9a149da7c9bdf3599..006604077d457d286cdb9148332e087a0204313c 100644 (file)
--- a/sdk/python/tests/nginx.conf
+++ b/sdk/python/tests/nginx.conf
@@ -54,4 +54,20 @@ http {
        proxy_redirect //download:{{KEEPWEBPORT}}/ https://$host:{{KEEPWEBDLSSLPORT}}/;
      }
    }
+  upstream ws {
+    server localhost:{{WSPORT}};
+  }
+  server {
+    listen *:{{WSSPORT}} ssl default_server;
+    server_name ~^(?<request_host>.*)$;
+    ssl_certificate {{SSLCERT}};
+    ssl_certificate_key {{SSLKEY}};
+    location  / {
+      proxy_pass http://ws;
+      proxy_set_header Upgrade $http_upgrade;
+      proxy_set_header Connection "upgrade";
+      proxy_set_header Host $request_host:{{WSPORT}};
+      proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+    }
+  }
  }
diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py

index 642b7ccbad51846a9f1c25acc86f6b0505897c62..b969b12a7abc6374ae0993d0d95c2f5cf85f784b 100644 (file)
--- a/sdk/python/tests/run_test_server.py
+++ b/sdk/python/tests/run_test_server.py
@@ -44,6 +44,7 @@ if not os.path.exists(TEST_TMPDIR):
  
  my_api_host = None
  _cached_config = {}
+_cached_db_config = {}
  
  def find_server_pid(PID_PATH, wait=10):
      now = time.time()
@@ -284,10 +285,19 @@ def run(leave_running_atexit=False):
          os.makedirs(gitdir)
      subprocess.check_output(['tar', '-xC', gitdir, '-f', gittarball])
  
+    # The nginx proxy isn't listening here yet, but we need to choose
+    # the wss:// port now so we can write the API server config file.
+    wss_port = find_available_port()
+    _setport('wss', wss_port)
+
      port = find_available_port()
      env = os.environ.copy()
      env['RAILS_ENV'] = 'test'
-    env['ARVADOS_WEBSOCKETS'] = 'yes'
+    env['ARVADOS_TEST_WSS_PORT'] = str(wss_port)
+    if env.get('ARVADOS_TEST_EXPERIMENTAL_WS'):
+        env.pop('ARVADOS_WEBSOCKETS', None)
+    else:
+        env['ARVADOS_WEBSOCKETS'] = 'yes'
      env.pop('ARVADOS_TEST_API_HOST', None)
      env.pop('ARVADOS_API_HOST', None)
      env.pop('ARVADOS_API_HOST_INSECURE', None)
@@ -360,6 +370,47 @@ def stop(force=False):
          kill_server_pid(_pidfile('api'))
          my_api_host = None
  
+def run_ws():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    stop_ws()
+    port = find_available_port()
+    conf = os.path.join(TEST_TMPDIR, 'ws.yml')
+    with open(conf, 'w') as f:
+        f.write("""
+Client:
+  APIHost: {}
+  Insecure: true
+Listen: :{}
+LogLevel: {}
+Postgres:
+  host: {}
+  dbname: {}
+  user: {}
+  password: {}
+  sslmode: require
+        """.format(os.environ['ARVADOS_API_HOST'],
+                   port,
+                   ('info' if os.environ.get('ARVADOS_DEBUG', '') in ['','0'] else 'debug'),
+                   _dbconfig('host'),
+                   _dbconfig('database'),
+                   _dbconfig('username'),
+                   _dbconfig('password')))
+    logf = open(_fifo2stderr('ws'), 'w')
+    ws = subprocess.Popen(
+        ["ws", "-config", conf],
+        stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
+    with open(_pidfile('ws'), 'w') as f:
+        f.write(str(ws.pid))
+    _wait_until_port_listens(port)
+    _setport('ws', port)
+    return port
+
+def stop_ws():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    kill_server_pid(_pidfile('ws'))
+
  def _start_keep(n, keep_args):
      keep0 = tempfile.mkdtemp()
      port = find_available_port()
@@ -537,6 +588,7 @@ def stop_keep_web():
  def run_nginx():
      if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
          return
+    stop_nginx()
      nginxconf = {}
      nginxconf['KEEPWEBPORT'] = _getport('keep-web')
      nginxconf['KEEPWEBDLSSLPORT'] = find_available_port()
@@ -545,6 +597,8 @@ def run_nginx():
      nginxconf['KEEPPROXYSSLPORT'] = find_available_port()
      nginxconf['GITPORT'] = _getport('arv-git-httpd')
      nginxconf['GITSSLPORT'] = find_available_port()
+    nginxconf['WSPORT'] = _getport('ws')
+    nginxconf['WSSPORT'] = _getport('wss')
      nginxconf['SSLCERT'] = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'self-signed.pem')
      nginxconf['SSLKEY'] = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'self-signed.key')
      nginxconf['ACCESSLOG'] = _fifo2stderr('nginx_access_log')
@@ -593,7 +647,15 @@ def _getport(program):
      except IOError:
          return 9
  
+def _dbconfig(key):
+    global _cached_db_config
+    if not _cached_db_config:
+        _cached_db_config = yaml.load(open(os.path.join(
+            SERVICES_SRC_DIR, 'api', 'config', 'database.yml')))
+    return _cached_db_config['test'][key]
+
  def _apiconfig(key):
+    global _cached_config
      if _cached_config:
          return _cached_config[key]
      def _load(f, required=True):
@@ -647,6 +709,7 @@ class TestCaseWithServers(unittest.TestCase):
      original environment.
      """
      MAIN_SERVER = None
+    WS_SERVER = None
      KEEP_SERVER = None
      KEEP_PROXY_SERVER = None
      KEEP_WEB_SERVER = None
@@ -667,6 +730,7 @@ class TestCaseWithServers(unittest.TestCase):
          os.environ.pop('ARVADOS_EXTERNAL_CLIENT', None)
          for server_kwargs, start_func, stop_func in (
                  (cls.MAIN_SERVER, run, reset),
+                (cls.WS_SERVER, run_ws, stop_ws),
                  (cls.KEEP_SERVER, run_keep, stop_keep),
                  (cls.KEEP_PROXY_SERVER, run_keep_proxy, stop_keep_proxy),
                  (cls.KEEP_WEB_SERVER, run_keep_web, stop_keep_web)):
@@ -693,6 +757,7 @@ class TestCaseWithServers(unittest.TestCase):
  if __name__ == "__main__":
      actions = [
          'start', 'stop',
+        'start_ws', 'stop_ws',
          'start_keep', 'stop_keep',
          'start_keep_proxy', 'stop_keep_proxy',
          'start_keep-web', 'stop_keep-web',
@@ -725,6 +790,10 @@ if __name__ == "__main__":
              print(host)
      elif args.action == 'stop':
          stop(force=('ARVADOS_TEST_API_HOST' not in os.environ))
+    elif args.action == 'start_ws':
+        run_ws()
+    elif args.action == 'stop_ws':
+        stop_ws()
      elif args.action == 'start_keep':
          run_keep(enforce_permissions=args.keep_enforce_permissions, num_servers=args.num_keep_servers)
      elif args.action == 'stop_keep':
diff --git a/sdk/python/tests/test_arv_copy.py b/sdk/python/tests/test_arv_copy.py

new file mode 100644 (file)

index 0000000..e291ee0
--- /dev/null
+++ b/sdk/python/tests/test_arv_copy.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import io
+import os
+import sys
+import tempfile
+import unittest
+
+import arvados.commands.arv_copy as arv_copy
+import arvados_testutil as tutil
+
+class ArvCopyTestCase(unittest.TestCase):
+    def run_copy(self, args):
+        sys.argv = ['arv-copy'] + args
+        return arv_copy.main()
+
+    def test_unsupported_arg(self):
+        with self.assertRaises(SystemExit):
+            self.run_copy(['-x=unknown'])
+
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with tutil.redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.run_copy(['--version'])
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
diff --git a/sdk/python/tests/test_arv_keepdocker.py b/sdk/python/tests/test_arv_keepdocker.py

new file mode 100644 (file)

index 0000000..bb94db5
--- /dev/null
+++ b/sdk/python/tests/test_arv_keepdocker.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import io
+import os
+import sys
+import tempfile
+import unittest
+
+import arvados.commands.keepdocker as arv_keepdocker
+import arvados_testutil as tutil
+
+
+class ArvKeepdockerTestCase(unittest.TestCase):
+    def run_arv_keepdocker(self, args):
+        sys.argv = ['arv-keepdocker'] + args
+        return arv_keepdocker.main()
+
+    def test_unsupported_arg(self):
+        with self.assertRaises(SystemExit):
+            self.run_arv_keepdocker(['-x=unknown'])
+
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with tutil.redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.run_arv_keepdocker(['--version'])
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
diff --git a/sdk/python/tests/test_arv_ls.py b/sdk/python/tests/test_arv_ls.py

index 664b57fc00a57cef068e352232d55d0dfa548a58..5064f07d722ee77efc0c8a4f733eaf86d02b8b39 100644 (file)
--- a/sdk/python/tests/test_arv_ls.py
+++ b/sdk/python/tests/test_arv_ls.py
@@ -2,15 +2,17 @@
  # -*- coding: utf-8 -*-
  
  import io
+import os
  import random
-
+import sys
  import mock
+import tempfile
  
  import arvados.errors as arv_error
  import arvados.commands.ls as arv_ls
  import run_test_server
  
-from arvados_testutil import str_keep_locator
+from arvados_testutil import str_keep_locator, redirected_streams
  
  class ArvLsTestCase(run_test_server.TestCaseWithServers):
      FAKE_UUID = 'zzzzz-4zz18-12345abcde12345'
@@ -78,3 +80,12 @@ class ArvLsTestCase(run_test_server.TestCaseWithServers):
              arv_error.NotFoundError)
          self.assertNotEqual(0, self.run_ls([self.FAKE_UUID], api_client))
          self.assertNotEqual('', self.stderr.getvalue())
+
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.run_ls(['--version'], None)
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
diff --git a/sdk/python/tests/test_arv_normalize.py b/sdk/python/tests/test_arv_normalize.py

new file mode 100644 (file)

index 0000000..8bce7e3
--- /dev/null
+++ b/sdk/python/tests/test_arv_normalize.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import subprocess
+import sys
+import tempfile
+import unittest
+
+
+class ArvNormalizeTestCase(unittest.TestCase):
+    def run_arv_normalize(self, args=[]):
+        p = subprocess.Popen([sys.executable, 'bin/arv-normalize'] + args,
+                             stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+        (stdout, stderr) = p.communicate()
+        return p.returncode, stdout, stderr
+
+    def test_unsupported_arg(self):
+        returncode, out, err = self.run_arv_normalize(['-x=unknown'])
+        self.assertNotEqual(0, returncode)
+
+    def test_version_argument(self):
+        returncode, out, err = self.run_arv_normalize(['--version'])
+        self.assertEqual(0, returncode)
+        self.assertEqual('', out)
+        self.assertNotEqual('', err)
+        self.assertRegexpMatches(err, "[0-9]+\.[0-9]+\.[0-9]+")
diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py

index 7a0120c02814d00b27e81dd41fbb50e51ef2855c..f1dfd03def33d09c1ede560f50c5a059020f9c0c 100644 (file)
--- a/sdk/python/tests/test_arv_put.py
+++ b/sdk/python/tests/test_arv_put.py
@@ -2,6 +2,7 @@
  # -*- coding: utf-8 -*-
  
  import apiclient
+import io
  import mock
  import os
  import pwd
@@ -31,9 +32,7 @@ class ArvadosPutResumeCacheTest(ArvadosBaseTestCase):
          [],
          ['/dev/null'],
          ['/dev/null', '--filename', 'empty'],
-        ['/tmp'],
-        ['/tmp', '--max-manifest-depth', '0'],
-        ['/tmp', '--max-manifest-depth', '1']
+        ['/tmp']
          ]
  
      def tearDown(self):
@@ -240,6 +239,7 @@ class ArvadosPutResumeCacheTest(ArvadosBaseTestCase):
  
  class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
                            ArvadosBaseTestCase):
+
      def setUp(self):
          super(ArvPutUploadJobTest, self).setUp()
          run_test_server.authorize_with('active')
@@ -270,7 +270,7 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
  
      def test_writer_works_without_cache(self):
          cwriter = arv_put.ArvPutUploadJob(['/dev/null'], resume=False)
-        cwriter.start()
+        cwriter.start(save_collection=False)
          self.assertEqual(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:null\n", cwriter.manifest_text())
  
      def test_writer_works_with_cache(self):
@@ -278,13 +278,13 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
              f.write('foo')
              f.flush()
              cwriter = arv_put.ArvPutUploadJob([f.name])
-            cwriter.start()
-            self.assertEqual(3, cwriter.bytes_written)
+            cwriter.start(save_collection=False)
+            self.assertEqual(3, cwriter.bytes_written - cwriter.bytes_skipped)
              # Don't destroy the cache, and start another upload
              cwriter_new = arv_put.ArvPutUploadJob([f.name])
-            cwriter_new.start()
+            cwriter_new.start(save_collection=False)
              cwriter_new.destroy_cache()
-            self.assertEqual(0, cwriter_new.bytes_written)
+            self.assertEqual(0, cwriter_new.bytes_written - cwriter_new.bytes_skipped)
  
      def make_progress_tester(self):
          progression = []
@@ -300,13 +300,13 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
                  progression, reporter = self.make_progress_tester()
                  cwriter = arv_put.ArvPutUploadJob([f.name],
                      reporter=reporter, bytes_expected=expect_count)
-                cwriter.start()
+                cwriter.start(save_collection=False)
                  cwriter.destroy_cache()
                  self.assertIn((3, expect_count), progression)
  
      def test_writer_upload_directory(self):
          cwriter = arv_put.ArvPutUploadJob([self.tempdir])
-        cwriter.start()
+        cwriter.start(save_collection=False)
          cwriter.destroy_cache()
          self.assertEqual(1024*(1+2+3+4+5), cwriter.bytes_written)
  
@@ -324,17 +324,128 @@ class ArvPutUploadJobTest(run_test_server.TestCaseWithServers,
              writer = arv_put.ArvPutUploadJob([self.large_file_name],
                                               replication_desired=1)
              with self.assertRaises(SystemExit):
-                writer.start()
-                self.assertLess(writer.bytes_written,
-                                os.path.getsize(self.large_file_name))
+                writer.start(save_collection=False)
+            # Confirm that the file was partially uploaded
+            self.assertGreater(writer.bytes_written, 0)
+            self.assertLess(writer.bytes_written,
+                            os.path.getsize(self.large_file_name))
          # Retry the upload
          writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
                                            replication_desired=1)
-        writer2.start()
-        self.assertEqual(writer.bytes_written + writer2.bytes_written,
+        writer2.start(save_collection=False)
+        self.assertEqual(writer.bytes_written + writer2.bytes_written - writer2.bytes_skipped,
+                         os.path.getsize(self.large_file_name))
+        writer2.destroy_cache()
+
+    def test_no_resume_when_asked(self):
+        def wrapped_write(*args, **kwargs):
+            data = args[1]
+            # Exit only on last block
+            if len(data) < arvados.config.KEEP_BLOCK_SIZE:
+                raise SystemExit("Simulated error")
+            return self.arvfile_write(*args, **kwargs)
+
+        with mock.patch('arvados.arvfile.ArvadosFileWriter.write',
+                        autospec=True) as mocked_write:
+            mocked_write.side_effect = wrapped_write
+            writer = arv_put.ArvPutUploadJob([self.large_file_name],
+                                             replication_desired=1)
+            with self.assertRaises(SystemExit):
+                writer.start(save_collection=False)
+            # Confirm that the file was partially uploaded
+            self.assertGreater(writer.bytes_written, 0)
+            self.assertLess(writer.bytes_written,
+                            os.path.getsize(self.large_file_name))
+        # Retry the upload, this time without resume
+        writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
+                                          replication_desired=1,
+                                          resume=False)
+        writer2.start(save_collection=False)
+        self.assertEqual(writer2.bytes_skipped, 0)
+        self.assertEqual(writer2.bytes_written,
                           os.path.getsize(self.large_file_name))
          writer2.destroy_cache()
  
+    def test_no_resume_when_no_cache(self):
+        def wrapped_write(*args, **kwargs):
+            data = args[1]
+            # Exit only on last block
+            if len(data) < arvados.config.KEEP_BLOCK_SIZE:
+                raise SystemExit("Simulated error")
+            return self.arvfile_write(*args, **kwargs)
+
+        with mock.patch('arvados.arvfile.ArvadosFileWriter.write',
+                        autospec=True) as mocked_write:
+            mocked_write.side_effect = wrapped_write
+            writer = arv_put.ArvPutUploadJob([self.large_file_name],
+                                             replication_desired=1)
+            with self.assertRaises(SystemExit):
+                writer.start(save_collection=False)
+            # Confirm that the file was partially uploaded
+            self.assertGreater(writer.bytes_written, 0)
+            self.assertLess(writer.bytes_written,
+                            os.path.getsize(self.large_file_name))
+        # Retry the upload, this time without cache usage
+        writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
+                                          replication_desired=1,
+                                          resume=False,
+                                          use_cache=False)
+        writer2.start(save_collection=False)
+        self.assertEqual(writer2.bytes_skipped, 0)
+        self.assertEqual(writer2.bytes_written,
+                         os.path.getsize(self.large_file_name))
+        writer2.destroy_cache()
+
+
+    def test_dry_run_feature(self):
+        def wrapped_write(*args, **kwargs):
+            data = args[1]
+            # Exit only on last block
+            if len(data) < arvados.config.KEEP_BLOCK_SIZE:
+                raise SystemExit("Simulated error")
+            return self.arvfile_write(*args, **kwargs)
+
+        with mock.patch('arvados.arvfile.ArvadosFileWriter.write',
+                        autospec=True) as mocked_write:
+            mocked_write.side_effect = wrapped_write
+            writer = arv_put.ArvPutUploadJob([self.large_file_name],
+                                             replication_desired=1)
+            with self.assertRaises(SystemExit):
+                writer.start(save_collection=False)
+            # Confirm that the file was partially uploaded
+            self.assertGreater(writer.bytes_written, 0)
+            self.assertLess(writer.bytes_written,
+                            os.path.getsize(self.large_file_name))
+        # Retry the upload using dry_run to check if there is a pending upload
+        writer2 = arv_put.ArvPutUploadJob([self.large_file_name],
+                                          replication_desired=1,
+                                          dry_run=True)
+        with self.assertRaises(arv_put.ArvPutUploadIsPending):
+            writer2.start(save_collection=False)
+        # Complete the pending upload
+        writer3 = arv_put.ArvPutUploadJob([self.large_file_name],
+                                          replication_desired=1)
+        writer3.start(save_collection=False)
+        # Confirm there's no pending upload with dry_run=True
+        writer4 = arv_put.ArvPutUploadJob([self.large_file_name],
+                                          replication_desired=1,
+                                          dry_run=True)
+        with self.assertRaises(arv_put.ArvPutUploadNotPending):
+            writer4.start(save_collection=False)
+        writer4.destroy_cache()
+        # Test obvious cases
+        with self.assertRaises(arv_put.ArvPutUploadIsPending):
+            arv_put.ArvPutUploadJob([self.large_file_name],
+                                    replication_desired=1,
+                                    dry_run=True,
+                                    resume=False,
+                                    use_cache=False)
+        with self.assertRaises(arv_put.ArvPutUploadIsPending):
+            arv_put.ArvPutUploadJob([self.large_file_name],
+                                    replication_desired=1,
+                                    dry_run=True,
+                                    resume=False)
+
  
  class ArvadosExpectedBytesTest(ArvadosBaseTestCase):
      TEST_SIZE = os.path.getsize(__file__)
@@ -408,6 +519,15 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase):
                  delattr(self, outbuf)
          super(ArvadosPutTest, self).tearDown()
  
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with tutil.redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.call_main_with_args(['--version'])
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
+
      def test_simple_file_put(self):
          self.call_main_on_test_file()
  
@@ -624,6 +744,21 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
          self.assertEqual(1, len(collection_list))
          return collection_list[0]
  
+    def test_put_collection_with_later_update(self):
+        tmpdir = self.make_tmpdir()
+        with open(os.path.join(tmpdir, 'file1'), 'w') as f:
+            f.write('Relaxing in basins at the end of inlets terminates the endless tests from the box')
+        col = self.run_and_find_collection("", ['--no-progress', tmpdir])
+        self.assertNotEqual(None, col['uuid'])
+        # Add a new file to the directory
+        with open(os.path.join(tmpdir, 'file2'), 'w') as f:
+            f.write('The quick brown fox jumped over the lazy dog')
+        updated_col = self.run_and_find_collection("", ['--no-progress', '--update-collection', col['uuid'], tmpdir])
+        self.assertEqual(col['uuid'], updated_col['uuid'])
+        # Get the manifest and check that the new file is being included
+        c = arv_put.api_client.collections().get(uuid=updated_col['uuid']).execute()
+        self.assertRegexpMatches(c['manifest_text'], r'^\. .*:44:file2\n')
+
      def test_put_collection_with_high_redundancy(self):
          # Write empty data: we're not testing CollectionWriter, just
          # making sure collections.create tells the API server what our
diff --git a/sdk/python/tests/test_arv_run.py b/sdk/python/tests/test_arv_run.py

new file mode 100644 (file)

index 0000000..3d04d27
--- /dev/null
+++ b/sdk/python/tests/test_arv_run.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import io
+import os
+import sys
+import tempfile
+import unittest
+
+import arvados.commands.run as arv_run
+import arvados_testutil as tutil
+
+class ArvRunTestCase(unittest.TestCase):
+    def run_arv_run(self, args):
+        sys.argv = ['arv-run'] + args
+        return arv_run.main()
+
+    def test_unsupported_arg(self):
+        with self.assertRaises(SystemExit):
+            self.run_arv_run(['-x=unknown'])
+
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with tutil.redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.run_arv_run(['--version'])
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
diff --git a/sdk/python/tests/test_arv_ws.py b/sdk/python/tests/test_arv_ws.py

index 5a018273a4d0c8aa6b35970cbb151227083e0a47..2a85e04e87c06067bd7d83773295cf049f747852 100644 (file)
--- a/sdk/python/tests/test_arv_ws.py
+++ b/sdk/python/tests/test_arv_ws.py
@@ -1,8 +1,14 @@
  #!/usr/bin/env python
  
+import io
+import os
+import sys
+import tempfile
  import unittest
+
  import arvados.errors as arv_error
  import arvados.commands.ws as arv_ws
+import arvados_testutil as tutil
  
  class ArvWsTestCase(unittest.TestCase):
      def run_ws(self, args):
@@ -11,3 +17,12 @@ class ArvWsTestCase(unittest.TestCase):
      def test_unsupported_arg(self):
          with self.assertRaises(SystemExit):
              self.run_ws(['-x=unknown'])
+
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with tutil.redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.run_ws(['--version'])
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
diff --git a/sdk/python/tests/test_arvfile.py b/sdk/python/tests/test_arvfile.py

index 6b3562602aa69601021b04d93a116c04972abab5..8f02d517fc54ff531755dbacdefdb48378ab13ea 100644 (file)
--- a/sdk/python/tests/test_arvfile.py
+++ b/sdk/python/tests/test_arvfile.py
@@ -1,6 +1,7 @@
  #!/usr/bin/env python
  
  import bz2
+import datetime
  import gzip
  import io
  import mock
@@ -570,6 +571,26 @@ class ArvadosFileReadlinesTestCase(ArvadosFileReadTestCase):
      def read_for_test(self, reader, byte_count, **kwargs):
          return ''.join(reader.readlines(**kwargs))
  
+
+class ArvadosFileTestCase(unittest.TestCase):
+    def datetime_to_hex(self, dt):
+        return hex(int(time.mktime(dt.timetuple())))[2:]
+
+    def test_permission_expired(self):
+        base_manifest = ". 781e5e245d69b566979b86e28d23f2c7+10+A715fd31f8111894f717eb1003c1b0216799dd9ec@{} 0:10:count.txt\n"
+        now = datetime.datetime.now()
+        a_week_ago = now - datetime.timedelta(days=7)
+        a_month_ago = now - datetime.timedelta(days=30)
+        a_week_from_now = now + datetime.timedelta(days=7)
+        with Collection(base_manifest.format(self.datetime_to_hex(a_week_from_now))) as c:
+            self.assertFalse(c.find('count.txt').permission_expired())
+        with Collection(base_manifest.format(self.datetime_to_hex(a_week_ago))) as c:
+            f = c.find('count.txt')
+            self.assertTrue(f.permission_expired())
+            self.assertTrue(f.permission_expired(a_week_from_now))
+            self.assertFalse(f.permission_expired(a_month_ago))
+
+
  class BlockManagerTest(unittest.TestCase):
      def test_bufferblock_append(self):
          keep = ArvadosFileWriterTestCase.MockKeep({})
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py

index fc30a242eba1bfc665a05747de66f999869ef8a4..0e3d5e13f135c84f2fde2f741bd554b0ccdf3a85 100644 (file)
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -861,6 +861,8 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
              c.find("/.")
          with self.assertRaises(arvados.errors.ArgumentError):
              c.find("")
+        self.assertIs(c.find("./nonexistant.txt"), None)
+        self.assertIs(c.find("./nonexistantsubdir/nonexistant.txt"), None)
  
      def test_remove_in_subdir(self):
          c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
diff --git a/sdk/python/tests/test_events.py b/sdk/python/tests/test_events.py

index 0199724339b76f0fb37e5af89c8d62c53f332711..7e8c84ec11279495d55fd47770378847886ae76e 100644 (file)
--- a/sdk/python/tests/test_events.py
+++ b/sdk/python/tests/test_events.py
@@ -53,21 +53,22 @@ class WebsocketTest(run_test_server.TestCaseWithServers):
          self.assertEqual(200, events.get(True, 5)['status'])
          human = arvados.api('v1').humans().create(body={}).execute()
  
-        log_object_uuids = []
-        for i in range(0, expected):
-            log_object_uuids.append(events.get(True, 5)['object_uuid'])
-
+        want_uuids = []
          if expected > 0:
-            self.assertIn(human['uuid'], log_object_uuids)
-
+            want_uuids.append(human['uuid'])
          if expected > 1:
-            self.assertIn(ancestor['uuid'], log_object_uuids)
+            want_uuids.append(ancestor['uuid'])
+        log_object_uuids = []
+        while set(want_uuids) - set(log_object_uuids):
+            log_object_uuids.append(events.get(True, 5)['object_uuid'])
  
-        with self.assertRaises(Queue.Empty):
-            # assertEqual just serves to show us what unexpected thing
-            # comes out of the queue when the assertRaises fails; when
-            # the test passes, this assertEqual doesn't get called.
-            self.assertEqual(events.get(True, 2), None)
+        if expected < 2:
+            with self.assertRaises(Queue.Empty):
+                # assertEqual just serves to show us what unexpected
+                # thing comes out of the queue when the assertRaises
+                # fails; when the test passes, this assertEqual
+                # doesn't get called.
+                self.assertEqual(events.get(True, 2), None)
  
      def test_subscribe_websocket(self):
          self._test_subscribe(
@@ -145,8 +146,8 @@ class WebsocketTest(run_test_server.TestCaseWithServers):
          return time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(t)) + self.isotz(-time.timezone/60)
  
      def isotz(self, offset):
-        """Convert minutes-east-of-UTC to ISO8601 time zone designator"""
-        return '{:+03d}{:02d}'.format(offset/60, offset%60)
+        """Convert minutes-east-of-UTC to RFC3339- and ISO-compatible time zone designator"""
+        return '{:+03d}:{:02d}'.format(offset/60, offset%60)
  
      # Test websocket reconnection on (un)execpted close
      def _test_websocket_reconnect(self, close_unexpected):
diff --git a/sdk/python/tests/test_keep_client.py b/sdk/python/tests/test_keep_client.py

index 908539b8cae010f1cf0f23046bdcaf1f15f136b0..85b5bc81f00902a2a816d606bbc2cecff06de289 100644 (file)
--- a/sdk/python/tests/test_keep_client.py
+++ b/sdk/python/tests/test_keep_client.py
@@ -1081,58 +1081,74 @@ class KeepClientRetryPutTestCase(KeepClientRetryTestMixin, unittest.TestCase):
              self.check_exception(copies=2, num_retries=3)
  
  
-class KeepClientAvoidClientOverreplicationTestCase(unittest.TestCase, tutil.ApiClientMock):
-    
-    
+class AvoidOverreplication(unittest.TestCase, tutil.ApiClientMock):
+
      class FakeKeepService(object):
-        def __init__(self, delay, will_succeed, replicas=1):
+        def __init__(self, delay, will_succeed=False, will_raise=None, replicas=1):
              self.delay = delay
-            self.success = will_succeed
+            self.will_succeed = will_succeed
+            self.will_raise = will_raise
              self._result = {}
              self._result['headers'] = {}
              self._result['headers']['x-keep-replicas-stored'] = str(replicas)
              self._result['body'] = 'foobar'
-        
+
          def put(self, data_hash, data, timeout):
              time.sleep(self.delay)
-            return self.success
-        
+            if self.will_raise is not None:
+                raise self.will_raise
+            return self.will_succeed
+
          def last_result(self):
-            return self._result
-        
+            if self.will_succeed:
+                return self._result
+
          def finished(self):
              return False
      
-    
-    def test_only_write_enough_on_success(self):
-        copies = 3
-        pool = arvados.KeepClient.KeepWriterThreadPool(
+    def setUp(self):
+        self.copies = 3
+        self.pool = arvados.KeepClient.KeepWriterThreadPool(
              data = 'foo',
              data_hash = 'acbd18db4cc2f85cedef654fccc4a4d8+3',
-            max_service_replicas = copies,
-            copies = copies
+            max_service_replicas = self.copies,
+            copies = self.copies
          )
+
+    def test_only_write_enough_on_success(self):
          for i in range(10):
              ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
-            pool.add_task(ks, None)
-        pool.join()
-        self.assertEqual(pool.done(), copies)
+            self.pool.add_task(ks, None)
+        self.pool.join()
+        self.assertEqual(self.pool.done(), self.copies)
  
      def test_only_write_enough_on_partial_success(self):
-        copies = 3
-        pool = arvados.KeepClient.KeepWriterThreadPool(
-            data = 'foo',
-            data_hash = 'acbd18db4cc2f85cedef654fccc4a4d8+3',
-            max_service_replicas = copies,
-            copies = copies
-        )
          for i in range(5):
              ks = self.FakeKeepService(delay=i/10.0, will_succeed=False)
-            pool.add_task(ks, None)
+            self.pool.add_task(ks, None)
+            ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
+            self.pool.add_task(ks, None)
+        self.pool.join()
+        self.assertEqual(self.pool.done(), self.copies)
+
+    def test_only_write_enough_when_some_crash(self):
+        for i in range(5):
+            ks = self.FakeKeepService(delay=i/10.0, will_raise=Exception())
+            self.pool.add_task(ks, None)
+            ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
+            self.pool.add_task(ks, None)
+        self.pool.join()
+        self.assertEqual(self.pool.done(), self.copies)
+
+    def test_fail_when_too_many_crash(self):
+        for i in range(self.copies+1):
+            ks = self.FakeKeepService(delay=i/10.0, will_raise=Exception())
+            self.pool.add_task(ks, None)
+        for i in range(self.copies-1):
              ks = self.FakeKeepService(delay=i/10.0, will_succeed=True)
-            pool.add_task(ks, None)
-        pool.join()
-        self.assertEqual(pool.done(), copies)
+            self.pool.add_task(ks, None)
+        self.pool.join()
+        self.assertEqual(self.pool.done(), self.copies-1)
      
  
  @tutil.skip_sleep
diff --git a/services/api/Gemfile b/services/api/Gemfile

index 5d9b031e0295a62b86a6f0b4d6e9c13cc784da70..39f217f100781fed106339a6b37d536b069b9bda 100644 (file)
--- a/services/api/Gemfile
+++ b/services/api/Gemfile
@@ -1,6 +1,6 @@
  source 'https://rubygems.org'
  
-gem 'rails', '~> 3.2.0'
+gem 'rails', '~> 3.2'
  
  # Bundle edge Rails instead:
  # gem 'rails',     :git => 'git://github.com/rails/rails.git'
@@ -12,14 +12,13 @@ group :test, :development do
    # Note: "require: false" here tells bunder not to automatically
    # 'require' the packages during application startup. Installation is
    # still mandatory.
+  gem 'test-unit', '~> 3.0', require: false
    gem 'simplecov', '~> 0.7.1', require: false
    gem 'simplecov-rcov', require: false
    gem 'mocha', require: false
  end
  
-# This might not be needed in :test and :development, but we load it
-# anyway to make sure it always gets in Gemfile.lock and to help
-# reveal install problems sooner rather than later.
+# pg is the only supported database driver.
  gem 'pg'
  
  # Start using multi_json once we are on Rails 3.2;
@@ -31,13 +30,13 @@ gem 'oj'
  # Gems used only for assets and not required
  # in production environments by default.
  group :assets do
-  gem 'sass-rails',   '>= 3.2.0'
-  gem 'coffee-rails', '~> 3.2.0'
+  gem 'sass-rails',   '~> 3.2'
+  gem 'coffee-rails', '~> 3.2'
  
    # See https://github.com/sstephenson/execjs#readme for more supported runtimes
    gem 'therubyracer'
  
-  gem 'uglifier', '>= 1.0.3'
+  gem 'uglifier', '~> 2.0'
  end
  
  gem 'jquery-rails'
@@ -60,8 +59,8 @@ gem 'acts_as_api'
  
  gem 'passenger'
  
-gem 'omniauth', '1.1.1'
-gem 'omniauth-oauth2', '1.1.1'
+gem 'omniauth', '~> 1.1'
+gem 'omniauth-oauth2', '~> 1.1'
  
  gem 'andand'
  
@@ -78,8 +77,13 @@ gem 'arvados-cli', '>= 0.1.20161017193526'
  # pg_power lets us use partial indexes in schema.rb in Rails 3
  gem 'pg_power'
  
-gem 'puma'
+gem 'puma', '~> 2.0'
  gem 'sshkey'
  gem 'safe_yaml'
  gem 'lograge'
  gem 'logstash-event'
+
+# Install any plugin gems
+Dir.glob(File.join(File.dirname(__FILE__), 'lib', '**', "Gemfile")) do |gemfile|
+    eval(IO.read(gemfile), binding)
+end
diff --git a/services/api/Gemfile.lock b/services/api/Gemfile.lock

index 6f7875163b63fa6af8462f2999e42bad4902f37d..9c9c4ae9e58b3105c47aa5eb330e052b3747a19f 100644 (file)
--- a/services/api/Gemfile.lock
+++ b/services/api/Gemfile.lock
@@ -1,12 +1,12 @@
  GEM
    remote: https://rubygems.org/
    specs:
-    actionmailer (3.2.17)
-      actionpack (= 3.2.17)
+    actionmailer (3.2.22.5)
+      actionpack (= 3.2.22.5)
        mail (~> 2.5.4)
-    actionpack (3.2.17)
-      activemodel (= 3.2.17)
-      activesupport (= 3.2.17)
+    actionpack (3.2.22.5)
+      activemodel (= 3.2.22.5)
+      activesupport (= 3.2.22.5)
        builder (~> 3.0.0)
        erubis (~> 2.7.0)
        journey (~> 1.0.4)
@@ -14,31 +14,31 @@ GEM
        rack-cache (~> 1.2)
        rack-test (~> 0.6.1)
        sprockets (~> 2.2.1)
-    activemodel (3.2.17)
-      activesupport (= 3.2.17)
+    activemodel (3.2.22.5)
+      activesupport (= 3.2.22.5)
        builder (~> 3.0.0)
-    activerecord (3.2.17)
-      activemodel (= 3.2.17)
-      activesupport (= 3.2.17)
+    activerecord (3.2.22.5)
+      activemodel (= 3.2.22.5)
+      activesupport (= 3.2.22.5)
        arel (~> 3.0.2)
        tzinfo (~> 0.3.29)
-    activeresource (3.2.17)
-      activemodel (= 3.2.17)
-      activesupport (= 3.2.17)
-    activesupport (3.2.17)
+    activeresource (3.2.22.5)
+      activemodel (= 3.2.22.5)
+      activesupport (= 3.2.22.5)
+    activesupport (3.2.22.5)
        i18n (~> 0.6, >= 0.6.4)
        multi_json (~> 1.0)
-    acts_as_api (0.4.2)
+    acts_as_api (0.4.3)
        activemodel (>= 3.0.0)
        activesupport (>= 3.0.0)
        rack (>= 1.1.0)
      addressable (2.4.0)
      andand (1.3.3)
      arel (3.0.3)
-    arvados (0.1.20160420143004)
+    arvados (0.1.20160513152536)
        activesupport (>= 3, < 4.2.6)
        andand (~> 1.3, >= 1.3.3)
-      google-api-client (>= 0.7, < 0.9)
+      google-api-client (>= 0.7, < 0.8.9)
        i18n (~> 0)
        json (~> 1.7, >= 1.7.7)
        jwt (>= 0.1.5, < 2)
@@ -56,62 +56,71 @@ GEM
        extlib (>= 0.9.15)
        multi_json (>= 1.0.0)
      builder (3.0.4)
-    capistrano (2.15.5)
+    capistrano (2.15.9)
        highline
        net-scp (>= 1.0.0)
        net-sftp (>= 2.0.0)
        net-ssh (>= 2.0.14)
        net-ssh-gateway (>= 1.1.0)
-    coffee-rails (3.2.1)
+    coffee-rails (3.2.2)
        coffee-script (>= 2.2.0)
-      railties (~> 3.2.0.beta)
-    coffee-script (2.2.0)
+      railties (~> 3.2.0)
+    coffee-script (2.4.1)
        coffee-script-source
        execjs
-    coffee-script-source (1.7.0)
+    coffee-script-source (1.10.0)
      curb (0.9.3)
-    daemon_controller (1.2.0)
-    database_cleaner (1.2.0)
+    database_cleaner (1.5.3)
      erubis (2.7.0)
-    eventmachine (1.0.3)
-    execjs (2.0.2)
+    eventmachine (1.2.0.1)
+    execjs (2.7.0)
      extlib (0.9.16)
-    factory_girl (4.4.0)
+    factory_girl (4.7.0)
        activesupport (>= 3.0.0)
-    factory_girl_rails (4.4.1)
-      factory_girl (~> 4.4.0)
+    factory_girl_rails (4.7.0)
+      factory_girl (~> 4.7.0)
        railties (>= 3.0.0)
      faraday (0.9.2)
        multipart-post (>= 1.2, < 3)
-    faye-websocket (0.7.2)
+    faye-websocket (0.10.4)
        eventmachine (>= 0.12.0)
-      websocket-driver (>= 0.3.1)
-    google-api-client (0.7.1)
-      addressable (>= 2.3.2)
-      autoparse (>= 0.3.3)
-      extlib (>= 0.9.15)
-      faraday (>= 0.9.0)
-      jwt (>= 0.1.5)
-      launchy (>= 2.1.1)
-      multi_json (>= 1.0.0)
-      retriable (>= 1.4)
-      signet (>= 0.5.0)
-      uuidtools (>= 2.1.0)
-    hashie (1.2.0)
-    highline (1.6.21)
+      websocket-driver (>= 0.5.1)
+    google-api-client (0.8.7)
+      activesupport (>= 3.2, < 5.0)
+      addressable (~> 2.3)
+      autoparse (~> 0.3)
+      extlib (~> 0.9)
+      faraday (~> 0.9)
+      googleauth (~> 0.3)
+      launchy (~> 2.4)
+      multi_json (~> 1.10)
+      retriable (~> 1.4)
+      signet (~> 0.6)
+    googleauth (0.5.1)
+      faraday (~> 0.9)
+      jwt (~> 1.4)
+      logging (~> 2.0)
+      memoist (~> 0.12)
+      multi_json (~> 1.11)
+      os (~> 0.9)
+      signet (~> 0.7)
+    hashie (3.4.6)
+    highline (1.7.8)
      hike (1.2.3)
-    httpauth (0.2.1)
      i18n (0.7.0)
      journey (1.0.4)
-    jquery-rails (3.1.0)
+    jquery-rails (3.1.4)
        railties (>= 3.0, < 5.0)
        thor (>= 0.14, < 2.0)
      json (1.8.3)
-    jwt (0.1.13)
-      multi_json (>= 1.5)
+    jwt (1.5.6)
      launchy (2.4.3)
        addressable (~> 2.3)
-    libv8 (3.16.14.3)
+    libv8 (3.16.14.15)
+    little-plugger (1.1.4)
+    logging (2.1.0)
+      little-plugger (~> 1.1)
+      multi_json (~> 1.10)
      lograge (0.3.6)
        actionpack (>= 3)
        activesupport (>= 3)
@@ -120,100 +129,105 @@ GEM
      mail (2.5.4)
        mime-types (~> 1.16)
        treetop (~> 1.4.8)
+    memoist (0.15.0)
      metaclass (0.0.4)
      mime-types (1.25.1)
-    mocha (1.1.0)
+    mocha (1.2.0)
        metaclass (~> 0.0.1)
-    multi_json (1.12.0)
+    multi_json (1.12.1)
+    multi_xml (0.5.5)
      multipart-post (2.0.0)
-    net-scp (1.2.0)
+    net-scp (1.2.1)
        net-ssh (>= 2.6.5)
      net-sftp (2.1.2)
        net-ssh (>= 2.6.5)
-    net-ssh (2.8.0)
+    net-ssh (3.2.0)
      net-ssh-gateway (1.2.0)
        net-ssh (>= 2.6.5)
-    oauth2 (0.8.1)
-      faraday (~> 0.8)
-      httpauth (~> 0.1)
-      jwt (~> 0.1.4)
-      multi_json (~> 1.0)
-      rack (~> 1.2)
+    oauth2 (1.2.0)
+      faraday (>= 0.8, < 0.10)
+      jwt (~> 1.0)
+      multi_json (~> 1.3)
+      multi_xml (~> 0.5)
+      rack (>= 1.2, < 3)
      oj (2.15.0)
-    omniauth (1.1.1)
-      hashie (~> 1.2)
-      rack
-    omniauth-oauth2 (1.1.1)
-      oauth2 (~> 0.8.0)
-      omniauth (~> 1.0)
-    passenger (4.0.41)
-      daemon_controller (>= 1.2.0)
+    omniauth (1.3.1)
+      hashie (>= 1.2, < 4)
+      rack (>= 1.0, < 3)
+    omniauth-oauth2 (1.4.0)
+      oauth2 (~> 1.0)
+      omniauth (~> 1.2)
+    os (0.9.6)
+    passenger (5.0.30)
        rack
        rake (>= 0.8.1)
-    pg (0.17.1)
+    pg (0.19.0)
      pg_power (1.6.4)
        pg
        rails (~> 3.1)
-    polyglot (0.3.4)
-    puma (2.8.2)
-      rack (>= 1.1, < 2.0)
-    rack (1.4.5)
-    rack-cache (1.2)
+    polyglot (0.3.5)
+    power_assert (0.3.1)
+    puma (2.16.0)
+    rack (1.4.7)
+    rack-cache (1.6.1)
        rack (>= 0.4)
      rack-ssl (1.3.4)
        rack
-    rack-test (0.6.2)
+    rack-test (0.6.3)
        rack (>= 1.0)
-    rails (3.2.17)
-      actionmailer (= 3.2.17)
-      actionpack (= 3.2.17)
-      activerecord (= 3.2.17)
-      activeresource (= 3.2.17)
-      activesupport (= 3.2.17)
+    rails (3.2.22.5)
+      actionmailer (= 3.2.22.5)
+      actionpack (= 3.2.22.5)
+      activerecord (= 3.2.22.5)
+      activeresource (= 3.2.22.5)
+      activesupport (= 3.2.22.5)
        bundler (~> 1.0)
-      railties (= 3.2.17)
-    railties (3.2.17)
-      actionpack (= 3.2.17)
-      activesupport (= 3.2.17)
+      railties (= 3.2.22.5)
+    railties (3.2.22.5)
+      actionpack (= 3.2.22.5)
+      activesupport (= 3.2.22.5)
        rack-ssl (~> 1.3.2)
        rake (>= 0.8.7)
        rdoc (~> 3.4)
        thor (>= 0.14.6, < 2.0)
-    rake (10.2.2)
+    rake (11.3.0)
      rdoc (3.12.2)
        json (~> 1.4)
-    ref (1.0.5)
-    retriable (2.1.0)
-    ruby-prof (0.15.2)
-    rvm-capistrano (1.5.1)
+    ref (2.0.0)
+    retriable (1.4.1)
+    ruby-prof (0.16.2)
+    rvm-capistrano (1.5.6)
        capistrano (~> 2.15.4)
      safe_yaml (1.0.4)
-    sass (3.3.4)
+    sass (3.4.22)
      sass-rails (3.2.6)
        railties (~> 3.2.0)
        sass (>= 3.1.10)
        tilt (~> 1.3)
-    signet (0.5.1)
-      addressable (>= 2.2.3)
-      faraday (>= 0.9.0.rc5)
-      jwt (>= 0.1.5)
-      multi_json (>= 1.0.0)
+    signet (0.7.3)
+      addressable (~> 2.3)
+      faraday (~> 0.9)
+      jwt (~> 1.5)
+      multi_json (~> 1.10)
      simplecov (0.7.1)
        multi_json (~> 1.0)
        simplecov-html (~> 0.7.1)
      simplecov-html (0.7.1)
      simplecov-rcov (0.2.3)
        simplecov (>= 0.4.1)
-    sprockets (2.2.2)
+    sprockets (2.2.3)
        hike (~> 1.2)
        multi_json (~> 1.0)
        rack (~> 1.0)
        tilt (~> 1.1, != 1.3.0)
-    sshkey (1.6.1)
-    test_after_commit (0.2.3)
+    sshkey (1.8.0)
+    test-unit (3.2.1)
+      power_assert
+    test_after_commit (1.1.0)
+      activerecord (>= 3.2)
      themes_for_rails (0.5.1)
        rails (>= 3.0.0)
-    therubyracer (0.12.1)
+    therubyracer (0.12.2)
        libv8 (~> 3.16.14.0)
        ref
      thor (0.19.1)
@@ -222,12 +236,13 @@ GEM
        polyglot
        polyglot (>= 0.3.1)
      trollop (2.1.2)
-    tzinfo (0.3.39)
-    uglifier (2.5.0)
+    tzinfo (0.3.51)
+    uglifier (2.7.2)
        execjs (>= 0.3.0)
        json (>= 1.8.0)
-    uuidtools (2.1.5)
-    websocket-driver (0.3.2)
+    websocket-driver (0.6.4)
+      websocket-extensions (>= 0.1.0)
+    websocket-extensions (0.1.2)
  
  PLATFORMS
    ruby
@@ -237,7 +252,7 @@ DEPENDENCIES
    andand
    arvados (>= 0.1.20150615153458)
    arvados-cli (>= 0.1.20161017193526)
-  coffee-rails (~> 3.2.0)
+  coffee-rails (~> 3.2)
    database_cleaner
    factory_girl_rails
    faye-websocket
@@ -247,22 +262,26 @@ DEPENDENCIES
    mocha
    multi_json
    oj
-  omniauth (= 1.1.1)
-  omniauth-oauth2 (= 1.1.1)
+  omniauth (~> 1.1)
+  omniauth-oauth2 (~> 1.1)
    passenger
    pg
    pg_power
-  puma
-  rails (~> 3.2.0)
+  puma (~> 2.0)
+  rails (~> 3.2)
    ruby-prof
    rvm-capistrano
    safe_yaml
-  sass-rails (>= 3.2.0)
+  sass-rails (~> 3.2)
    simplecov (~> 0.7.1)
    simplecov-rcov
    sshkey
+  test-unit (~> 3.0)
    test_after_commit
    themes_for_rails
    therubyracer
    trollop
-  uglifier (>= 1.0.3)
+  uglifier (~> 2.0)
+
+BUNDLED WITH
+   1.13.6
diff --git a/services/api/app/controllers/application_controller.rb b/services/api/app/controllers/application_controller.rb

index 776f7e190e06ad0a486dad78c04affe84493175a..d58c432b6c3f2ec5b1f17a47e798975b2759166f 100644 (file)
--- a/services/api/app/controllers/application_controller.rb
+++ b/services/api/app/controllers/application_controller.rb
@@ -46,7 +46,7 @@ class ApplicationController < ActionController::Base
  
    theme :select_theme
  
-  attr_accessor :resource_attrs
+  attr_writer :resource_attrs
  
    begin
      rescue_from(Exception,
@@ -59,6 +59,18 @@ class ApplicationController < ActionController::Base
                  :with => :render_not_found)
    end
  
+  def initialize *args
+    super
+    @object = nil
+    @objects = nil
+    @offset = nil
+    @limit = nil
+    @select = nil
+    @distinct = nil
+    @response_resource_name = nil
+    @attrs = nil
+  end
+
    def default_url_options
      if Rails.configuration.host
        {:host => Rails.configuration.host}
@@ -420,7 +432,7 @@ class ApplicationController < ActionController::Base
    end
  
    def find_object_by_uuid
-    if params[:id] and params[:id].match /\D/
+    if params[:id] and params[:id].match(/\D/)
        params[:uuid] = params.delete :id
      end
      @where = { uuid: params[:uuid] }
@@ -567,7 +579,7 @@ class ApplicationController < ActionController::Base
          }
        end
      end
-    super *opts
+    super(*opts)
    end
  
    def select_theme
diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb

index 81accd8c5681ed06fbd10c6594719c941000bb5e..2beb1e714d7c3fd1af64731c9023b989caa9ed14 100644 (file)
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -1,6 +1,8 @@
  require "arvados/keep"
  
  class Arvados::V1::CollectionsController < ApplicationController
+  include DbCurrentTime
+
    def self.limit_index_columns_read
      ["manifest_text"]
    end
@@ -13,6 +15,13 @@ class Arvados::V1::CollectionsController < ApplicationController
      super
    end
  
+  def find_objects_for_index
+    if params[:include_trash] || ['destroy', 'trash'].include?(action_name)
+      @objects = Collection.unscoped.readable_by(*@read_users)
+    end
+    super
+  end
+
    def find_object_by_uuid
      if loc = Keep::Locator.parse(params[:id])
        loc.strip_hints!
@@ -23,10 +32,10 @@ class Arvados::V1::CollectionsController < ApplicationController
            manifest_text: c.signed_manifest_text,
          }
        end
+      true
      else
        super
      end
-    true
    end
  
    def show
@@ -39,6 +48,25 @@ class Arvados::V1::CollectionsController < ApplicationController
      end
    end
  
+  def destroy
+    if !@object.is_trashed
+      @object.update_attributes!(trash_at: db_current_time)
+    end
+    earliest_delete = (@object.trash_at +
+                       Rails.configuration.blob_signature_ttl.seconds)
+    if @object.delete_at > earliest_delete
+      @object.update_attributes!(delete_at: earliest_delete)
+    end
+    show
+  end
+
+  def trash
+    if !@object.is_trashed
+      @object.update_attributes!(trash_at: db_current_time)
+    end
+    show
+  end
+
    def find_collections(visited, sp, &b)
      case sp
      when ArvadosModel
@@ -127,9 +155,9 @@ class Arvados::V1::CollectionsController < ApplicationController
            visited[uuid] = job.as_api_response
            if direction == :search_up
              # Follow upstream collections referenced in the script parameters
-            find_collections(visited, job) do |hash, uuid|
+            find_collections(visited, job) do |hash, col_uuid|
                search_edges(visited, hash, :search_up) if hash
-              search_edges(visited, uuid, :search_up) if uuid
+              search_edges(visited, col_uuid, :search_up) if col_uuid
              end
            elsif direction == :search_down
              # Follow downstream job output
diff --git a/services/api/app/controllers/arvados/v1/groups_controller.rb b/services/api/app/controllers/arvados/v1/groups_controller.rb

index d6adbf08516a7c2c199cb240017b5e64ede195be..5d91a81074cdfe9e75df182132af4b17f1ff85e3 100644 (file)
--- a/services/api/app/controllers/arvados/v1/groups_controller.rb
+++ b/services/api/app/controllers/arvados/v1/groups_controller.rb
@@ -68,9 +68,14 @@ class Arvados::V1::GroupsController < ApplicationController
       Collection,
       Human, Specimen, Trait]
  
-    table_names = klasses.map(&:table_name)
+    table_names = Hash[klasses.collect { |k| [k, k.table_name] }]
+
+    disabled_methods = Rails.configuration.disable_api_methods
+    avail_klasses = table_names.select{|k, t| !disabled_methods.include?(t+'.index')}
+    klasses = avail_klasses.keys
+
      request_filters.each do |col, op, val|
-      if col.index('.') && !table_names.include?(col.split('.', 2)[0])
+      if col.index('.') && !table_names.values.include?(col.split('.', 2)[0])
          raise ArgumentError.new("Invalid attribute '#{col}' in filter")
        end
      end
diff --git a/services/api/app/controllers/arvados/v1/schema_controller.rb b/services/api/app/controllers/arvados/v1/schema_controller.rb

index 2c55b15068ca3e8e1a2046b44d20dc4fa86ba32e..5f43ba8af8e2c9146af1fc267bd6888748b05510 100644 (file)
--- a/services/api/app/controllers/arvados/v1/schema_controller.rb
+++ b/services/api/app/controllers/arvados/v1/schema_controller.rb
@@ -85,7 +85,7 @@ class Arvados::V1::SchemaController < ApplicationController
        if Rails.application.config.websocket_address
          discovery[:websocketUrl] = Rails.application.config.websocket_address
        elsif ENV['ARVADOS_WEBSOCKETS']
-        discovery[:websocketUrl] = (root_url.sub /^http/, 'ws') + "websocket"
+        discovery[:websocketUrl] = root_url.sub(/^http/, 'ws') + "websocket"
        end
  
        ActiveRecord::Base.descendants.reject(&:abstract_class?).each do |k|
@@ -377,21 +377,21 @@ class Arvados::V1::SchemaController < ApplicationController
                method = d_methods[action.to_sym]
              end
              if ctl_class.respond_to? "_#{action}_requires_parameters".to_sym
-              ctl_class.send("_#{action}_requires_parameters".to_sym).each do |k, v|
+              ctl_class.send("_#{action}_requires_parameters".to_sym).each do |l, v|
                  if v.is_a? Hash
-                  method[:parameters][k] = v
+                  method[:parameters][l] = v
                  else
-                  method[:parameters][k] = {}
+                  method[:parameters][l] = {}
                  end
-                if !method[:parameters][k][:default].nil?
+                if !method[:parameters][l][:default].nil?
                    # The JAVA SDK is sensitive to all values being strings
-                  method[:parameters][k][:default] = method[:parameters][k][:default].to_s
+                  method[:parameters][l][:default] = method[:parameters][l][:default].to_s
                  end
-                method[:parameters][k][:type] ||= 'string'
-                method[:parameters][k][:description] ||= ''
-                method[:parameters][k][:location] = (route.segment_keys.include?(k) ? 'path' : 'query')
-                if method[:parameters][k][:required].nil?
-                  method[:parameters][k][:required] = v != false
+                method[:parameters][l][:type] ||= 'string'
+                method[:parameters][l][:description] ||= ''
+                method[:parameters][l][:location] = (route.segment_keys.include?(l) ? 'path' : 'query')
+                if method[:parameters][l][:required].nil?
+                  method[:parameters][l][:required] = v != false
                  end
                end
              end
diff --git a/services/api/app/controllers/arvados/v1/user_agreements_controller.rb b/services/api/app/controllers/arvados/v1/user_agreements_controller.rb

index 32adde9507554ee9195bbc812b51cc1d86d753ba..f23cd98c354824b4998373183b2cbaa17b08a715 100644 (file)
--- a/services/api/app/controllers/arvados/v1/user_agreements_controller.rb
+++ b/services/api/app/controllers/arvados/v1/user_agreements_controller.rb
@@ -17,7 +17,6 @@ class Arvados::V1::UserAgreementsController < ApplicationController
        # use this installation.
        @objects = []
      else
-      current_user_uuid = current_user.uuid
        act_as_system_user do
          uuids = Link.where("owner_uuid = ? and link_class = ? and name = ? and tail_uuid = ? and head_uuid like ?",
                             system_user_uuid,
@@ -25,7 +24,7 @@ class Arvados::V1::UserAgreementsController < ApplicationController
                             'require',
                             system_user_uuid,
                             Collection.uuid_like_pattern).
-          collect &:head_uuid
+          collect(&:head_uuid)
          @objects = Collection.where('uuid in (?)', uuids)
        end
      end
diff --git a/services/api/app/controllers/arvados/v1/users_controller.rb b/services/api/app/controllers/arvados/v1/users_controller.rb

index 03efed999fcb9791df63d4c6bc8475003f55b4c7..db5e7bd952323f661bbcd11312a937956f4d5044 100644 (file)
--- a/services/api/app/controllers/arvados/v1/users_controller.rb
+++ b/services/api/app/controllers/arvados/v1/users_controller.rb
@@ -159,7 +159,7 @@ class Arvados::V1::UsersController < ApplicationController
    end
  
    def apply_filters(model_class=nil)
-    return super if @read_users.any? &:is_admin
+    return super if @read_users.any?(&:is_admin)
      if params[:uuid] != current_user.andand.uuid
        # Non-admin index/show returns very basic information about readable users.
        safe_attrs = ["uuid", "is_active", "email", "first_name", "last_name"]
diff --git a/services/api/app/controllers/arvados/v1/virtual_machines_controller.rb b/services/api/app/controllers/arvados/v1/virtual_machines_controller.rb

index e6474aa4e0328a6759039921b9962d627b0b374d..99b663da43b8d05fde6db0966fe5da515fdc0d84 100644 (file)
--- a/services/api/app/controllers/arvados/v1/virtual_machines_controller.rb
+++ b/services/api/app/controllers/arvados/v1/virtual_machines_controller.rb
@@ -23,7 +23,7 @@ class Arvados::V1::VirtualMachinesController < ApplicationController
      @users = {}
      User.eager_load(:authorized_keys).
        where('users.uuid in (?)',
-            @vms.map { |vm| vm.login_permissions.map &:tail_uuid }.flatten.uniq).
+            @vms.map { |vm| vm.login_permissions.map(&:tail_uuid) }.flatten.uniq).
        each do |u|
        @users[u.uuid] = u
      end
diff --git a/services/api/app/controllers/database_controller.rb b/services/api/app/controllers/database_controller.rb

index 21c8e4710cb5e62dbe224a6034c68ea1bd40b05e..6699f7363b35e9a8b49bb6badc1aba48bc3bd1a0 100644 (file)
--- a/services/api/app/controllers/database_controller.rb
+++ b/services/api/app/controllers/database_controller.rb
@@ -11,7 +11,7 @@ class DatabaseController < ApplicationController
      # we can tell they're not valuable.
      user_uuids = User.
        where('email is null or email not like ?', '%@example.com').
-      collect &:uuid
+      collect(&:uuid)
      fixture_uuids =
        YAML::load_file(File.expand_path('../../../test/fixtures/users.yml',
                                         __FILE__)).
diff --git a/services/api/app/middlewares/arvados_api_token.rb b/services/api/app/middlewares/arvados_api_token.rb

index d8c04a1adbfcd0512bdbf38a4225081709ca2de8..2487f2ecb7db7820dd35d1c5a393fa22dbc0f7cf 100644 (file)
--- a/services/api/app/middlewares/arvados_api_token.rb
+++ b/services/api/app/middlewares/arvados_api_token.rb
@@ -7,7 +7,7 @@ class ArvadosApiToken
    # Create a new ArvadosApiToken handler
    # +app+  The next layer of the Rack stack.
    def initialize(app = nil, options = nil)
-    @app = app if app.respond_to?(:call)
+    @app = app.respond_to?(:call) ? app : nil
    end
  
    def call env
diff --git a/services/api/app/middlewares/rack_socket.rb b/services/api/app/middlewares/rack_socket.rb

index 8f82e585df07f431650270997325be9d6c7cf448..08d163e6e24d547f966513dc73f288c92f9ef679 100644 (file)
--- a/services/api/app/middlewares/rack_socket.rb
+++ b/services/api/app/middlewares/rack_socket.rb
@@ -44,18 +44,26 @@ class RackSocket
          if forked && EM.reactor_running?
            EM.stop
          end
-        Thread.new {
-          EM.run
-        }
+        Thread.new do
+          begin
+            EM.run
+          ensure
+            ActiveRecord::Base.connection.close
+          end
+        end
          die_gracefully_on_signal
        end
      else
        # faciliates debugging
        Thread.abort_on_exception = true
        # just spawn a thread and start it up
-      Thread.new {
-        EM.run
-      }
+      Thread.new do
+        begin
+          EM.run
+        ensure
+          ActiveRecord::Base.connection.close
+        end
+      end
      end
  
      # Create actual handler instance object from handler class.
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb

index 18d5647cc929e760a72ed48ed709a9d18b8da8a3..fd542ca909e296c1284b06a2843b9859c6acc8d6 100644 (file)
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -239,7 +239,7 @@ class ArvadosModel < ActiveRecord::Base
    end
  
    def logged_attributes
-    attributes.except *Rails.configuration.unlogged_attributes
+    attributes.except(*Rails.configuration.unlogged_attributes)
    end
  
    def self.full_text_searchable_columns
@@ -252,12 +252,7 @@ class ArvadosModel < ActiveRecord::Base
      parts = full_text_searchable_columns.collect do |column|
        "coalesce(#{column},'')"
      end
-    # We prepend a space to the tsvector() argument here. Otherwise,
-    # it might start with a column that has its own (non-full-text)
-    # index, which causes Postgres to use the column index instead of
-    # the tsvector index, which causes full text queries to be just as
-    # slow as if we had no index at all.
-    "to_tsvector('english', ' ' || #{parts.join(" || ' ' || ")})"
+    "to_tsvector('english', #{parts.join(" || ' ' || ")})"
    end
  
    def self.apply_filters query, filters
@@ -490,7 +485,7 @@ class ArvadosModel < ActiveRecord::Base
    end
  
    def foreign_key_attributes
-    attributes.keys.select { |a| a.match /_uuid$/ }
+    attributes.keys.select { |a| a.match(/_uuid$/) }
    end
  
    def skip_uuid_read_permission_check
@@ -505,7 +500,7 @@ class ArvadosModel < ActiveRecord::Base
      foreign_key_attributes.each do |attr|
        attr_value = send attr
        if attr_value.is_a? String and
-          attr_value.match /^[0-9a-f]{32,}(\+[@\w]+)*$/
+          attr_value.match(/^[0-9a-f]{32,}(\+[@\w]+)*$/)
          begin
            send "#{attr}=", Collection.normalize_uuid(attr_value)
          rescue
@@ -584,13 +579,12 @@ class ArvadosModel < ActiveRecord::Base
      unless uuid.is_a? String
        return nil
      end
-    resource_class = nil
  
      uuid.match HasUuid::UUID_REGEX do |re|
        return uuid_prefixes[re[1]] if uuid_prefixes[re[1]]
      end
  
-    if uuid.match /.+@.+/
+    if uuid.match(/.+@.+/)
        return Email
      end
  
@@ -603,7 +597,7 @@ class ArvadosModel < ActiveRecord::Base
      if self == ArvadosModel
        # If called directly as ArvadosModel.find_by_uuid rather than via subclass,
        # delegate to the appropriate subclass based on the given uuid.
-      self.resource_class_for_uuid(uuid).find_by_uuid(uuid)
+      self.resource_class_for_uuid(uuid).unscoped.find_by_uuid(uuid)
      else
        super
      end
diff --git a/services/api/app/models/blob.rb b/services/api/app/models/blob.rb

index 41d5b27093c3ab55c296f7a592b9defb7e25d6dc..00c2501865fa6098244b09487376f83514620e34 100644 (file)
--- a/services/api/app/models/blob.rb
+++ b/services/api/app/models/blob.rb
@@ -64,9 +64,9 @@ class Blob
    #   Return value: true if the locator has a valid signature, false otherwise
    #   Arguments: signed_blob_locator, opts
    #
-  def self.verify_signature *args
+  def self.verify_signature(*args)
      begin
-      self.verify_signature! *args
+      self.verify_signature!(*args)
        true
      rescue Blob::InvalidSignatureError
        false
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb

index 879f0293ee32a43a56a478801281feee634c101f..f212e3358a4c8729d46f5edd09a2f2226a9b6a1b 100644 (file)
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -1,4 +1,5 @@
  require 'arvados/keep'
+require 'sweep_trashed_collections'
  
  class Collection < ArvadosModel
    extend DbCurrentTime
@@ -8,17 +9,21 @@ class Collection < ArvadosModel
  
    serialize :properties, Hash
  
+  before_validation :set_validation_timestamp
    before_validation :default_empty_manifest
    before_validation :check_encoding
    before_validation :check_manifest_validity
    before_validation :check_signatures
    before_validation :strip_signatures_and_update_replication_confirmed
+  before_validation :ensure_trash_at_not_in_past
+  before_validation :sync_trash_state
+  before_validation :default_trash_interval
    validate :ensure_pdh_matches_manifest_text
+  validate :validate_trash_and_delete_timing
    before_save :set_file_names
-  before_save :expires_at_not_in_past
  
-  # Query only undeleted collections by default.
-  default_scope where("expires_at IS NULL or expires_at > statement_timestamp()")
+  # Query only untrashed collections by default.
+  default_scope where("is_trashed = false")
  
    api_accessible :user, extend: :common do |t|
      t.add :name
@@ -30,7 +35,14 @@ class Collection < ArvadosModel
      t.add :replication_desired
      t.add :replication_confirmed
      t.add :replication_confirmed_at
-    t.add :expires_at
+    t.add :delete_at
+    t.add :trash_at
+    t.add :is_trashed
+  end
+
+  after_initialize do
+    @signatures_checked = false
+    @computed_pdh_for_manifest_text = false
    end
  
    def self.attributes_required_columns
@@ -41,9 +53,9 @@ class Collection < ArvadosModel
                  # API response, and never let clients select the
                  # manifest_text column.
                  #
-                # We need expires_at to determine the correct
-                # timestamp in signed_manifest_text.
-                'manifest_text' => ['manifest_text', 'expires_at'],
+                # We need trash_at and is_trashed to determine the
+                # correct timestamp in signed_manifest_text.
+                'manifest_text' => ['manifest_text', 'trash_at', 'is_trashed'],
                  'unsigned_manifest_text' => ['manifest_text'],
                  )
    end
@@ -63,7 +75,9 @@ class Collection < ArvadosModel
      # subsequent passes without checking any signatures. This is
      # important because the signatures have probably been stripped off
      # by the time we get to a second validation pass!
-    return true if @signatures_checked and @signatures_checked == computed_pdh
+    if @signatures_checked && @signatures_checked == computed_pdh
+      return true
+    end
  
      if self.manifest_text_changed?
        # Check permissions on the collection manifest.
@@ -72,7 +86,7 @@ class Collection < ArvadosModel
        api_token = current_api_client_authorization.andand.api_token
        signing_opts = {
          api_token: api_token,
-        now: db_current_time.to_i,
+        now: @validation_timestamp.to_i,
        }
        self.manifest_text.each_line do |entry|
          entry.split.each do |tok|
@@ -199,7 +213,7 @@ class Collection < ArvadosModel
          utf8 = manifest_text
          utf8.force_encoding Encoding::UTF_8
          if utf8.valid_encoding? and utf8 == manifest_text.encode(Encoding::UTF_8)
-          manifest_text = utf8
+          self.manifest_text = utf8
            return true
          end
        rescue
@@ -220,11 +234,15 @@ class Collection < ArvadosModel
    end
  
    def signed_manifest_text
-    if has_attribute? :manifest_text
+    if !has_attribute? :manifest_text
+      return nil
+    elsif is_trashed
+      return manifest_text
+    else
        token = current_api_client_authorization.andand.api_token
        exp = [db_current_time.to_i + Rails.configuration.blob_signature_ttl,
-             expires_at].compact.map(&:to_i).min
-      @signed_manifest_text = self.class.sign_manifest manifest_text, token, exp
+             trash_at].compact.map(&:to_i).min
+      self.class.sign_manifest manifest_text, token, exp
      end
    end
  
@@ -285,10 +303,10 @@ class Collection < ArvadosModel
      hash_part = nil
      size_part = nil
      uuid.split('+').each do |token|
-      if token.match /^[0-9a-f]{32,}$/
+      if token.match(/^[0-9a-f]{32,}$/)
          raise "uuid #{uuid} has multiple hash parts" if hash_part
          hash_part = token
-      elsif token.match /^\d+$/
+      elsif token.match(/^\d+$/)
          raise "uuid #{uuid} has multiple size parts" if size_part
          size_part = token
        end
@@ -362,6 +380,11 @@ class Collection < ArvadosModel
      super - ["manifest_text"]
    end
  
+  def self.where *args
+    SweepTrashedCollections.sweep_if_stale
+    super
+  end
+
    protected
    def portable_manifest_text
      self.class.munge_manifest_locators(manifest_text) do |match|
@@ -398,13 +421,65 @@ class Collection < ArvadosModel
      super
    end
  
-  # If expires_at is being changed to a time in the past, change it to
+  # Use a single timestamp for all validations, even though each
+  # validation runs at a different time.
+  def set_validation_timestamp
+    @validation_timestamp = db_current_time
+  end
+
+  # If trash_at is being changed to a time in the past, change it to
    # now. This allows clients to say "expires {client-current-time}"
    # without failing due to clock skew, while avoiding odd log entries
    # like "expiry date changed to {1 year ago}".
-  def expires_at_not_in_past
-    if expires_at_changed? and expires_at
-      self.expires_at = [db_current_time, expires_at].max
+  def ensure_trash_at_not_in_past
+    if trash_at_changed? && trash_at
+      self.trash_at = [@validation_timestamp, trash_at].max
      end
    end
+
+  # Caller can move into/out of trash by setting/clearing is_trashed
+  # -- however, if the caller also changes trash_at, then any changes
+  # to is_trashed are ignored.
+  def sync_trash_state
+    if is_trashed_changed? && !trash_at_changed?
+      if is_trashed
+        self.trash_at = @validation_timestamp
+      else
+        self.trash_at = nil
+        self.delete_at = nil
+      end
+    end
+    self.is_trashed = trash_at && trash_at <= @validation_timestamp || false
+    true
+  end
+
+  # If trash_at is updated without touching delete_at, automatically
+  # update delete_at to a sensible value.
+  def default_trash_interval
+    if trash_at_changed? && !delete_at_changed?
+      if trash_at.nil?
+        self.delete_at = nil
+      else
+        self.delete_at = trash_at + Rails.configuration.default_trash_lifetime.seconds
+      end
+    end
+  end
+
+  def validate_trash_and_delete_timing
+    if trash_at.nil? != delete_at.nil?
+      errors.add :delete_at, "must be set if trash_at is set, and must be nil otherwise"
+    end
+
+    earliest_delete = ([@validation_timestamp, trash_at_was].compact.min +
+                       Rails.configuration.blob_signature_ttl.seconds)
+    if delete_at && delete_at < earliest_delete
+      errors.add :delete_at, "#{delete_at} is too soon: earliest allowed is #{earliest_delete}"
+    end
+
+    if delete_at && delete_at < trash_at
+      errors.add :delete_at, "must not be earlier than trash_at"
+    end
+
+    true
+  end
  end
diff --git a/services/api/app/models/commit_ancestor.rb b/services/api/app/models/commit_ancestor.rb

index 71ea57fb95ce15f1d3c9d95479a0c3ddf145b446..419eca2e01fd3002762124045707b5b8dc38bf8f 100644 (file)
--- a/services/api/app/models/commit_ancestor.rb
+++ b/services/api/app/models/commit_ancestor.rb
@@ -16,13 +16,13 @@ class CommitAncestor < ActiveRecord::Base
      @gitdirbase = Rails.configuration.git_repositories_dir
      self.is = nil
      Dir.foreach @gitdirbase do |repo|
-      next if repo.match /^\./
+      next if repo.match(/^\./)
        git_dir = repo.match(/\.git$/) ? repo : File.join(repo, '.git')
        repo_name = repo.sub(/\.git$/, '')
        ENV['GIT_DIR'] = File.join(@gitdirbase, git_dir)
-      IO.foreach("|git rev-list --format=oneline '#{self.descendant.gsub /[^0-9a-f]/,""}'") do |line|
+      IO.foreach("|git rev-list --format=oneline '#{self.descendant.gsub(/[^0-9a-f]/,"")}'") do |line|
          self.is = false
-        sha1, message = line.strip.split(" ", 2)
+        sha1, _ = line.strip.split(" ", 2)
          if sha1 == self.ancestor
            self.is = true
            break
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb

index 52f1cba723ed5744af3bf226265f9bb600d4f61f..7731a8d79ddc86da1c315cf4b2a554eab7ab6ce2 100644 (file)
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -341,7 +341,7 @@ class Container < ArvadosModel
        act_as_system_user do
  
          if self.state == Cancelled
-          retryable_requests = ContainerRequest.where("priority > 0 and state = 'Committed' and container_count < container_count_max")
+          retryable_requests = ContainerRequest.where("container_uuid = ? and priority > 0 and state = 'Committed' and container_count < container_count_max", uuid)
          else
            retryable_requests = []
          end
diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb

index f92fa21314824b4118ac8f725470fee2bee41ea6..a264bbfe813e73bd0e22526ac416deb3cc8ca007 100644 (file)
--- a/services/api/app/models/container_request.rb
+++ b/services/api/app/models/container_request.rb
@@ -35,18 +35,19 @@ class ContainerRequest < ArvadosModel
      t.add :environment
      t.add :expires_at
      t.add :filters
+    t.add :log_uuid
      t.add :mounts
      t.add :name
+    t.add :output_name
      t.add :output_path
+    t.add :output_uuid
      t.add :priority
      t.add :properties
      t.add :requesting_container_uuid
      t.add :runtime_constraints
+    t.add :scheduling_parameters
      t.add :state
      t.add :use_existing
-    t.add :output_uuid
-    t.add :log_uuid
-    t.add :scheduling_parameters
    end
  
    # Supported states for a container request
@@ -90,15 +91,39 @@ class ContainerRequest < ArvadosModel
      ['output', 'log'].each do |out_type|
        pdh = c.send(out_type)
        next if pdh.nil?
+      if self.output_name and out_type == 'output'
+        coll_name = self.output_name
+      else
+        coll_name = "Container #{out_type} for request #{uuid}"
+      end
        manifest = Collection.where(portable_data_hash: pdh).first.manifest_text
-      coll = Collection.create!(owner_uuid: owner_uuid,
-                         manifest_text: manifest,
-                         portable_data_hash: pdh,
-                         name: "Container #{out_type} for request #{uuid}",
-                         properties: {
-                           'type' => out_type,
-                           'container_request' => uuid,
-                         })
+      begin
+        coll = Collection.create!(owner_uuid: owner_uuid,
+                                  manifest_text: manifest,
+                                  portable_data_hash: pdh,
+                                  name: coll_name,
+                                  properties: {
+                                    'type' => out_type,
+                                    'container_request' => uuid,
+                                  })
+      rescue ActiveRecord::RecordNotUnique => rn
+        # In case this is executed as part of a transaction: When a Postgres exception happens,
+        # the following statements on the same transaction become invalid, so a rollback is
+        # needed. One example are Unit Tests, every test is enclosed inside a transaction so
+        # that the database can be reverted before every new test starts.
+        # See: http://api.rubyonrails.org/classes/ActiveRecord/Transactions/ClassMethods.html#module-ActiveRecord::Transactions::ClassMethods-label-Exception+handling+and+rolling+back
+        ActiveRecord::Base.connection.execute 'ROLLBACK'
+        raise unless out_type == 'output' and self.output_name
+        # Postgres specific unique name check. See ApplicationController#create for
+        # a detailed explanation.
+        raise unless rn.original_exception.is_a? PG::UniqueViolation
+        err = rn.original_exception
+        detail = err.result.error_field(PG::Result::PG_DIAG_MESSAGE_DETAIL)
+        raise unless /^Key \(owner_uuid, name\)=\([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}, .*?\) already exists\./.match detail
+        # Output collection name collision detected: append a timestamp.
+        coll_name = "#{self.output_name} #{Time.now.getgm.strftime('%FT%TZ')}"
+        retry
+      end
        if out_type == 'output'
          out_coll = coll.uuid
        else
@@ -269,7 +294,8 @@ class ContainerRequest < ArvadosModel
                       :container_image, :cwd, :description, :environment,
                       :filters, :mounts, :name, :output_path, :priority,
                       :properties, :requesting_container_uuid, :runtime_constraints,
-                     :state, :container_uuid, :use_existing, :scheduling_parameters
+                     :state, :container_uuid, :use_existing, :scheduling_parameters,
+                     :output_name
  
      when Committed
        if container_uuid.nil?
@@ -281,14 +307,16 @@ class ContainerRequest < ArvadosModel
        end
  
        # Can update priority, container count, name and description
-      permitted.push :priority, :container_count, :container_count_max, :container_uuid, :name, :description
+      permitted.push :priority, :container_count, :container_count_max, :container_uuid,
+                     :name, :description
  
        if self.state_changed?
          # Allow create-and-commit in a single operation.
          permitted.push :command, :container_image, :cwd, :description, :environment,
                         :filters, :mounts, :name, :output_path, :properties,
                         :requesting_container_uuid, :runtime_constraints,
-                       :state, :container_uuid, :use_existing, :scheduling_parameters
+                       :state, :container_uuid, :use_existing, :scheduling_parameters,
+                       :output_name
        end
  
      when Final
diff --git a/services/api/app/models/job.rb b/services/api/app/models/job.rb

index 30ca7f8cb29581b0c65b0a70dd49c02f3b59f753..2ae713928113af9817c0598ee9f25841d00cf4e9 100644 (file)
--- a/services/api/app/models/job.rb
+++ b/services/api/app/models/job.rb
@@ -67,6 +67,10 @@ class Job < ArvadosModel
              (Complete = 'Complete'),
             ]
  
+  after_initialize do
+    @need_crunch_dispatch_trigger = false
+  end
+
    def assert_finished
      update_attributes(finished_at: finished_at || db_current_time,
                        success: success.nil? ? false : success,
@@ -115,6 +119,10 @@ class Job < ArvadosModel
      super - ["script_parameters_digest"]
    end
  
+  def self.full_text_searchable_columns
+    super - ["script_parameters_digest"]
+  end
+
    def self.load_job_specific_filters attrs, orig_filters, read_users
      # Convert Job-specific @filters entries into general SQL filters.
      script_info = {"repository" => nil, "script" => nil}
@@ -336,7 +344,7 @@ class Job < ArvadosModel
          assign_uuid
          Commit.tag_in_internal_repository repository, script_version, uuid
        rescue
-        uuid = uuid_was
+        self.uuid = uuid_was
          raise
        end
      end
@@ -415,7 +423,7 @@ class Job < ArvadosModel
            output_changed? or
            log_changed? or
            tasks_summary_changed? or
-          state_changed? or
+          (state_changed? && state != Cancelled) or
            components_changed?
          logger.warn "User #{current_user.uuid if current_user} tried to change protected job attributes on locked #{self.class.to_s} #{uuid_was}"
          return false
@@ -565,24 +573,6 @@ class Job < ArvadosModel
    end
  
    def ensure_no_collection_uuids_in_script_params
-    # recursive_hash_search searches recursively through hashes and
-    # arrays in 'thing' for string fields matching regular expression
-    # 'pattern'.  Returns true if pattern is found, false otherwise.
-    def recursive_hash_search thing, pattern
-      if thing.is_a? Hash
-        thing.each do |k, v|
-          return true if recursive_hash_search v, pattern
-        end
-      elsif thing.is_a? Array
-        thing.each do |k|
-          return true if recursive_hash_search k, pattern
-        end
-      elsif thing.is_a? String
-        return true if thing.match pattern
-      end
-      false
-    end
-
      # Fail validation if any script_parameters field includes a string containing a
      # collection uuid pattern.
      if self.script_parameters_changed?
@@ -593,4 +583,22 @@ class Job < ArvadosModel
      end
      true
    end
+
+  # recursive_hash_search searches recursively through hashes and
+  # arrays in 'thing' for string fields matching regular expression
+  # 'pattern'.  Returns true if pattern is found, false otherwise.
+  def recursive_hash_search thing, pattern
+    if thing.is_a? Hash
+      thing.each do |k, v|
+        return true if recursive_hash_search v, pattern
+      end
+    elsif thing.is_a? Array
+      thing.each do |k|
+        return true if recursive_hash_search k, pattern
+      end
+    elsif thing.is_a? String
+      return true if thing.match pattern
+    end
+    false
+  end
  end
diff --git a/services/api/app/models/link.rb b/services/api/app/models/link.rb

index 24872b21ec7163852cf86d0c0ceb3d3b41f13608..649a6f80c281fc83f2d6eaf4b0fc80fe82c28ce6 100644 (file)
--- a/services/api/app/models/link.rb
+++ b/services/api/app/models/link.rb
@@ -8,7 +8,6 @@ class Link < ArvadosModel
    after_update :maybe_invalidate_permissions_cache
    after_create :maybe_invalidate_permissions_cache
    after_destroy :maybe_invalidate_permissions_cache
-  attr_accessor :head_kind, :tail_kind
    validate :name_links_are_obsolete
  
    api_accessible :user, extend: :common do |t|
diff --git a/services/api/app/models/log.rb b/services/api/app/models/log.rb

index 7eab402609b482a238f8a40313bf622ece86c3c0..3207d1f288f2f264c671d6709063d93140ce3fec 100644 (file)
--- a/services/api/app/models/log.rb
+++ b/services/api/app/models/log.rb
@@ -4,7 +4,6 @@ class Log < ArvadosModel
    include CommonApiTemplate
    serialize :properties, Hash
    before_validation :set_default_event_at
-  attr_accessor :object, :object_kind
    after_save :send_notify
  
    api_accessible :user, extend: :common do |t|
diff --git a/services/api/app/models/node.rb b/services/api/app/models/node.rb

index e470e4c2bd9c47a45b395a4c90f4814edf89a417..18550204669c7cc6353d87cfc863bcbf3c4d876a 100644 (file)
--- a/services/api/app/models/node.rb
+++ b/services/api/app/models/node.rb
@@ -32,6 +32,10 @@ class Node < ArvadosModel
      t.add lambda { |x| Rails.configuration.compute_node_nameservers }, :as => :nameservers
    end
  
+  after_initialize do
+    @bypass_arvados_authorization = false
+  end
+
    def domain
      super || Rails.configuration.compute_node_domain
    end
@@ -226,7 +230,7 @@ class Node < ArvadosModel
      (0..Rails.configuration.max_compute_nodes-1).each do |slot_number|
        hostname = hostname_for_slot(slot_number)
        hostfile = File.join Rails.configuration.dns_server_conf_dir, "#{hostname}.conf"
-      if !File.exists? hostfile
+      if !File.exist? hostfile
          n = Node.where(:slot_number => slot_number).first
          if n.nil? or n.ip_address.nil?
            dns_server_update(hostname, UNUSED_NODE_IP)
diff --git a/services/api/app/models/pipeline_instance.rb b/services/api/app/models/pipeline_instance.rb

index 77a0736b000d669d298abc93ad95d12417e8a3d1..f84c4a310fd19904f4f5f85cdbea23a4c0b83770 100644 (file)
--- a/services/api/app/models/pipeline_instance.rb
+++ b/services/api/app/models/pipeline_instance.rb
@@ -10,6 +10,7 @@ class PipelineInstance < ArvadosModel
    before_validation :bootstrap_components
    before_validation :update_state
    before_validation :verify_status
+  before_validation :update_timestamps_when_state_changes
    before_create :set_state_before_save
    before_save :set_state_before_save
  
@@ -136,4 +137,17 @@ class PipelineInstance < ArvadosModel
      end
    end
  
+  def update_timestamps_when_state_changes
+    return if not (state_changed? or new_record?)
+
+    case state
+    when RunningOnServer, RunningOnClient
+      self.started_at ||= db_current_time
+    when Failed, Complete
+      current_time = db_current_time
+      self.started_at ||= current_time
+      self.finished_at ||= current_time
+    end
+  end
+
  end
diff --git a/services/api/app/models/repository.rb b/services/api/app/models/repository.rb

index f361a49db5dcd49b649d7e7f79c255e214eae97a..13b00df544cf1b20b3378d08aba78d264d1f570a 100644 (file)
--- a/services/api/app/models/repository.rb
+++ b/services/api/app/models/repository.rb
@@ -86,7 +86,7 @@ class Repository < ArvadosModel
        prefix_match = Regexp.escape(owner.username + "/")
        errmsg_start = "must be the owner's username, then '/', then"
      end
-    if not /^#{prefix_match}[A-Za-z][A-Za-z0-9]*$/.match(name)
+    if not (/^#{prefix_match}[A-Za-z][A-Za-z0-9]*$/.match(name))
        errors.add(:name,
                   "#{errmsg_start} a letter followed by alphanumerics")
        false
diff --git a/services/api/app/models/user.rb b/services/api/app/models/user.rb

index 9363cc4f02aa04d08552b9e343bbda9f8dcda5c1..78ec7bea1ec1b9b1ac72cce590f1951dc87279ac 100644 (file)
--- a/services/api/app/models/user.rb
+++ b/services/api/app/models/user.rb
@@ -57,6 +57,14 @@ class User < ArvadosModel
  
    ALL_PERMISSIONS = {read: true, write: true, manage: true}
  
+  # Map numeric permission levels (see lib/create_permission_view.sql)
+  # back to read/write/manage flags.
+  PERMS_FOR_VAL =
+    [{},
+     {read: true},
+     {read: true, write: true},
+     {read: true, write: true, manage: true}]
+
    def full_name
      "#{first_name} #{last_name}".strip
    end
@@ -64,7 +72,7 @@ class User < ArvadosModel
    def is_invited
      !!(self.is_active ||
         Rails.configuration.new_users_are_active ||
-       self.groups_i_can(:read).select { |x| x.match /-f+$/ }.first)
+       self.groups_i_can(:read).select { |x| x.match(/-f+$/) }.first)
    end
  
    def groups_i_can(verb)
@@ -135,60 +143,38 @@ class User < ArvadosModel
    # Return a hash of {group_uuid: perm_hash} where perm_hash[:read]
    # and perm_hash[:write] are true if this user can read and write
    # objects owned by group_uuid.
-  #
-  # The permission graph is built by repeatedly enumerating all
-  # permission links reachable from self.uuid, and then calling
-  # search_permissions
    def calculate_group_permissions
-      permissions_from = {}
-      todo = {self.uuid => true}
-      done = {}
-      # Build the equivalence class of permissions starting with
-      # self.uuid. On each iteration of this loop, todo contains
-      # the next set of uuids in the permission equivalence class
-      # to evaluate.
-      while !todo.empty?
-        lookup_uuids = todo.keys
-        lookup_uuids.each do |uuid| done[uuid] = true end
-        todo = {}
-        newgroups = []
-        # include all groups owned by the current set of uuids.
-        Group.where('owner_uuid in (?)', lookup_uuids).each do |group|
-          newgroups << [group.owner_uuid, group.uuid, 'can_manage']
-        end
-        # add any permission links from the current lookup_uuids to a Group.
-        Link.where('link_class = ? and tail_uuid in (?) and ' \
-                   '(head_uuid like ? or (name = ? and head_uuid like ?))',
-                   'permission',
-                   lookup_uuids,
-                   Group.uuid_like_pattern,
-                   'can_manage',
-                   User.uuid_like_pattern).each do |link|
-          newgroups << [link.tail_uuid, link.head_uuid, link.name]
-        end
-        newgroups.each do |tail_uuid, head_uuid, perm_name|
-          unless done.has_key? head_uuid
-            todo[head_uuid] = true
-          end
-          link_permissions = {}
-          case perm_name
-          when 'can_read'
-            link_permissions = {read:true}
-          when 'can_write'
-            link_permissions = {read:true,write:true}
-          when 'can_manage'
-            link_permissions = ALL_PERMISSIONS
-          end
-          permissions_from[tail_uuid] ||= {}
-          permissions_from[tail_uuid][head_uuid] ||= {}
-          link_permissions.each do |k,v|
-            permissions_from[tail_uuid][head_uuid][k] ||= v
-          end
-        end
+    conn = ActiveRecord::Base.connection
+    self.class.transaction do
+      # Check whether the temporary view has already been created
+      # during this connection. If not, create it.
+      conn.exec_query 'SAVEPOINT check_permission_view'
+      begin
+        conn.exec_query('SELECT 1 FROM permission_view LIMIT 0')
+      rescue
+        conn.exec_query 'ROLLBACK TO SAVEPOINT check_permission_view'
+        sql = File.read(Rails.root.join('lib', 'create_permission_view.sql'))
+        conn.exec_query(sql)
+      ensure
+        conn.exec_query 'RELEASE SAVEPOINT check_permission_view'
        end
-      perms = search_permissions(self.uuid, permissions_from)
-      Rails.cache.write "groups_for_user_#{self.uuid}", perms
-      perms
+    end
+
+    group_perms = {}
+    conn.exec_query('SELECT target_owner_uuid, max(perm_level)
+                    FROM permission_view
+                    WHERE user_uuid = $1
+                    AND target_owner_uuid IS NOT NULL
+                    GROUP BY target_owner_uuid',
+                    # "name" arg is a query label that appears in logs:
+                    "group_permissions for #{uuid}",
+                    # "binds" arg is an array of [col_id, value] for '$1' vars:
+                    [[nil, uuid]],
+                    ).rows.each do |group_uuid, max_p_val|
+      group_perms[group_uuid] = PERMS_FOR_VAL[max_p_val.to_i]
+    end
+    Rails.cache.write "groups_for_user_#{self.uuid}", group_perms
+    group_perms
    end
  
    # Return a hash of {group_uuid: perm_hash} where perm_hash[:read]
@@ -242,7 +228,7 @@ class User < ArvadosModel
  
      # delete "All users" group read permissions for this user
      group = Group.where(name: 'All users').select do |g|
-      g[:uuid].match /-f+$/
+      g[:uuid].match(/-f+$/)
      end.first
      Link.destroy_all(tail_uuid: self.uuid,
                       head_uuid: group[:uuid],
diff --git a/services/api/config/application.default.yml b/services/api/config/application.default.yml

index a9aa953f9f36e948dc57d34336c7d3f1cc1df43c..bb1355d030a74fa4594ea1dee54ef1a91ef70ef2 100644 (file)
--- a/services/api/config/application.default.yml
+++ b/services/api/config/application.default.yml
@@ -180,9 +180,15 @@ common:
    # The default is 2 weeks.
    blob_signature_ttl: 1209600
  
-  # Default lifetime for ephemeral collections: 2 weeks.
+  # Default lifetime for ephemeral collections: 2 weeks. This must not
+  # be less than blob_signature_ttl.
    default_trash_lifetime: 1209600
  
+  # Interval (seconds) between trash sweeps. During a trash sweep,
+  # collections are marked as trash if their trash_at time has
+  # arrived, and deleted if their delete_at time has arrived.
+  trash_sweep_interval: 60
+
    # Maximum characters of (JSON-encoded) query parameters to include
    # in each request log entry. When params exceed this size, they will
    # be JSON-encoded, truncated to this size, and logged as
@@ -444,3 +450,5 @@ test:
    workbench_address: https://localhost:3001/
    git_repositories_dir: <%= Rails.root.join 'tmp', 'git', 'test' %>
    git_internal_dir: <%= Rails.root.join 'tmp', 'internal.git' %>
+  websocket_address: <% if ENV['ARVADOS_TEST_EXPERIMENTAL_WS'] %>"wss://0.0.0.0:<%= ENV['ARVADOS_TEST_WSS_PORT'] %>/websocket"<% else %>false<% end %>
+  trash_sweep_interval: -1
diff --git a/services/api/config/application.rb b/services/api/config/application.rb

index 4211df29d099cecaa33c8f54ef9dfcbff11dfac1..f3f6424b2dac260ac1973859296aa0dec5c42070 100644 (file)
--- a/services/api/config/application.rb
+++ b/services/api/config/application.rb
@@ -35,5 +35,22 @@ module Server
      config.filter_parameters += [:password]
  
      I18n.enforce_available_locales = false
+
+    # Before using the filesystem backend for Rails.cache, check
+    # whether we own the relevant directory. If we don't, using it is
+    # likely to either fail or (if we're root) pollute it and cause
+    # other processes to fail later.
+    default_cache_path = Rails.root.join('tmp', 'cache')
+    if not File.owned?(default_cache_path)
+      if File.exist?(default_cache_path)
+        why = "owner (uid=#{File::Stat.new(default_cache_path).uid}) " +
+          "is not me (uid=#{Process.euid})"
+      else
+        why = "does not exist"
+      end
+      STDERR.puts("Defaulting to memory cache, " +
+                  "because #{default_cache_path} #{why}")
+      config.cache_store = :memory_store
+    end
    end
  end
diff --git a/services/api/config/boot.rb b/services/api/config/boot.rb

index 4489e58688ca642d8e0e9489f6896f49f9b89da6..f2830ae3166dc7fc2849feff72258dccca1e5f97 100644 (file)
--- a/services/api/config/boot.rb
+++ b/services/api/config/boot.rb
@@ -3,4 +3,4 @@ require 'rubygems'
  # Set up gems listed in the Gemfile.
  ENV['BUNDLE_GEMFILE'] ||= File.expand_path('../../Gemfile', __FILE__)
  
-require 'bundler/setup' if File.exists?(ENV['BUNDLE_GEMFILE'])
+require 'bundler/setup' if File.exist?(ENV['BUNDLE_GEMFILE'])
diff --git a/services/api/config/initializers/inflections.rb b/services/api/config/initializers/inflections.rb

index 79bca3af389506f6bf63ee594b76399e6164514a..b6dadf7e2e386220fd8c1eaa2c9de0f7ea653a08 100644 (file)
--- a/services/api/config/initializers/inflections.rb
+++ b/services/api/config/initializers/inflections.rb
@@ -10,8 +10,8 @@
  # end
  
  ActiveSupport::Inflector.inflections do |inflect|
-  inflect.plural /^([Ss]pecimen)$/i, '\1s'
-  inflect.singular /^([Ss]pecimen)s?/i, '\1'
-  inflect.plural /^([Hh]uman)$/i, '\1s'
-  inflect.singular /^([Hh]uman)s?/i, '\1'
+  inflect.plural(/^([Ss]pecimen)$/i, '\1s')
+  inflect.singular(/^([Ss]pecimen)s?/i, '\1')
+  inflect.plural(/^([Hh]uman)$/i, '\1s')
+  inflect.singular(/^([Hh]uman)s?/i, '\1')
  end
diff --git a/services/api/config/initializers/load_config.rb b/services/api/config/initializers/load_config.rb

index 76234d3e4b0f6ab148f73cb7a1242af1eacefb6a..fd3c9773933703a58f06da0aaff3ad837cad41af 100644 (file)
--- a/services/api/config/initializers/load_config.rb
+++ b/services/api/config/initializers/load_config.rb
@@ -6,7 +6,7 @@ rescue LoadError
    # configured by application.yml (i.e., here!) instead.
  end
  
-if (File.exists?(File.expand_path '../omniauth.rb', __FILE__) and
+if (File.exist?(File.expand_path '../omniauth.rb', __FILE__) and
      not defined? WARNED_OMNIAUTH_CONFIG)
    Rails.logger.warn <<-EOS
  DEPRECATED CONFIGURATION:
@@ -26,7 +26,7 @@ $application_config = {}
  
  %w(application.default application).each do |cfgfile|
    path = "#{::Rails.root.to_s}/config/#{cfgfile}.yml"
-  if File.exists? path
+  if File.exist? path
      yaml = ERB.new(IO.read path).result(binding)
      confs = YAML.load(yaml, deserialize_symbols: true)
      # Ignore empty YAML file:
diff --git a/services/api/config/initializers/preload_all_models.rb b/services/api/config/initializers/preload_all_models.rb

index 7e2612377434b9e3bfc245a8b4dc6143d6ad00c6..1ae531c169af05428f390953a344b1d24cafa4b0 100644 (file)
--- a/services/api/config/initializers/preload_all_models.rb
+++ b/services/api/config/initializers/preload_all_models.rb
@@ -7,6 +7,6 @@ require_relative 'load_config.rb'
  
  if Rails.env == 'development'
    Dir.foreach("#{Rails.root}/app/models") do |model_file|
-    require_dependency model_file if model_file.match /\.rb$/
+    require_dependency model_file if model_file.match(/\.rb$/)
    end
  end
diff --git a/services/api/config/routes.rb b/services/api/config/routes.rb

index f28390489dca3f42e14b7274407881009cc80b2f..f89d2c16a8881f66444738ea6a7c6299759fd52f 100644 (file)
--- a/services/api/config/routes.rb
+++ b/services/api/config/routes.rb
@@ -22,6 +22,7 @@ Server::Application.routes.draw do
        resources :collections do
          get 'provenance', on: :member
          get 'used_by', on: :member
+        post 'trash', on: :member
        end
        resources :groups do
          get 'contents', on: :collection
diff --git a/services/api/db/migrate/20161213172944_full_text_search_indexes.rb b/services/api/db/migrate/20161213172944_full_text_search_indexes.rb

new file mode 100644 (file)

index 0000000..aac3773
--- /dev/null
+++ b/services/api/db/migrate/20161213172944_full_text_search_indexes.rb
@@ -0,0 +1,33 @@
+class FullTextSearchIndexes < ActiveRecord::Migration
+  def fts_indexes
+    {
+      "collections" => "collections_full_text_search_idx",
+      "container_requests" => "container_requests_full_text_search_idx",
+      "groups" => "groups_full_text_search_idx",
+      "jobs" => "jobs_full_text_search_idx",
+      "pipeline_instances" => "pipeline_instances_full_text_search_idx",
+      "pipeline_templates" => "pipeline_templates_full_text_search_idx",
+      "workflows" => "workflows_full_text_search_idx",
+    }
+  end
+
+  def up
+    # remove existing fts indexes and create up to date ones with no leading space
+    fts_indexes.each do |t, i|
+      t.classify.constantize.reset_column_information
+      ActiveRecord::Base.connection.indexes(t).each do |idx|
+        if idx.name == i
+          remove_index t.to_sym, :name => i
+          break
+        end
+      end
+      execute "CREATE INDEX #{i} ON #{t} USING gin(#{t.classify.constantize.full_text_tsvector});"
+    end
+  end
+
+  def down
+    fts_indexes.each do |t, i|
+      remove_index t.to_sym, :name => i
+    end
+  end
+end
diff --git a/services/api/db/migrate/20161222153434_split_expiry_to_trash_and_delete.rb b/services/api/db/migrate/20161222153434_split_expiry_to_trash_and_delete.rb

new file mode 100644 (file)

index 0000000..13e4419
--- /dev/null
+++ b/services/api/db/migrate/20161222153434_split_expiry_to_trash_and_delete.rb
@@ -0,0 +1,42 @@
+class SplitExpiryToTrashAndDelete < ActiveRecord::Migration
+  def up
+    Collection.transaction do
+      add_column(:collections, :trash_at, :datetime)
+      add_index(:collections, :trash_at)
+      add_column(:collections, :is_trashed, :boolean, null: false, default: false)
+      add_index(:collections, :is_trashed)
+      rename_column(:collections, :expires_at, :delete_at)
+      add_index(:collections, :delete_at)
+
+      Collection.reset_column_information
+      Collection.
+        where('delete_at is not null and delete_at <= statement_timestamp()').
+        delete_all
+      Collection.
+        where('delete_at is not null').
+        update_all('is_trashed = true, trash_at = statement_timestamp()')
+      add_index(:collections, [:owner_uuid, :name],
+                unique: true,
+                where: 'is_trashed = false',
+                name: 'index_collections_on_owner_uuid_and_name')
+      remove_index(:collections,
+                   name: 'collection_owner_uuid_name_unique')
+    end
+  end
+
+  def down
+    Collection.transaction do
+      remove_index(:collections, :delete_at)
+      rename_column(:collections, :delete_at, :expires_at)
+      add_index(:collections, [:owner_uuid, :name],
+                unique: true,
+                where: 'expires_at is null',
+                name: 'collection_owner_uuid_name_unique')
+      remove_index(:collections,
+                   name: 'index_collections_on_owner_uuid_and_name')
+      remove_column(:collections, :is_trashed)
+      remove_index(:collections, :trash_at)
+      remove_column(:collections, :trash_at)
+    end
+  end
+end
diff --git a/services/api/db/migrate/20161223090712_add_output_name_to_container_requests.rb b/services/api/db/migrate/20161223090712_add_output_name_to_container_requests.rb

new file mode 100644 (file)

index 0000000..0e6adfb
--- /dev/null
+++ b/services/api/db/migrate/20161223090712_add_output_name_to_container_requests.rb
@@ -0,0 +1,9 @@
+class AddOutputNameToContainerRequests < ActiveRecord::Migration
+  def up
+    add_column :container_requests, :output_name, :string, :default => nil
+  end
+
+  def down
+    remove_column :container_requests, :output_name
+  end
+end
diff --git a/services/api/db/migrate/20170102153111_add_output_name_to_container_request_search_index.rb b/services/api/db/migrate/20170102153111_add_output_name_to_container_request_search_index.rb

new file mode 100644 (file)

index 0000000..0bd7c47
--- /dev/null
+++ b/services/api/db/migrate/20170102153111_add_output_name_to_container_request_search_index.rb
@@ -0,0 +1,21 @@
+class AddOutputNameToContainerRequestSearchIndex < ActiveRecord::Migration
+  def up
+    begin
+      remove_index :container_requests, :name => 'container_requests_search_index'
+    rescue
+    end
+    add_index :container_requests,
+              ["uuid", "owner_uuid", "modified_by_client_uuid", "modified_by_user_uuid", "name", "state", "requesting_container_uuid", "container_uuid", "container_image", "cwd", "output_path", "output_uuid", "log_uuid", "output_name"],
+              name: "container_requests_search_index"
+  end
+
+  def down
+    begin
+      remove_index :container_requests, :name => 'container_requests_search_index'
+    rescue
+    end
+         add_index :container_requests,
+              ["uuid", "owner_uuid", "modified_by_client_uuid", "modified_by_user_uuid", "name", "state", "requesting_container_uuid", "container_uuid", "container_image", "cwd", "output_path", "output_uuid", "log_uuid"],
+              name: "container_requests_search_index"
+  end
+end
diff --git a/services/api/db/migrate/20170105160301_add_output_name_to_cr_fts_index.rb b/services/api/db/migrate/20170105160301_add_output_name_to_cr_fts_index.rb

new file mode 100644 (file)

index 0000000..9721ead
--- /dev/null
+++ b/services/api/db/migrate/20170105160301_add_output_name_to_cr_fts_index.rb
@@ -0,0 +1,22 @@
+class AddOutputNameToCrFtsIndex < ActiveRecord::Migration
+  def up
+    t = "container_requests"
+    i = "container_requests_full_text_search_idx"
+    t.classify.constantize.reset_column_information
+    ActiveRecord::Base.connection.indexes(t).each do |idx|
+      if idx.name == i
+        remove_index t.to_sym, :name => i
+        break
+      end
+    end
+    # By now, container_request should have the new column "output_name" so full_text_tsvector
+    # would include it on its results
+    execute "CREATE INDEX #{i} ON #{t} USING gin(#{t.classify.constantize.full_text_tsvector});"
+  end
+
+  def down
+    t = "container_requests"
+    i = "container_requests_full_text_search_idx"
+    remove_index t.to_sym, :name => i
+  end
+end
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql

index e715cd60c4fcc0bb56e7a3df0ca7bbf01362cf9a..ea406f21779e61c6a17aa470a8a1172a1ff2990d 100644 (file)
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -169,8 +169,10 @@ CREATE TABLE collections (
      name character varying(255),
      description character varying(524288),
      properties text,
-    expires_at timestamp without time zone,
-    file_names character varying(8192)
+    delete_at timestamp without time zone,
+    file_names character varying(8192),
+    trash_at timestamp without time zone,
+    is_trashed boolean DEFAULT false NOT NULL
  );
  
  
@@ -294,7 +296,8 @@ CREATE TABLE container_requests (
      use_existing boolean DEFAULT true,
      scheduling_parameters text,
      output_uuid character varying(255),
-    log_uuid character varying(255)
+    log_uuid character varying(255),
+    output_name character varying(255) DEFAULT NULL::character varying
  );
  
  
@@ -1495,18 +1498,11 @@ CREATE INDEX api_clients_search_index ON api_clients USING btree (uuid, owner_uu
  CREATE INDEX authorized_keys_search_index ON authorized_keys USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name, key_type, authorized_user_uuid);
  
  
---
--- Name: collection_owner_uuid_name_unique; Type: INDEX; Schema: public; Owner: -; Tablespace: 
---
-
-CREATE UNIQUE INDEX collection_owner_uuid_name_unique ON collections USING btree (owner_uuid, name) WHERE (expires_at IS NULL);
-
-
  --
  -- Name: collections_full_text_search_idx; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX collections_full_text_search_idx ON collections USING gin (to_tsvector('english'::regconfig, (((((((((((((((((' '::text || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(file_names, ''::character varying))::text)));
+CREATE INDEX collections_full_text_search_idx ON collections USING gin (to_tsvector('english'::regconfig, (((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(file_names, ''::character varying))::text)));
  
  
  --
@@ -1520,14 +1516,14 @@ CREATE INDEX collections_search_index ON collections USING btree (owner_uuid, mo
  -- Name: container_requests_full_text_search_idx; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX container_requests_full_text_search_idx ON container_requests USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((((((((((((((((' '::text || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(mounts, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text))));
+CREATE INDEX container_requests_full_text_search_idx ON container_requests USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(requesting_container_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(container_uuid, ''::character varying))::text) || ' '::text) || COALESCE(mounts, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(container_image, ''::character varying))::text) || ' '::text) || COALESCE(environment, ''::text)) || ' '::text) || (COALESCE(cwd, ''::character varying))::text) || ' '::text) || COALESCE(command, ''::text)) || ' '::text) || (COALESCE(output_path, ''::character varying))::text) || ' '::text) || COALESCE(filters, ''::text)) || ' '::text) || COALESCE(scheduling_parameters, ''::text)) || ' '::text) || (COALESCE(output_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output_name, ''::character varying))::text)));
  
  
  --
  -- Name: container_requests_search_index; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX container_requests_search_index ON container_requests USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name, state, requesting_container_uuid, container_uuid, container_image, cwd, output_path, output_uuid, log_uuid);
+CREATE INDEX container_requests_search_index ON container_requests USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name, state, requesting_container_uuid, container_uuid, container_image, cwd, output_path, output_uuid, log_uuid, output_name);
  
  
  --
@@ -1541,7 +1537,7 @@ CREATE INDEX containers_search_index ON containers USING btree (uuid, owner_uuid
  -- Name: groups_full_text_search_idx; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX groups_full_text_search_idx ON groups USING gin (to_tsvector('english'::regconfig, (((((((((((((' '::text || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text)));
+CREATE INDEX groups_full_text_search_idx ON groups USING gin (to_tsvector('english'::regconfig, (((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text)));
  
  
  --
@@ -1656,6 +1652,20 @@ CREATE UNIQUE INDEX index_authorized_keys_on_uuid ON authorized_keys USING btree
  CREATE INDEX index_collections_on_created_at ON collections USING btree (created_at);
  
  
+--
+-- Name: index_collections_on_delete_at; Type: INDEX; Schema: public; Owner: -; Tablespace: 
+--
+
+CREATE INDEX index_collections_on_delete_at ON collections USING btree (delete_at);
+
+
+--
+-- Name: index_collections_on_is_trashed; Type: INDEX; Schema: public; Owner: -; Tablespace: 
+--
+
+CREATE INDEX index_collections_on_is_trashed ON collections USING btree (is_trashed);
+
+
  --
  -- Name: index_collections_on_modified_at; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
@@ -1670,6 +1680,20 @@ CREATE INDEX index_collections_on_modified_at ON collections USING btree (modifi
  CREATE INDEX index_collections_on_owner_uuid ON collections USING btree (owner_uuid);
  
  
+--
+-- Name: index_collections_on_owner_uuid_and_name; Type: INDEX; Schema: public; Owner: -; Tablespace: 
+--
+
+CREATE UNIQUE INDEX index_collections_on_owner_uuid_and_name ON collections USING btree (owner_uuid, name) WHERE (is_trashed = false);
+
+
+--
+-- Name: index_collections_on_trash_at; Type: INDEX; Schema: public; Owner: -; Tablespace: 
+--
+
+CREATE INDEX index_collections_on_trash_at ON collections USING btree (trash_at);
+
+
  --
  -- Name: index_collections_on_uuid; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
@@ -2283,7 +2307,7 @@ CREATE INDEX job_tasks_search_index ON job_tasks USING btree (uuid, owner_uuid,
  -- Name: jobs_full_text_search_idx; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX jobs_full_text_search_idx ON jobs USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((((((((((((((((((((((' '::text || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text)));
+CREATE INDEX jobs_full_text_search_idx ON jobs USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text))));
  
  
  --
@@ -2339,7 +2363,7 @@ CREATE INDEX nodes_search_index ON nodes USING btree (uuid, owner_uuid, modified
  -- Name: pipeline_instances_full_text_search_idx; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX pipeline_instances_full_text_search_idx ON pipeline_instances USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((' '::text || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)));
+CREATE INDEX pipeline_instances_full_text_search_idx ON pipeline_instances USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)));
  
  
  --
@@ -2360,7 +2384,7 @@ CREATE UNIQUE INDEX pipeline_template_owner_uuid_name_unique ON pipeline_templat
  -- Name: pipeline_templates_full_text_search_idx; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX pipeline_templates_full_text_search_idx ON pipeline_templates USING gin (to_tsvector('english'::regconfig, (((((((((((((' '::text || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)));
+CREATE INDEX pipeline_templates_full_text_search_idx ON pipeline_templates USING gin (to_tsvector('english'::regconfig, (((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)));
  
  
  --
@@ -2416,7 +2440,7 @@ CREATE INDEX virtual_machines_search_index ON virtual_machines USING btree (uuid
  -- Name: workflows_full_text_search_idx; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX workflows_full_text_search_idx ON workflows USING gin (to_tsvector('english'::regconfig, (((((((((((((' '::text || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE(definition, ''::text))));
+CREATE INDEX workflows_full_text_search_idx ON workflows USING gin (to_tsvector('english'::regconfig, (((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(description, ''::text)) || ' '::text) || COALESCE(definition, ''::text))));
  
  
  --
@@ -2704,4 +2728,14 @@ INSERT INTO schema_migrations (version) VALUES ('20161111143147');
  
  INSERT INTO schema_migrations (version) VALUES ('20161115171221');
  
-INSERT INTO schema_migrations (version) VALUES ('20161115174218');
-\ No newline at end of file
+INSERT INTO schema_migrations (version) VALUES ('20161115174218');
+
+INSERT INTO schema_migrations (version) VALUES ('20161213172944');
+
+INSERT INTO schema_migrations (version) VALUES ('20161222153434');
+
+INSERT INTO schema_migrations (version) VALUES ('20161223090712');
+
+INSERT INTO schema_migrations (version) VALUES ('20170102153111');
+
+INSERT INTO schema_migrations (version) VALUES ('20170105160301');
+\ No newline at end of file
diff --git a/services/api/lib/create_permission_view.sql b/services/api/lib/create_permission_view.sql

new file mode 100644 (file)

index 0000000..2a9e55b
--- /dev/null
+++ b/services/api/lib/create_permission_view.sql
@@ -0,0 +1,41 @@
+CREATE TEMPORARY VIEW permission_view AS
+WITH RECURSIVE
+perm_value (name, val) AS (
+     VALUES
+     ('can_read',   1::smallint),
+     ('can_login',  1),
+     ('can_write',  2),
+     ('can_manage', 3)
+     ),
+perm_edges (tail_uuid, head_uuid, val, follow) AS (
+       SELECT links.tail_uuid,
+              links.head_uuid,
+              pv.val,
+              (pv.val = 3 OR groups.uuid IS NOT NULL) AS follow
+              FROM links
+              LEFT JOIN perm_value pv ON pv.name = links.name
+              LEFT JOIN groups ON pv.val<3 AND groups.uuid = links.head_uuid
+              WHERE links.link_class = 'permission'
+       UNION ALL
+       SELECT owner_uuid, uuid, 3, true FROM groups
+       ),
+perm (val, follow, user_uuid, target_uuid) AS (
+     SELECT 3::smallint             AS val,
+            true                    AS follow,
+            users.uuid::varchar(32) AS user_uuid,
+            users.uuid::varchar(32) AS target_uuid
+            FROM users
+     UNION
+     SELECT LEAST(perm.val, edges.val)::smallint AS val,
+            edges.follow                         AS follow,
+            perm.user_uuid::varchar(32)          AS user_uuid,
+            edges.head_uuid::varchar(32)         AS target_uuid
+            FROM perm
+            INNER JOIN perm_edges edges
+            ON perm.follow AND edges.tail_uuid = perm.target_uuid
+)
+SELECT user_uuid,
+       target_uuid,
+       val AS perm_level,
+       CASE follow WHEN true THEN target_uuid ELSE NULL END AS target_owner_uuid
+       FROM perm;
diff --git a/services/api/lib/create_superuser_token.rb b/services/api/lib/create_superuser_token.rb

index 54faa9a0afe8682de8b54096f1374be6c5f0d500..72b1ae7bc97c708368a92844fa60c020f67d7783 100755 (executable)
--- a/services/api/lib/create_superuser_token.rb
+++ b/services/api/lib/create_superuser_token.rb
@@ -14,8 +14,12 @@ module CreateSuperUserToken
          api_client_auth = ApiClientAuthorization.
            where(api_token: supplied_token).
            first
-        if api_client_auth && !api_client_auth.user.uuid.match(/-000000000000000$/)
-          raise "Token already exists but is not a superuser token."
+        if !api_client_auth
+          # fall through to create a token
+        elsif !api_client_auth.user.uuid.match(/-000000000000000$/)
+          raise "Token exists but is not a superuser token."
+        elsif api_client_auth.scopes != ['all']
+          raise "Token exists but has limited scope #{api_client_auth.scopes.inspect}."
          end
        end
  
@@ -26,10 +30,11 @@ module CreateSuperUserToken
  
          # Check if there is an unexpired superuser token corresponding to this api client
          api_client_auth = ApiClientAuthorization.where(
-                'user_id = (?) AND
-                 api_client_id = (?) AND
+                'user_id = ? AND
+                 api_client_id = ? AND
+                 scopes = ? AND
                   (expires_at IS NULL OR expires_at > CURRENT_TIMESTAMP)',
-               system_user.id, apiClient.id).first
+               system_user.id, apiClient.id, ['all'].to_yaml).first
  
          # none exist; create one with the supplied token
          if !api_client_auth
diff --git a/services/api/lib/crunch_dispatch.rb b/services/api/lib/crunch_dispatch.rb

index ce94f737a2467f855a7156ba76873db57cd183ee..48b0eb5983a750aad99e195cd2f08a7b4c01c92e 100644 (file)
--- a/services/api/lib/crunch_dispatch.rb
+++ b/services/api/lib/crunch_dispatch.rb
@@ -27,7 +27,7 @@ class CrunchDispatch
      @cgroup_root = ENV['CRUNCH_CGROUP_ROOT']
  
      @arvados_internal = Rails.configuration.git_internal_dir
-    if not File.exists? @arvados_internal
+    if not File.exist? @arvados_internal
        $stderr.puts `mkdir -p #{@arvados_internal.shellescape} && git init --bare #{@arvados_internal.shellescape}`
        raise "No internal git repository available" unless ($? == 0)
      end
@@ -73,7 +73,7 @@ class CrunchDispatch
        # into multiple rows with one hostname each.
        `#{cmd} --noheader -o '%N:#{outfmt}'`.each_line do |line|
          tokens = line.chomp.split(":", max_fields)
-        if (re = tokens[0].match /^(.*?)\[([-,\d]+)\]$/)
+        if (re = tokens[0].match(/^(.*?)\[([-,\d]+)\]$/))
            tokens.shift
            re[2].split(",").each do |range|
              range = range.split("-").collect(&:to_i)
@@ -105,7 +105,7 @@ class CrunchDispatch
    end
  
    def update_node_status
-    return unless Server::Application.config.crunch_job_wrapper.to_s.match /^slurm/
+    return unless Server::Application.config.crunch_job_wrapper.to_s.match(/^slurm/)
      slurm_status.each_pair do |hostname, slurmdata|
        next if @node_state[hostname] == slurmdata
        begin
@@ -169,7 +169,7 @@ class CrunchDispatch
        end
        usable_nodes << node
        if usable_nodes.count >= min_node_count
-        return usable_nodes.map { |node| node.hostname }
+        return usable_nodes.map { |n| n.hostname }
        end
      end
      nil
@@ -512,8 +512,6 @@ class CrunchDispatch
  
    def read_pipes
      @running.each do |job_uuid, j|
-      job = j[:job]
-
        now = Time.now
        if now > j[:log_throttle_reset_time]
          # It has been more than throttle_period seconds since the last
diff --git a/services/api/lib/current_api_client.rb b/services/api/lib/current_api_client.rb

index fbd4ef5f0c67933a7cc703d9f532c94fd601fc3d..97348d5fb4b7a9458089016dc280778559365185 100644 (file)
--- a/services/api/lib/current_api_client.rb
+++ b/services/api/lib/current_api_client.rb
@@ -1,3 +1,11 @@
+$system_user = nil
+$system_group = nil
+$all_users_group = nil
+$anonymous_user = nil
+$anonymous_group = nil
+$anonymous_group_read_permission = nil
+$empty_collection = nil
+
  module CurrentApiClient
    def current_user
      Thread.current[:user]
@@ -83,9 +91,7 @@ module CurrentApiClient
              User.all.collect(&:uuid).each do |user_uuid|
                Link.create!(link_class: 'permission',
                             name: 'can_manage',
-                           tail_kind: 'arvados#group',
                             tail_uuid: system_group_uuid,
-                           head_kind: 'arvados#user',
                             head_uuid: user_uuid)
              end
            end
diff --git a/services/api/lib/eventbus.rb b/services/api/lib/eventbus.rb

index 16bb030941c3033ebf32cb972a645eb821a063d3..5e413d5cabf580688693754af4201497f2d79e0a 100644 (file)
--- a/services/api/lib/eventbus.rb
+++ b/services/api/lib/eventbus.rb
@@ -78,6 +78,10 @@ class EventBus
      @connection_count = 0
    end
  
+  def send_message(ws, obj)
+    ws.send(Oj.dump(obj, mode: :compat))
+  end
+
    # Push out any pending events to the connection +ws+
    # +notify_id+  the id of the most recent row in the log table, may be nil
    #
@@ -146,7 +150,7 @@ class EventBus
          logs.select('logs.id').find_each do |l|
            if not ws.sent_ids.include?(l.id)
              # only send if not a duplicate
-            ws.send(Log.find(l.id).as_api_response.to_json)
+            send_message(ws, Log.find(l.id).as_api_response)
            end
            if not ws.last_log_id.nil?
              # record ids only when sending "catchup" messages, not notifies
@@ -158,12 +162,12 @@ class EventBus
      rescue ArgumentError => e
        # There was some kind of user error.
        Rails.logger.warn "Error publishing event: #{$!}"
-      ws.send ({status: 500, message: $!}.to_json)
+      send_message(ws, {status: 500, message: $!})
        ws.close
      rescue => e
        Rails.logger.warn "Error publishing event: #{$!}"
        Rails.logger.warn "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
-      ws.send ({status: 500, message: $!}.to_json)
+      send_message(ws, {status: 500, message: $!})
        ws.close
        # These exceptions typically indicate serious server trouble:
        # out of memory issues, database connection problems, etc.  Go ahead and
@@ -180,7 +184,7 @@ class EventBus
          p = (Oj.strict_load event.data).symbolize_keys
          filter = Filter.new(p)
        rescue Oj::Error => e
-        ws.send ({status: 400, message: "malformed request"}.to_json)
+        send_message(ws, {status: 400, message: "malformed request"})
          return
        end
  
@@ -200,12 +204,12 @@ class EventBus
            # Add a filter.  This gets the :filters field which is the same
            # format as used for regular index queries.
            ws.filters << filter
-          ws.send ({status: 200, message: 'subscribe ok', filter: p}.to_json)
+          send_message(ws, {status: 200, message: 'subscribe ok', filter: p})
  
            # Send any pending events
            push_events ws, nil
          else
-          ws.send ({status: 403, message: "maximum of #{Rails.configuration.websocket_max_filters} filters allowed per connection"}.to_json)
+          send_message(ws, {status: 403, message: "maximum of #{Rails.configuration.websocket_max_filters} filters allowed per connection"})
          end
  
        elsif p[:method] == 'unsubscribe'
@@ -214,18 +218,18 @@ class EventBus
          len = ws.filters.length
          ws.filters.select! { |f| not ((f.filters == p[:filters]) or (f.filters.empty? and p[:filters].nil?)) }
          if ws.filters.length < len
-          ws.send ({status: 200, message: 'unsubscribe ok'}.to_json)
+          send_message(ws, {status: 200, message: 'unsubscribe ok'})
          else
-          ws.send ({status: 404, message: 'filter not found'}.to_json)
+          send_message(ws, {status: 404, message: 'filter not found'})
          end
  
        else
-        ws.send ({status: 400, message: "missing or unrecognized method"}.to_json)
+        send_message(ws, {status: 400, message: "missing or unrecognized method"})
        end
      rescue => e
        Rails.logger.warn "Error handling message: #{$!}"
        Rails.logger.warn "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
-      ws.send ({status: 500, message: 'error'}.to_json)
+      send_message(ws, {status: 500, message: 'error'})
        ws.close
      end
    end
@@ -241,8 +245,13 @@ class EventBus
      # Disconnect if no valid API token.
      # current_user is included from CurrentApiClient
      if not current_user
-      ws.send ({status: 401, message: "Valid API token required"}.to_json)
-      ws.close
+      send_message(ws, {status: 401, message: "Valid API token required"})
+      # Wait for the handshake to complete before closing the
+      # socket. Otherwise, nginx responds with HTTP 502 Bad gateway,
+      # and the client never sees our real error message.
+      ws.on :open do |event|
+        ws.close
+      end
        return
      end
  
@@ -262,7 +271,7 @@ class EventBus
      # forward them to the thread associated with the connection.
      sub = @channel.subscribe do |msg|
        if ws.queue.length > Rails.configuration.websocket_max_notify_backlog
-        ws.send ({status: 500, message: 'Notify backlog too long'}.to_json)
+        send_message(ws, {status: 500, message: 'Notify backlog too long'})
          ws.close
          @channel.unsubscribe sub
          ws.queue.clear
@@ -307,6 +316,7 @@ class EventBus
          @mtx.synchronize do
            @connection_count -= 1
          end
+        ActiveRecord::Base.connection.close
        end
      end
  
diff --git a/services/api/lib/load_param.rb b/services/api/lib/load_param.rb

index 5b22274d07781325276b5df152037a5b2a13dc61..dee0f23b1d87118ac5aaff6652579be87b1d5229 100644 (file)
--- a/services/api/lib/load_param.rb
+++ b/services/api/lib/load_param.rb
@@ -92,11 +92,11 @@ module LoadParam
          # has used set_table_name to use an alternate table name from the Rails standard.
          # I could not find a perfect way to handle this well, but ActiveRecord::Base.send(:descendants)
          # would be a place to start if this ever becomes necessary.
-        if attr.match /^[a-z][_a-z0-9]+$/ and
+        if attr.match(/^[a-z][_a-z0-9]+$/) and
              model_class.columns.collect(&:name).index(attr) and
              ['asc','desc'].index direction.downcase
            @orders << "#{table_name}.#{attr} #{direction.downcase}"
-        elsif attr.match /^([a-z][_a-z0-9]+)\.([a-z][_a-z0-9]+)$/ and
+        elsif attr.match(/^([a-z][_a-z0-9]+)\.([a-z][_a-z0-9]+)$/) and
              ['asc','desc'].index(direction.downcase) and
              ActiveRecord::Base.connection.tables.include?($1) and
              $1.classify.constantize.columns.collect(&:name).index($2)
@@ -153,8 +153,9 @@ module LoadParam
        # Any ordering columns must be selected when doing select,
        # otherwise it is an SQL error, so filter out invaliding orderings.
        @orders.select! { |o|
+        col, dir = o.split
          # match select column against order array entry
-        @select.select { |s| /^#{table_name}.#{s}( (asc|desc))?$/.match o }.any?
+        @select.select { |s| col == "#{table_name}.#{s}" }.any?
        }
      end
  
diff --git a/services/api/lib/salvage_collection.rb b/services/api/lib/salvage_collection.rb

index 2011f812d5ccf8e3836394acafd50d14ef58f7ec..c6664b1ec916413fc3d2e431bb12551a303bf1ed 100755 (executable)
--- a/services/api/lib/salvage_collection.rb
+++ b/services/api/lib/salvage_collection.rb
@@ -29,7 +29,7 @@ module SalvageCollection
    def salvage_collection_locator_data manifest
      locators = []
      size = 0
-    manifest.scan /(^|[^[:xdigit:]])([[:xdigit:]]{32})((\+\d+)(\+|\b))?/ do |_, hash, _, sizehint, _|
+    manifest.scan(/(^|[^[:xdigit:]])([[:xdigit:]]{32})((\+\d+)(\+|\b))?/) do |_, hash, _, sizehint, _|
        if sizehint
          locators << hash.downcase + sizehint
          size += sizehint.to_i
diff --git a/services/api/lib/sweep_trashed_collections.rb b/services/api/lib/sweep_trashed_collections.rb

new file mode 100644 (file)

index 0000000..ab2d27a
--- /dev/null
+++ b/services/api/lib/sweep_trashed_collections.rb
@@ -0,0 +1,34 @@
+require 'current_api_client'
+
+module SweepTrashedCollections
+  extend CurrentApiClient
+
+  def self.sweep_now
+    act_as_system_user do
+      Collection.unscoped.
+        where('delete_at is not null and delete_at < statement_timestamp()').
+        destroy_all
+      Collection.unscoped.
+        where('is_trashed = false and trash_at < statement_timestamp()').
+        update_all('is_trashed = true')
+    end
+  end
+
+  def self.sweep_if_stale
+    return if Rails.configuration.trash_sweep_interval <= 0
+    exp = Rails.configuration.trash_sweep_interval.seconds
+    need = false
+    Rails.cache.fetch('SweepTrashedCollections', expires_in: exp) do
+      need = true
+    end
+    if need
+      Thread.new do
+        begin
+          sweep_now
+        ensure
+          ActiveRecord::Base.connection.close
+        end
+      end
+    end
+  end
+end
diff --git a/services/api/lib/tasks/config_dump.rake b/services/api/lib/tasks/config_dump.rake

new file mode 100644 (file)

index 0000000..c7e0214
--- /dev/null
+++ b/services/api/lib/tasks/config_dump.rake
@@ -0,0 +1,6 @@
+namespace :config do
+  desc 'Show site configuration'
+  task dump: :environment do
+    puts $application_config.to_yaml
+  end
+end
diff --git a/services/api/script/arvados-git-sync.rb b/services/api/script/arvados-git-sync.rb

index 3a8ed2724f236b9966f0f9e64d625e11db36486f..b78553491715bf6aa85ea5615bbbdb39392e0a95 100755 (executable)
--- a/services/api/script/arvados-git-sync.rb
+++ b/services/api/script/arvados-git-sync.rb
@@ -22,7 +22,7 @@ DEBUG = 1
  # load and merge in the environment-specific application config info
  # if present, overriding base config parameters as specified
  path = File.absolute_path('../../config/arvados-clients.yml', __FILE__)
-if File.exists?(path) then
+if File.exist?(path) then
    cp_config = YAML.load_file(path)[ENV['RAILS_ENV']]
  else
    puts "Please create a\n #{path}\n file"
@@ -214,7 +214,7 @@ end
  
  begin
    # Get our local gitolite-admin repo up to snuff
-  if not File.exists?(gitolite_admin) then
+  if not File.exist?(gitolite_admin) then
      ensure_directory(gitolite_tmpdir, 0700)
      Dir.chdir(gitolite_tmpdir)
      `git clone #{gitolite_url}`
diff --git a/services/api/script/migrate-gitolite-to-uuid-storage.rb b/services/api/script/migrate-gitolite-to-uuid-storage.rb

index 8db1a0edadf4f8707d630d5dc02021501e0724a3..169509f63b96337f9b129f1b1ffd50305112f3aa 100755 (executable)
--- a/services/api/script/migrate-gitolite-to-uuid-storage.rb
+++ b/services/api/script/migrate-gitolite-to-uuid-storage.rb
@@ -35,7 +35,7 @@ DEBUG = 1
  # load and merge in the environment-specific application config info
  # if present, overriding base config parameters as specified
  path = File.dirname(__FILE__) + '/config/arvados-clients.yml'
-if File.exists?(path) then
+if File.exist?(path) then
    cp_config = YAML.load_file(path)[ENV['RAILS_ENV']]
  else
    puts "Please create a\n " + File.dirname(__FILE__) + "/config/arvados-clients.yml\n file"
@@ -186,7 +186,7 @@ end
  
  begin
    # Get our local gitolite-admin repo up to snuff
-  if not File.exists?(gitolite_admin) then
+  if not File.exist?(gitolite_admin) then
      ensure_directory(gitolite_tmpdir, 0700)
      Dir.chdir(gitolite_tmpdir)
      `git clone #{gitolite_url}`
diff --git a/services/api/test/factories/user.rb b/services/api/test/factories/user.rb

index 6ec9e9f05d5ad7cdeff29cda76c20abbe7a4eae1..a0e5ad95dc32a0210d28577c2b899d05c53fd3f1 100644 (file)
--- a/services/api/test/factories/user.rb
+++ b/services/api/test/factories/user.rb
@@ -4,7 +4,7 @@ end
  
  FactoryGirl.define do
    factory :user do
-    ignore do
+    transient do
        join_groups []
      end
      after :create do |user, evaluator|
diff --git a/services/api/test/fixtures/collections.yml b/services/api/test/fixtures/collections.yml

index 2272b0f4a041094455c6a06a979a0ed4947f531a..2eb873b5d99ad088b97e3dd1f2dbbe4a6692a744 100644 (file)
--- a/services/api/test/fixtures/collections.yml
+++ b/services/api/test/fixtures/collections.yml
@@ -221,6 +221,8 @@ collection_to_move_around_in_aproject:
    manifest_text: ". 73feffa4b7f6bb68e44cf984c85f6e88+3 0:3:baz\n"
    name: collection_to_move_around
  
+# Note: collections(:expired_collection) fixture finder won't work
+# because it is not in default scope
  expired_collection:
    uuid: zzzzz-4zz18-mto52zx1s7sn3ih
    portable_data_hash: 0b21a217243bfce5617fb9224b95bcb9+49
@@ -230,10 +232,44 @@ expired_collection:
    modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
    modified_at: 2014-02-03T17:22:54Z
    updated_at: 2014-02-03T17:22:54Z
-  expires_at: 2001-01-01T00:00:00Z
+  is_trashed: true
+  trash_at: 2001-01-01T00:00:00Z
+  delete_at: 2038-01-01T00:00:00Z
    manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:expired\n"
    name: expired_collection
  
+trashed_on_next_sweep:
+  uuid: zzzzz-4zz18-4guozfh77ewd2f0
+  portable_data_hash: 0b21a217243bfce5617fb9224b95bcb9+49
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2016-12-07T22:01:00.123456Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2016-12-27T22:01:30.123456Z
+  updated_at: 2016-12-27T22:01:30.123456Z
+  is_trashed: false
+  trash_at: 2016-12-07T22:01:30.123456Z
+  delete_at: 2112-01-01T00:00:00Z
+  manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:expired\n"
+  name: trashed_on_next_sweep
+
+# Note: collections(:deleted_on_next_sweep) fixture finder won't work
+# because it is not in default scope
+deleted_on_next_sweep:
+  uuid: zzzzz-4zz18-3u1p5umicfpqszp
+  portable_data_hash: 0b21a217243bfce5617fb9224b95bcb9+49
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2016-12-07T22:01:00.234567Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2016-12-27T22:01:30.234567Z
+  updated_at: 2016-12-27T22:01:30.234567Z
+  is_trashed: true
+  trash_at: 2016-12-07T22:01:30.234567Z
+  delete_at: 2016-12-27T22:01:30.234567Z
+  manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:expired\n"
+  name: deleted_on_next_sweep
+
  collection_expires_in_future:
    uuid: zzzzz-4zz18-padkqo7yb8d9i3j
    portable_data_hash: 0b21a217243bfce5617fb9224b95bcb9+49
@@ -243,7 +279,8 @@ collection_expires_in_future:
    modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
    modified_at: 2014-02-03T17:22:54Z
    updated_at: 2014-02-03T17:22:54Z
-  expires_at: 2038-01-01T00:00:00Z
+  trash_at: 2038-01-01T00:00:00Z
+  delete_at: 2038-03-01T00:00:00Z
    manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:expired\n"
    name: collection_expires_in_future
  
diff --git a/services/api/test/fixtures/container_requests.yml b/services/api/test/fixtures/container_requests.yml

index acacf4023829756085caf1feb269845887043d4d..76f59c29f87a3ebbad2672a42a5f1502883c2b13 100644 (file)
--- a/services/api/test/fixtures/container_requests.yml
+++ b/services/api/test/fixtures/container_requests.yml
@@ -90,6 +90,8 @@ completed:
    output_path: test
    command: ["echo", "hello"]
    container_uuid: zzzzz-dz642-compltcontainer
+  log_uuid: zzzzz-4zz18-y9vne9npefyxh8g
+  output_uuid: zzzzz-4zz18-znfnqtbbv4spc3w
    runtime_constraints:
      vcpus: 1
      ram: 123
diff --git a/services/api/test/fixtures/pipeline_templates.yml b/services/api/test/fixtures/pipeline_templates.yml

index cbd82de9241101a72cc1c263903b9a403a8234fa..49503c971236af12a936085d92084a79e2dfc0ba 100644 (file)
--- a/services/api/test/fixtures/pipeline_templates.yml
+++ b/services/api/test/fixtures/pipeline_templates.yml
@@ -243,3 +243,25 @@ template_in_asubproject_with_same_name_as_one_in_active_user_home:
          dataclass: Collection
          title: "Foo/bar pair"
          description: "Provide a collection containing at least two files."
+
+workflow_with_input_defaults:
+  uuid: zzzzz-p5p6p-aox0k0ofxrystg2
+  owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
+  created_at: 2014-04-14 12:35:04 -0400
+  updated_at: 2014-04-14 12:35:04 -0400
+  modified_at: 2014-04-14 12:35:04 -0400
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  name: Pipeline with default input specifications
+  components:
+    part-one:
+      script: foo
+      script_version: master
+      script_parameters:
+        ex_string:
+          required: true
+          dataclass: string
+        ex_string_def:
+          required: true
+          dataclass: string
+          default: hello-testing-123
+\ No newline at end of file
diff --git a/services/api/test/fixtures/workflows.yml b/services/api/test/fixtures/workflows.yml

index 4badf9e175f21c1fb521befa6d6c6bbe8024af5b..f79320e907801cae499cdcbf809a4602194656f4 100644 (file)
--- a/services/api/test/fixtures/workflows.yml
+++ b/services/api/test/fixtures/workflows.yml
@@ -44,3 +44,22 @@ workflow_with_input_specifications:
        inputBinding:
          position: 1
      outputs: []
+
+workflow_with_input_defaults:
+  uuid: zzzzz-7fd4e-validwithinput2
+  owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
+  name: Workflow with default input specifications
+  description: this workflow has inputs specified
+  created_at: <%= 1.minute.ago.to_s(:db) %>
+  definition: |
+    cwlVersion: v1.0
+    class: CommandLineTool
+    baseCommand:
+    - echo
+    inputs:
+    - type: string
+      id: ex_string
+    - type: string
+      id: ex_string_def
+      default: hello-testing-123
+    outputs: []
diff --git a/services/api/test/functional/arvados/v1/api_client_authorizations_controller_test.rb b/services/api/test/functional/arvados/v1/api_client_authorizations_controller_test.rb

index 37e690e0b21ccb221454d6a46ac04b4e732c2ce7..6c09d8e9f593a8e7c847c83ceb9bb46dcb913397 100644 (file)
--- a/services/api/test/functional/arvados/v1/api_client_authorizations_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/api_client_authorizations_controller_test.rb
@@ -46,7 +46,7 @@ class Arvados::V1::ApiClientAuthorizationsControllerTest < ActionController::Tes
      get :index, search_params
      assert_response :success
      got_tokens = JSON.parse(@response.body)['items']
-      .map { |auth| auth['api_token'] }
+      .map { |a| a['api_token'] }
      assert_equal(expected_tokens.sort, got_tokens.sort,
                   "wrong results for #{search_params.inspect}")
    end
diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb

index c285f8db20e825a74bff16022ba98f5058fc116c..4b87ebd41f75b27ce7d6f4b63c488e7595ad630e 100644 (file)
--- a/services/api/test/functional/arvados/v1/collections_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb
@@ -1,6 +1,7 @@
  require 'test_helper'
  
  class Arvados::V1::CollectionsControllerTest < ActionController::TestCase
+  include DbCurrentTime
  
    PERM_TOKEN_RE = /\+A[[:xdigit:]]+@[[:xdigit:]]{8}\b/
  
@@ -823,11 +824,11 @@ EOS
      [2**8, :success],
      [2**18, 422],
    ].each do |description_size, expected_response|
-    test "create collection with description size #{description_size}
+    # Descriptions are not part of search indexes. Skip until
+    # full-text search is implemented, at which point replace with a
+    # search in description.
+    skip "create collection with description size #{description_size}
            and expect response #{expected_response}" do
-      skip "(Descriptions are not part of search indexes. Skip until full-text search
-            is implemented, at which point replace with a search in description.)"
-
        authorize_with :active
  
        description = 'here is a collection with a very large description'
@@ -959,4 +960,68 @@ EOS
        assert_response 200
      end
    end
+
+  test 'get trashed collection with include_trash' do
+    uuid = 'zzzzz-4zz18-mto52zx1s7sn3ih' # expired_collection
+    authorize_with :active
+    get :show, {
+      id: uuid,
+      include_trash: true,
+    }
+    assert_response 200
+  end
+
+  test 'get trashed collection without include_trash' do
+    uuid = 'zzzzz-4zz18-mto52zx1s7sn3ih' # expired_collection
+    authorize_with :active
+    get :show, {
+      id: uuid,
+    }
+    assert_response 404
+  end
+
+  test 'trash collection using http DELETE verb' do
+    uuid = collections(:collection_owned_by_active).uuid
+    authorize_with :active
+    delete :destroy, {
+      id: uuid,
+    }
+    assert_response 200
+    c = Collection.unscoped.find_by_uuid(uuid)
+    assert_operator c.trash_at, :<, db_current_time
+    assert_equal c.delete_at, c.trash_at + Rails.configuration.blob_signature_ttl
+  end
+
+  test 'delete long-trashed collection immediately using http DELETE verb' do
+    uuid = 'zzzzz-4zz18-mto52zx1s7sn3ih' # expired_collection
+    authorize_with :active
+    delete :destroy, {
+      id: uuid,
+    }
+    assert_response 200
+    c = Collection.unscoped.find_by_uuid(uuid)
+    assert_operator c.trash_at, :<, db_current_time
+    assert_operator c.delete_at, :<, db_current_time
+  end
+
+  ['zzzzz-4zz18-mto52zx1s7sn3ih', # expired_collection
+   :empty_collection_name_in_active_user_home_project,
+  ].each do |fixture|
+    test "trash collection #{fixture} via trash action with grace period" do
+      if fixture.is_a? String
+        uuid = fixture
+      else
+        uuid = collections(fixture).uuid
+      end
+      authorize_with :active
+      time_before_trashing = db_current_time
+      post :trash, {
+        id: uuid,
+      }
+      assert_response 200
+      c = Collection.unscoped.find_by_uuid(uuid)
+      assert_operator c.trash_at, :<, db_current_time
+      assert_operator c.delete_at, :>=, time_before_trashing + Rails.configuration.default_trash_lifetime
+    end
+  end
  end
diff --git a/services/api/test/functional/arvados/v1/filters_test.rb b/services/api/test/functional/arvados/v1/filters_test.rb

index 9344b0bc75c3be0c9bc878207df52275a1190e88..2b1b675323fa8a05cdd9682a176e3ec561a099ad 100644 (file)
--- a/services/api/test/functional/arvados/v1/filters_test.rb
+++ b/services/api/test/functional/arvados/v1/filters_test.rb
@@ -32,7 +32,7 @@ class Arvados::V1::FiltersTest < ActionController::TestCase
        filters: [['uuid', '@@', 'abcdef']],
      }
      assert_response 422
-    assert_match /not supported/, json_response['errors'].join(' ')
+    assert_match(/not supported/, json_response['errors'].join(' '))
    end
  
    test 'difficult characters in full text search' do
@@ -52,7 +52,7 @@ class Arvados::V1::FiltersTest < ActionController::TestCase
        filters: [['any', '@@', ['abc', 'def']]],
      }
      assert_response 422
-    assert_match /not supported/, json_response['errors'].join(' ')
+    assert_match(/not supported/, json_response['errors'].join(' '))
    end
  
    test 'api responses provide timestamps with nanoseconds' do
@@ -65,7 +65,7 @@ class Arvados::V1::FiltersTest < ActionController::TestCase
        %w(created_at modified_at).each do |attr|
          # Pass fixtures with null timestamps.
          next if item[attr].nil?
-        assert_match /^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d.\d{9}Z$/, item[attr]
+        assert_match(/^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d.\d{9}Z$/, item[attr])
        end
      end
    end
diff --git a/services/api/test/functional/arvados/v1/groups_controller_test.rb b/services/api/test/functional/arvados/v1/groups_controller_test.rb

index 10534a70610a8188d35863992f2810ac29195937..e9abf9d495bbaf937532e6ad44ebc1449254c66e 100644 (file)
--- a/services/api/test/functional/arvados/v1/groups_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/groups_controller_test.rb
@@ -55,12 +55,12 @@ class Arvados::V1::GroupsControllerTest < ActionController::TestCase
      assert_equal 0, json_response['items_available']
    end
  
-  def check_project_contents_response
+  def check_project_contents_response disabled_kinds=[]
      assert_response :success
      assert_operator 2, :<=, json_response['items_available']
      assert_operator 2, :<=, json_response['items'].count
      kinds = json_response['items'].collect { |i| i['kind'] }.uniq
-    expect_kinds = %w'arvados#group arvados#specimen arvados#pipelineTemplate arvados#job'
+    expect_kinds = %w'arvados#group arvados#specimen arvados#pipelineTemplate arvados#job' - disabled_kinds
      assert_equal expect_kinds, (expect_kinds & kinds)
  
      json_response['items'].each do |i|
@@ -69,6 +69,10 @@ class Arvados::V1::GroupsControllerTest < ActionController::TestCase
                 "group#contents returned a non-project group")
        end
      end
+
+    disabled_kinds.each do |d|
+      assert_equal true, !kinds.include?(d)
+    end
    end
  
    test 'get group-owned objects' do
@@ -448,4 +452,15 @@ class Arvados::V1::GroupsControllerTest < ActionController::TestCase
        end
      end
    end
+
+  test 'get contents with jobs and pipeline instances disabled' do
+    Rails.configuration.disable_api_methods = ['jobs.index', 'pipeline_instances.index']
+
+    authorize_with :active
+    get :contents, {
+      id: groups(:aproject).uuid,
+      format: :json,
+    }
+    check_project_contents_response %w'arvados#pipelineInstance arvados#job'
+  end
  end
diff --git a/services/api/test/functional/arvados/v1/jobs_controller_test.rb b/services/api/test/functional/arvados/v1/jobs_controller_test.rb

index 3c11b3e00940fefb644b2ea4b3e64f81ffdfed2b..8808a82c45c92d398df02e497438f137a7bf1e1a 100644 (file)
--- a/services/api/test/functional/arvados/v1/jobs_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/jobs_controller_test.rb
@@ -97,7 +97,7 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
                   'server should correct bogus cancelled_at ' +
                   job['cancelled_at'])
      assert_equal(true,
-                 File.exists?(Rails.configuration.crunch_refresh_trigger),
+                 File.exist?(Rails.configuration.crunch_refresh_trigger),
                   'trigger file should be created when job is cancelled')
    end
  
diff --git a/services/api/test/functional/arvados/v1/links_controller_test.rb b/services/api/test/functional/arvados/v1/links_controller_test.rb

index 1345701b43e8a7666a7c634d8f73c7762ad59467..6a19bdf4ad0baf8946d54be21c965f9b2d783ebd 100644 (file)
--- a/services/api/test/functional/arvados/v1/links_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/links_controller_test.rb
@@ -305,8 +305,8 @@ class Arvados::V1::LinksControllerTest < ActionController::TestCase
      assert_response 404
    end
  
-  test "retrieve all permissions using generic links index api" do
-    skip "(not implemented)"
+  # not implemented
+  skip "retrieve all permissions using generic links index api" do
      # Links.readable_by() does not return the full set of permission
      # links that are visible to a user (i.e., all permission links
      # whose head_uuid references an object for which the user has
diff --git a/services/api/test/functional/arvados/v1/repositories_controller_test.rb b/services/api/test/functional/arvados/v1/repositories_controller_test.rb

index 71b528e72afd9467539f2136d3163481e582b956..56dd57ce7c64dd4697279ce3332ccd86546c99ef 100644 (file)
--- a/services/api/test/functional/arvados/v1/repositories_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/repositories_controller_test.rb
@@ -43,9 +43,8 @@ class Arvados::V1::RepositoriesControllerTest < ActionController::TestCase
    end
  
    test "get_all_permissions takes into account is_active flag" do
-    r = nil
      act_as_user users(:active) do
-      r = Repository.create! name: 'active/testrepo'
+      Repository.create! name: 'active/testrepo'
      end
      act_as_system_user do
        u = users(:active)
@@ -170,19 +169,19 @@ class Arvados::V1::RepositoriesControllerTest < ActionController::TestCase
          u = User.find_by_uuid(user_uuid)
          if perms['can_read']
            assert u.can? read: repo['uuid']
-          assert_match /R/, perms['gitolite_permissions']
+          assert_match(/R/, perms['gitolite_permissions'])
          else
-          refute_match /R/, perms['gitolite_permissions']
+          refute_match(/R/, perms['gitolite_permissions'])
          end
          if perms['can_write']
            assert u.can? write: repo['uuid']
-          assert_match /RW\+/, perms['gitolite_permissions']
+          assert_match(/RW\+/, perms['gitolite_permissions'])
          else
-          refute_match /W/, perms['gitolite_permissions']
+          refute_match(/W/, perms['gitolite_permissions'])
          end
          if perms['can_manage']
            assert u.can? manage: repo['uuid']
-          assert_match /RW\+/, perms['gitolite_permissions']
+          assert_match(/RW\+/, perms['gitolite_permissions'])
          end
        end
      end
diff --git a/services/api/test/functional/arvados/v1/schema_controller_test.rb b/services/api/test/functional/arvados/v1/schema_controller_test.rb

index 2e370ec9cd63db9b61f6e93ab15111d028f469e0..710182174621b0bcf1eaddc8432b4ca824182949 100644 (file)
--- a/services/api/test/functional/arvados/v1/schema_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/schema_controller_test.rb
@@ -32,7 +32,7 @@ class Arvados::V1::SchemaControllerTest < ActionController::TestCase
      get :index
      assert_response :success
      discovery_doc = JSON.parse(@response.body)
-    assert_match /^[0-9a-f]+(-modified)?$/, discovery_doc['source_version']
+    assert_match(/^[0-9a-f]+(-modified)?$/, discovery_doc['source_version'])
    end
  
    test "discovery document overrides source_version with config" do
diff --git a/services/api/test/functional/arvados/v1/users_controller_test.rb b/services/api/test/functional/arvados/v1/users_controller_test.rb

index 157e487859c927a978baad10c810592e36be9e77..579b8cc6d05256a88086cd0a50592e3d8a1afaa7 100644 (file)
--- a/services/api/test/functional/arvados/v1/users_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/users_controller_test.rb
@@ -603,7 +603,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
  
      active_user = User.find_by_uuid(users(:active).uuid)
      readable_groups = active_user.groups_i_can(:read)
-    all_users_group = Group.all.collect(&:uuid).select { |g| g.match /-f+$/ }
+    all_users_group = Group.all.collect(&:uuid).select { |g| g.match(/-f+$/) }
      refute_includes(readable_groups, all_users_group,
                      "active user can read All Users group after being deactivated")
      assert_equal(false, active_user.is_invited,
@@ -842,14 +842,12 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
    end
  
    def verify_num_links (original_links, expected_additional_links)
-    links_now = Link.all
      assert_equal expected_additional_links, Link.all.size-original_links.size,
          "Expected #{expected_additional_links.inspect} more links"
    end
  
    def find_obj_in_resp (response_items, object_type, head_kind=nil)
      return_obj = nil
-    response_items
      response_items.each { |x|
        if !x
          next
diff --git a/services/api/test/functional/arvados/v1/virtual_machines_controller_test.rb b/services/api/test/functional/arvados/v1/virtual_machines_controller_test.rb

index 329bc1589afc6a2298472d739524fc094e7f0723..9b805af8e38b7095cdddafd67a5f18c63a7e5237 100644 (file)
--- a/services/api/test/functional/arvados/v1/virtual_machines_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/virtual_machines_controller_test.rb
@@ -33,7 +33,6 @@ class Arvados::V1::VirtualMachinesControllerTest < ActionController::TestCase
    test "groups is an empty list by default" do
      get_logins_for(:testvm2)
      active_login = find_login(:active)
-    perm = links(:active_can_login_to_testvm2)
      assert_equal([], active_login["groups"])
    end
  
diff --git a/services/api/test/helpers/users_test_helper.rb b/services/api/test/helpers/users_test_helper.rb

index 2a618204c649bace8e6d271059ccee0413807e7d..4cf70cfbc61360aa9661bee1666d921a90137e8e 100644 (file)
--- a/services/api/test/helpers/users_test_helper.rb
+++ b/services/api/test/helpers/users_test_helper.rb
@@ -74,7 +74,7 @@ module UsersTestHelper
      end
  
      group = Group.where(name: 'All users').select do |g|
-      g[:uuid].match /-f+$/
+      g[:uuid].match(/-f+$/)
      end.first
      group_read_perms = Link.where(tail_uuid: uuid,
                                    head_uuid: group[:uuid],
diff --git a/services/api/test/integration/collections_api_test.rb b/services/api/test/integration/collections_api_test.rb

index 0bedc0726a08549711f3455f509778b1f9901de3..5f55f5eaecf8a56fa71c59a1f78cb9d6d065ec0c 100644 (file)
--- a/services/api/test/integration/collections_api_test.rb
+++ b/services/api/test/integration/collections_api_test.rb
@@ -21,7 +21,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
        :filters => ['uuid', '=', 'ad02e37b6a7f45bbe2ead3c29a109b8a+54'].to_json
      }, auth(:active)
      assert_response 422
-    assert_match /nvalid element.*not an array/, json_response['errors'].join(' ')
+    assert_match(/nvalid element.*not an array/, json_response['errors'].join(' '))
    end
  
    test "get index with invalid filters (unsearchable column) responds 422" do
@@ -30,7 +30,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
        :filters => [['this_column_does_not_exist', '=', 'bogus']].to_json
      }, auth(:active)
      assert_response 422
-    assert_match /nvalid attribute/, json_response['errors'].join(' ')
+    assert_match(/nvalid attribute/, json_response['errors'].join(' '))
    end
  
    test "get index with invalid filters (invalid operator) responds 422" do
@@ -39,7 +39,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
        :filters => [['uuid', ':-(', 'displeased']].to_json
      }, auth(:active)
      assert_response 422
-    assert_match /nvalid operator/, json_response['errors'].join(' ')
+    assert_match(/nvalid operator/, json_response['errors'].join(' '))
    end
  
    test "get index with invalid filters (invalid operand type) responds 422" do
@@ -48,7 +48,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
        :filters => [['uuid', '=', {foo: 'bar'}]].to_json
      }, auth(:active)
      assert_response 422
-    assert_match /nvalid operand type/, json_response['errors'].join(' ')
+    assert_match(/nvalid operand type/, json_response['errors'].join(' '))
    end
  
    test "get index with where= (empty string)" do
@@ -73,7 +73,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
            :select => ['bogus'].to_json
          }, auth(:active)
      assert_response 422
-    assert_match /Invalid attribute.*bogus/, json_response['errors'].join(' ')
+    assert_match(/Invalid attribute.*bogus/, json_response['errors'].join(' '))
    end
  
    test "get index with select= (invalid attribute type) responds 422" do
@@ -82,7 +82,7 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
            :select => [['bogus']].to_json
          }, auth(:active)
      assert_response 422
-    assert_match /Invalid attribute.*bogus/, json_response['errors'].join(' ')
+    assert_match(/Invalid attribute.*bogus/, json_response['errors'].join(' '))
    end
  
    test "controller 404 response is json" do
@@ -243,8 +243,6 @@ class CollectionsApiTest < ActionDispatch::IntegrationTest
      assert_response :success
      assert_equal true, json_response['manifest_text'].include?('file4_in_subdir4.txt')
  
-    created = json_response
-
      # search using the filename
      search_using_full_text_search 'subdir2', 0
      search_using_full_text_search 'subdir2:*', 1
diff --git a/services/api/test/integration/collections_performance_test.rb b/services/api/test/integration/collections_performance_test.rb

index a952c202cb7dbadf73fae734ca0141d000ac5cde..f6f39fe526edff84cc1679887bf400ecd2c3bc00 100644 (file)
--- a/services/api/test/integration/collections_performance_test.rb
+++ b/services/api/test/integration/collections_performance_test.rb
@@ -5,8 +5,7 @@ require 'helpers/time_block'
  class CollectionsApiPerformanceTest < ActionDispatch::IntegrationTest
    include ManifestExamples
  
-  test "crud cycle for a collection with a big manifest" do
-    slow_test
+  slow_test "crud cycle for a collection with a big manifest" do
      bigmanifest = time_block 'make example' do
        make_manifest(streams: 100,
                      files_per_stream: 100,
@@ -39,8 +38,7 @@ class CollectionsApiPerformanceTest < ActionDispatch::IntegrationTest
      end
    end
  
-  test "memory usage" do
-    slow_test
+  slow_test "memory usage" do
      hugemanifest = make_manifest(streams: 1,
                                   files_per_stream: 2000,
                                   blocks_per_file: 200,
diff --git a/services/api/test/integration/cross_origin_test.rb b/services/api/test/integration/cross_origin_test.rb

index ebe7ce7a6705b0d99d2e4b439b9339c7061a6531..28c1b81dabcc8621707044e1f7d225e6e1a624b7 100644 (file)
--- a/services/api/test/integration/cross_origin_test.rb
+++ b/services/api/test/integration/cross_origin_test.rb
@@ -70,7 +70,7 @@ class CrossOriginTest < ActionDispatch::IntegrationTest
  
    def assert_no_cors_headers
      response.headers.keys.each do |h|
-      assert_no_match /^Access-Control-/i, h
+      assert_no_match(/^Access-Control-/i, h)
      end
    end
  end
diff --git a/services/api/test/integration/database_reset_test.rb b/services/api/test/integration/database_reset_test.rb

index ecb2f2a05831a44a7798fd98d048a821878fd11a..029e37cbbfed18075a73785ce1b565d5907202cd 100644 (file)
--- a/services/api/test/integration/database_reset_test.rb
+++ b/services/api/test/integration/database_reset_test.rb
@@ -3,8 +3,7 @@ require 'test_helper'
  class DatabaseResetTest < ActionDispatch::IntegrationTest
    self.use_transactional_fixtures = false
  
-  test "reset fails when Rails.env != 'test'" do
-    slow_test
+  slow_test "reset fails when Rails.env != 'test'" do
      rails_env_was = Rails.env
      begin
        Rails.env = 'production'
@@ -22,8 +21,7 @@ class DatabaseResetTest < ActionDispatch::IntegrationTest
      assert_response 403
    end
  
-  test "database reset doesn't break basic CRUD operations" do
-    slow_test
+  slow_test "database reset doesn't break basic CRUD operations" do
      active_auth = auth(:active)
      admin_auth = auth(:admin)
  
@@ -49,8 +47,7 @@ class DatabaseResetTest < ActionDispatch::IntegrationTest
      assert_response 404
    end
  
-  test "roll back database change" do
-    slow_test
+  slow_test "roll back database change" do
      active_auth = auth(:active)
      admin_auth = auth(:admin)
  
diff --git a/services/api/test/integration/select_test.rb b/services/api/test/integration/select_test.rb

index a7bd545179813dcad92485b09f3e1a39c20c9927..982e172e186f3c9be84f2dddde653018377c6219 100644 (file)
--- a/services/api/test/integration/select_test.rb
+++ b/services/api/test/integration/select_test.rb
@@ -36,6 +36,13 @@ class SelectTest < ActionDispatch::IntegrationTest
      end
    end
  
+  test "select with default order" do
+    get "/arvados/v1/links", {format: :json, select: ['uuid']}, auth(:admin)
+    assert_response :success
+    uuids = json_response['items'].collect { |i| i['uuid'] }
+    assert_equal uuids, uuids.sort
+  end
+
    def assert_link_classes_ascend(current_class, prev_class)
      # Databases and Ruby don't always agree about string ordering with
      # punctuation.  If the strings aren't ascending normally, check
diff --git a/services/api/test/integration/user_sessions_test.rb b/services/api/test/integration/user_sessions_test.rb

index 7a9f9176d335c02c1a7dc425cf4212dfb85e0ea5..a46a4d1bc29c0e25992f6424bb4f50b919020229 100644 (file)
--- a/services/api/test/integration/user_sessions_test.rb
+++ b/services/api/test/integration/user_sessions_test.rb
@@ -110,7 +110,7 @@ class UserSessionsApiTest < ActionDispatch::IntegrationTest
          (repos.collect(&:name) +
           vm_links.collect { |link| link.properties['username'] }
           ).each do |name|
-          r = name.match /^(.{#{prefix.length}})(\d+)$/
+          r = name.match(/^(.{#{prefix.length}})(\d+)$/)
            assert_not_nil r, "#{name.inspect} does not match {prefix}\\d+"
            assert_equal(prefix, r[1],
                         "#{name.inspect} was not {#{prefix.inspect} plus digits}")
diff --git a/services/api/test/integration/websocket_test.rb b/services/api/test/integration/websocket_test.rb

index 99ca7ac960b3dac2fc4e0f9b82d89949afd6e76c..a9993b2fc318b4abe6ca0669f64a614250608733 100644 (file)
--- a/services/api/test/integration/websocket_test.rb
+++ b/services/api/test/integration/websocket_test.rb
@@ -1,5 +1,4 @@
  require 'test_helper'
-require 'websocket_runner'
  require 'oj'
  require 'database_cleaner'
  
@@ -16,35 +15,92 @@ class WebsocketTest < ActionDispatch::IntegrationTest
      DatabaseCleaner.clean
    end
  
-  def ws_helper (token = nil, timeout = true)
+  def self.startup
+    s = TCPServer.new('0.0.0.0', 0)
+    @@port = s.addr[1]
+    s.close
+    @@pidfile = "tmp/pids/passenger.#{@@port}.pid"
+    DatabaseCleaner.start
+    Dir.chdir(Rails.root) do |apidir|
+      # Only passenger seems to be able to run the websockets server
+      # successfully.
+      _system('passenger', 'start', '-d',
+              "-p#{@@port}",
+              "--log-file", "/dev/stderr",
+              "--pid-file", @@pidfile)
+      timeout = Time.now.tv_sec + 10
+      begin
+        sleep 0.2
+        begin
+          server_pid = IO.read(@@pidfile).to_i
+          good_pid = (server_pid > 0) and (Process.kill(0, pid) rescue false)
+        rescue Errno::ENOENT
+          good_pid = false
+        end
+      end while (not good_pid) and (Time.now.tv_sec < timeout)
+      if not good_pid
+        raise RuntimeError, "could not find API server Rails pid"
+      end
+      STDERR.puts "Started websocket server on port #{@@port} with pid #{server_pid}"
+    end
+  end
+
+  def self.shutdown
+    Dir.chdir(Rails.root) do
+      _system('passenger', 'stop', "-p#{@@port}",
+              "--pid-file", @@pidfile)
+    end
+    # DatabaseCleaner leaves the database empty. Prefer to leave it full.
+    dc = DatabaseController.new
+    dc.define_singleton_method :render do |*args| end
+    dc.reset
+  end
+
+  def self._system(*cmd)
+    Bundler.with_clean_env do
+      env = {
+        'ARVADOS_WEBSOCKETS' => 'ws-only',
+        'RAILS_ENV' => 'test',
+      }
+      if not system(env, *cmd)
+        raise RuntimeError, "Command exited #{$?}: #{cmd.inspect}"
+      end
+    end
+  end
+
+  def ws_helper(token: nil, timeout: 8)
      opened = false
      close_status = nil
      too_long = false
  
-    EM.run {
+    EM.run do
        if token
-        ws = Faye::WebSocket::Client.new("ws://localhost:#{WEBSOCKET_PORT}/websocket?api_token=#{api_client_authorizations(token).api_token}")
+        ws = Faye::WebSocket::Client.new("ws://localhost:#{@@port}/websocket?api_token=#{api_client_authorizations(token).api_token}")
        else
-        ws = Faye::WebSocket::Client.new("ws://localhost:#{WEBSOCKET_PORT}/websocket")
+        ws = Faye::WebSocket::Client.new("ws://localhost:#{@@port}/websocket")
        end
  
        ws.on :open do |event|
          opened = true
          if timeout
-          EM::Timer.new 8 do
+          EM::Timer.new(timeout) do
              too_long = true if close_status.nil?
              EM.stop_event_loop
            end
          end
        end
  
+      ws.on :error do |event|
+        STDERR.puts "websocket client error: #{event.inspect}"
+      end
+
        ws.on :close do |event|
          close_status = [:close, event.code, event.reason]
          EM.stop_event_loop
        end
  
        yield ws
-    }
+    end
  
      assert opened, "Should have opened web socket"
      assert (not too_long), "Test took too long"
@@ -65,11 +121,10 @@ class WebsocketTest < ActionDispatch::IntegrationTest
      assert_equal 401, status
    end
  
-
    test "connect, subscribe and get response" do
      status = nil
  
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
        ws.on :open do |event|
          ws.send ({method: 'subscribe'}.to_json)
        end
@@ -91,7 +146,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
  
      authorize_with :active
  
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
        ws.on :open do |event|
          ws.send ({method: 'subscribe'}.to_json)
        end
@@ -128,7 +183,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
  
      authorize_with :active
  
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
        ws.on :open do |event|
          ws.send ({method: 'subscribe'}.to_json)
        end
@@ -168,7 +223,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
  
      authorize_with :active
  
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
        ws.on :open do |event|
          ws.send ({method: 'subscribe', filters: [['object_uuid', 'is_a', 'arvados#human']]}.to_json)
        end
@@ -206,7 +261,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
  
      authorize_with :active
  
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
        ws.on :open do |event|
          ws.send ({method: 'subscribe', filters: [['object_uuid', 'is_a', 'arvados#human']]}.to_json)
          ws.send ({method: 'subscribe', filters: [['object_uuid', 'is_a', 'arvados#specimen']]}.to_json)
@@ -251,7 +306,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
  
      authorize_with :active
  
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
        ws.on :open do |event|
          ws.send ({method: 'subscribe', filters: [['object_uuid', 'is_a', 'arvados#trait'], ['event_type', '=', 'update']]}.to_json)
        end
@@ -282,8 +337,6 @@ class WebsocketTest < ActionDispatch::IntegrationTest
  
    test "connect, subscribe, ask events starting at seq num" do
      state = 1
-    human = nil
-    human_ev_uuid = nil
  
      authorize_with :active
  
@@ -291,7 +344,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
      l1 = nil
      l2 = nil
  
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
        ws.on :open do |event|
          ws.send ({method: 'subscribe', last_log_id: lastid}.to_json)
        end
@@ -322,16 +375,14 @@ class WebsocketTest < ActionDispatch::IntegrationTest
      assert_equal expect_next_logs[1].object_uuid, l2
    end
  
-  test "connect, subscribe, get event, unsubscribe" do
-    slow_test
+  slow_test "connect, subscribe, get event, unsubscribe" do
      state = 1
      spec = nil
      spec_ev_uuid = nil
-    filter_id = nil
  
      authorize_with :active
  
-    ws_helper :active, false do |ws|
+    ws_helper(token: :active, timeout: false) do |ws|
        ws.on :open do |event|
          ws.send ({method: 'subscribe'}.to_json)
          EM::Timer.new 3 do
@@ -372,15 +423,14 @@ class WebsocketTest < ActionDispatch::IntegrationTest
      assert_equal spec.uuid, spec_ev_uuid
    end
  
-  test "connect, subscribe, get event, unsubscribe with filter" do
-    slow_test
+  slow_test "connect, subscribe, get event, unsubscribe with filter" do
      state = 1
      spec = nil
      spec_ev_uuid = nil
  
      authorize_with :active
  
-    ws_helper :active, false do |ws|
+    ws_helper(token: :active, timeout: false) do |ws|
        ws.on :open do |event|
          ws.send ({method: 'subscribe', filters: [['object_uuid', 'is_a', 'arvados#human']]}.to_json)
          EM::Timer.new 6 do
@@ -422,8 +472,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
    end
  
  
-  test "connect, subscribe, get event, try to unsubscribe with bogus filter" do
-    slow_test
+  slow_test "connect, subscribe, get event, try to unsubscribe with bogus filter" do
      state = 1
      spec = nil
      spec_ev_uuid = nil
@@ -432,7 +481,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
  
      authorize_with :active
  
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
        ws.on :open do |event|
          ws.send ({method: 'subscribe'}.to_json)
        end
@@ -473,13 +522,10 @@ class WebsocketTest < ActionDispatch::IntegrationTest
      assert_equal human.uuid, human_ev_uuid
    end
  
-
-
-  test "connected, not subscribed, no event" do
-    slow_test
+  slow_test "connected, not subscribed, no event" do
      authorize_with :active
  
-    ws_helper :active, false do |ws|
+    ws_helper(token: :active, timeout: false) do |ws|
        ws.on :open do |event|
          EM::Timer.new 1 do
            Specimen.create
@@ -496,13 +542,12 @@ class WebsocketTest < ActionDispatch::IntegrationTest
      end
    end
  
-  test "connected, not authorized to see event" do
-    slow_test
+  slow_test "connected, not authorized to see event" do
      state = 1
  
      authorize_with :admin
  
-    ws_helper :active, false do |ws|
+    ws_helper(token: :active, timeout: false) do |ws|
        ws.on :open do |event|
          ws.send ({method: 'subscribe'}.to_json)
  
@@ -530,7 +575,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
    test "connect, try bogus method" do
      status = nil
  
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
        ws.on :open do |event|
          ws.send ({method: 'frobnabble'}.to_json)
        end
@@ -548,7 +593,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
    test "connect, missing method" do
      status = nil
  
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
        ws.on :open do |event|
          ws.send ({fizzbuzz: 'frobnabble'}.to_json)
        end
@@ -566,7 +611,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
    test "connect, send malformed request" do
      status = nil
  
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
        ws.on :open do |event|
          ws.send '<XML4EVER></XML4EVER>'
        end
@@ -587,7 +632,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
  
      authorize_with :active
  
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
        ws.on :open do |event|
          (1..17).each do |i|
            ws.send ({method: 'subscribe', filters: [['object_uuid', '=', i]]}.to_json)
@@ -612,15 +657,14 @@ class WebsocketTest < ActionDispatch::IntegrationTest
  
    end
  
-  test "connect, subscribe, lots of events" do
-    slow_test
+  slow_test "connect, subscribe, lots of events" do
      state = 1
      event_count = 0
      log_start = Log.order(:id).last.id
  
      authorize_with :active
  
-    ws_helper :active, false do |ws|
+    ws_helper(token: :active, timeout: false) do |ws|
        EM::Timer.new 45 do
          # Needs a longer timeout than the default
          ws.close
@@ -637,7 +681,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
            assert_equal 200, d["status"]
            ActiveRecord::Base.transaction do
              (1..202).each do
-              spec = Specimen.create
+              Specimen.create
              end
            end
            state = 2
@@ -658,12 +702,10 @@ class WebsocketTest < ActionDispatch::IntegrationTest
  
    test "connect, subscribe with invalid filter" do
      state = 1
-    human = nil
-    human_ev_uuid = nil
  
      authorize_with :active
  
-    ws_helper :active do |ws|
+    ws_helper(token: :active) do |ws|
        ws.on :open do |event|
          # test that #6451 is fixed (invalid filter crashes websockets)
          ws.send ({method: 'subscribe', filters: [['object_blarg', 'is_a', 'arvados#human']]}.to_json)
@@ -675,7 +717,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
          when 1
            assert_equal 200, d["status"]
            Specimen.create
-          human = Human.create
+          Human.create
            state = 2
          when 2
            assert_equal 500, d["status"]
diff --git a/services/api/test/test_helper.rb b/services/api/test/test_helper.rb

index 417ddf6bee8eeee96d8e960099ccc227cee4950a..86bc2397c5309e98310f659d4077d7d0f8a33184 100644 (file)
--- a/services/api/test/test_helper.rb
+++ b/services/api/test/test_helper.rb
@@ -22,7 +22,7 @@ end
  
  require File.expand_path('../../config/environment', __FILE__)
  require 'rails/test_help'
-require 'mocha/mini_test'
+require 'mocha'
  
  module ArvadosTestSupport
    def json_response
@@ -84,7 +84,7 @@ class ActiveSupport::TestCase
    def restore_configuration
      # Restore configuration settings changed during tests
      $application_config.each do |k,v|
-      if k.match /^[^.]*$/
+      if k.match(/^[^.]*$/)
          Rails.configuration.send (k + '='), v
        end
      end
@@ -112,9 +112,18 @@ class ActiveSupport::TestCase
                               "HTTP_AUTHORIZATION" => "OAuth2 #{t}")
    end
  
-  def slow_test
-    skip "RAILS_TEST_SHORT is set" unless (ENV['RAILS_TEST_SHORT'] || '').empty?
+  def self.skip_slow_tests?
+    !(ENV['RAILS_TEST_SHORT'] || '').empty?
    end
+
+  def self.skip(*args, &block)
+  end
+
+  def self.slow_test(name, &block)
+    define_method(name, block) unless skip_slow_tests?
+  end
+
+  alias_method :skip, :omit
  end
  
  class ActionController::TestCase
@@ -135,6 +144,21 @@ class ActionController::TestCase
        super action, *args
      end
    end
+
+  def self.suite
+    s = super
+    def s.run(*args)
+      @test_case.startup()
+      begin
+        super
+      ensure
+        @test_case.shutdown()
+      end
+    end
+    s
+  end
+  def self.startup; end
+  def self.shutdown; end
  end
  
  class ActionDispatch::IntegrationTest
diff --git a/services/api/test/unit/app_version_test.rb b/services/api/test/unit/app_version_test.rb

index 3e9b16757dbf8af4750ea21402b78a752202c54e..2e585051ad56ce0c2f6ecaf8fe6f53d5c3e9795d 100644 (file)
--- a/services/api/test/unit/app_version_test.rb
+++ b/services/api/test/unit/app_version_test.rb
@@ -20,16 +20,19 @@ class AppVersionTest < ActiveSupport::TestCase
      end
    end
  
-  test 'override with configuration' do
+  test 'override with configuration "foobar"' do
      Rails.configuration.source_version = 'foobar'
      assert_equal 'foobar', AppVersion.hash
+  end
+
+  test 'override with configuration false' do
      Rails.configuration.source_version = false
      assert_not_equal 'foobar', AppVersion.hash
    end
  
    test 'override with file' do
      path = Rails.root.join 'git-commit.version'
-    assert(!File.exists?(path),
+    assert(!File.exist?(path),
             "Packaged version file found in source tree: #{path}")
      begin
        File.open(path, 'w') do |f|
diff --git a/services/api/test/unit/arvados_model_test.rb b/services/api/test/unit/arvados_model_test.rb

index 6918aa0d00058b4d6183e92c89506aacb45a3f85..676581470cd402e761fb851cab64e3405445e396 100644 (file)
--- a/services/api/test/unit/arvados_model_test.rb
+++ b/services/api/test/unit/arvados_model_test.rb
@@ -146,6 +146,28 @@ class ArvadosModelTest < ActiveSupport::TestCase
      end
    end
  
+  test "full text search index exists on models" do
+    fts_tables =  ["collections", "container_requests", "groups", "jobs",
+                   "pipeline_instances", "pipeline_templates", "workflows"]
+    fts_tables.each do |table|
+      table_class = table.classify.constantize
+      if table_class.respond_to?('full_text_searchable_columns')
+        fts_index_columns = table_class.full_text_searchable_columns
+        index_columns = nil
+        indexes = ActiveRecord::Base.connection.indexes(table)
+        fts_index_by_columns = indexes.select do |index|
+          if index.columns.first.match(/to_tsvector/)
+            index_columns = index.columns.first.scan(/\((?<columns>[A-Za-z_]+)\,/).flatten!
+            index_columns.sort == fts_index_columns.sort
+          else
+            false
+          end
+        end
+        assert !fts_index_by_columns.empty?, "#{table} has no FTS index with columns #{fts_index_columns}. Instead found FTS index with columns #{index_columns}"
+      end
+    end
+  end
+
    test "selectable_attributes includes database attributes" do
      assert_includes(Job.selectable_attributes, "success")
    end
diff --git a/services/api/test/unit/authorized_key_test.rb b/services/api/test/unit/authorized_key_test.rb

index 5a661785bd7bef903747b5890bb135b8bacaebf1..25801bb9b6f6d38b65bd4947ba9b62a045c5c913 100644 (file)
--- a/services/api/test/unit/authorized_key_test.rb
+++ b/services/api/test/unit/authorized_key_test.rb
@@ -25,7 +25,7 @@ class AuthorizedKeyTest < ActiveSupport::TestCase
        ak2 = AuthorizedKey.new(name: "bar", public_key: TEST_KEY, authorized_user_uuid: u2.uuid)
        refute ak2.valid?
        refute ak2.save
-      assert_match /already exists/, ak2.errors.full_messages.to_s
+      assert_match(/already exists/, ak2.errors.full_messages.to_s)
      end
    end
  
diff --git a/services/api/test/unit/collection_performance_test.rb b/services/api/test/unit/collection_performance_test.rb

index 1c6e4f2db2c0dfafcde3d7fba519fe8e4431cf6b..57beddbe6d6247bf9f96d21103e6b2640c42732a 100644 (file)
--- a/services/api/test/unit/collection_performance_test.rb
+++ b/services/api/test/unit/collection_performance_test.rb
@@ -17,8 +17,7 @@ class CollectionModelPerformanceTest < ActiveSupport::TestCase
    end
  
    # "crrud" == "create read render update delete", not a typo
-  test "crrud cycle for a collection with a big manifest)" do
-    slow_test
+  slow_test "crrud cycle for a collection with a big manifest)" do
      bigmanifest = time_block 'make example' do
        make_manifest(streams: 100,
                      files_per_stream: 100,
@@ -44,7 +43,7 @@ class CollectionModelPerformanceTest < ActiveSupport::TestCase
          c.signed_manifest_text
        end
        time_block 'sign + render' do
-        resp = c.as_api_response(nil)
+        c.as_api_response(nil)
        end
        loc = Blob.sign_locator(Digest::MD5.hexdigest('foo') + '+3',
                                api_token: api_token(:active))
diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb

index 91568927ae37654117da4dec7c811882818d0add..4984aad88b7a01ffbb4c226ebc01f91953be771a 100644 (file)
--- a/services/api/test/unit/collection_test.rb
+++ b/services/api/test/unit/collection_test.rb
@@ -1,4 +1,5 @@
  require 'test_helper'
+require 'sweep_trashed_collections'
  
  class CollectionTest < ActiveSupport::TestCase
    include DbCurrentTime
@@ -28,7 +29,7 @@ class CollectionTest < ActiveSupport::TestCase
        c = create_collection "f\xc8o", Encoding::UTF_8
        assert !c.valid?
        assert_equal [:manifest_text], c.errors.messages.keys
-      assert_match /UTF-8/, c.errors.messages[:manifest_text].first
+      assert_match(/UTF-8/, c.errors.messages[:manifest_text].first)
      end
    end
  
@@ -37,7 +38,7 @@ class CollectionTest < ActiveSupport::TestCase
        c = create_collection "f\xc8o", Encoding::ASCII_8BIT
        assert !c.valid?
        assert_equal [:manifest_text], c.errors.messages.keys
-      assert_match /UTF-8/, c.errors.messages[:manifest_text].first
+      assert_match(/UTF-8/, c.errors.messages[:manifest_text].first)
      end
    end
  
@@ -107,11 +108,11 @@ class CollectionTest < ActiveSupport::TestCase
        assert c.valid?
        created_file_names = c.file_names
        assert created_file_names
-      assert_match /foo.txt/, c.file_names
+      assert_match(/foo.txt/, c.file_names)
  
        c.update_attribute 'manifest_text', ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo2.txt\n"
        assert_not_equal created_file_names, c.file_names
-      assert_match /foo2.txt/, c.file_names
+      assert_match(/foo2.txt/, c.file_names)
      end
    end
  
@@ -134,11 +135,11 @@ class CollectionTest < ActiveSupport::TestCase
  
          assert c.valid?
          assert c.file_names
-        assert_match /veryverylongfilename0000000000001.txt/, c.file_names
-        assert_match /veryverylongfilename0000000000002.txt/, c.file_names
+        assert_match(/veryverylongfilename0000000000001.txt/, c.file_names)
+        assert_match(/veryverylongfilename0000000000002.txt/, c.file_names)
          if not allow_truncate
-          assert_match /veryverylastfilename/, c.file_names
-          assert_match /laststreamname/, c.file_names
+          assert_match(/veryverylastfilename/, c.file_names)
+          assert_match(/laststreamname/, c.file_names)
          end
        end
      end
@@ -307,11 +308,11 @@ class CollectionTest < ActiveSupport::TestCase
      end
    end
  
-  test 'signature expiry does not exceed expires_at' do
+  test 'signature expiry does not exceed trash_at' do
      act_as_user users(:active) do
        t0 = db_current_time
        c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n", name: 'foo')
-      c.update_attributes! expires_at: (t0 + 1.hours)
+      c.update_attributes! trash_at: (t0 + 1.hours)
        c.reload
        sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i
        assert_operator sig_exp.to_i, :<=, (t0 + 1.hours).to_i
@@ -322,10 +323,10 @@ class CollectionTest < ActiveSupport::TestCase
      act_as_user users(:active) do
        c = Collection.create!(manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n",
                               name: 'foo',
-                             expires_at: db_current_time + 1.years)
+                             trash_at: db_current_time + 1.years)
        sig_exp = /\+A[0-9a-f]{40}\@([0-9]+)/.match(c.signed_manifest_text)[1].to_i
        expect_max_sig_exp = db_current_time.to_i + Rails.configuration.blob_signature_ttl
-      assert_operator c.expires_at.to_i, :>, expect_max_sig_exp
+      assert_operator c.trash_at.to_i, :>, expect_max_sig_exp
        assert_operator sig_exp.to_i, :<=, expect_max_sig_exp
      end
    end
@@ -348,7 +349,7 @@ class CollectionTest < ActiveSupport::TestCase
        uuid = c.uuid
  
        # mark collection as expired
-      c.update_attribute 'expires_at', Time.new.strftime("%Y-%m-%d")
+      c.update_attributes!(trash_at: Time.new.strftime("%Y-%m-%d"))
        c = Collection.where(uuid: uuid)
        assert_empty c, 'Should not be able to find expired collection'
  
@@ -359,6 +360,108 @@ class CollectionTest < ActiveSupport::TestCase
      end
    end
  
+  test 'trash_at cannot be set too far in the past' do
+    act_as_user users(:active) do
+      t0 = db_current_time
+      c = Collection.create!(manifest_text: '', name: 'foo')
+      c.update_attributes! trash_at: (t0 - 2.weeks)
+      c.reload
+      assert_operator c.trash_at, :>, t0
+    end
+  end
+
+  [['trash-to-delete interval negative',
+    :collection_owned_by_active,
+    {trash_at: Time.now+2.weeks, delete_at: Time.now},
+    {state: :invalid}],
+   ['trash-to-delete interval too short',
+    :collection_owned_by_active,
+    {trash_at: Time.now+3.days, delete_at: Time.now+7.days},
+    {state: :invalid}],
+   ['trash-to-delete interval ok',
+    :collection_owned_by_active,
+    {trash_at: Time.now, delete_at: Time.now+15.days},
+    {state: :trash_now}],
+   ['trash-to-delete interval short, but far enough in future',
+    :collection_owned_by_active,
+    {trash_at: Time.now+13.days, delete_at: Time.now+15.days},
+    {state: :trash_future}],
+   ['trash by setting is_trashed bool',
+    :collection_owned_by_active,
+    {is_trashed: true},
+    {state: :trash_now}],
+   ['trash in future by setting just trash_at',
+    :collection_owned_by_active,
+    {trash_at: Time.now+1.week},
+    {state: :trash_future}],
+   ['trash in future by setting trash_at and delete_at',
+    :collection_owned_by_active,
+    {trash_at: Time.now+1.week, delete_at: Time.now+4.weeks},
+    {state: :trash_future}],
+   ['untrash by clearing is_trashed bool',
+    :expired_collection,
+    {is_trashed: false},
+    {state: :not_trash}],
+  ].each do |test_name, fixture_name, updates, expect|
+    test test_name do
+      act_as_user users(:active) do
+        min_exp = (db_current_time +
+                   Rails.configuration.blob_signature_ttl.seconds)
+        if fixture_name == :expired_collection
+          # Fixture-finder shorthand doesn't find trashed collections
+          # because they're not in the default scope.
+          c = Collection.unscoped.find_by_uuid('zzzzz-4zz18-mto52zx1s7sn3ih')
+        else
+          c = collections(fixture_name)
+        end
+        updates_ok = c.update_attributes(updates)
+        expect_valid = expect[:state] != :invalid
+        assert_equal updates_ok, expect_valid, c.errors.full_messages.to_s
+        case expect[:state]
+        when :invalid
+          refute c.valid?
+        when :trash_now
+          assert c.is_trashed
+          assert_not_nil c.trash_at
+          assert_operator c.trash_at, :<=, db_current_time
+          assert_not_nil c.delete_at
+          assert_operator c.delete_at, :>=, min_exp
+        when :trash_future
+          refute c.is_trashed
+          assert_not_nil c.trash_at
+          assert_operator c.trash_at, :>, db_current_time
+          assert_not_nil c.delete_at
+          assert_operator c.delete_at, :>=, c.trash_at
+          # Currently this minimum interval is needed to prevent early
+          # garbage collection:
+          assert_operator c.delete_at, :>=, min_exp
+        when :not_trash
+          refute c.is_trashed
+          assert_nil c.trash_at
+          assert_nil c.delete_at
+        else
+          raise "bad expect[:state]==#{expect[:state].inspect} in test case"
+        end
+      end
+    end
+  end
+
+  test 'default trash interval > blob signature ttl' do
+    Rails.configuration.default_trash_lifetime = 86400 * 21 # 3 weeks
+    start = db_current_time
+    act_as_user users(:active) do
+      c = Collection.create!(manifest_text: '', name: 'foo')
+      c.update_attributes!(trash_at: start + 86400.seconds)
+      assert_operator c.delete_at, :>=, start + (86400*22).seconds
+      assert_operator c.delete_at, :<, start + (86400*22 + 30).seconds
+      c.destroy
+
+      c = Collection.create!(manifest_text: '', name: 'foo')
+      c.update_attributes!(is_trashed: true)
+      assert_operator c.delete_at, :>=, start + (86400*21).seconds
+    end
+  end
+
    test "find_all_for_docker_image resolves names that look like hashes" do
      coll_list = Collection.
        find_all_for_docker_image('a' * 64, nil, [users(:active)])
@@ -366,13 +469,28 @@ class CollectionTest < ActiveSupport::TestCase
      assert_includes(coll_uuids, collections(:docker_image).uuid)
    end
  
-  test 'expires_at cannot be set too far in the past' do
+  test "move to trash in SweepTrashedCollections" do
+    c = collections(:trashed_on_next_sweep)
+    refute_empty Collection.where('uuid=? and is_trashed=false', c.uuid)
+    assert_raises(ActiveRecord::RecordNotUnique) do
+      act_as_user users(:active) do
+        Collection.create!(owner_uuid: c.owner_uuid,
+                           name: c.name)
+      end
+    end
+    SweepTrashedCollections.sweep_now
+    c = Collection.unscoped.where('uuid=? and is_trashed=true', c.uuid).first
+    assert c
      act_as_user users(:active) do
-      t0 = db_current_time
-      c = Collection.create!(manifest_text: '', name: 'foo')
-      c.update_attributes! expires_at: (t0 - 2.weeks)
-      c.reload
-      assert_operator c.expires_at, :>, t0
+      assert Collection.create!(owner_uuid: c.owner_uuid,
+                                name: c.name)
      end
    end
+
+  test "delete in SweepTrashedCollections" do
+    uuid = 'zzzzz-4zz18-3u1p5umicfpqszp' # deleted_on_next_sweep
+    assert_not_empty Collection.unscoped.where(uuid: uuid)
+    SweepTrashedCollections.sweep_now
+    assert_empty Collection.unscoped.where(uuid: uuid)
+  end
  end
diff --git a/services/api/test/unit/commit_test.rb b/services/api/test/unit/commit_test.rb

index b57c23b4538dee4339a0a27630a1ad36e7e575a6..a8594169fb3c7567aa95a00922be6350ac41ac52 100644 (file)
--- a/services/api/test/unit/commit_test.rb
+++ b/services/api/test/unit/commit_test.rb
@@ -18,7 +18,7 @@ class CommitTest < ActiveSupport::TestCase
    test 'find_commit_range does not bypass permissions' do
      authorize_with :inactive
      assert_raises ArgumentError do
-      c = Commit.find_commit_range 'foo', nil, 'master', []
+      Commit.find_commit_range 'foo', nil, 'master', []
      end
    end
  
@@ -68,10 +68,10 @@ class CommitTest < ActiveSupport::TestCase
      authorize_with :active
      gitint = "git --git-dir #{Rails.configuration.git_internal_dir}"
      IO.read("|#{gitint} tag -d testtag 2>/dev/null") # "no such tag", fine
-    assert_match /^fatal: /, IO.read("|#{gitint} show testtag 2>&1")
+    assert_match(/^fatal: /, IO.read("|#{gitint} show testtag 2>&1"))
      refute $?.success?
      Commit.tag_in_internal_repository 'active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', 'testtag'
-    assert_match /^commit 31ce37f/, IO.read("|#{gitint} show testtag")
+    assert_match(/^commit 31ce37f/, IO.read("|#{gitint} show testtag"))
      assert $?.success?
    end
  
@@ -183,34 +183,34 @@ class CommitTest < ActiveSupport::TestCase
      Dir.mktmpdir do |touchdir|
        # invalid input to maximum
        a = Commit.find_commit_range('active/foo', nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
+      assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
        assert_equal [], a
  
        # invalid input to maximum
        a = Commit.find_commit_range('active/foo', nil, "$(uname>#{touchdir}/uh_oh)", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
+      assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
        assert_equal [], a
  
        # invalid input to minimum
        a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
+      assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
        assert_equal [], a
  
        # invalid input to minimum
        a = Commit.find_commit_range('active/foo', "$(uname>#{touchdir}/uh_oh)", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
+      assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
        assert_equal [], a
  
        # invalid input to 'excludes'
        # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
        a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["4fe459abe02d9b365932b8f5dc419439ab4e2577 ; touch #{touchdir}/uh_oh"])
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
+      assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
        assert_equal [], a
  
        # invalid input to 'excludes'
        # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
        a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["$(uname>#{touchdir}/uh_oh)"])
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
+      assert !File.exist?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
        assert_equal [], a
      end
    end
diff --git a/services/api/test/unit/container_request_test.rb b/services/api/test/unit/container_request_test.rb

index c4d1efec30802d75025c9c4560457815fea7f80b..da4f0bb7a8db515e595656b4c8d0fa063ffac8ac 100644 (file)
--- a/services/api/test/unit/container_request_test.rb
+++ b/services/api/test/unit/container_request_test.rb
@@ -471,6 +471,7 @@ class ContainerRequestTest < ActiveSupport::TestCase
    test "Retry on container cancelled" do
      set_user_from_auth :active
      cr = create_minimal_req!(priority: 1, state: "Committed", container_count_max: 2)
+    cr2 = create_minimal_req!(priority: 1, state: "Committed", container_count_max: 2, command: ["echo", "baz"])
      prev_container_uuid = cr.container_uuid
  
      c = act_as_system_user do
@@ -481,8 +482,10 @@ class ContainerRequestTest < ActiveSupport::TestCase
      end
  
      cr.reload
+    cr2.reload
      assert_equal "Committed", cr.state
      assert_equal prev_container_uuid, cr.container_uuid
+    assert_not_equal cr2.container_uuid, cr.container_uuid
      prev_container_uuid = cr.container_uuid
  
      act_as_system_user do
@@ -490,8 +493,10 @@ class ContainerRequestTest < ActiveSupport::TestCase
      end
  
      cr.reload
+    cr2.reload
      assert_equal "Committed", cr.state
      assert_not_equal prev_container_uuid, cr.container_uuid
+    assert_not_equal cr2.container_uuid, cr.container_uuid
      prev_container_uuid = cr.container_uuid
  
      c = act_as_system_user do
@@ -501,8 +506,39 @@ class ContainerRequestTest < ActiveSupport::TestCase
      end
  
      cr.reload
+    cr2.reload
      assert_equal "Final", cr.state
      assert_equal prev_container_uuid, cr.container_uuid
+    assert_not_equal cr2.container_uuid, cr.container_uuid
+  end
+
+  test "Output collection name setting using output_name with name collision resolution" do
+    set_user_from_auth :active
+    output_name = collections(:foo_file).name
+
+    cr = create_minimal_req!(priority: 1,
+                             state: ContainerRequest::Committed,
+                             output_name: output_name)
+    act_as_system_user do
+      c = Container.find_by_uuid(cr.container_uuid)
+      c.update_attributes!(state: Container::Locked)
+      c.update_attributes!(state: Container::Running)
+      c.update_attributes!(state: Container::Complete,
+                           exit_code: 0,
+                           output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45',
+                           log: 'fa7aeb5140e2848d39b416daeef4ffc5+45')
+    end
+    cr.save
+    assert_equal ContainerRequest::Final, cr.state
+    output_coll = Collection.find_by_uuid(cr.output_uuid)
+    # Make sure the resulting output collection name include the original name
+    # plus the date
+    assert_not_equal output_name, output_coll.name,
+                     "It shouldn't exist more than one collection with the same owner and name '${output_name}'"
+    assert output_coll.name.include?(output_name),
+           "New name should include original name"
+    assert_match /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z/, output_coll.name,
+                 "New name should include ISO8601 date"
    end
  
    test "Finalize committed request when reusing a finished container" do
diff --git a/services/api/test/unit/container_test.rb b/services/api/test/unit/container_test.rb

index 4fd9f8e75931eff3d3f66e8fcddd4e65253cec7f..a1755332853e227acee600d990b1866a9d22b443 100644 (file)
--- a/services/api/test/unit/container_test.rb
+++ b/services/api/test/unit/container_test.rb
@@ -102,8 +102,9 @@ class ContainerTest < ActiveSupport::TestCase
    test "find_reusable method should select higher priority queued container" do
      set_user_from_auth :active
      common_attrs = REUSABLE_COMMON_ATTRS.merge({environment:{"var" => "queued"}})
-    c_low_priority, _ = minimal_new(common_attrs.merge({priority:1}))
-    c_high_priority, _ = minimal_new(common_attrs.merge({priority:2}))
+    c_low_priority, _ = minimal_new(common_attrs.merge({use_existing:false, priority:1}))
+    c_high_priority, _ = minimal_new(common_attrs.merge({use_existing:false, priority:2}))
+    assert_not_equal c_low_priority.uuid, c_high_priority.uuid
      assert_equal Container::Queued, c_low_priority.state
      assert_equal Container::Queued, c_high_priority.state
      reused = Container.find_reusable(common_attrs)
@@ -121,8 +122,9 @@ class ContainerTest < ActiveSupport::TestCase
        output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45'
      }
  
-    c_older, _ = minimal_new(common_attrs)
-    c_recent, _ = minimal_new(common_attrs)
+    c_older, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_recent, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    assert_not_equal c_older.uuid, c_recent.uuid
  
      set_user_from_auth :dispatch1
      c_older.update_attributes!({state: Container::Locked})
@@ -151,6 +153,7 @@ class ContainerTest < ActiveSupport::TestCase
  
      c_output1 = Container.create common_attrs
      c_output2 = Container.create common_attrs
+    assert_not_equal c_output1.uuid, c_output2.uuid
  
      cr = ContainerRequest.new common_attrs
      cr.state = ContainerRequest::Committed
@@ -177,9 +180,11 @@ class ContainerTest < ActiveSupport::TestCase
    test "find_reusable method should select running container by start date" do
      set_user_from_auth :active
      common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "running"}})
-    c_slower, _ = minimal_new(common_attrs)
-    c_faster_started_first, _ = minimal_new(common_attrs)
-    c_faster_started_second, _ = minimal_new(common_attrs)
+    c_slower, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_faster_started_first, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_faster_started_second, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    # Confirm the 3 container UUIDs are different.
+    assert_equal 3, [c_slower.uuid, c_faster_started_first.uuid, c_faster_started_second.uuid].uniq.length
      set_user_from_auth :dispatch1
      c_slower.update_attributes!({state: Container::Locked})
      c_slower.update_attributes!({state: Container::Running,
@@ -199,9 +204,11 @@ class ContainerTest < ActiveSupport::TestCase
    test "find_reusable method should select running container by progress" do
      set_user_from_auth :active
      common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "running2"}})
-    c_slower, _ = minimal_new(common_attrs)
-    c_faster_started_first, _ = minimal_new(common_attrs)
-    c_faster_started_second, _ = minimal_new(common_attrs)
+    c_slower, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_faster_started_first, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_faster_started_second, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    # Confirm the 3 container UUIDs are different.
+    assert_equal 3, [c_slower.uuid, c_faster_started_first.uuid, c_faster_started_second.uuid].uniq.length
      set_user_from_auth :dispatch1
      c_slower.update_attributes!({state: Container::Locked})
      c_slower.update_attributes!({state: Container::Running,
@@ -221,9 +228,11 @@ class ContainerTest < ActiveSupport::TestCase
    test "find_reusable method should select locked container most likely to start sooner" do
      set_user_from_auth :active
      common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "locked"}})
-    c_low_priority, _ = minimal_new(common_attrs)
-    c_high_priority_older, _ = minimal_new(common_attrs)
-    c_high_priority_newer, _ = minimal_new(common_attrs)
+    c_low_priority, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_high_priority_older, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_high_priority_newer, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    # Confirm the 3 container UUIDs are different.
+    assert_equal 3, [c_low_priority.uuid, c_high_priority_older.uuid, c_high_priority_newer.uuid].uniq.length
      set_user_from_auth :dispatch1
      c_low_priority.update_attributes!({state: Container::Locked,
                                         priority: 1})
@@ -239,8 +248,9 @@ class ContainerTest < ActiveSupport::TestCase
    test "find_reusable method should select running over failed container" do
      set_user_from_auth :active
      common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "failed_vs_running"}})
-    c_failed, _ = minimal_new(common_attrs)
-    c_running, _ = minimal_new(common_attrs)
+    c_failed, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_running, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    assert_not_equal c_failed.uuid, c_running.uuid
      set_user_from_auth :dispatch1
      c_failed.update_attributes!({state: Container::Locked})
      c_failed.update_attributes!({state: Container::Running})
@@ -259,8 +269,9 @@ class ContainerTest < ActiveSupport::TestCase
    test "find_reusable method should select complete over running container" do
      set_user_from_auth :active
      common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "completed_vs_running"}})
-    c_completed, _ = minimal_new(common_attrs)
-    c_running, _ = minimal_new(common_attrs)
+    c_completed, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_running, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    assert_not_equal c_completed.uuid, c_running.uuid
      set_user_from_auth :dispatch1
      c_completed.update_attributes!({state: Container::Locked})
      c_completed.update_attributes!({state: Container::Running})
@@ -279,8 +290,9 @@ class ContainerTest < ActiveSupport::TestCase
    test "find_reusable method should select running over locked container" do
      set_user_from_auth :active
      common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "running_vs_locked"}})
-    c_locked, _ = minimal_new(common_attrs)
-    c_running, _ = minimal_new(common_attrs)
+    c_locked, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_running, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    assert_not_equal c_running.uuid, c_locked.uuid
      set_user_from_auth :dispatch1
      c_locked.update_attributes!({state: Container::Locked})
      c_running.update_attributes!({state: Container::Locked})
@@ -294,8 +306,9 @@ class ContainerTest < ActiveSupport::TestCase
    test "find_reusable method should select locked over queued container" do
      set_user_from_auth :active
      common_attrs = REUSABLE_COMMON_ATTRS.merge({environment: {"var" => "running_vs_locked"}})
-    c_locked, _ = minimal_new(common_attrs)
-    c_queued, _ = minimal_new(common_attrs)
+    c_locked, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    c_queued, _ = minimal_new(common_attrs.merge({use_existing: false}))
+    assert_not_equal c_queued.uuid, c_locked.uuid
      set_user_from_auth :dispatch1
      c_locked.update_attributes!({state: Container::Locked})
      reused = Container.find_reusable(common_attrs)
diff --git a/services/api/test/unit/create_superuser_token_test.rb b/services/api/test/unit/create_superuser_token_test.rb

index d5ca3f965060018e5c0b56a9af5e5a50cf2ba15e..ba813602b32f4804de6a8d8f62459ab3f57b47bc 100644 (file)
--- a/services/api/test/unit/create_superuser_token_test.rb
+++ b/services/api/test/unit/create_superuser_token_test.rb
@@ -73,6 +73,25 @@ class CreateSuperUserTokenTest < ActiveSupport::TestCase
        create_superuser_token active_user_token
      end
      assert_not_nil e
-    assert_equal "Token already exists but is not a superuser token.", e.message
+    assert_equal "Token exists but is not a superuser token.", e.message
+  end
+
+  test "specified token has limited scope" do
+    active_user_token = api_client_authorizations("data_manager").api_token
+    e = assert_raises RuntimeError do
+      create_superuser_token active_user_token
+    end
+    assert_not_nil e
+    assert_match /^Token exists but has limited scope/, e.message
+  end
+
+  test "existing token has limited scope" do
+    active_user_token = api_client_authorizations("admin_vm").api_token
+    ApiClientAuthorization.
+      where(user_id: system_user.id).
+      update_all(scopes: ["GET /"])
+    fixture_tokens = ApiClientAuthorization.all.collect(&:api_token)
+    new_token = create_superuser_token
+    refute_includes(fixture_tokens, new_token)
    end
  end
diff --git a/services/api/test/unit/fail_jobs_test.rb b/services/api/test/unit/fail_jobs_test.rb

index c390b3213e94120eea370fdaaeec25500c7f5f3f..1f5847aea6e22b5bea9fe2e51d7f9354861145dd 100644 (file)
--- a/services/api/test/unit/fail_jobs_test.rb
+++ b/services/api/test/unit/fail_jobs_test.rb
@@ -60,13 +60,13 @@ class FailJobsTest < ActiveSupport::TestCase
  
    test 'command line help' do
      cmd = Rails.root.join('script/fail-jobs.rb').to_s
-    assert_match /Options:.*--before=/m, File.popen([cmd, '--help']).read
+    assert_match(/Options:.*--before=/m, File.popen([cmd, '--help']).read)
    end
  
    protected
  
    def assert_end_states
-    @job.values.map &:reload
+    @job.values.map(&:reload)
      assert_equal 'Failed', @job[:before_reboot].state
      assert_equal false, @job[:before_reboot].running
      assert_equal false, @job[:before_reboot].success
diff --git a/services/api/test/unit/job_test.rb b/services/api/test/unit/job_test.rb

index 3da2c836ed61579fe1d5058e4c367db2c9dd3eab..761953e8eb4e441f3e30cacf86a1f45ddf816e6c 100644 (file)
--- a/services/api/test/unit/job_test.rb
+++ b/services/api/test/unit/job_test.rb
@@ -185,7 +185,7 @@ class JobTest < ActiveSupport::TestCase
        # Ensure valid_attrs doesn't produce errors -- otherwise we will
        # not know whether errors reported below are actually caused by
        # invalid_attrs.
-      dummy = Job.create! job_attrs
+      Job.create! job_attrs
  
        job = Job.create job_attrs(invalid_attrs)
        assert_raises(ActiveRecord::RecordInvalid, ArgumentError,
@@ -223,7 +223,7 @@ class JobTest < ActiveSupport::TestCase
  
        parameters.each do |parameter|
          expectations = parameter[2]
-        if parameter[1] == 'use_current_user_uuid'
+        if 'use_current_user_uuid' == parameter[1]
            parameter[1] = Thread.current[:user].uuid
          end
  
@@ -307,6 +307,24 @@ class JobTest < ActiveSupport::TestCase
      assert_equal "Failed", job.state
    end
  
+  test "admin user can cancel a running job despite lock" do
+    set_user_from_auth :active_trustedclient
+    job = Job.create! job_attrs
+    job.lock current_user.uuid
+    assert_equal Job::Running, job.state
+
+    set_user_from_auth :spectator
+    assert_raises do
+      job.update_attributes!(state: Job::Cancelled)
+    end
+
+    set_user_from_auth :admin
+    job.reload
+    assert_equal Job::Running, job.state
+    job.update_attributes!(state: Job::Cancelled)
+    assert_equal Job::Cancelled, job.state
+  end
+
    test "verify job queue position" do
      job1 = Job.create! job_attrs
      assert_equal 'Queued', job1.state, "Incorrect job state for newly created job1"
@@ -411,7 +429,7 @@ class JobTest < ActiveSupport::TestCase
      }
      assert_raises(ActiveRecord::RecordInvalid,
                    "created job with a collection uuid in script_parameters") do
-      job = Job.create!(job_attrs(bad_params))
+      Job.create!(job_attrs(bad_params))
      end
    end
  
diff --git a/services/api/test/unit/log_test.rb b/services/api/test/unit/log_test.rb

index efbb189c9f8f0e50f1db262a8cd3bac7342a0b03..92976e0053580eafa45707febfe753fd08f43c9b 100644 (file)
--- a/services/api/test/unit/log_test.rb
+++ b/services/api/test/unit/log_test.rb
@@ -263,7 +263,7 @@ class LogTest < ActiveSupport::TestCase
      # appear too, but only if they are _not_ listed in known_logs
      # (i.e., we do not make any assertions about logs not mentioned in
      # either "known" or "expected".)
-    result_ids = result.collect &:id
+    result_ids = result.collect(&:id)
      expected_logs.each do |want|
        assert_includes result_ids, logs(want).id
      end
diff --git a/services/api/test/unit/node_test.rb b/services/api/test/unit/node_test.rb

index 6eb1df56d129f0279c2e86323b865d13fd09817c..df8c22baf4ad04a6a20b97c0f9c551a335a9fb96 100644 (file)
--- a/services/api/test/unit/node_test.rb
+++ b/services/api/test/unit/node_test.rb
@@ -33,7 +33,7 @@ class NodeTest < ActiveSupport::TestCase
      conffile = Rails.root.join 'tmp', 'compute65535.conf'
      File.unlink conffile rescue nil
      assert Node.dns_server_update 'compute65535', '127.0.0.1'
-    assert_match /\"1\.0\.0\.127\.in-addr\.arpa\. IN PTR compute65535\.zzzzz\.arvadosapi\.com\"/, IO.read(conffile)
+    assert_match(/\"1\.0\.0\.127\.in-addr\.arpa\. IN PTR compute65535\.zzzzz\.arvadosapi\.com\"/, IO.read(conffile))
      File.unlink conffile
    end
  
diff --git a/services/api/test/unit/owner_test.rb b/services/api/test/unit/owner_test.rb

index c7f9776ac6a98c36f8ab8a3e002773c03603ea6a..6fcc3165289acbccc2884b52887146fae71317b5 100644 (file)
--- a/services/api/test/unit/owner_test.rb
+++ b/services/api/test/unit/owner_test.rb
@@ -27,7 +27,7 @@ class OwnerTest < ActiveSupport::TestCase
      test "create object with non-existent #{o_class} owner" do
        assert_raises(ActiveRecord::RecordInvalid,
                      "create should fail with random owner_uuid") do
-        i = Specimen.create!(owner_uuid: o_class.generate_uuid)
+        Specimen.create!(owner_uuid: o_class.generate_uuid)
        end
  
        i = Specimen.create(owner_uuid: o_class.generate_uuid)
@@ -89,7 +89,6 @@ class OwnerTest < ActiveSupport::TestCase
        o = eval ofixt
        assert_equal(true, Specimen.where(owner_uuid: o.uuid).any?,
                     "need something to be owned by #{o.uuid} for this test")
-      old_uuid = o.uuid
        new_uuid = o.uuid.sub(/..........$/, rand(2**256).to_s(36)[0..9])
        assert(!o.update_attributes(uuid: new_uuid),
               "should not change uuid of #{ofixt} that owns objects")
diff --git a/services/api/test/unit/permission_test.rb b/services/api/test/unit/permission_test.rb

index 79fc1f29c7bf46a2f1efb3ae8f9dd298f0222015..df110549989c124f1f92f2aa46fd2a3baca5c459 100644 (file)
--- a/services/api/test/unit/permission_test.rb
+++ b/services/api/test/unit/permission_test.rb
@@ -125,10 +125,10 @@ class PermissionTest < ActiveSupport::TestCase
      sp_grp = Group.create!
      sp = Specimen.create!(owner_uuid: sp_grp.uuid)
  
-    manage_perm = Link.create!(link_class: 'permission',
-                               name: 'can_manage',
-                               tail_uuid: owner_grp.uuid,
-                               head_uuid: sp_grp.uuid)
+    Link.create!(link_class: 'permission',
+                 name: 'can_manage',
+                 tail_uuid: owner_grp.uuid,
+                 head_uuid: sp_grp.uuid)
  
      # active user owns owner_grp, which has can_manage permission on sp_grp
      # user should be able to add permissions on sp.
@@ -137,14 +137,12 @@ class PermissionTest < ActiveSupport::TestCase
                              head_uuid: sp.uuid,
                              link_class: 'permission',
                              name: 'can_write')
-    test_uuid = test_perm.uuid
      assert test_perm.save, "could not save new permission on target object"
      assert test_perm.destroy, "could not delete new permission on target object"
    end
  
-  # TODO(twp): fix bug #3091, which should fix this test.
-  test "can_manage permission on a non-group object" do
-    skip
+  # bug #3091
+  skip "can_manage permission on a non-group object" do
      set_user_from_auth :admin
  
      ob = Specimen.create!
diff --git a/services/api/test/unit/pipeline_instance_test.rb b/services/api/test/unit/pipeline_instance_test.rb

index 93354f8b1edd31d2b332a71cd6d7de28bcc490e6..05ba135700463312efed027d785b752393612435 100644 (file)
--- a/services/api/test/unit/pipeline_instance_test.rb
+++ b/services/api/test/unit/pipeline_instance_test.rb
@@ -38,6 +38,8 @@ class PipelineInstanceTest < ActiveSupport::TestCase
      pi = PipelineInstance.find_by_uuid 'zzzzz-d1hrv-f4gneyn6br1xize'
      assert_equal PipelineInstance::New, pi.state, 'expected state to be New after adding component with input'
      assert_equal pi.components.size, 1, 'expected one component'
+    assert_nil pi.started_at, 'expected started_at to be nil on new pipeline instance'
+    assert_nil pi.finished_at, 'expected finished_at to be nil on new pipeline instance'
  
      # add a component with no input not required
      component = {'script_parameters' => {"input_not_provided" => {"required" => false}}}
@@ -61,6 +63,8 @@ class PipelineInstanceTest < ActiveSupport::TestCase
      pi.save
      pi = PipelineInstance.find_by_uuid 'zzzzz-d1hrv-f4gneyn6br1xize'
      assert_equal PipelineInstance::RunningOnServer, pi.state, 'expected state to be RunningOnServer after updating state to RunningOnServer'
+    assert_not_nil pi.started_at, 'expected started_at to have a value on a running pipeline instance'
+    assert_nil pi.finished_at, 'expected finished_at to be nil on a running pipeline instance'
  
      pi.state = PipelineInstance::Paused
      pi.save
@@ -71,6 +75,8 @@ class PipelineInstanceTest < ActiveSupport::TestCase
      pi.save
      pi = PipelineInstance.find_by_uuid 'zzzzz-d1hrv-f4gneyn6br1xize'
      assert_equal PipelineInstance::Complete, pi.state, 'expected state to be Complete after updating state to Complete'
+    assert_not_nil pi.started_at, 'expected started_at to have a value on a completed pipeline instance'
+    assert_not_nil pi.finished_at, 'expected finished_at to have a value on a completed pipeline instance'
  
      pi.state = 'bogus'
      pi.save
@@ -81,6 +87,8 @@ class PipelineInstanceTest < ActiveSupport::TestCase
      pi.save
      pi = PipelineInstance.find_by_uuid 'zzzzz-d1hrv-f4gneyn6br1xize'
      assert_equal PipelineInstance::Failed, pi.state, 'expected state to be Failed after updating state to Failed'
+    assert_not_nil pi.started_at, 'expected started_at to have a value on a failed pipeline instance'
+    assert_not_nil pi.finished_at, 'expected finished_at to have a value on a failed pipeline instance'
    end
  
    test "update attributes for pipeline with two components" do
@@ -91,7 +99,6 @@ class PipelineInstanceTest < ActiveSupport::TestCase
      component2 = {'script_parameters' => {"something_else" => "xxxad4b39ca5a924e481008009d94e32+210", "input_missing" => {"required" => true}}}
      pi.components['first'] = component1
      pi.components['second'] = component2
-    components = pi.components
  
      Thread.current[:user] = users(:admin)
      pi.update_attribute 'components', pi.components
diff --git a/services/api/test/unit/salvage_collection_test.rb b/services/api/test/unit/salvage_collection_test.rb

index a269078b736b92fcebf8a3bb1fbca4c52516c166..1381c8f538ff3bbf9de5596c2dce784123318f1c 100644 (file)
--- a/services/api/test/unit/salvage_collection_test.rb
+++ b/services/api/test/unit/salvage_collection_test.rb
@@ -60,7 +60,7 @@ class SalvageCollectionTest < ActiveSupport::TestCase
      updated_name = updated_src_collection.name
      assert_equal true, updated_name.include?(src_collection.name)
  
-    match = updated_name.match /^test collection.*salvaged data at (.*)\)$/
+    match = updated_name.match(/^test collection.*salvaged data at (.*)\)$/)
      assert_not_nil match
      assert_not_nil match[1]
      assert_empty updated_src_collection.manifest_text
@@ -68,7 +68,7 @@ class SalvageCollectionTest < ActiveSupport::TestCase
      # match[1] is the uuid of the new collection created from src_collection's salvaged data
      # use this to get the new collection and verify
      new_collection = Collection.find_by_uuid match[1]
-    match = new_collection.name.match /^salvaged from (.*),.*/
+    match = new_collection.name.match(/^salvaged from (.*),.*/)
      assert_not_nil match
      assert_equal src_collection.uuid, match[1]
  
@@ -80,7 +80,7 @@ class SalvageCollectionTest < ActiveSupport::TestCase
    end
  
    test "salvage collection with no uuid required argument" do
-    e = assert_raises RuntimeError do
+    assert_raises RuntimeError do
        salvage_collection nil
      end
    end
@@ -107,7 +107,7 @@ class SalvageCollectionTest < ActiveSupport::TestCase
      e = assert_raises RuntimeError do
        salvage_collection collections('user_agreement').uuid
      end
-    assert_match /Error during arv-put: pid \d+ exit \d+ \(cmd was \"arv-put .*\"\)/, e.message
+    assert_match(/Error during arv-put: pid \d+ exit \d+ \(cmd was \"arv-put .*\"\)/, e.message)
    end
  
    # This test uses BAD_MANIFEST, which has the following flaws:
@@ -146,7 +146,7 @@ class SalvageCollectionTest < ActiveSupport::TestCase
      updated_name = updated_src_collection.name
      assert_equal true, updated_name.include?(src_collection.name)
  
-    match = updated_name.match /^test collection.*salvaged data at (.*)\)$/
+    match = updated_name.match(/^test collection.*salvaged data at (.*)\)$/)
      assert_not_nil match
      assert_not_nil match[1]
      assert_empty updated_src_collection.manifest_text
@@ -154,7 +154,7 @@ class SalvageCollectionTest < ActiveSupport::TestCase
      # match[1] is the uuid of the new collection created from src_collection's salvaged data
      # use this to get the new collection and verify
      new_collection = Collection.find_by_uuid match[1]
-    match = new_collection.name.match /^salvaged from (.*),.*/
+    match = new_collection.name.match(/^salvaged from (.*),.*/)
      assert_not_nil match
      assert_equal src_collection.uuid, match[1]
      # verify the new collection's manifest includes the bad locators
diff --git a/services/api/test/unit/user_test.rb b/services/api/test/unit/user_test.rb

index 4df6cc0b369a67cd99907cb128979a4cf1c21956..3bd6ed4003f865a43911cbda8d81e07c62e2e88e 100644 (file)
--- a/services/api/test/unit/user_test.rb
+++ b/services/api/test/unit/user_test.rb
@@ -136,7 +136,6 @@ class UserTest < ActiveSupport::TestCase
    test "admin can't clear username when user owns repositories" do
      set_user_from_auth :admin
      user = users(:active)
-    start_username = user.username
      user.username = nil
      assert_not_allowed { user.save }
      refute_empty(user.errors[:username])
@@ -277,10 +276,12 @@ class UserTest < ActiveSupport::TestCase
      assert @uninvited_user.can? :write=>"#{@uninvited_user.uuid}"
      assert @uninvited_user.can? :manage=>"#{@uninvited_user.uuid}"
  
-    assert @uninvited_user.groups_i_can(:read).size == 1, "inactive and uninvited user can only read anonymous user group"
-    assert @uninvited_user.groups_i_can(:read).first.ends_with? 'anonymouspublic' , "inactive and uninvited user can only read anonymous user group"
-    assert @uninvited_user.groups_i_can(:write).size == 0, "inactive and uninvited user should not be able write to any groups"
-    assert @uninvited_user.groups_i_can(:manage).size == 0, "inactive and uninvited user should not be able manage any groups"
+    assert_equal(@uninvited_user.groups_i_can(:read).sort,
+                 [@uninvited_user.uuid, groups(:anonymous_group).uuid].sort)
+    assert_equal(@uninvited_user.groups_i_can(:write),
+                 [@uninvited_user.uuid])
+    assert_equal(@uninvited_user.groups_i_can(:manage),
+                 [@uninvited_user.uuid])
    end
  
    test "find user method checks" do
diff --git a/services/api/test/websocket_runner.rb b/services/api/test/websocket_runner.rb

deleted file mode 100644 (file)

index be32a0f..0000000
--- a/services/api/test/websocket_runner.rb
+++ /dev/null
@@ -1,53 +0,0 @@
-require 'bundler'
-require 'socket'
-
-$ARV_API_SERVER_DIR = File.expand_path('../..', __FILE__)
-
-s = TCPServer.new('0.0.0.0', 0)
-WEBSOCKET_PORT = s.addr[1]
-s.close
-SERVER_PID_PATH = "tmp/pids/passenger.#{WEBSOCKET_PORT}.pid"
-
-class WebsocketTestRunner < MiniTest::Unit
-  def _system(*cmd)
-    Bundler.with_clean_env do
-      if not system({'ARVADOS_WEBSOCKETS' => 'ws-only', 'RAILS_ENV' => 'test'}, *cmd)
-        raise RuntimeError, "Command failed with exit status #{$?}: #{cmd.inspect}"
-      end
-    end
-  end
-
-  def _run(args=[])
-    server_pid = Dir.chdir($ARV_API_SERVER_DIR) do |apidir|
-      # Only passenger seems to be able to run the websockets server successfully.
-      _system('passenger', 'start', '-d', "-p#{WEBSOCKET_PORT}")
-      timeout = Time.now.tv_sec + 10
-      begin
-        sleep 0.2
-        begin
-          server_pid = IO.read(SERVER_PID_PATH).to_i
-          good_pid = (server_pid > 0) and (Process.kill(0, pid) rescue false)
-        rescue Errno::ENOENT
-          good_pid = false
-        end
-      end while (not good_pid) and (Time.now.tv_sec < timeout)
-      if not good_pid
-        raise RuntimeError, "could not find API server Rails pid"
-      end
-      server_pid
-    end
-    begin
-      super(args)
-    ensure
-      Dir.chdir($ARV_API_SERVER_DIR) do
-        _system('passenger', 'stop', "-p#{WEBSOCKET_PORT}")
-      end
-      # DatabaseCleaner leaves the database empty. Prefer to leave it full.
-      dc = DatabaseController.new
-      dc.define_singleton_method :render do |*args| end
-      dc.reset
-    end
-  end
-end
-
-MiniTest::Unit.runner = WebsocketTestRunner.new
diff --git a/services/arv-web/arv-web.py b/services/arv-web/arv-web.py

index 5a95e27b93b26789d10d93f4dadeac849fcfaa9b..f440aa608773de22eb221049993d91b63115edf4 100755 (executable)
--- a/services/arv-web/arv-web.py
+++ b/services/arv-web/arv-web.py
@@ -72,7 +72,7 @@ class ArvWeb(object):
                          et = 'add'
                      else:
                          et = 'remove'
-                if ev['properties']['new_attributes']['expires_at'] is not None:
+                if ev['properties']['new_attributes']['trash_at'] is not None:
                      et = 'remove'
  
              self.evqueue.put((self.project, et, ev['object_uuid']))
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go

index 3c4f281912842a0ceedb6df409aa61e80fa38fa2..e768b509cd6f2c69bb529d9e9a90e2d923e422ce 100644 (file)
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.go
@@ -195,6 +195,7 @@ func submit(dispatcher *dispatch.Dispatcher,
                 b, _ := ioutil.ReadAll(stdoutReader)
                 stdoutReader.Close()
                 stdoutChan <- b
+               close(stdoutChan)
         }()
  
         stderrChan := make(chan []byte)
@@ -202,6 +203,7 @@ func submit(dispatcher *dispatch.Dispatcher,
                 b, _ := ioutil.ReadAll(stderrReader)
                 stderrReader.Close()
                 stderrChan <- b
+               close(stderrChan)
         }()
  
         // Send a tiny script on stdin to execute the crunch-run command
@@ -209,13 +211,10 @@ func submit(dispatcher *dispatch.Dispatcher,
         io.WriteString(stdinWriter, execScript(append(crunchRunCommand, container.UUID)))
         stdinWriter.Close()
  
-       err = cmd.Wait()
-
         stdoutMsg := <-stdoutChan
         stderrmsg := <-stderrChan
  
-       close(stdoutChan)
-       close(stderrChan)
+       err = cmd.Wait()
  
         if err != nil {
                 submitErr = fmt.Errorf("Container submission failed: %v: %v (stderr: %q)", cmd.Args, err, stderrmsg)
@@ -302,12 +301,13 @@ func run(dispatcher *dispatch.Dispatcher,
  
                                 // Mutex between squeue sync and running sbatch or scancel.
                                 squeueUpdater.SlurmLock.Lock()
-                               err := scancelCmd(container).Run()
+                               cmd := scancelCmd(container)
+                               msg, err := cmd.CombinedOutput()
                                 squeueUpdater.SlurmLock.Unlock()
  
                                 if err != nil {
-                                       log.Printf("Error stopping container %s with scancel: %v",
-                                               container.UUID, err)
+                                       log.Printf("Error stopping container %s with %v %v: %v %v",
+                                               container.UUID, cmd.Path, cmd.Args, err, string(msg))
                                         if squeueUpdater.CheckSqueue(container.UUID) {
                                                 log.Printf("Container %s is still in squeue after scancel.",
                                                         container.UUID)
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go

index fbea48e548a59f78718cb0afa419b5a84a1cd89b..40461031e214486f1dbed9feeda6aae0d97fb76c 100644 (file)
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm_test.go
@@ -81,7 +81,8 @@ func (s *TestSuite) TestIntegrationCancel(c *C) {
                 return exec.Command("echo")
         }
  
-       container := s.integrationTest(c, func() *exec.Cmd { return exec.Command("echo", "zzzzz-dz642-queuedcontainer") },
+       container := s.integrationTest(c,
+               func() *exec.Cmd { return exec.Command("echo", "zzzzz-dz642-queuedcontainer") },
                 []string(nil),
                 func(dispatcher *dispatch.Dispatcher, container arvados.Container) {
                         dispatcher.UpdateState(container.UUID, dispatch.Running)
@@ -134,7 +135,7 @@ func (s *TestSuite) integrationTest(c *C,
         }(squeueCmd)
         squeueCmd = newSqueueCmd
  
-       // There should be no queued containers now
+       // There should be one queued container
         params := arvadosclient.Dict{
                 "filters": [][]string{{"state", "=", "Queued"}},
         }
diff --git a/services/crunch-dispatch-slurm/squeue.go b/services/crunch-dispatch-slurm/squeue.go

index 61decde61c4bd61d0a92e96bde20ff0c82780f57..45d06c8c1e27f12f2bc6e83ca262ab2ff7f08a53 100644 (file)
--- a/services/crunch-dispatch-slurm/squeue.go
+++ b/services/crunch-dispatch-slurm/squeue.go
@@ -2,6 +2,8 @@ package main
  
  import (
         "bufio"
+       "io"
+       "io/ioutil"
         "log"
         "os/exec"
         "sync"
@@ -45,31 +47,49 @@ func (squeue *Squeue) RunSqueue() {
                 log.Printf("Error creating stdout pipe for squeue: %v", err)
                 return
         }
+
+       stderrReader, err := cmd.StderrPipe()
+       if err != nil {
+               log.Printf("Error creating stderr pipe for squeue: %v", err)
+               return
+       }
+
         err = cmd.Start()
         if err != nil {
                 log.Printf("Error running squeue: %v", err)
                 return
         }
+
+       stderrChan := make(chan []byte)
+       go func() {
+               b, _ := ioutil.ReadAll(stderrReader)
+               stderrChan <- b
+               close(stderrChan)
+       }()
+
         scanner := bufio.NewScanner(sq)
         for scanner.Scan() {
                 newSqueueContents = append(newSqueueContents, scanner.Text())
         }
-       if err := scanner.Err(); err != nil {
-               cmd.Wait()
-               log.Printf("Error reading from squeue pipe: %v", err)
-               return
-       }
+       io.Copy(ioutil.Discard, sq)
+
+       stderrmsg := <-stderrChan
  
         err = cmd.Wait()
+
+       if scanner.Err() != nil {
+               log.Printf("Error reading from squeue pipe: %v", err)
+       }
         if err != nil {
-               log.Printf("Error running squeue: %v", err)
-               return
+               log.Printf("Error running %v %v: %v %q", cmd.Path, cmd.Args, err, string(stderrmsg))
         }
  
-       squeue.squeueCond.L.Lock()
-       squeue.squeueContents = newSqueueContents
-       squeue.squeueCond.Broadcast()
-       squeue.squeueCond.L.Unlock()
+       if scanner.Err() == nil && err == nil {
+               squeue.squeueCond.L.Lock()
+               squeue.squeueContents = newSqueueContents
+               squeue.squeueCond.Broadcast()
+               squeue.squeueCond.L.Unlock()
+       }
  }
  
  // CheckSqueue checks if a given container UUID is in the slurm queue.  This
diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go

index ade40c6b03a4d4a98812172aab31da5173453c4e..971cb3a27a246c9fbe1a325496a6da63e6e69ac4 100644 (file)
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -257,6 +257,7 @@ func (runner *ContainerRunner) SetupMounts() (err error) {
  
         collectionPaths := []string{}
         runner.Binds = nil
+       needCertMount := true
  
         for bind, mnt := range runner.Container.Mounts {
                 if bind == "stdout" {
@@ -274,6 +275,9 @@ func (runner *ContainerRunner) SetupMounts() (err error) {
                                 return fmt.Errorf("Stdout path does not start with OutputPath: %s, %s", mnt.Path, prefix)
                         }
                 }
+               if bind == "/etc/arvados/ca-certificates.crt" {
+                       needCertMount = false
+               }
  
                 switch {
                 case mnt.Kind == "collection":
@@ -355,6 +359,16 @@ func (runner *ContainerRunner) SetupMounts() (err error) {
                 return fmt.Errorf("Output path does not correspond to a writable mount point")
         }
  
+       if wantAPI := runner.Container.RuntimeConstraints.API; needCertMount && wantAPI != nil && *wantAPI {
+               for _, certfile := range arvadosclient.CertFiles {
+                       _, err := os.Stat(certfile)
+                       if err == nil {
+                               runner.Binds = append(runner.Binds, fmt.Sprintf("%s:/etc/arvados/ca-certificates.crt:ro", certfile))
+                               break
+                       }
+               }
+       }
+
         if pdhOnly {
                 arvMountCmd = append(arvMountCmd, "--mount-by-pdh", "by_id")
         } else {
@@ -623,7 +637,7 @@ func (runner *ContainerRunner) CaptureOutput() error {
         err = runner.ArvClient.Create("collections",
                 arvadosclient.Dict{
                         "collection": arvadosclient.Dict{
-                               "expires_at":    time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
+                               "trash_at":      time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
                                 "name":          "output for " + runner.Container.UUID,
                                 "manifest_text": manifestText}},
                 &response)
@@ -694,7 +708,7 @@ func (runner *ContainerRunner) CommitLogs() error {
         err = runner.ArvClient.Create("collections",
                 arvadosclient.Dict{
                         "collection": arvadosclient.Dict{
-                               "expires_at":    time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
+                               "trash_at":      time.Now().Add(runner.trashLifetime).Format(time.RFC3339),
                                 "name":          "logs for " + runner.Container.UUID,
                                 "manifest_text": mt}},
                 &response)
@@ -737,10 +751,10 @@ func (runner *ContainerRunner) ContainerToken() (string, error) {
  func (runner *ContainerRunner) UpdateContainerFinal() error {
         update := arvadosclient.Dict{}
         update["state"] = runner.finalState
+       if runner.LogsPDH != nil {
+               update["log"] = *runner.LogsPDH
+       }
         if runner.finalState == "Complete" {
-               if runner.LogsPDH != nil {
-                       update["log"] = *runner.LogsPDH
-               }
                 if runner.ExitCode != nil {
                         update["exit_code"] = *runner.ExitCode
                 }
@@ -800,6 +814,7 @@ func (runner *ContainerRunner) Run() (err error) {
                 checkErr(err)
  
                 if runner.finalState == "Queued" {
+                       runner.CrunchLog.Close()
                         runner.UpdateContainerFinal()
                         return
                 }
@@ -832,6 +847,7 @@ func (runner *ContainerRunner) Run() (err error) {
         // check for and/or load image
         err = runner.LoadImage()
         if err != nil {
+               runner.finalState = "Cancelled"
                 err = fmt.Errorf("While loading container image: %v", err)
                 return
         }
@@ -839,6 +855,7 @@ func (runner *ContainerRunner) Run() (err error) {
         // set up FUSE mount and binds
         err = runner.SetupMounts()
         if err != nil {
+               runner.finalState = "Cancelled"
                 err = fmt.Errorf("While setting up mounts: %v", err)
                 return
         }
@@ -895,10 +912,15 @@ func main() {
         cgroupRoot := flag.String("cgroup-root", "/sys/fs/cgroup", "path to sysfs cgroup tree")
         cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)")
         cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container")
+       caCertsPath := flag.String("ca-certs", "", "Path to TLS root certificates")
         flag.Parse()
  
         containerId := flag.Arg(0)
  
+       if *caCertsPath != "" {
+               arvadosclient.CertFiles = []string{*caCertsPath}
+       }
+
         api, err := arvadosclient.MakeArvadosClient()
         if err != nil {
                 log.Fatalf("%s: %v", containerId, err)
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go

index 2c7145998ab402786e1f9bf47bfa5575afb1c086..b9856aca2964755a7fab0f9baf527084adff8532 100644 (file)
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -759,6 +759,14 @@ func (am *ArvMountCmdLine) ArvMountTest(c []string, token string) (*exec.Cmd, er
         return nil, nil
  }
  
+func stubCert(temp string) string {
+       path := temp + "/ca-certificates.crt"
+       crt, _ := os.Create(path)
+       crt.Close()
+       arvadosclient.CertFiles = []string{path}
+       return path
+}
+
  func (s *TestSuite) TestSetupMounts(c *C) {
         api := &ArvTestClient{}
         kc := &KeepTestClient{}
@@ -766,9 +774,14 @@ func (s *TestSuite) TestSetupMounts(c *C) {
         am := &ArvMountCmdLine{}
         cr.RunArvMount = am.ArvMountTest
  
-       realTemp, err := ioutil.TempDir("", "crunchrun_test-")
+       realTemp, err := ioutil.TempDir("", "crunchrun_test1-")
+       c.Assert(err, IsNil)
+       certTemp, err := ioutil.TempDir("", "crunchrun_test2-")
         c.Assert(err, IsNil)
+       stubCertPath := stubCert(certTemp)
+
         defer os.RemoveAll(realTemp)
+       defer os.RemoveAll(certTemp)
  
         i := 0
         cr.MkTempDir = func(_ string, prefix string) (string, error) {
@@ -804,6 +817,25 @@ func (s *TestSuite) TestSetupMounts(c *C) {
                 checkEmpty()
         }
  
+       {
+               i = 0
+               cr.Container.Mounts = make(map[string]arvados.Mount)
+               cr.Container.Mounts["/tmp"] = arvados.Mount{Kind: "tmp"}
+               cr.OutputPath = "/tmp"
+
+               apiflag := true
+               cr.Container.RuntimeConstraints.API = &apiflag
+
+               err := cr.SetupMounts()
+               c.Check(err, IsNil)
+               c.Check(am.Cmd, DeepEquals, []string{"--foreground", "--allow-other", "--read-write", "--mount-by-pdh", "by_id", realTemp + "/keep1"})
+               c.Check(cr.Binds, DeepEquals, []string{realTemp + "/2:/tmp", stubCertPath + ":/etc/arvados/ca-certificates.crt:ro"})
+               cr.CleanupDirs()
+               checkEmpty()
+
+               apiflag = false
+       }
+
         {
                 i = 0
                 cr.Container.Mounts = map[string]arvados.Mount{
diff --git a/services/crunchstat/crunchstat.go b/services/crunchstat/crunchstat.go

index cae95fdd9d6cfd30110764e4ea7c87188c0ed6aa..e71989afb82cc6df750d6a3627247f418c0e5db5 100644 (file)
--- a/services/crunchstat/crunchstat.go
+++ b/services/crunchstat/crunchstat.go
@@ -16,6 +16,11 @@ import (
  
  const MaxLogLine = 1 << 14 // Child stderr lines >16KiB will be split
  
+var (
+       signalOnDeadPPID  int = 15
+       ppidCheckInterval     = time.Second
+)
+
  func main() {
         reporter := crunchstat.Reporter{
                 Logger: log.New(os.Stderr, "crunchstat: ", 0),
@@ -24,12 +29,16 @@ func main() {
         flag.StringVar(&reporter.CgroupRoot, "cgroup-root", "", "Root of cgroup tree")
         flag.StringVar(&reporter.CgroupParent, "cgroup-parent", "", "Name of container parent under cgroup")
         flag.StringVar(&reporter.CIDFile, "cgroup-cid", "", "Path to container id file")
+       flag.IntVar(&signalOnDeadPPID, "signal-on-dead-ppid", signalOnDeadPPID, "Signal to send child if crunchstat's parent process disappears (0 to disable)")
+       flag.DurationVar(&ppidCheckInterval, "ppid-check-interval", ppidCheckInterval, "Time between checks for parent process disappearance")
         pollMsec := flag.Int64("poll", 1000, "Reporting interval, in milliseconds")
  
         flag.Parse()
  
         if reporter.CgroupRoot == "" {
                 reporter.Logger.Fatal("error: must provide -cgroup-root")
+       } else if signalOnDeadPPID < 0 {
+               reporter.Logger.Fatalf("-signal-on-dead-ppid=%d is invalid (use a positive signal number, or 0 to disable)", signalOnDeadPPID)
         }
         reporter.PollPeriod = time.Duration(*pollMsec) * time.Millisecond
  
@@ -77,6 +86,11 @@ func runCommand(argv []string, logger *log.Logger) error {
         signal.Notify(sigChan, syscall.SIGTERM)
         signal.Notify(sigChan, syscall.SIGINT)
  
+       // Kill our child proc if our parent process disappears
+       if signalOnDeadPPID != 0 {
+               go sendSignalOnDeadPPID(ppidCheckInterval, signalOnDeadPPID, os.Getppid(), cmd, logger)
+       }
+
         // Funnel stderr through our channel
         stderr_pipe, err := cmd.StderrPipe()
         if err != nil {
@@ -97,6 +111,28 @@ func runCommand(argv []string, logger *log.Logger) error {
         return cmd.Wait()
  }
  
+func sendSignalOnDeadPPID(intvl time.Duration, signum, ppidOrig int, cmd *exec.Cmd, logger *log.Logger) {
+       ticker := time.NewTicker(intvl)
+       for _ = range ticker.C {
+               ppid := os.Getppid()
+               if ppid == ppidOrig {
+                       continue
+               }
+               if cmd.Process == nil {
+                       // Child process isn't running yet
+                       continue
+               }
+               logger.Printf("notice: crunchstat ppid changed from %d to %d -- killing child pid %d with signal %d", ppidOrig, ppid, cmd.Process.Pid, signum)
+               err := cmd.Process.Signal(syscall.Signal(signum))
+               if err != nil {
+                       logger.Printf("error: sending signal: %s", err)
+                       continue
+               }
+               ticker.Stop()
+               break
+       }
+}
+
  func copyPipeToChildLog(in io.ReadCloser, logger *log.Logger) {
         reader := bufio.NewReaderSize(in, MaxLogLine)
         var prefix string
diff --git a/services/crunchstat/crunchstat_test.go b/services/crunchstat/crunchstat_test.go

index fe3b56d25876fd832d3596abe3db8e40852ebbf7..759b3aa073c11df927923b703141587b8dbbabd9 100644 (file)
--- a/services/crunchstat/crunchstat_test.go
+++ b/services/crunchstat/crunchstat_test.go
@@ -3,9 +3,15 @@ package main
  import (
         "bufio"
         "bytes"
+       "fmt"
         "io"
+       "io/ioutil"
         "log"
         "math/rand"
+       "os"
+       "os/exec"
+       "sync"
+       "syscall"
         "testing"
         "time"
  )
@@ -82,3 +88,147 @@ func bufLogger() (*log.Logger, *bufio.Reader) {
         logger := log.New(w, "", 0)
         return logger, bufio.NewReader(r)
  }
+
+func TestSignalOnDeadPPID(t *testing.T) {
+       if !testDeadParent(t, 0) {
+               t.Fatal("child should still be alive after parent dies")
+       }
+       if testDeadParent(t, 15) {
+               t.Fatal("child should have been killed when parent died")
+       }
+}
+
+// testDeadParent returns true if crunchstat's child proc is still
+// alive after its parent dies.
+func testDeadParent(t *testing.T, signum int) bool {
+       var err error
+       var bin, childlockfile, parentlockfile *os.File
+       for _, f := range []**os.File{&bin, &childlockfile, &parentlockfile} {
+               *f, err = ioutil.TempFile("", "crunchstat_")
+               if err != nil {
+                       t.Fatal(err)
+               }
+               defer (*f).Close()
+               defer os.Remove((*f).Name())
+       }
+
+       bin.Close()
+       err = exec.Command("go", "build", "-o", bin.Name()).Run()
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       err = syscall.Flock(int(parentlockfile.Fd()), syscall.LOCK_EX)
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       cmd := exec.Command("bash", "-c", `
+set -e
+"$BINFILE" -cgroup-root=/none -ppid-check-interval=10ms -signal-on-dead-ppid="$SIGNUM" bash -c '
+    set -e
+    unlock() {
+        flock --unlock "$CHILDLOCKFD"
+        kill %1
+    }
+    trap unlock TERM
+    flock --exclusive "$CHILDLOCKFD"
+    echo -n "$$" > "$CHILDLOCKFILE"
+    flock --unlock "$PARENTLOCKFD"
+    sleep 20 </dev/null >/dev/null 2>/dev/null &
+    wait %1
+    unlock
+' &
+
+# wait for inner bash to start, to ensure $BINFILE has seen this bash proc as its initial PPID
+flock --exclusive "$PARENTLOCKFILE" true
+`)
+       cmd.Env = append(os.Environ(),
+               "SIGNUM="+fmt.Sprintf("%d", signum),
+               "PARENTLOCKFD=3",
+               "PARENTLOCKFILE="+parentlockfile.Name(),
+               "CHILDLOCKFD=4",
+               "CHILDLOCKFILE="+childlockfile.Name(),
+               "BINFILE="+bin.Name())
+       cmd.ExtraFiles = []*os.File{parentlockfile, childlockfile}
+       stderr, err := cmd.StderrPipe()
+       if err != nil {
+               t.Fatal(err)
+       }
+       stdout, err := cmd.StdoutPipe()
+       if err != nil {
+               t.Fatal(err)
+       }
+       cmd.Start()
+       defer cmd.Wait()
+
+       var wg sync.WaitGroup
+       wg.Add(2)
+       defer wg.Wait()
+       for _, rdr := range []io.ReadCloser{stderr, stdout} {
+               go func(rdr io.ReadCloser) {
+                       defer wg.Done()
+                       buf := make([]byte, 1024)
+                       for {
+                               n, err := rdr.Read(buf)
+                               if n > 0 {
+                                       t.Logf("%s", buf[:n])
+                               }
+                               if err != nil {
+                                       return
+                               }
+                       }
+               }(rdr)
+       }
+
+       // Wait until inner bash process releases parentlockfile
+       // (which means it has locked childlockfile and written its
+       // PID)
+       err = exec.Command("flock", "--exclusive", parentlockfile.Name(), "true").Run()
+       if err != nil {
+               t.Fatal(err)
+       }
+
+       childDone := make(chan bool)
+       go func() {
+               // Notify the main thread when the inner bash process
+               // releases its lock on childlockfile (which means
+               // either its sleep process ended or it received a
+               // TERM signal).
+               t0 := time.Now()
+               err = exec.Command("flock", "--exclusive", childlockfile.Name(), "true").Run()
+               if err != nil {
+                       t.Fatal(err)
+               }
+               t.Logf("child done after %s", time.Since(t0))
+               close(childDone)
+       }()
+
+       select {
+       case <-time.After(500 * time.Millisecond):
+               // Inner bash process is still alive after the timeout
+               // period. Kill it now, so our stdout and stderr pipes
+               // can finish and we don't leave a mess of child procs
+               // behind.
+               buf, err := ioutil.ReadFile(childlockfile.Name())
+               if err != nil {
+                       t.Fatal(err)
+               }
+               var childPID int
+               _, err = fmt.Sscanf(string(buf), "%d", &childPID)
+               if err != nil {
+                       t.Fatal(err)
+               }
+               child, err := os.FindProcess(childPID)
+               if err != nil {
+                       t.Fatal(err)
+               }
+               child.Signal(syscall.Signal(15))
+               return true
+
+       case <-childDone:
+               // Inner bash process ended soon after its grandparent
+               // ended.
+               return false
+       }
+}
diff --git a/services/datamanager/collection/collection.go b/services/datamanager/collection/collection.go

deleted file mode 100644 (file)

index 05e7a5f..0000000
--- a/services/datamanager/collection/collection.go
+++ /dev/null
@@ -1,408 +0,0 @@
-// Deals with parsing Collection responses from API Server.
-
-package collection
-
-import (
-       "flag"
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "git.curoverse.com/arvados.git/sdk/go/logger"
-       "git.curoverse.com/arvados.git/sdk/go/manifest"
-       "git.curoverse.com/arvados.git/sdk/go/util"
-       "log"
-       "os"
-       "runtime/pprof"
-       "time"
-)
-
-var (
-       HeapProfileFilename string
-)
-
-// Collection representation
-type Collection struct {
-       UUID              string
-       OwnerUUID         string
-       ReplicationLevel  int
-       BlockDigestToSize map[blockdigest.BlockDigest]int
-       TotalSize         int
-}
-
-// ReadCollections holds information about collections from API server
-type ReadCollections struct {
-       ReadAllCollections        bool
-       UUIDToCollection          map[string]Collection
-       OwnerToCollectionSize     map[string]int
-       BlockToDesiredReplication map[blockdigest.DigestWithSize]int
-       CollectionUUIDToIndex     map[string]int
-       CollectionIndexToUUID     []string
-       BlockToCollectionIndices  map[blockdigest.DigestWithSize][]int
-}
-
-// GetCollectionsParams params
-type GetCollectionsParams struct {
-       Client    *arvadosclient.ArvadosClient
-       Logger    *logger.Logger
-       BatchSize int
-}
-
-// SdkCollectionInfo holds collection info from api
-type SdkCollectionInfo struct {
-       UUID               string    `json:"uuid"`
-       OwnerUUID          string    `json:"owner_uuid"`
-       ReplicationDesired int       `json:"replication_desired"`
-       ModifiedAt         time.Time `json:"modified_at"`
-       ManifestText       string    `json:"manifest_text"`
-}
-
-// SdkCollectionList lists collections from api
-type SdkCollectionList struct {
-       ItemsAvailable int                 `json:"items_available"`
-       Items          []SdkCollectionInfo `json:"items"`
-}
-
-func init() {
-       flag.StringVar(&HeapProfileFilename,
-               "heap-profile",
-               "",
-               "File to write the heap profiles to. Leave blank to skip profiling.")
-}
-
-// WriteHeapProfile writes the heap profile to a file for later review.
-// Since a file is expected to only contain a single heap profile this
-// function overwrites the previously written profile, so it is safe
-// to call multiple times in a single run.
-// Otherwise we would see cumulative numbers as explained here:
-// https://groups.google.com/d/msg/golang-nuts/ZyHciRglQYc/2nh4Ndu2fZcJ
-func WriteHeapProfile() error {
-       if HeapProfileFilename != "" {
-               heapProfile, err := os.Create(HeapProfileFilename)
-               if err != nil {
-                       return err
-               }
-
-               defer heapProfile.Close()
-
-               err = pprof.WriteHeapProfile(heapProfile)
-               return err
-       }
-
-       return nil
-}
-
-// GetCollectionsAndSummarize gets collections from api and summarizes
-func GetCollectionsAndSummarize(params GetCollectionsParams) (results ReadCollections, err error) {
-       results, err = GetCollections(params)
-       if err != nil {
-               return
-       }
-
-       results.Summarize(params.Logger)
-
-       log.Printf("Uuid to Size used: %v", results.OwnerToCollectionSize)
-       log.Printf("Read and processed %d collections",
-               len(results.UUIDToCollection))
-
-       // TODO(misha): Add a "readonly" flag. If we're in readonly mode,
-       // lots of behaviors can become warnings (and obviously we can't
-       // write anything).
-       // if !readCollections.ReadAllCollections {
-       //      log.Fatalf("Did not read all collections")
-       // }
-
-       return
-}
-
-// GetCollections gets collections from api
-func GetCollections(params GetCollectionsParams) (results ReadCollections, err error) {
-       if &params.Client == nil {
-               err = fmt.Errorf("params.Client passed to GetCollections() should " +
-                       "contain a valid ArvadosClient, but instead it is nil.")
-               return
-       }
-
-       fieldsWanted := []string{"manifest_text",
-               "owner_uuid",
-               "uuid",
-               "replication_desired",
-               "modified_at"}
-
-       sdkParams := arvadosclient.Dict{
-               "select":  fieldsWanted,
-               "order":   []string{"modified_at ASC", "uuid ASC"},
-               "filters": [][]string{{"modified_at", ">=", "1900-01-01T00:00:00Z"}},
-               "offset":  0}
-
-       if params.BatchSize > 0 {
-               sdkParams["limit"] = params.BatchSize
-       }
-
-       var defaultReplicationLevel int
-       {
-               var value interface{}
-               value, err = params.Client.Discovery("defaultCollectionReplication")
-               if err != nil {
-                       return
-               }
-
-               defaultReplicationLevel = int(value.(float64))
-               if defaultReplicationLevel <= 0 {
-                       err = fmt.Errorf("Default collection replication returned by arvados SDK "+
-                               "should be a positive integer but instead it was %d.",
-                               defaultReplicationLevel)
-                       return
-               }
-       }
-
-       initialNumberOfCollectionsAvailable, err :=
-               util.NumberItemsAvailable(params.Client, "collections")
-       if err != nil {
-               return
-       }
-       // Include a 1% margin for collections added while we're reading so
-       // that we don't have to grow the map in most cases.
-       maxExpectedCollections := int(
-               float64(initialNumberOfCollectionsAvailable) * 1.01)
-       results.UUIDToCollection = make(map[string]Collection, maxExpectedCollections)
-
-       if params.Logger != nil {
-               params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       collectionInfo := logger.GetOrCreateMap(p, "collection_info")
-                       collectionInfo["num_collections_at_start"] = initialNumberOfCollectionsAvailable
-                       collectionInfo["batch_size"] = params.BatchSize
-                       collectionInfo["default_replication_level"] = defaultReplicationLevel
-               })
-       }
-
-       // These values are just for getting the loop to run the first time,
-       // afterwards they'll be set to real values.
-       remainingCollections := 1
-       var totalCollections int
-       var previousTotalCollections int
-       for remainingCollections > 0 {
-               // We're still finding new collections
-
-               // Write the heap profile for examining memory usage
-               err = WriteHeapProfile()
-               if err != nil {
-                       return
-               }
-
-               // Get next batch of collections.
-               var collections SdkCollectionList
-               err = params.Client.List("collections", sdkParams, &collections)
-               if err != nil {
-                       return
-               }
-               batchCollections := len(collections.Items)
-
-               // We must always have at least one collection in the batch
-               if batchCollections < 1 {
-                       err = fmt.Errorf("API query returned no collections for %+v", sdkParams)
-                       return
-               }
-
-               // Update count of remaining collections
-               remainingCollections = collections.ItemsAvailable - sdkParams["offset"].(int) - batchCollections
-
-               // Process collection and update our date filter.
-               latestModificationDate, maxManifestSize, totalManifestSize, err := ProcessCollections(params.Logger,
-                       collections.Items,
-                       defaultReplicationLevel,
-                       results.UUIDToCollection)
-               if err != nil {
-                       return results, err
-               }
-               if sdkParams["filters"].([][]string)[0][2] != latestModificationDate.Format(time.RFC3339) {
-                       sdkParams["filters"].([][]string)[0][2] = latestModificationDate.Format(time.RFC3339)
-                       sdkParams["offset"] = 0
-               } else {
-                       sdkParams["offset"] = sdkParams["offset"].(int) + batchCollections
-               }
-
-               // update counts
-               previousTotalCollections = totalCollections
-               totalCollections = len(results.UUIDToCollection)
-
-               log.Printf("%d collections read, %d (%d new) in last batch, "+
-                       "%d remaining, "+
-                       "%s latest modified date, %.0f %d %d avg,max,total manifest size",
-                       totalCollections,
-                       batchCollections,
-                       totalCollections-previousTotalCollections,
-                       remainingCollections,
-                       sdkParams["filters"].([][]string)[0][2],
-                       float32(totalManifestSize)/float32(totalCollections),
-                       maxManifestSize, totalManifestSize)
-
-               if params.Logger != nil {
-                       params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                               collectionInfo := logger.GetOrCreateMap(p, "collection_info")
-                               collectionInfo["collections_read"] = totalCollections
-                               collectionInfo["latest_modified_date_seen"] = sdkParams["filters"].([][]string)[0][2]
-                               collectionInfo["total_manifest_size"] = totalManifestSize
-                               collectionInfo["max_manifest_size"] = maxManifestSize
-                       })
-               }
-       }
-
-       // Make one final API request to verify that we have processed all collections available up to the latest modification date
-       var collections SdkCollectionList
-       sdkParams["filters"].([][]string)[0][1] = "<="
-       sdkParams["limit"] = 0
-       err = params.Client.List("collections", sdkParams, &collections)
-       if err != nil {
-               return
-       }
-       finalNumberOfCollectionsAvailable, err :=
-               util.NumberItemsAvailable(params.Client, "collections")
-       if err != nil {
-               return
-       }
-       if totalCollections < finalNumberOfCollectionsAvailable {
-               err = fmt.Errorf("API server indicates a total of %d collections "+
-                       "available up to %v, but we only retrieved %d. "+
-                       "Refusing to continue as this could indicate an "+
-                       "otherwise undetected failure.",
-                       finalNumberOfCollectionsAvailable,
-                       sdkParams["filters"].([][]string)[0][2],
-                       totalCollections)
-               return
-       }
-
-       // Write the heap profile for examining memory usage
-       err = WriteHeapProfile()
-
-       return
-}
-
-// StrCopy returns a newly allocated string.
-// It is useful to copy slices so that the garbage collector can reuse
-// the memory of the longer strings they came from.
-func StrCopy(s string) string {
-       return string([]byte(s))
-}
-
-// ProcessCollections read from api server
-func ProcessCollections(arvLogger *logger.Logger,
-       receivedCollections []SdkCollectionInfo,
-       defaultReplicationLevel int,
-       UUIDToCollection map[string]Collection,
-) (
-       latestModificationDate time.Time,
-       maxManifestSize, totalManifestSize uint64,
-       err error,
-) {
-       for _, sdkCollection := range receivedCollections {
-               collection := Collection{UUID: StrCopy(sdkCollection.UUID),
-                       OwnerUUID:         StrCopy(sdkCollection.OwnerUUID),
-                       ReplicationLevel:  sdkCollection.ReplicationDesired,
-                       BlockDigestToSize: make(map[blockdigest.BlockDigest]int)}
-
-               if sdkCollection.ModifiedAt.IsZero() {
-                       err = fmt.Errorf(
-                               "Arvados SDK collection returned with unexpected zero "+
-                                       "modification date. This probably means that either we failed to "+
-                                       "parse the modification date or the API server has changed how "+
-                                       "it returns modification dates: %+v",
-                               collection)
-                       return
-               }
-
-               if sdkCollection.ModifiedAt.After(latestModificationDate) {
-                       latestModificationDate = sdkCollection.ModifiedAt
-               }
-
-               if collection.ReplicationLevel == 0 {
-                       collection.ReplicationLevel = defaultReplicationLevel
-               }
-
-               manifest := manifest.Manifest{Text: sdkCollection.ManifestText}
-               manifestSize := uint64(len(sdkCollection.ManifestText))
-
-               if _, alreadySeen := UUIDToCollection[collection.UUID]; !alreadySeen {
-                       totalManifestSize += manifestSize
-               }
-               if manifestSize > maxManifestSize {
-                       maxManifestSize = manifestSize
-               }
-
-               blockChannel := manifest.BlockIterWithDuplicates()
-               for block := range blockChannel {
-                       if storedSize, stored := collection.BlockDigestToSize[block.Digest]; stored && storedSize != block.Size {
-                               log.Printf(
-                                       "Collection %s contains multiple sizes (%d and %d) for block %s",
-                                       collection.UUID,
-                                       storedSize,
-                                       block.Size,
-                                       block.Digest)
-                       }
-                       collection.BlockDigestToSize[block.Digest] = block.Size
-               }
-               if manifest.Err != nil {
-                       err = manifest.Err
-                       return
-               }
-
-               collection.TotalSize = 0
-               for _, size := range collection.BlockDigestToSize {
-                       collection.TotalSize += size
-               }
-               UUIDToCollection[collection.UUID] = collection
-
-               // Clear out all the manifest strings that we don't need anymore.
-               // These hopefully form the bulk of our memory usage.
-               manifest.Text = ""
-               sdkCollection.ManifestText = ""
-       }
-
-       return
-}
-
-// Summarize the collections read
-func (readCollections *ReadCollections) Summarize(arvLogger *logger.Logger) {
-       readCollections.OwnerToCollectionSize = make(map[string]int)
-       readCollections.BlockToDesiredReplication = make(map[blockdigest.DigestWithSize]int)
-       numCollections := len(readCollections.UUIDToCollection)
-       readCollections.CollectionUUIDToIndex = make(map[string]int, numCollections)
-       readCollections.CollectionIndexToUUID = make([]string, 0, numCollections)
-       readCollections.BlockToCollectionIndices = make(map[blockdigest.DigestWithSize][]int)
-
-       for _, coll := range readCollections.UUIDToCollection {
-               collectionIndex := len(readCollections.CollectionIndexToUUID)
-               readCollections.CollectionIndexToUUID =
-                       append(readCollections.CollectionIndexToUUID, coll.UUID)
-               readCollections.CollectionUUIDToIndex[coll.UUID] = collectionIndex
-
-               readCollections.OwnerToCollectionSize[coll.OwnerUUID] =
-                       readCollections.OwnerToCollectionSize[coll.OwnerUUID] + coll.TotalSize
-
-               for block, size := range coll.BlockDigestToSize {
-                       locator := blockdigest.DigestWithSize{Digest: block, Size: uint32(size)}
-                       readCollections.BlockToCollectionIndices[locator] =
-                               append(readCollections.BlockToCollectionIndices[locator],
-                                       collectionIndex)
-                       storedReplication := readCollections.BlockToDesiredReplication[locator]
-                       if coll.ReplicationLevel > storedReplication {
-                               readCollections.BlockToDesiredReplication[locator] =
-                                       coll.ReplicationLevel
-                       }
-               }
-       }
-
-       if arvLogger != nil {
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       collectionInfo := logger.GetOrCreateMap(p, "collection_info")
-                       // Since maps are shallow copied, we run a risk of concurrent
-                       // updates here. By copying results.OwnerToCollectionSize into
-                       // the log, we're assuming that it won't be updated.
-                       collectionInfo["owner_to_collection_size"] =
-                               readCollections.OwnerToCollectionSize
-                       collectionInfo["distinct_blocks_named"] =
-                               len(readCollections.BlockToDesiredReplication)
-               })
-       }
-
-       return
-}
diff --git a/services/datamanager/collection/collection_test.go b/services/datamanager/collection/collection_test.go

deleted file mode 100644 (file)

index 1bf6a89..0000000
--- a/services/datamanager/collection/collection_test.go
+++ /dev/null
@@ -1,202 +0,0 @@
-package collection
-
-import (
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       . "gopkg.in/check.v1"
-       "net/http"
-       "net/http/httptest"
-       "testing"
-)
-
-// Gocheck boilerplate
-func Test(t *testing.T) {
-       TestingT(t)
-}
-
-type MySuite struct{}
-
-var _ = Suite(&MySuite{})
-
-// This captures the result we expect from
-// ReadCollections.Summarize().  Because CollectionUUIDToIndex is
-// indeterminate, we replace BlockToCollectionIndices with
-// BlockToCollectionUuids.
-type ExpectedSummary struct {
-       OwnerToCollectionSize     map[string]int
-       BlockToDesiredReplication map[blockdigest.DigestWithSize]int
-       BlockToCollectionUuids    map[blockdigest.DigestWithSize][]string
-}
-
-func CompareSummarizedReadCollections(c *C,
-       summarized ReadCollections,
-       expected ExpectedSummary) {
-
-       c.Assert(summarized.OwnerToCollectionSize, DeepEquals,
-               expected.OwnerToCollectionSize)
-
-       c.Assert(summarized.BlockToDesiredReplication, DeepEquals,
-               expected.BlockToDesiredReplication)
-
-       summarizedBlockToCollectionUuids :=
-               make(map[blockdigest.DigestWithSize]map[string]struct{})
-       for digest, indices := range summarized.BlockToCollectionIndices {
-               uuidSet := make(map[string]struct{})
-               summarizedBlockToCollectionUuids[digest] = uuidSet
-               for _, index := range indices {
-                       uuidSet[summarized.CollectionIndexToUUID[index]] = struct{}{}
-               }
-       }
-
-       expectedBlockToCollectionUuids :=
-               make(map[blockdigest.DigestWithSize]map[string]struct{})
-       for digest, uuidSlice := range expected.BlockToCollectionUuids {
-               uuidSet := make(map[string]struct{})
-               expectedBlockToCollectionUuids[digest] = uuidSet
-               for _, uuid := range uuidSlice {
-                       uuidSet[uuid] = struct{}{}
-               }
-       }
-
-       c.Assert(summarizedBlockToCollectionUuids, DeepEquals,
-               expectedBlockToCollectionUuids)
-}
-
-func (s *MySuite) TestSummarizeSimple(checker *C) {
-       rc := MakeTestReadCollections([]TestCollectionSpec{{
-               ReplicationLevel: 5,
-               Blocks:           []int{1, 2},
-       }})
-
-       rc.Summarize(nil)
-
-       c := rc.UUIDToCollection["col0"]
-
-       blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
-       blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
-
-       expected := ExpectedSummary{
-               OwnerToCollectionSize:     map[string]int{c.OwnerUUID: c.TotalSize},
-               BlockToDesiredReplication: map[blockdigest.DigestWithSize]int{blockDigest1: 5, blockDigest2: 5},
-               BlockToCollectionUuids:    map[blockdigest.DigestWithSize][]string{blockDigest1: {c.UUID}, blockDigest2: {c.UUID}},
-       }
-
-       CompareSummarizedReadCollections(checker, rc, expected)
-}
-
-func (s *MySuite) TestSummarizeOverlapping(checker *C) {
-       rc := MakeTestReadCollections([]TestCollectionSpec{
-               {
-                       ReplicationLevel: 5,
-                       Blocks:           []int{1, 2},
-               },
-               {
-                       ReplicationLevel: 8,
-                       Blocks:           []int{2, 3},
-               },
-       })
-
-       rc.Summarize(nil)
-
-       c0 := rc.UUIDToCollection["col0"]
-       c1 := rc.UUIDToCollection["col1"]
-
-       blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
-       blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
-       blockDigest3 := blockdigest.MakeTestDigestWithSize(3)
-
-       expected := ExpectedSummary{
-               OwnerToCollectionSize: map[string]int{
-                       c0.OwnerUUID: c0.TotalSize,
-                       c1.OwnerUUID: c1.TotalSize,
-               },
-               BlockToDesiredReplication: map[blockdigest.DigestWithSize]int{
-                       blockDigest1: 5,
-                       blockDigest2: 8,
-                       blockDigest3: 8,
-               },
-               BlockToCollectionUuids: map[blockdigest.DigestWithSize][]string{
-                       blockDigest1: {c0.UUID},
-                       blockDigest2: {c0.UUID, c1.UUID},
-                       blockDigest3: {c1.UUID},
-               },
-       }
-
-       CompareSummarizedReadCollections(checker, rc, expected)
-}
-
-type APITestData struct {
-       // path and response map
-       responses map[string]arvadostest.StubResponse
-
-       // expected error, if any
-       expectedError string
-}
-
-func (s *MySuite) TestGetCollectionsAndSummarize_DiscoveryError(c *C) {
-       testGetCollectionsAndSummarize(c,
-               APITestData{
-                       responses:     make(map[string]arvadostest.StubResponse),
-                       expectedError: "arvados API server error: 500.*",
-               })
-}
-
-func (s *MySuite) TestGetCollectionsAndSummarize_ApiErrorGetCollections(c *C) {
-       respMap := make(map[string]arvadostest.StubResponse)
-       respMap["/discovery/v1/apis/arvados/v1/rest"] = arvadostest.StubResponse{200, `{"defaultCollectionReplication":2}`}
-       respMap["/arvados/v1/collections"] = arvadostest.StubResponse{-1, ``}
-
-       testGetCollectionsAndSummarize(c,
-               APITestData{
-                       responses:     respMap,
-                       expectedError: "arvados API server error: 302.*",
-               })
-}
-
-func (s *MySuite) TestGetCollectionsAndSummarize_GetCollectionsBadStreamName(c *C) {
-       respMap := make(map[string]arvadostest.StubResponse)
-       respMap["/discovery/v1/apis/arvados/v1/rest"] = arvadostest.StubResponse{200, `{"defaultCollectionReplication":2}`}
-       respMap["/arvados/v1/collections"] = arvadostest.StubResponse{200, `{"items_available":1,"items":[{"modified_at":"2015-11-24T15:04:05Z","manifest_text":"badstreamname"}]}`}
-
-       testGetCollectionsAndSummarize(c,
-               APITestData{
-                       responses:     respMap,
-                       expectedError: "Invalid stream name: badstreamname",
-               })
-}
-
-func (s *MySuite) TestGetCollectionsAndSummarize_GetCollectionsBadFileToken(c *C) {
-       respMap := make(map[string]arvadostest.StubResponse)
-       respMap["/discovery/v1/apis/arvados/v1/rest"] = arvadostest.StubResponse{200, `{"defaultCollectionReplication":2}`}
-       respMap["/arvados/v1/collections"] = arvadostest.StubResponse{200, `{"items_available":1,"items":[{"modified_at":"2015-11-24T15:04:05Z","manifest_text":"./goodstream acbd18db4cc2f85cedef654fccc4a4d8+3 0:1:file1.txt file2.txt"}]}`}
-
-       testGetCollectionsAndSummarize(c,
-               APITestData{
-                       responses:     respMap,
-                       expectedError: "Invalid file token: file2.txt",
-               })
-}
-
-func testGetCollectionsAndSummarize(c *C, testData APITestData) {
-       apiStub := arvadostest.ServerStub{testData.responses}
-
-       api := httptest.NewServer(&apiStub)
-       defer api.Close()
-
-       arv := &arvadosclient.ArvadosClient{
-               Scheme:    "http",
-               ApiServer: api.URL[7:],
-               ApiToken:  "abc123",
-               Client:    &http.Client{Transport: &http.Transport{}},
-       }
-
-       // GetCollectionsAndSummarize
-       _, err := GetCollectionsAndSummarize(GetCollectionsParams{arv, nil, 10})
-
-       if testData.expectedError == "" {
-               c.Assert(err, IsNil)
-       } else {
-               c.Assert(err, ErrorMatches, testData.expectedError)
-       }
-}
diff --git a/services/datamanager/collection/testing.go b/services/datamanager/collection/testing.go

deleted file mode 100644 (file)

index 2238433..0000000
--- a/services/datamanager/collection/testing.go
+++ /dev/null
@@ -1,61 +0,0 @@
-// Code used for testing only.
-
-package collection
-
-import (
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-)
-
-// TestCollectionSpec with test blocks and desired replication level
-type TestCollectionSpec struct {
-       // The desired replication level
-       ReplicationLevel int
-       // Blocks this contains, represented by ints. Ints repeated will
-       // still only represent one block
-       Blocks []int
-}
-
-// MakeTestReadCollections creates a ReadCollections object for testing
-// based on the give specs. Only the ReadAllCollections and UUIDToCollection
-// fields are populated. To populate other fields call rc.Summarize().
-func MakeTestReadCollections(specs []TestCollectionSpec) (rc ReadCollections) {
-       rc = ReadCollections{
-               ReadAllCollections: true,
-               UUIDToCollection:   map[string]Collection{},
-       }
-
-       for i, spec := range specs {
-               c := Collection{
-                       UUID:              fmt.Sprintf("col%d", i),
-                       OwnerUUID:         fmt.Sprintf("owner%d", i),
-                       ReplicationLevel:  spec.ReplicationLevel,
-                       BlockDigestToSize: map[blockdigest.BlockDigest]int{},
-               }
-               rc.UUIDToCollection[c.UUID] = c
-               for _, j := range spec.Blocks {
-                       c.BlockDigestToSize[blockdigest.MakeTestBlockDigest(j)] = j
-               }
-               // We compute the size in a separate loop because the value
-               // computed in the above loop would be invalid if c.Blocks
-               // contained duplicates.
-               for _, size := range c.BlockDigestToSize {
-                       c.TotalSize += size
-               }
-       }
-       return
-}
-
-// CollectionIndicesForTesting returns a slice giving the collection
-// index of each collection that was passed in to MakeTestReadCollections.
-// rc.Summarize() must be called before this method, since Summarize()
-// assigns an index to each collection.
-func (rc ReadCollections) CollectionIndicesForTesting() (indices []int) {
-       // TODO(misha): Assert that rc.Summarize() has been called.
-       numCollections := len(rc.CollectionIndexToUUID)
-       indices = make([]int, numCollections)
-       for i := 0; i < numCollections; i++ {
-               indices[i] = rc.CollectionUUIDToIndex[fmt.Sprintf("col%d", i)]
-       }
-       return
-}
diff --git a/services/datamanager/datamanager.go b/services/datamanager/datamanager.go

deleted file mode 100644 (file)

index 5250d17..0000000
--- a/services/datamanager/datamanager.go
+++ /dev/null
@@ -1,220 +0,0 @@
-/* Keep Datamanager. Responsible for checking on and reporting on Keep Storage */
-
-package main
-
-import (
-       "errors"
-       "flag"
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       "git.curoverse.com/arvados.git/sdk/go/logger"
-       "git.curoverse.com/arvados.git/sdk/go/util"
-       "git.curoverse.com/arvados.git/services/datamanager/collection"
-       "git.curoverse.com/arvados.git/services/datamanager/keep"
-       "git.curoverse.com/arvados.git/services/datamanager/loggerutil"
-       "git.curoverse.com/arvados.git/services/datamanager/summary"
-       "log"
-       "time"
-)
-
-var (
-       logEventTypePrefix  string
-       logFrequencySeconds int
-       minutesBetweenRuns  int
-       collectionBatchSize int
-       dryRun              bool
-)
-
-func init() {
-       flag.StringVar(&logEventTypePrefix,
-               "log-event-type-prefix",
-               "experimental-data-manager",
-               "Prefix to use in the event_type of our arvados log entries. Set to empty to turn off logging")
-       flag.IntVar(&logFrequencySeconds,
-               "log-frequency-seconds",
-               20,
-               "How frequently we'll write log entries in seconds.")
-       flag.IntVar(&minutesBetweenRuns,
-               "minutes-between-runs",
-               0,
-               "How many minutes we wait between data manager runs. 0 means run once and exit.")
-       flag.IntVar(&collectionBatchSize,
-               "collection-batch-size",
-               1000,
-               "How many collections to request in each batch.")
-       flag.BoolVar(&dryRun,
-               "dry-run",
-               false,
-               "Perform a dry run. Log how many blocks would be deleted/moved, but do not issue any changes to keepstore.")
-}
-
-func main() {
-       flag.Parse()
-
-       if minutesBetweenRuns == 0 {
-               arv, err := arvadosclient.MakeArvadosClient()
-               if err != nil {
-                       loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Error making arvados client: %v", err))
-               }
-               err = singlerun(arv)
-               if err != nil {
-                       loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("singlerun: %v", err))
-               }
-       } else {
-               waitTime := time.Minute * time.Duration(minutesBetweenRuns)
-               for {
-                       log.Println("Beginning Run")
-                       arv, err := arvadosclient.MakeArvadosClient()
-                       if err != nil {
-                               loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Error making arvados client: %v", err))
-                       }
-                       err = singlerun(arv)
-                       if err != nil {
-                               log.Printf("singlerun: %v", err)
-                       }
-                       log.Printf("Sleeping for %d minutes", minutesBetweenRuns)
-                       time.Sleep(waitTime)
-               }
-       }
-}
-
-var arvLogger *logger.Logger
-
-func singlerun(arv *arvadosclient.ArvadosClient) error {
-       var err error
-       if isAdmin, err := util.UserIsAdmin(arv); err != nil {
-               return errors.New("Error verifying admin token: " + err.Error())
-       } else if !isAdmin {
-               return errors.New("Current user is not an admin. Datamanager requires a privileged token.")
-       }
-
-       if logEventTypePrefix != "" {
-               arvLogger, err = logger.NewLogger(logger.LoggerParams{
-                       Client:          arv,
-                       EventTypePrefix: logEventTypePrefix,
-                       WriteInterval:   time.Second * time.Duration(logFrequencySeconds)})
-       }
-
-       loggerutil.LogRunInfo(arvLogger)
-       if arvLogger != nil {
-               arvLogger.AddWriteHook(loggerutil.LogMemoryAlloc)
-       }
-
-       var (
-               dataFetcher     summary.DataFetcher
-               readCollections collection.ReadCollections
-               keepServerInfo  keep.ReadServers
-       )
-
-       if summary.ShouldReadData() {
-               dataFetcher = summary.ReadData
-       } else {
-               dataFetcher = BuildDataFetcher(arv)
-       }
-
-       err = dataFetcher(arvLogger, &readCollections, &keepServerInfo)
-       if err != nil {
-               return err
-       }
-
-       err = summary.MaybeWriteData(arvLogger, readCollections, keepServerInfo)
-       if err != nil {
-               return err
-       }
-
-       buckets := summary.BucketReplication(readCollections, keepServerInfo)
-       bucketCounts := buckets.Counts()
-
-       replicationSummary := buckets.SummarizeBuckets(readCollections)
-       replicationCounts := replicationSummary.ComputeCounts()
-
-       log.Printf("Blocks In Collections: %d, "+
-               "\nBlocks In Keep: %d.",
-               len(readCollections.BlockToDesiredReplication),
-               len(keepServerInfo.BlockToServers))
-       log.Println(replicationCounts.PrettyPrint())
-
-       log.Printf("Blocks Histogram:")
-       for _, rlbss := range bucketCounts {
-               log.Printf("%+v: %10d",
-                       rlbss.Levels,
-                       rlbss.Count)
-       }
-
-       kc, err := keepclient.MakeKeepClient(arv)
-       if err != nil {
-               return fmt.Errorf("Error setting up keep client %v", err.Error())
-       }
-
-       // Log that we're finished. We force the recording, since go will
-       // not wait for the write timer before exiting.
-       if arvLogger != nil {
-               defer arvLogger.FinalUpdate(func(p map[string]interface{}, e map[string]interface{}) {
-                       summaryInfo := logger.GetOrCreateMap(p, "summary_info")
-                       summaryInfo["block_replication_counts"] = bucketCounts
-                       summaryInfo["replication_summary"] = replicationCounts
-                       p["summary_info"] = summaryInfo
-
-                       p["run_info"].(map[string]interface{})["finished_at"] = time.Now()
-               })
-       }
-
-       pullServers := summary.ComputePullServers(kc,
-               &keepServerInfo,
-               readCollections.BlockToDesiredReplication,
-               replicationSummary.UnderReplicatedBlocks)
-
-       pullLists := summary.BuildPullLists(pullServers)
-
-       trashLists, trashErr := summary.BuildTrashLists(kc,
-               &keepServerInfo,
-               replicationSummary.KeepBlocksNotInCollections)
-
-       err = summary.WritePullLists(arvLogger, pullLists, dryRun)
-       if err != nil {
-               return err
-       }
-
-       if trashErr != nil {
-               return err
-       }
-       keep.SendTrashLists(arvLogger, kc, trashLists, dryRun)
-
-       return nil
-}
-
-// BuildDataFetcher returns a data fetcher that fetches data from remote servers.
-func BuildDataFetcher(arv *arvadosclient.ArvadosClient) summary.DataFetcher {
-       return func(
-               arvLogger *logger.Logger,
-               readCollections *collection.ReadCollections,
-               keepServerInfo *keep.ReadServers,
-       ) error {
-               collDone := make(chan struct{})
-               var collErr error
-               go func() {
-                       *readCollections, collErr = collection.GetCollectionsAndSummarize(
-                               collection.GetCollectionsParams{
-                                       Client:    arv,
-                                       Logger:    arvLogger,
-                                       BatchSize: collectionBatchSize})
-                       collDone <- struct{}{}
-               }()
-
-               var keepErr error
-               *keepServerInfo, keepErr = keep.GetKeepServersAndSummarize(
-                       keep.GetKeepServersParams{
-                               Client: arv,
-                               Logger: arvLogger,
-                               Limit:  1000})
-
-               <-collDone
-
-               // Return a nil error only if both parts succeeded.
-               if collErr != nil {
-                       return collErr
-               }
-               return keepErr
-       }
-}
diff --git a/services/datamanager/datamanager_test.go b/services/datamanager/datamanager_test.go

deleted file mode 100644 (file)

index 7a8fff5..0000000
--- a/services/datamanager/datamanager_test.go
+++ /dev/null
@@ -1,732 +0,0 @@
-package main
-
-import (
-       "encoding/json"
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       "git.curoverse.com/arvados.git/services/datamanager/collection"
-       "git.curoverse.com/arvados.git/services/datamanager/summary"
-       "io/ioutil"
-       "net/http"
-       "os"
-       "os/exec"
-       "path"
-       "regexp"
-       "strings"
-       "testing"
-       "time"
-)
-
-var arv *arvadosclient.ArvadosClient
-var keepClient *keepclient.KeepClient
-var keepServers []string
-
-func SetupDataManagerTest(t *testing.T) {
-       os.Setenv("ARVADOS_API_HOST_INSECURE", "true")
-
-       // start api and keep servers
-       arvadostest.ResetEnv()
-       arvadostest.StartAPI()
-       arvadostest.StartKeep(2, false)
-
-       var err error
-       arv, err = arvadosclient.MakeArvadosClient()
-       if err != nil {
-               t.Fatalf("Error making arvados client: %s", err)
-       }
-       arv.ApiToken = arvadostest.DataManagerToken
-
-       // keep client
-       keepClient = &keepclient.KeepClient{
-               Arvados:       arv,
-               Want_replicas: 2,
-               Client:        &http.Client{},
-       }
-
-       // discover keep services
-       if err = keepClient.DiscoverKeepServers(); err != nil {
-               t.Fatalf("Error discovering keep services: %s", err)
-       }
-       keepServers = []string{}
-       for _, host := range keepClient.LocalRoots() {
-               keepServers = append(keepServers, host)
-       }
-}
-
-func TearDownDataManagerTest(t *testing.T) {
-       arvadostest.StopKeep(2)
-       arvadostest.StopAPI()
-       summary.WriteDataTo = ""
-       collection.HeapProfileFilename = ""
-}
-
-func putBlock(t *testing.T, data string) string {
-       locator, _, err := keepClient.PutB([]byte(data))
-       if err != nil {
-               t.Fatalf("Error putting test data for %s %s %v", data, locator, err)
-       }
-       if locator == "" {
-               t.Fatalf("No locator found after putting test data")
-       }
-
-       splits := strings.Split(locator, "+")
-       return splits[0] + "+" + splits[1]
-}
-
-func getBlock(t *testing.T, locator string, data string) {
-       reader, blocklen, _, err := keepClient.Get(locator)
-       if err != nil {
-               t.Fatalf("Error getting test data in setup for %s %s %v", data, locator, err)
-       }
-       if reader == nil {
-               t.Fatalf("No reader found after putting test data")
-       }
-       if blocklen != int64(len(data)) {
-               t.Fatalf("blocklen %d did not match data len %d", blocklen, len(data))
-       }
-
-       all, err := ioutil.ReadAll(reader)
-       if string(all) != data {
-               t.Fatalf("Data read %s did not match expected data %s", string(all), data)
-       }
-}
-
-// Create a collection using arv-put
-func createCollection(t *testing.T, data string) string {
-       tempfile, err := ioutil.TempFile(os.TempDir(), "temp-test-file")
-       defer os.Remove(tempfile.Name())
-
-       _, err = tempfile.Write([]byte(data))
-       if err != nil {
-               t.Fatalf("Error writing to tempfile %v", err)
-       }
-
-       // arv-put
-       output, err := exec.Command("arv-put", "--use-filename", "test.txt", tempfile.Name()).Output()
-       if err != nil {
-               t.Fatalf("Error running arv-put %s", err)
-       }
-
-       uuid := string(output[0:27]) // trim terminating char
-       return uuid
-}
-
-// Get collection locator
-var locatorMatcher = regexp.MustCompile(`^([0-9a-f]{32})\+(\d*)(.*)$`)
-
-func getFirstLocatorFromCollection(t *testing.T, uuid string) string {
-       manifest := getCollection(t, uuid)["manifest_text"].(string)
-
-       locator := strings.Split(manifest, " ")[1]
-       match := locatorMatcher.FindStringSubmatch(locator)
-       if match == nil {
-               t.Fatalf("No locator found in collection manifest %s", manifest)
-       }
-
-       return match[1] + "+" + match[2]
-}
-
-func switchToken(t string) func() {
-       orig := arv.ApiToken
-       restore := func() {
-               arv.ApiToken = orig
-       }
-       arv.ApiToken = t
-       return restore
-}
-
-func getCollection(t *testing.T, uuid string) Dict {
-       defer switchToken(arvadostest.AdminToken)()
-
-       getback := make(Dict)
-       err := arv.Get("collections", uuid, nil, &getback)
-       if err != nil {
-               t.Fatalf("Error getting collection %s", err)
-       }
-       if getback["uuid"] != uuid {
-               t.Fatalf("Get collection uuid did not match original: $s, result: $s", uuid, getback["uuid"])
-       }
-
-       return getback
-}
-
-func updateCollection(t *testing.T, uuid string, paramName string, paramValue string) {
-       defer switchToken(arvadostest.AdminToken)()
-
-       err := arv.Update("collections", uuid, arvadosclient.Dict{
-               "collection": arvadosclient.Dict{
-                       paramName: paramValue,
-               },
-       }, &arvadosclient.Dict{})
-
-       if err != nil {
-               t.Fatalf("Error updating collection %s", err)
-       }
-}
-
-type Dict map[string]interface{}
-
-func deleteCollection(t *testing.T, uuid string) {
-       defer switchToken(arvadostest.AdminToken)()
-
-       getback := make(Dict)
-       err := arv.Delete("collections", uuid, nil, &getback)
-       if err != nil {
-               t.Fatalf("Error deleting collection %s", err)
-       }
-       if getback["uuid"] != uuid {
-               t.Fatalf("Delete collection uuid did not match original: $s, result: $s", uuid, getback["uuid"])
-       }
-}
-
-func dataManagerSingleRun(t *testing.T) {
-       err := singlerun(arv)
-       if err != nil {
-               t.Fatalf("Error during singlerun %s", err)
-       }
-}
-
-func getBlockIndexesForServer(t *testing.T, i int) []string {
-       var indexes []string
-
-       path := keepServers[i] + "/index"
-       client := http.Client{}
-       req, err := http.NewRequest("GET", path, nil)
-       req.Header.Add("Authorization", "OAuth2 "+arvadostest.DataManagerToken)
-       req.Header.Add("Content-Type", "application/octet-stream")
-       resp, err := client.Do(req)
-       defer resp.Body.Close()
-
-       if err != nil {
-               t.Fatalf("Error during %s %s", path, err)
-       }
-
-       body, err := ioutil.ReadAll(resp.Body)
-       if err != nil {
-               t.Fatalf("Error reading response from %s %s", path, err)
-       }
-
-       lines := strings.Split(string(body), "\n")
-       for _, line := range lines {
-               indexes = append(indexes, strings.Split(line, " ")...)
-       }
-
-       return indexes
-}
-
-func getBlockIndexes(t *testing.T) [][]string {
-       var indexes [][]string
-
-       for i := 0; i < len(keepServers); i++ {
-               indexes = append(indexes, getBlockIndexesForServer(t, i))
-       }
-       return indexes
-}
-
-func verifyBlocks(t *testing.T, notExpected []string, expected []string, minReplication int) {
-       blocks := getBlockIndexes(t)
-
-       for _, block := range notExpected {
-               for _, idx := range blocks {
-                       if valueInArray(block, idx) {
-                               t.Fatalf("Found unexpected block %s", block)
-                       }
-               }
-       }
-
-       for _, block := range expected {
-               nFound := 0
-               for _, idx := range blocks {
-                       if valueInArray(block, idx) {
-                               nFound++
-                       }
-               }
-               if nFound < minReplication {
-                       t.Fatalf("Found %d replicas of block %s, expected >= %d", nFound, block, minReplication)
-               }
-       }
-}
-
-func valueInArray(value string, list []string) bool {
-       for _, v := range list {
-               if value == v {
-                       return true
-               }
-       }
-       return false
-}
-
-// Test env uses two keep volumes. The volume names can be found by reading the files
-// ARVADOS_HOME/tmp/keep0.volume and ARVADOS_HOME/tmp/keep1.volume
-//
-// The keep volumes are of the dir structure: volumeN/subdir/locator
-func backdateBlocks(t *testing.T, oldUnusedBlockLocators []string) {
-       // First get rid of any size hints in the locators
-       var trimmedBlockLocators []string
-       for _, block := range oldUnusedBlockLocators {
-               trimmedBlockLocators = append(trimmedBlockLocators, strings.Split(block, "+")[0])
-       }
-
-       // Get the working dir so that we can read keep{n}.volume files
-       wd, err := os.Getwd()
-       if err != nil {
-               t.Fatalf("Error getting working dir %s", err)
-       }
-
-       // Now cycle through the two keep volumes
-       oldTime := time.Now().AddDate(0, -2, 0)
-       for i := 0; i < 2; i++ {
-               filename := fmt.Sprintf("%s/../../tmp/keep%d.volume", wd, i)
-               volumeDir, err := ioutil.ReadFile(filename)
-               if err != nil {
-                       t.Fatalf("Error reading keep volume file %s %s", filename, err)
-               }
-
-               // Read the keep volume dir structure
-               volumeContents, err := ioutil.ReadDir(string(volumeDir))
-               if err != nil {
-                       t.Fatalf("Error reading keep dir %s %s", string(volumeDir), err)
-               }
-
-               // Read each subdir for each of the keep volume dir
-               for _, subdir := range volumeContents {
-                       subdirName := fmt.Sprintf("%s/%s", volumeDir, subdir.Name())
-                       subdirContents, err := ioutil.ReadDir(string(subdirName))
-                       if err != nil {
-                               t.Fatalf("Error reading keep dir %s %s", string(subdirName), err)
-                       }
-
-                       // Now we got to the files. The files are names are the block locators
-                       for _, fileInfo := range subdirContents {
-                               blockName := fileInfo.Name()
-                               myname := fmt.Sprintf("%s/%s", subdirName, blockName)
-                               if valueInArray(blockName, trimmedBlockLocators) {
-                                       err = os.Chtimes(myname, oldTime, oldTime)
-                               }
-                       }
-               }
-       }
-}
-
-func getStatus(t *testing.T, path string) interface{} {
-       client := http.Client{}
-       req, err := http.NewRequest("GET", path, nil)
-       req.Header.Add("Authorization", "OAuth2 "+arvadostest.DataManagerToken)
-       req.Header.Add("Content-Type", "application/octet-stream")
-       resp, err := client.Do(req)
-       if err != nil {
-               t.Fatalf("Error during %s %s", path, err)
-       }
-       defer resp.Body.Close()
-
-       var s interface{}
-       json.NewDecoder(resp.Body).Decode(&s)
-
-       return s
-}
-
-// Wait until PullQueue and TrashQueue are empty on all keepServers.
-func waitUntilQueuesFinishWork(t *testing.T) {
-       for _, ks := range keepServers {
-               for done := false; !done; {
-                       time.Sleep(100 * time.Millisecond)
-                       s := getStatus(t, ks+"/status.json")
-                       for _, qName := range []string{"PullQueue", "TrashQueue"} {
-                               qStatus := s.(map[string]interface{})[qName].(map[string]interface{})
-                               if qStatus["Queued"].(float64)+qStatus["InProgress"].(float64) == 0 {
-                                       done = true
-                               }
-                       }
-               }
-       }
-}
-
-// Create some blocks and backdate some of them.
-// Also create some collections and delete some of them.
-// Verify block indexes.
-func TestPutAndGetBlocks(t *testing.T) {
-       defer TearDownDataManagerTest(t)
-       SetupDataManagerTest(t)
-
-       // Put some blocks which will be backdated later on
-       // The first one will also be used in a collection and hence should not be deleted when datamanager runs.
-       // The rest will be old and unreferenced and hence should be deleted when datamanager runs.
-       var oldUnusedBlockLocators []string
-       oldUnusedBlockData := "this block will have older mtime"
-       for i := 0; i < 5; i++ {
-               oldUnusedBlockLocators = append(oldUnusedBlockLocators, putBlock(t, fmt.Sprintf("%s%d", oldUnusedBlockData, i)))
-       }
-       for i := 0; i < 5; i++ {
-               getBlock(t, oldUnusedBlockLocators[i], fmt.Sprintf("%s%d", oldUnusedBlockData, i))
-       }
-
-       // The rest will be old and unreferenced and hence should be deleted when datamanager runs.
-       oldUsedBlockData := "this collection block will have older mtime"
-       oldUsedBlockLocator := putBlock(t, oldUsedBlockData)
-       getBlock(t, oldUsedBlockLocator, oldUsedBlockData)
-
-       // Put some more blocks which will not be backdated; hence they are still new, but not in any collection.
-       // Hence, even though unreferenced, these should not be deleted when datamanager runs.
-       var newBlockLocators []string
-       newBlockData := "this block is newer"
-       for i := 0; i < 5; i++ {
-               newBlockLocators = append(newBlockLocators, putBlock(t, fmt.Sprintf("%s%d", newBlockData, i)))
-       }
-       for i := 0; i < 5; i++ {
-               getBlock(t, newBlockLocators[i], fmt.Sprintf("%s%d", newBlockData, i))
-       }
-
-       // Create a collection that would be deleted later on
-       toBeDeletedCollectionUUID := createCollection(t, "some data for collection creation")
-       toBeDeletedCollectionLocator := getFirstLocatorFromCollection(t, toBeDeletedCollectionUUID)
-
-       // Create another collection that has the same data as the one of the old blocks
-       oldUsedBlockCollectionUUID := createCollection(t, oldUsedBlockData)
-       oldUsedBlockCollectionLocator := getFirstLocatorFromCollection(t, oldUsedBlockCollectionUUID)
-       if oldUsedBlockCollectionLocator != oldUsedBlockLocator {
-               t.Fatalf("Locator of the collection with the same data as old block is different %s", oldUsedBlockCollectionLocator)
-       }
-
-       // Create another collection whose replication level will be changed
-       replicationCollectionUUID := createCollection(t, "replication level on this collection will be reduced")
-       replicationCollectionLocator := getFirstLocatorFromCollection(t, replicationCollectionUUID)
-
-       // Create two collections with same data; one will be deleted later on
-       dataForTwoCollections := "one of these collections will be deleted"
-       oneOfTwoWithSameDataUUID := createCollection(t, dataForTwoCollections)
-       oneOfTwoWithSameDataLocator := getFirstLocatorFromCollection(t, oneOfTwoWithSameDataUUID)
-       secondOfTwoWithSameDataUUID := createCollection(t, dataForTwoCollections)
-       secondOfTwoWithSameDataLocator := getFirstLocatorFromCollection(t, secondOfTwoWithSameDataUUID)
-       if oneOfTwoWithSameDataLocator != secondOfTwoWithSameDataLocator {
-               t.Fatalf("Locators for both these collections expected to be same: %s %s", oneOfTwoWithSameDataLocator, secondOfTwoWithSameDataLocator)
-       }
-
-       // create collection with empty manifest text
-       emptyBlockLocator := putBlock(t, "")
-       emptyCollection := createCollection(t, "")
-
-       // Verify blocks before doing any backdating / deleting.
-       var expected []string
-       expected = append(expected, oldUnusedBlockLocators...)
-       expected = append(expected, newBlockLocators...)
-       expected = append(expected, toBeDeletedCollectionLocator)
-       expected = append(expected, replicationCollectionLocator)
-       expected = append(expected, oneOfTwoWithSameDataLocator)
-       expected = append(expected, secondOfTwoWithSameDataLocator)
-       expected = append(expected, emptyBlockLocator)
-
-       verifyBlocks(t, nil, expected, 2)
-
-       // Run datamanager in singlerun mode
-       dataManagerSingleRun(t)
-       waitUntilQueuesFinishWork(t)
-
-       verifyBlocks(t, nil, expected, 2)
-
-       // Backdate the to-be old blocks and delete the collections
-       backdateBlocks(t, oldUnusedBlockLocators)
-       deleteCollection(t, toBeDeletedCollectionUUID)
-       deleteCollection(t, secondOfTwoWithSameDataUUID)
-       backdateBlocks(t, []string{emptyBlockLocator})
-       deleteCollection(t, emptyCollection)
-
-       // Run data manager again
-       dataManagerSingleRun(t)
-       waitUntilQueuesFinishWork(t)
-
-       // Get block indexes and verify that all backdated blocks except the first one used in collection are not included.
-       expected = expected[:0]
-       expected = append(expected, oldUsedBlockLocator)
-       expected = append(expected, newBlockLocators...)
-       expected = append(expected, toBeDeletedCollectionLocator)
-       expected = append(expected, oneOfTwoWithSameDataLocator)
-       expected = append(expected, secondOfTwoWithSameDataLocator)
-       expected = append(expected, emptyBlockLocator) // even when unreferenced, this remains
-
-       verifyBlocks(t, oldUnusedBlockLocators, expected, 2)
-
-       // Reduce desired replication on replicationCollectionUUID
-       // collection, and verify that Data Manager does not reduce
-       // actual replication any further than that. (It might not
-       // reduce actual replication at all; that's OK for this test.)
-
-       // Reduce desired replication level.
-       updateCollection(t, replicationCollectionUUID, "replication_desired", "1")
-       collection := getCollection(t, replicationCollectionUUID)
-       if collection["replication_desired"].(interface{}) != float64(1) {
-               t.Fatalf("After update replication_desired is not 1; instead it is %v", collection["replication_desired"])
-       }
-
-       // Verify data is currently overreplicated.
-       verifyBlocks(t, nil, []string{replicationCollectionLocator}, 2)
-
-       // Run data manager again
-       dataManagerSingleRun(t)
-       waitUntilQueuesFinishWork(t)
-
-       // Verify data is not underreplicated.
-       verifyBlocks(t, nil, []string{replicationCollectionLocator}, 1)
-
-       // Verify *other* collections' data is not underreplicated.
-       verifyBlocks(t, oldUnusedBlockLocators, expected, 2)
-}
-
-func TestDatamanagerSingleRunRepeatedly(t *testing.T) {
-       defer TearDownDataManagerTest(t)
-       SetupDataManagerTest(t)
-
-       for i := 0; i < 10; i++ {
-               err := singlerun(arv)
-               if err != nil {
-                       t.Fatalf("Got an error during datamanager singlerun: %v", err)
-               }
-       }
-}
-
-func TestGetStatusRepeatedly(t *testing.T) {
-       defer TearDownDataManagerTest(t)
-       SetupDataManagerTest(t)
-
-       for i := 0; i < 10; i++ {
-               for j := 0; j < 2; j++ {
-                       s := getStatus(t, keepServers[j]+"/status.json")
-
-                       var pullQueueStatus interface{}
-                       pullQueueStatus = s.(map[string]interface{})["PullQueue"]
-                       var trashQueueStatus interface{}
-                       trashQueueStatus = s.(map[string]interface{})["TrashQueue"]
-
-                       if pullQueueStatus.(map[string]interface{})["Queued"] == nil ||
-                               pullQueueStatus.(map[string]interface{})["InProgress"] == nil ||
-                               trashQueueStatus.(map[string]interface{})["Queued"] == nil ||
-                               trashQueueStatus.(map[string]interface{})["InProgress"] == nil {
-                               t.Fatalf("PullQueue and TrashQueue status not found")
-                       }
-
-                       time.Sleep(100 * time.Millisecond)
-               }
-       }
-}
-
-func TestRunDatamanagerWithBogusServer(t *testing.T) {
-       defer TearDownDataManagerTest(t)
-       SetupDataManagerTest(t)
-
-       arv.ApiServer = "bogus-server"
-
-       err := singlerun(arv)
-       if err == nil {
-               t.Fatalf("Expected error during singlerun with bogus server")
-       }
-}
-
-func TestRunDatamanagerAsNonAdminUser(t *testing.T) {
-       defer TearDownDataManagerTest(t)
-       SetupDataManagerTest(t)
-
-       arv.ApiToken = arvadostest.ActiveToken
-
-       err := singlerun(arv)
-       if err == nil {
-               t.Fatalf("Expected error during singlerun as non-admin user")
-       }
-}
-
-func TestPutAndGetBlocks_NoErrorDuringSingleRun(t *testing.T) {
-       testOldBlocksNotDeletedOnDataManagerError(t, "", "", false, false)
-}
-
-func TestPutAndGetBlocks_ErrorDuringGetCollectionsBadWriteTo(t *testing.T) {
-       badpath, err := arvadostest.CreateBadPath()
-       if err != nil {
-               t.Fatalf(err.Error())
-       }
-       defer func() {
-               err = arvadostest.DestroyBadPath(badpath)
-               if err != nil {
-                       t.Fatalf(err.Error())
-               }
-       }()
-       testOldBlocksNotDeletedOnDataManagerError(t, path.Join(badpath, "writetofile"), "", true, true)
-}
-
-func TestPutAndGetBlocks_ErrorDuringGetCollectionsBadHeapProfileFilename(t *testing.T) {
-       badpath, err := arvadostest.CreateBadPath()
-       if err != nil {
-               t.Fatalf(err.Error())
-       }
-       defer func() {
-               err = arvadostest.DestroyBadPath(badpath)
-               if err != nil {
-                       t.Fatalf(err.Error())
-               }
-       }()
-       testOldBlocksNotDeletedOnDataManagerError(t, "", path.Join(badpath, "heapprofilefile"), true, true)
-}
-
-// Create some blocks and backdate some of them.
-// Run datamanager while producing an error condition.
-// Verify that the blocks are hence not deleted.
-func testOldBlocksNotDeletedOnDataManagerError(t *testing.T, writeDataTo string, heapProfileFile string, expectError bool, expectOldBlocks bool) {
-       defer TearDownDataManagerTest(t)
-       SetupDataManagerTest(t)
-
-       // Put some blocks and backdate them.
-       var oldUnusedBlockLocators []string
-       oldUnusedBlockData := "this block will have older mtime"
-       for i := 0; i < 5; i++ {
-               oldUnusedBlockLocators = append(oldUnusedBlockLocators, putBlock(t, fmt.Sprintf("%s%d", oldUnusedBlockData, i)))
-       }
-       backdateBlocks(t, oldUnusedBlockLocators)
-
-       // Run data manager
-       summary.WriteDataTo = writeDataTo
-       collection.HeapProfileFilename = heapProfileFile
-
-       err := singlerun(arv)
-       if !expectError {
-               if err != nil {
-                       t.Fatalf("Got an error during datamanager singlerun: %v", err)
-               }
-       } else {
-               if err == nil {
-                       t.Fatalf("Expected error during datamanager singlerun")
-               }
-       }
-       waitUntilQueuesFinishWork(t)
-
-       // Get block indexes and verify that all backdated blocks are not/deleted as expected
-       if expectOldBlocks {
-               verifyBlocks(t, nil, oldUnusedBlockLocators, 2)
-       } else {
-               verifyBlocks(t, oldUnusedBlockLocators, nil, 2)
-       }
-}
-
-// Create a collection with multiple streams and blocks
-func createMultiStreamBlockCollection(t *testing.T, data string, numStreams, numBlocks int) (string, []string) {
-       defer switchToken(arvadostest.AdminToken)()
-
-       manifest := ""
-       locators := make(map[string]bool)
-       for s := 0; s < numStreams; s++ {
-               manifest += fmt.Sprintf("./stream%d ", s)
-               for b := 0; b < numBlocks; b++ {
-                       locator, _, err := keepClient.PutB([]byte(fmt.Sprintf("%s in stream %d and block %d", data, s, b)))
-                       if err != nil {
-                               t.Fatalf("Error creating block %d in stream %d: %v", b, s, err)
-                       }
-                       locators[strings.Split(locator, "+A")[0]] = true
-                       manifest += locator + " "
-               }
-               manifest += "0:1:dummyfile.txt\n"
-       }
-
-       collection := make(Dict)
-       err := arv.Create("collections",
-               arvadosclient.Dict{"collection": arvadosclient.Dict{"manifest_text": manifest}},
-               &collection)
-
-       if err != nil {
-               t.Fatalf("Error creating collection %v", err)
-       }
-
-       var locs []string
-       for k := range locators {
-               locs = append(locs, k)
-       }
-
-       return collection["uuid"].(string), locs
-}
-
-// Create collection with multiple streams and blocks; backdate the blocks and but do not delete the collection.
-// Also, create stray block and backdate it.
-// After datamanager run: expect blocks from the collection, but not the stray block.
-func TestManifestWithMultipleStreamsAndBlocks(t *testing.T) {
-       testManifestWithMultipleStreamsAndBlocks(t, 100, 10, "", false)
-}
-
-// Same test as TestManifestWithMultipleStreamsAndBlocks with an additional
-// keepstore of a service type other than "disk". Only the "disk" type services
-// will be indexed by datamanager and hence should work the same way.
-func TestManifestWithMultipleStreamsAndBlocks_WithOneUnsupportedKeepServer(t *testing.T) {
-       testManifestWithMultipleStreamsAndBlocks(t, 2, 2, "testblobstore", false)
-}
-
-// Test datamanager with dry-run. Expect no block to be deleted.
-func TestManifestWithMultipleStreamsAndBlocks_DryRun(t *testing.T) {
-       testManifestWithMultipleStreamsAndBlocks(t, 2, 2, "", true)
-}
-
-func testManifestWithMultipleStreamsAndBlocks(t *testing.T, numStreams, numBlocks int, createExtraKeepServerWithType string, isDryRun bool) {
-       defer TearDownDataManagerTest(t)
-       SetupDataManagerTest(t)
-
-       // create collection whose blocks will be backdated
-       collectionWithOldBlocks, oldBlocks := createMultiStreamBlockCollection(t, "old block", numStreams, numBlocks)
-       if collectionWithOldBlocks == "" {
-               t.Fatalf("Failed to create collection with %d blocks", numStreams*numBlocks)
-       }
-       if len(oldBlocks) != numStreams*numBlocks {
-               t.Fatalf("Not all blocks are created: expected %v, found %v", 1000, len(oldBlocks))
-       }
-
-       // create a stray block that will be backdated
-       strayOldBlock := putBlock(t, "this stray block is old")
-
-       expected := []string{strayOldBlock}
-       expected = append(expected, oldBlocks...)
-       verifyBlocks(t, nil, expected, 2)
-
-       // Backdate old blocks; but the collection still references these blocks
-       backdateBlocks(t, oldBlocks)
-
-       // also backdate the stray old block
-       backdateBlocks(t, []string{strayOldBlock})
-
-       // If requested, create an extra keepserver with the given type
-       // This should be ignored during indexing and hence not change the datamanager outcome
-       var extraKeepServerUUID string
-       if createExtraKeepServerWithType != "" {
-               extraKeepServerUUID = addExtraKeepServer(t, createExtraKeepServerWithType)
-               defer deleteExtraKeepServer(extraKeepServerUUID)
-       }
-
-       // run datamanager
-       dryRun = isDryRun
-       dataManagerSingleRun(t)
-
-       if dryRun {
-               // verify that all blocks, including strayOldBlock, are still to be found
-               verifyBlocks(t, nil, expected, 2)
-       } else {
-               // verify that strayOldBlock is not to be found, but the collections blocks are still there
-               verifyBlocks(t, []string{strayOldBlock}, oldBlocks, 2)
-       }
-}
-
-// Add one more keepstore with the given service type
-func addExtraKeepServer(t *testing.T, serviceType string) string {
-       defer switchToken(arvadostest.AdminToken)()
-
-       extraKeepService := make(arvadosclient.Dict)
-       err := arv.Create("keep_services",
-               arvadosclient.Dict{"keep_service": arvadosclient.Dict{
-                       "service_host":     "localhost",
-                       "service_port":     "21321",
-                       "service_ssl_flag": false,
-                       "service_type":     serviceType}},
-               &extraKeepService)
-       if err != nil {
-               t.Fatal(err)
-       }
-
-       return extraKeepService["uuid"].(string)
-}
-
-func deleteExtraKeepServer(uuid string) {
-       defer switchToken(arvadostest.AdminToken)()
-       arv.Delete("keep_services", uuid, nil, nil)
-}
diff --git a/services/datamanager/experimental/datamanager.py b/services/datamanager/experimental/datamanager.py

deleted file mode 100755 (executable)

index 8207bdc..0000000
--- a/services/datamanager/experimental/datamanager.py
+++ /dev/null
@@ -1,887 +0,0 @@
-#! /usr/bin/env python
-
-import arvados
-
-import argparse
-import cgi
-import csv
-import json
-import logging
-import math
-import pprint
-import re
-import threading
-import urllib2
-
-from BaseHTTPServer import HTTPServer, BaseHTTPRequestHandler
-from collections import defaultdict, Counter
-from functools import partial
-from operator import itemgetter
-from SocketServer import ThreadingMixIn
-
-arv = arvados.api('v1')
-
-# Adapted from http://stackoverflow.com/questions/4180980/formatting-data-quantity-capacity-as-string
-byteunits = ('B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB')
-def fileSizeFormat(value):
-  exponent = 0 if value == 0 else int(math.log(value, 1024))
-  return "%7.2f %-3s" % (float(value) / pow(1024, exponent),
-                         byteunits[exponent])
-
-def percentageFloor(x):
-  """ Returns a float which is the input rounded down to the neared 0.01.
-
-e.g. precentageFloor(0.941354) = 0.94
-"""
-  return math.floor(x*100) / 100.0
-
-
-def byteSizeFromValidUuid(valid_uuid):
-  return int(valid_uuid.split('+')[1])
-
-class maxdict(dict):
-  """A dictionary that holds the largest value entered for each key."""
-  def addValue(self, key, value):
-    dict.__setitem__(self, key, max(dict.get(self, key), value))
-  def addValues(self, kv_pairs):
-    for key,value in kv_pairs:
-      self.addValue(key, value)
-  def addDict(self, d):
-    self.addValues(d.items())
-
-class CollectionInfo:
-  DEFAULT_PERSISTER_REPLICATION_LEVEL=2
-  all_by_uuid = {}
-
-  def __init__(self, uuid):
-    if CollectionInfo.all_by_uuid.has_key(uuid):
-      raise ValueError('Collection for uuid "%s" already exists.' % uuid)
-    self.uuid = uuid
-    self.block_uuids = set()  # uuids of keep blocks in this collection
-    self.reader_uuids = set()  # uuids of users who can read this collection
-    self.persister_uuids = set()  # uuids of users who want this collection saved
-    # map from user uuid to replication level they desire
-    self.persister_replication = maxdict()
-
-    # The whole api response in case we need anything else later.
-    self.api_response = []
-    CollectionInfo.all_by_uuid[uuid] = self
-
-  def byteSize(self):
-    return sum(map(byteSizeFromValidUuid, self.block_uuids))
-
-  def __str__(self):
-    return ('CollectionInfo uuid: %s\n'
-            '               %d block(s) containing %s\n'
-            '               reader_uuids: %s\n'
-            '               persister_replication: %s' %
-            (self.uuid,
-             len(self.block_uuids),
-             fileSizeFormat(self.byteSize()),
-             pprint.pformat(self.reader_uuids, indent = 15),
-             pprint.pformat(self.persister_replication, indent = 15)))
-
-  @staticmethod
-  def get(uuid):
-    if not CollectionInfo.all_by_uuid.has_key(uuid):
-      CollectionInfo(uuid)
-    return CollectionInfo.all_by_uuid[uuid]
-
-
-def extractUuid(candidate):
-  """ Returns a canonical (hash+size) uuid from a valid uuid, or None if candidate is not a valid uuid."""
-  match = re.match('([0-9a-fA-F]{32}\+[0-9]+)(\+[^+]+)*$', candidate)
-  return match and match.group(1)
-
-def checkUserIsAdmin():
-  current_user = arv.users().current().execute()
-
-  if not current_user['is_admin']:
-    log.warning('Current user %s (%s - %s) does not have '
-                'admin access and will not see much of the data.',
-                current_user['full_name'],
-                current_user['email'],
-                current_user['uuid'])
-    if args.require_admin_user:
-      log.critical('Exiting, rerun with --no-require-admin-user '
-                   'if you wish to continue.')
-      exit(1)
-
-def buildCollectionsList():
-  if args.uuid:
-    return [args.uuid,]
-  else:
-    collections_list_response = arv.collections().list(limit=args.max_api_results).execute()
-
-    print ('Returned %d of %d collections.' %
-           (len(collections_list_response['items']),
-            collections_list_response['items_available']))
-
-    return [item['uuid'] for item in collections_list_response['items']]
-
-
-def readCollections(collection_uuids):
-  for collection_uuid in collection_uuids:
-    collection_block_uuids = set()
-    collection_response = arv.collections().get(uuid=collection_uuid).execute()
-    collection_info = CollectionInfo.get(collection_uuid)
-    collection_info.api_response = collection_response
-    manifest_lines = collection_response['manifest_text'].split('\n')
-
-    if args.verbose:
-      print 'Manifest text for %s:' % collection_uuid
-      pprint.pprint(manifest_lines)
-
-    for manifest_line in manifest_lines:
-      if manifest_line:
-        manifest_tokens = manifest_line.split(' ')
-        if args.verbose:
-          print 'manifest tokens: ' + pprint.pformat(manifest_tokens)
-        stream_name = manifest_tokens[0]
-
-        line_block_uuids = set(filter(None,
-                                      [extractUuid(candidate)
-                                       for candidate in manifest_tokens[1:]]))
-        collection_info.block_uuids.update(line_block_uuids)
-
-        # file_tokens = [token
-        #                for token in manifest_tokens[1:]
-        #                if extractUuid(token) is None]
-
-        # # Sort file tokens by start position in case they aren't already
-        # file_tokens.sort(key=lambda file_token: int(file_token.split(':')[0]))
-
-        # if args.verbose:
-        #   print 'line_block_uuids: ' + pprint.pformat(line_block_uuids)
-        #   print 'file_tokens: ' + pprint.pformat(file_tokens)
-
-
-def readLinks():
-  link_classes = set()
-
-  for collection_uuid,collection_info in CollectionInfo.all_by_uuid.items():
-    # TODO(misha): We may not be seing all the links, but since items
-    # available does not return an accurate number, I don't knos how
-    # to confirm that we saw all of them.
-    collection_links_response = arv.links().list(where={'head_uuid':collection_uuid}).execute()
-    link_classes.update([link['link_class'] for link in collection_links_response['items']])
-    for link in collection_links_response['items']:
-      if link['link_class'] == 'permission':
-        collection_info.reader_uuids.add(link['tail_uuid'])
-      elif link['link_class'] == 'resources':
-        replication_level = link['properties'].get(
-          'replication',
-          CollectionInfo.DEFAULT_PERSISTER_REPLICATION_LEVEL)
-        collection_info.persister_replication.addValue(
-          link['tail_uuid'],
-          replication_level)
-        collection_info.persister_uuids.add(link['tail_uuid'])
-
-  print 'Found the following link classes:'
-  pprint.pprint(link_classes)
-
-def reportMostPopularCollections():
-  most_popular_collections = sorted(
-    CollectionInfo.all_by_uuid.values(),
-    key=lambda info: len(info.reader_uuids) + 10 * len(info.persister_replication),
-    reverse=True)[:10]
-
-  print 'Most popular Collections:'
-  for collection_info in most_popular_collections:
-    print collection_info
-
-
-def buildMaps():
-  for collection_uuid,collection_info in CollectionInfo.all_by_uuid.items():
-    # Add the block holding the manifest itself for all calculations
-    block_uuids = collection_info.block_uuids.union([collection_uuid,])
-    for block_uuid in block_uuids:
-      block_to_collections[block_uuid].add(collection_uuid)
-      block_to_readers[block_uuid].update(collection_info.reader_uuids)
-      block_to_persisters[block_uuid].update(collection_info.persister_uuids)
-      block_to_persister_replication[block_uuid].addDict(
-        collection_info.persister_replication)
-    for reader_uuid in collection_info.reader_uuids:
-      reader_to_collections[reader_uuid].add(collection_uuid)
-      reader_to_blocks[reader_uuid].update(block_uuids)
-    for persister_uuid in collection_info.persister_uuids:
-      persister_to_collections[persister_uuid].add(collection_uuid)
-      persister_to_blocks[persister_uuid].update(block_uuids)
-
-
-def itemsByValueLength(original):
-  return sorted(original.items(),
-                key=lambda item:len(item[1]),
-                reverse=True)
-
-
-def reportBusiestUsers():
-  busiest_readers = itemsByValueLength(reader_to_collections)
-  print 'The busiest readers are:'
-  for reader,collections in busiest_readers:
-    print '%s reading %d collections.' % (reader, len(collections))
-  busiest_persisters = itemsByValueLength(persister_to_collections)
-  print 'The busiest persisters are:'
-  for persister,collections in busiest_persisters:
-    print '%s reading %d collections.' % (persister, len(collections))
-
-
-def blockDiskUsage(block_uuid):
-  """Returns the disk usage of a block given its uuid.
-
-  Will return 0 before reading the contents of the keep servers.
-  """
-  return byteSizeFromValidUuid(block_uuid) * block_to_replication[block_uuid]
-
-def blockPersistedUsage(user_uuid, block_uuid):
-  return (byteSizeFromValidUuid(block_uuid) *
-          block_to_persister_replication[block_uuid].get(user_uuid, 0))
-
-memo_computeWeightedReplicationCosts = {}
-def computeWeightedReplicationCosts(replication_levels):
-  """Computes the relative cost of varied replication levels.
-
-  replication_levels: a tuple of integers representing the desired
-  replication level. If n users want a replication level of x then x
-  should appear n times in replication_levels.
-
-  Returns a dictionary from replication level to cost.
-
-  The basic thinking is that the cost of replicating at level x should
-  be shared by everyone who wants replication of level x or higher.
-
-  For example, if we have two users who want 1 copy, one user who
-  wants 3 copies and two users who want 6 copies:
-  the input would be [1, 1, 3, 6, 6] (or any permutation)
-
-  The cost of the first copy is shared by all 5 users, so they each
-  pay 1 copy / 5 users = 0.2.
-  The cost of the second and third copies shared by 3 users, so they
-  each pay 2 copies / 3 users = 0.67 (plus the above costs)
-  The cost of the fourth, fifth and sixth copies is shared by two
-  users, so they each pay 3 copies / 2 users = 1.5 (plus the above costs)
-
-  Here are some other examples:
-  computeWeightedReplicationCosts([1,]) -> {1:1.0}
-  computeWeightedReplicationCosts([2,]) -> {2:2.0}
-  computeWeightedReplicationCosts([1,1]) -> {1:0.5}
-  computeWeightedReplicationCosts([2,2]) -> {1:1.0}
-  computeWeightedReplicationCosts([1,2]) -> {1:0.5,2:1.5}
-  computeWeightedReplicationCosts([1,3]) -> {1:0.5,2:2.5}
-  computeWeightedReplicationCosts([1,3,6,6,10]) -> {1:0.2,3:0.7,6:1.7,10:5.7}
-  """
-  replication_level_counts = sorted(Counter(replication_levels).items())
-
-  memo_key = str(replication_level_counts)
-
-  if not memo_key in memo_computeWeightedReplicationCosts:
-    last_level = 0
-    current_cost = 0
-    total_interested = float(sum(map(itemgetter(1), replication_level_counts)))
-    cost_for_level = {}
-    for replication_level, count in replication_level_counts:
-      copies_added = replication_level - last_level
-      # compute marginal cost from last level and add it to the last cost
-      current_cost += copies_added / total_interested
-      cost_for_level[replication_level] = current_cost
-      # update invariants
-      last_level = replication_level
-      total_interested -= count
-    memo_computeWeightedReplicationCosts[memo_key] = cost_for_level
-
-  return memo_computeWeightedReplicationCosts[memo_key]
-
-def blockPersistedWeightedUsage(user_uuid, block_uuid):
-  persister_replication_for_block = block_to_persister_replication[block_uuid]
-  user_replication = persister_replication_for_block[user_uuid]
-  return (
-    byteSizeFromValidUuid(block_uuid) *
-    computeWeightedReplicationCosts(
-      persister_replication_for_block.values())[user_replication])
-
-
-def computeUserStorageUsage():
-  for user, blocks in reader_to_blocks.items():
-    user_to_usage[user][UNWEIGHTED_READ_SIZE_COL] = sum(map(
-        byteSizeFromValidUuid,
-        blocks))
-    user_to_usage[user][WEIGHTED_READ_SIZE_COL] = sum(map(
-        lambda block_uuid:(float(byteSizeFromValidUuid(block_uuid))/
-                                 len(block_to_readers[block_uuid])),
-        blocks))
-  for user, blocks in persister_to_blocks.items():
-    user_to_usage[user][UNWEIGHTED_PERSIST_SIZE_COL] = sum(map(
-        partial(blockPersistedUsage, user),
-        blocks))
-    user_to_usage[user][WEIGHTED_PERSIST_SIZE_COL] = sum(map(
-        partial(blockPersistedWeightedUsage, user),
-        blocks))
-
-def printUserStorageUsage():
-  print ('user: unweighted readable block size, weighted readable block size, '
-         'unweighted persisted block size, weighted persisted block size:')
-  for user, usage in user_to_usage.items():
-    print ('%s: %s %s %s %s' %
-           (user,
-            fileSizeFormat(usage[UNWEIGHTED_READ_SIZE_COL]),
-            fileSizeFormat(usage[WEIGHTED_READ_SIZE_COL]),
-            fileSizeFormat(usage[UNWEIGHTED_PERSIST_SIZE_COL]),
-            fileSizeFormat(usage[WEIGHTED_PERSIST_SIZE_COL])))
-
-def logUserStorageUsage():
-  for user, usage in user_to_usage.items():
-    body = {}
-    # user could actually represent a user or a group. We don't set
-    # the object_type field since we don't know which we have.
-    body['object_uuid'] = user
-    body['event_type'] = args.user_storage_log_event_type
-    properties = {}
-    properties['read_collections_total_bytes'] = usage[UNWEIGHTED_READ_SIZE_COL]
-    properties['read_collections_weighted_bytes'] = (
-      usage[WEIGHTED_READ_SIZE_COL])
-    properties['persisted_collections_total_bytes'] = (
-      usage[UNWEIGHTED_PERSIST_SIZE_COL])
-    properties['persisted_collections_weighted_bytes'] = (
-      usage[WEIGHTED_PERSIST_SIZE_COL])
-    body['properties'] = properties
-    # TODO(misha): Confirm that this will throw an exception if it
-    # fails to create the log entry.
-    arv.logs().create(body=body).execute()
-
-def getKeepServers():
-  response = arv.keep_disks().list().execute()
-  return [[keep_server['service_host'], keep_server['service_port']]
-          for keep_server in response['items']]
-
-
-def getKeepBlocks(keep_servers):
-  blocks = []
-  for host,port in keep_servers:
-    response = urllib2.urlopen('http://%s:%d/index' % (host, port))
-    server_blocks = [line.split(' ')
-                     for line in response.read().split('\n')
-                     if line]
-    server_blocks = [(block_id, int(mtime))
-                     for block_id, mtime in server_blocks]
-    blocks.append(server_blocks)
-  return blocks
-
-def getKeepStats(keep_servers):
-  MOUNT_COLUMN = 5
-  TOTAL_COLUMN = 1
-  FREE_COLUMN = 3
-  DISK_BLOCK_SIZE = 1024
-  stats = []
-  for host,port in keep_servers:
-    response = urllib2.urlopen('http://%s:%d/status.json' % (host, port))
-
-    parsed_json = json.load(response)
-    df_entries = [line.split()
-                  for line in parsed_json['df'].split('\n')
-                  if line]
-    keep_volumes = [columns
-                    for columns in df_entries
-                    if 'keep' in columns[MOUNT_COLUMN]]
-    total_space = DISK_BLOCK_SIZE*sum(map(int,map(itemgetter(TOTAL_COLUMN),
-                                                  keep_volumes)))
-    free_space =  DISK_BLOCK_SIZE*sum(map(int,map(itemgetter(FREE_COLUMN),
-                                                  keep_volumes)))
-    stats.append([total_space, free_space])
-  return stats
-
-
-def computeReplication(keep_blocks):
-  for server_blocks in keep_blocks:
-    for block_uuid, _ in server_blocks:
-      block_to_replication[block_uuid] += 1
-  log.debug('Seeing the following replication levels among blocks: %s',
-            str(set(block_to_replication.values())))
-
-
-def computeGarbageCollectionCandidates():
-  for server_blocks in keep_blocks:
-    block_to_latest_mtime.addValues(server_blocks)
-  empty_set = set()
-  garbage_collection_priority = sorted(
-    [(block,mtime)
-     for block,mtime in block_to_latest_mtime.items()
-     if len(block_to_persisters.get(block,empty_set)) == 0],
-    key = itemgetter(1))
-  global garbage_collection_report
-  garbage_collection_report = []
-  cumulative_disk_size = 0
-  for block,mtime in garbage_collection_priority:
-    disk_size = blockDiskUsage(block)
-    cumulative_disk_size += disk_size
-    garbage_collection_report.append(
-      (block,
-       mtime,
-       disk_size,
-       cumulative_disk_size,
-       float(free_keep_space + cumulative_disk_size)/total_keep_space))
-
-  print 'The oldest Garbage Collection Candidates: '
-  pprint.pprint(garbage_collection_report[:20])
-
-
-def outputGarbageCollectionReport(filename):
-  with open(filename, 'wb') as csvfile:
-    gcwriter = csv.writer(csvfile)
-    gcwriter.writerow(['block uuid', 'latest mtime', 'disk size',
-                       'cumulative size', 'disk free'])
-    for line in garbage_collection_report:
-      gcwriter.writerow(line)
-
-def computeGarbageCollectionHistogram():
-  # TODO(misha): Modify this to allow users to specify the number of
-  # histogram buckets through a flag.
-  histogram = []
-  last_percentage = -1
-  for _,mtime,_,_,disk_free in garbage_collection_report:
-    curr_percentage = percentageFloor(disk_free)
-    if curr_percentage > last_percentage:
-      histogram.append( (mtime, curr_percentage) )
-    last_percentage = curr_percentage
-
-  log.info('Garbage collection histogram is: %s', histogram)
-
-  return histogram
-
-
-def logGarbageCollectionHistogram():
-  body = {}
-  # TODO(misha): Decide whether we should specify an object_uuid in
-  # the body and if so, which uuid to use.
-  body['event_type'] = args.block_age_free_space_histogram_log_event_type
-  properties = {}
-  properties['histogram'] = garbage_collection_histogram
-  body['properties'] = properties
-  # TODO(misha): Confirm that this will throw an exception if it
-  # fails to create the log entry.
-  arv.logs().create(body=body).execute()
-
-
-def detectReplicationProblems():
-  blocks_not_in_any_collections.update(
-    set(block_to_replication.keys()).difference(block_to_collections.keys()))
-  underreplicated_persisted_blocks.update(
-    [uuid
-     for uuid, persister_replication in block_to_persister_replication.items()
-     if len(persister_replication) > 0 and
-     block_to_replication[uuid] < max(persister_replication.values())])
-  overreplicated_persisted_blocks.update(
-    [uuid
-     for uuid, persister_replication in block_to_persister_replication.items()
-     if len(persister_replication) > 0 and
-     block_to_replication[uuid] > max(persister_replication.values())])
-
-  log.info('Found %d blocks not in any collections, e.g. %s...',
-           len(blocks_not_in_any_collections),
-           ','.join(list(blocks_not_in_any_collections)[:5]))
-  log.info('Found %d underreplicated blocks, e.g. %s...',
-           len(underreplicated_persisted_blocks),
-           ','.join(list(underreplicated_persisted_blocks)[:5]))
-  log.info('Found %d overreplicated blocks, e.g. %s...',
-           len(overreplicated_persisted_blocks),
-           ','.join(list(overreplicated_persisted_blocks)[:5]))
-
-  # TODO:
-  #  Read blocks sorted by mtime
-  #  Cache window vs % free space
-  #  Collections which candidates will appear in
-  #  Youngest underreplicated read blocks that appear in collections.
-  #  Report Collections that have blocks which are missing from (or
-  #   underreplicated in) keep.
-
-
-# This is the main flow here
-
-parser = argparse.ArgumentParser(description='Report on keep disks.')
-"""The command line argument parser we use.
-
-We only use it in the __main__ block, but leave it outside the block
-in case another package wants to use it or customize it by specifying
-it as a parent to their commandline parser.
-"""
-parser.add_argument('-m',
-                    '--max-api-results',
-                    type=int,
-                    default=5000,
-                    help=('The max results to get at once.'))
-parser.add_argument('-p',
-                    '--port',
-                    type=int,
-                    default=9090,
-                    help=('The port number to serve on. 0 means no server.'))
-parser.add_argument('-v',
-                    '--verbose',
-                    help='increase output verbosity',
-                    action='store_true')
-parser.add_argument('-u',
-                    '--uuid',
-                    help='uuid of specific collection to process')
-parser.add_argument('--require-admin-user',
-                    action='store_true',
-                    default=True,
-                    help='Fail if the user is not an admin [default]')
-parser.add_argument('--no-require-admin-user',
-                    dest='require_admin_user',
-                    action='store_false',
-                    help=('Allow users without admin permissions with '
-                          'only a warning.'))
-parser.add_argument('--log-to-workbench',
-                    action='store_true',
-                    default=False,
-                    help='Log findings to workbench')
-parser.add_argument('--no-log-to-workbench',
-                    dest='log_to_workbench',
-                    action='store_false',
-                    help='Don\'t log findings to workbench [default]')
-parser.add_argument('--user-storage-log-event-type',
-                    default='user-storage-report',
-                    help=('The event type to set when logging user '
-                          'storage usage to workbench.'))
-parser.add_argument('--block-age-free-space-histogram-log-event-type',
-                    default='block-age-free-space-histogram',
-                    help=('The event type to set when logging user '
-                          'storage usage to workbench.'))
-parser.add_argument('--garbage-collection-file',
-                    default='',
-                    help=('The file to write a garbage collection report, or '
-                          'leave empty for no report.'))
-
-args = None
-
-# TODO(misha): Think about moving some of this to the __main__ block.
-log = logging.getLogger('arvados.services.datamanager')
-stderr_handler = logging.StreamHandler()
-log.setLevel(logging.INFO)
-stderr_handler.setFormatter(
-  logging.Formatter('%(asctime)-15s %(levelname)-8s %(message)s'))
-log.addHandler(stderr_handler)
-
-# Global Data - don't try this at home
-collection_uuids = []
-
-# These maps all map from uuids to a set of uuids
-block_to_collections = defaultdict(set)  # keep blocks
-reader_to_collections = defaultdict(set)  # collection(s) for which the user has read access
-persister_to_collections = defaultdict(set)  # collection(s) which the user has persisted
-block_to_readers = defaultdict(set)
-block_to_persisters = defaultdict(set)
-block_to_persister_replication = defaultdict(maxdict)
-reader_to_blocks = defaultdict(set)
-persister_to_blocks = defaultdict(set)
-
-UNWEIGHTED_READ_SIZE_COL = 0
-WEIGHTED_READ_SIZE_COL = 1
-UNWEIGHTED_PERSIST_SIZE_COL = 2
-WEIGHTED_PERSIST_SIZE_COL = 3
-NUM_COLS = 4
-user_to_usage = defaultdict(lambda : [0,]*NUM_COLS)
-
-keep_servers = []
-keep_blocks = []
-keep_stats = []
-total_keep_space = 0
-free_keep_space =  0
-
-block_to_replication = defaultdict(lambda: 0)
-block_to_latest_mtime = maxdict()
-
-garbage_collection_report = []
-"""A list of non-persisted blocks, sorted by increasing mtime
-
-Each entry is of the form (block uuid, latest mtime, disk size,
-cumulative size)
-
-* block uuid: The id of the block we want to delete
-* latest mtime: The latest mtime of the block across all keep servers.
-* disk size: The total disk space used by this block (block size
-multiplied by current replication level)
-* cumulative disk size: The sum of this block's disk size and all the
-blocks listed above it
-* disk free: The proportion of our disk space that would be free if we
-deleted this block and all the above. So this is (free disk space +
-cumulative disk size) / total disk capacity
-"""
-
-garbage_collection_histogram = []
-""" Shows the tradeoff of keep block age vs keep disk free space.
-
-Each entry is of the form (mtime, Disk Proportion).
-
-An entry of the form (1388747781, 0.52) means that if we deleted the
-oldest non-presisted blocks until we had 52% of the disk free, then
-all blocks with an mtime greater than 1388747781 would be preserved.
-"""
-
-# Stuff to report on
-blocks_not_in_any_collections = set()
-underreplicated_persisted_blocks = set()
-overreplicated_persisted_blocks = set()
-
-all_data_loaded = False
-
-def loadAllData():
-  checkUserIsAdmin()
-
-  log.info('Building Collection List')
-  global collection_uuids
-  collection_uuids = filter(None, [extractUuid(candidate)
-                                   for candidate in buildCollectionsList()])
-
-  log.info('Reading Collections')
-  readCollections(collection_uuids)
-
-  if args.verbose:
-    pprint.pprint(CollectionInfo.all_by_uuid)
-
-  log.info('Reading Links')
-  readLinks()
-
-  reportMostPopularCollections()
-
-  log.info('Building Maps')
-  buildMaps()
-
-  reportBusiestUsers()
-
-  log.info('Getting Keep Servers')
-  global keep_servers
-  keep_servers = getKeepServers()
-
-  print keep_servers
-
-  log.info('Getting Blocks from each Keep Server.')
-  global keep_blocks
-  keep_blocks = getKeepBlocks(keep_servers)
-
-  log.info('Getting Stats from each Keep Server.')
-  global keep_stats, total_keep_space, free_keep_space
-  keep_stats = getKeepStats(keep_servers)
-
-  total_keep_space = sum(map(itemgetter(0), keep_stats))
-  free_keep_space = sum(map(itemgetter(1), keep_stats))
-
-  # TODO(misha): Delete this hack when the keep servers are fixed!
-  # This hack deals with the fact that keep servers report each other's disks.
-  total_keep_space /= len(keep_stats)
-  free_keep_space /= len(keep_stats)
-
-  log.info('Total disk space: %s, Free disk space: %s (%d%%).' %
-           (fileSizeFormat(total_keep_space),
-            fileSizeFormat(free_keep_space),
-            100*free_keep_space/total_keep_space))
-
-  computeReplication(keep_blocks)
-
-  log.info('average replication level is %f',
-           (float(sum(block_to_replication.values())) /
-            len(block_to_replication)))
-
-  computeGarbageCollectionCandidates()
-
-  if args.garbage_collection_file:
-    log.info('Writing garbage Collection report to %s',
-             args.garbage_collection_file)
-    outputGarbageCollectionReport(args.garbage_collection_file)
-
-  global garbage_collection_histogram
-  garbage_collection_histogram = computeGarbageCollectionHistogram()
-
-  if args.log_to_workbench:
-    logGarbageCollectionHistogram()
-
-  detectReplicationProblems()
-
-  computeUserStorageUsage()
-  printUserStorageUsage()
-  if args.log_to_workbench:
-    logUserStorageUsage()
-
-  global all_data_loaded
-  all_data_loaded = True
-
-
-class DataManagerHandler(BaseHTTPRequestHandler):
-  USER_PATH = 'user'
-  COLLECTION_PATH = 'collection'
-  BLOCK_PATH = 'block'
-
-  def userLink(self, uuid):
-    return ('<A HREF="/%(path)s/%(uuid)s">%(uuid)s</A>' %
-            {'uuid': uuid,
-             'path': DataManagerHandler.USER_PATH})
-
-  def collectionLink(self, uuid):
-    return ('<A HREF="/%(path)s/%(uuid)s">%(uuid)s</A>' %
-            {'uuid': uuid,
-             'path': DataManagerHandler.COLLECTION_PATH})
-
-  def blockLink(self, uuid):
-    return ('<A HREF="/%(path)s/%(uuid)s">%(uuid)s</A>' %
-            {'uuid': uuid,
-             'path': DataManagerHandler.BLOCK_PATH})
-
-  def writeTop(self, title):
-    self.wfile.write('<HTML><HEAD><TITLE>%s</TITLE></HEAD>\n<BODY>' % title)
-
-  def writeBottom(self):
-    self.wfile.write('</BODY></HTML>\n')
-
-  def writeHomePage(self):
-    self.send_response(200)
-    self.end_headers()
-    self.writeTop('Home')
-    self.wfile.write('<TABLE>')
-    self.wfile.write('<TR><TH>user'
-                     '<TH>unweighted readable block size'
-                     '<TH>weighted readable block size'
-                     '<TH>unweighted persisted block size'
-                     '<TH>weighted persisted block size</TR>\n')
-    for user, usage in user_to_usage.items():
-      self.wfile.write('<TR><TD>%s<TD>%s<TD>%s<TD>%s<TD>%s</TR>\n' %
-                       (self.userLink(user),
-                        fileSizeFormat(usage[UNWEIGHTED_READ_SIZE_COL]),
-                        fileSizeFormat(usage[WEIGHTED_READ_SIZE_COL]),
-                        fileSizeFormat(usage[UNWEIGHTED_PERSIST_SIZE_COL]),
-                        fileSizeFormat(usage[WEIGHTED_PERSIST_SIZE_COL])))
-    self.wfile.write('</TABLE>\n')
-    self.writeBottom()
-
-  def userExists(self, uuid):
-    # Currently this will return false for a user who exists but
-    # doesn't appear on any manifests.
-    # TODO(misha): Figure out if we need to fix this.
-    return user_to_usage.has_key(uuid)
-
-  def writeUserPage(self, uuid):
-    if not self.userExists(uuid):
-      self.send_error(404,
-                      'User (%s) Not Found.' % cgi.escape(uuid, quote=False))
-    else:
-      # Here we assume that since a user exists, they don't need to be
-      # html escaped.
-      self.send_response(200)
-      self.end_headers()
-      self.writeTop('User %s' % uuid)
-      self.wfile.write('<TABLE>')
-      self.wfile.write('<TR><TH>user'
-                       '<TH>unweighted readable block size'
-                       '<TH>weighted readable block size'
-                       '<TH>unweighted persisted block size'
-                       '<TH>weighted persisted block size</TR>\n')
-      usage = user_to_usage[uuid]
-      self.wfile.write('<TR><TD>%s<TD>%s<TD>%s<TD>%s<TD>%s</TR>\n' %
-                       (self.userLink(uuid),
-                        fileSizeFormat(usage[UNWEIGHTED_READ_SIZE_COL]),
-                        fileSizeFormat(usage[WEIGHTED_READ_SIZE_COL]),
-                        fileSizeFormat(usage[UNWEIGHTED_PERSIST_SIZE_COL]),
-                        fileSizeFormat(usage[WEIGHTED_PERSIST_SIZE_COL])))
-      self.wfile.write('</TABLE>\n')
-      self.wfile.write('<P>Persisting Collections: %s\n' %
-                       ', '.join(map(self.collectionLink,
-                                     persister_to_collections[uuid])))
-      self.wfile.write('<P>Reading Collections: %s\n' %
-                       ', '.join(map(self.collectionLink,
-                                     reader_to_collections[uuid])))
-      self.writeBottom()
-
-  def collectionExists(self, uuid):
-    return CollectionInfo.all_by_uuid.has_key(uuid)
-
-  def writeCollectionPage(self, uuid):
-    if not self.collectionExists(uuid):
-      self.send_error(404,
-                      'Collection (%s) Not Found.' % cgi.escape(uuid, quote=False))
-    else:
-      collection = CollectionInfo.get(uuid)
-      # Here we assume that since a collection exists, its id doesn't
-      # need to be html escaped.
-      self.send_response(200)
-      self.end_headers()
-      self.writeTop('Collection %s' % uuid)
-      self.wfile.write('<H1>Collection %s</H1>\n' % uuid)
-      self.wfile.write('<P>Total size %s (not factoring in replication).\n' %
-                       fileSizeFormat(collection.byteSize()))
-      self.wfile.write('<P>Readers: %s\n' %
-                       ', '.join(map(self.userLink, collection.reader_uuids)))
-
-      if len(collection.persister_replication) == 0:
-        self.wfile.write('<P>No persisters\n')
-      else:
-        replication_to_users = defaultdict(set)
-        for user,replication in collection.persister_replication.items():
-          replication_to_users[replication].add(user)
-        replication_levels = sorted(replication_to_users.keys())
-
-        self.wfile.write('<P>%d persisters in %d replication level(s) maxing '
-                         'out at %dx replication:\n' %
-                         (len(collection.persister_replication),
-                          len(replication_levels),
-                          replication_levels[-1]))
-
-        # TODO(misha): This code is used twice, let's move it to a method.
-        self.wfile.write('<TABLE><TR><TH>%s</TR>\n' %
-                         '<TH>'.join(['Replication Level ' + str(x)
-                                      for x in replication_levels]))
-        self.wfile.write('<TR>\n')
-        for replication_level in replication_levels:
-          users = replication_to_users[replication_level]
-          self.wfile.write('<TD valign="top">%s\n' % '<BR>\n'.join(
-              map(self.userLink, users)))
-        self.wfile.write('</TR></TABLE>\n')
-
-      replication_to_blocks = defaultdict(set)
-      for block in collection.block_uuids:
-        replication_to_blocks[block_to_replication[block]].add(block)
-      replication_levels = sorted(replication_to_blocks.keys())
-      self.wfile.write('<P>%d blocks in %d replication level(s):\n' %
-                       (len(collection.block_uuids), len(replication_levels)))
-      self.wfile.write('<TABLE><TR><TH>%s</TR>\n' %
-                       '<TH>'.join(['Replication Level ' + str(x)
-                                    for x in replication_levels]))
-      self.wfile.write('<TR>\n')
-      for replication_level in replication_levels:
-        blocks = replication_to_blocks[replication_level]
-        self.wfile.write('<TD valign="top">%s\n' % '<BR>\n'.join(blocks))
-      self.wfile.write('</TR></TABLE>\n')
-
-
-  def do_GET(self):
-    if not all_data_loaded:
-      self.send_error(503,
-                      'Sorry, but I am still loading all the data I need.')
-    else:
-      # Removing leading '/' and process request path
-      split_path = self.path[1:].split('/')
-      request_type = split_path[0]
-      log.debug('path (%s) split as %s with request_type %s' % (self.path,
-                                                                split_path,
-                                                                request_type))
-      if request_type == '':
-        self.writeHomePage()
-      elif request_type == DataManagerHandler.USER_PATH:
-        self.writeUserPage(split_path[1])
-      elif request_type == DataManagerHandler.COLLECTION_PATH:
-        self.writeCollectionPage(split_path[1])
-      else:
-        self.send_error(404, 'Unrecognized request path.')
-    return
-
-class ThreadedHTTPServer(ThreadingMixIn, HTTPServer):
-  """Handle requests in a separate thread."""
-
-
-if __name__ == '__main__':
-  args = parser.parse_args()
-
-  if args.port == 0:
-    loadAllData()
-  else:
-    loader = threading.Thread(target = loadAllData, name = 'loader')
-    loader.start()
-
-    server = ThreadedHTTPServer(('localhost', args.port), DataManagerHandler)
-    server.serve_forever()
diff --git a/services/datamanager/experimental/datamanager_test.py b/services/datamanager/experimental/datamanager_test.py

deleted file mode 100755 (executable)

index 0842c16..0000000
--- a/services/datamanager/experimental/datamanager_test.py
+++ /dev/null
@@ -1,41 +0,0 @@
-#! /usr/bin/env python
-
-import datamanager
-import unittest
-
-class TestComputeWeightedReplicationCosts(unittest.TestCase):
-  def test_obvious(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([1,]),
-                     {1:1.0})
-
-  def test_simple(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([2,]),
-                     {2:2.0})
-
-  def test_even_split(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([1,1]),
-                     {1:0.5})
-
-  def test_even_split_bigger(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([2,2]),
-                     {2:1.0})
-
-  def test_uneven_split(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([1,2]),
-                     {1:0.5, 2:1.5})
-
-  def test_uneven_split_bigger(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([1,3]),
-                     {1:0.5, 3:2.5})
-
-  def test_uneven_split_jumble(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([1,3,6,6,10]),
-                     {1:0.2, 3:0.7, 6:1.7, 10:5.7})
-
-  def test_documentation_example(self):
-    self.assertEqual(datamanager.computeWeightedReplicationCosts([1,1,3,6,6]),
-                     {1:0.2, 3: 0.2 + 2.0 / 3, 6: 0.2 + 2.0 / 3 + 1.5})
-
-
-if __name__ == '__main__':
-  unittest.main()
diff --git a/services/datamanager/keep/keep.go b/services/datamanager/keep/keep.go

deleted file mode 100644 (file)

index 39d2d5b..0000000
--- a/services/datamanager/keep/keep.go
+++ /dev/null
@@ -1,551 +0,0 @@
-/* Deals with getting Keep Server blocks from API Server and Keep Servers. */
-
-package keep
-
-import (
-       "bufio"
-       "encoding/json"
-       "errors"
-       "flag"
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       "git.curoverse.com/arvados.git/sdk/go/logger"
-       "io"
-       "io/ioutil"
-       "log"
-       "net/http"
-       "strconv"
-       "strings"
-       "time"
-)
-
-// ServerAddress struct
-type ServerAddress struct {
-       SSL         bool   `json:"service_ssl_flag"`
-       Host        string `json:"service_host"`
-       Port        int    `json:"service_port"`
-       UUID        string `json:"uuid"`
-       ServiceType string `json:"service_type"`
-}
-
-// BlockInfo is info about a particular block returned by the server
-type BlockInfo struct {
-       Digest blockdigest.DigestWithSize
-       Mtime  int64 // TODO(misha): Replace this with a timestamp.
-}
-
-// BlockServerInfo is info about a specified block given by a server
-type BlockServerInfo struct {
-       ServerIndex int
-       Mtime       int64 // TODO(misha): Replace this with a timestamp.
-}
-
-// ServerContents struct
-type ServerContents struct {
-       BlockDigestToInfo map[blockdigest.DigestWithSize]BlockInfo
-}
-
-// ServerResponse struct
-type ServerResponse struct {
-       Address  ServerAddress
-       Contents ServerContents
-       Err      error
-}
-
-// ReadServers struct
-type ReadServers struct {
-       ReadAllServers           bool
-       KeepServerIndexToAddress []ServerAddress
-       KeepServerAddressToIndex map[ServerAddress]int
-       ServerToContents         map[ServerAddress]ServerContents
-       BlockToServers           map[blockdigest.DigestWithSize][]BlockServerInfo
-       BlockReplicationCounts   map[int]int
-}
-
-// GetKeepServersParams struct
-type GetKeepServersParams struct {
-       Client *arvadosclient.ArvadosClient
-       Logger *logger.Logger
-       Limit  int
-}
-
-// ServiceList consists of the addresses of all the available kee servers
-type ServiceList struct {
-       ItemsAvailable int             `json:"items_available"`
-       KeepServers    []ServerAddress `json:"items"`
-}
-
-var serviceType string
-
-func init() {
-       flag.StringVar(&serviceType,
-               "service-type",
-               "disk",
-               "Operate only on keep_services with the specified service_type, ignoring all others.")
-}
-
-// String
-// TODO(misha): Change this to include the UUID as well.
-func (s ServerAddress) String() string {
-       return s.URL()
-}
-
-// URL of the keep server
-func (s ServerAddress) URL() string {
-       if s.SSL {
-               return fmt.Sprintf("https://%s:%d", s.Host, s.Port)
-       }
-       return fmt.Sprintf("http://%s:%d", s.Host, s.Port)
-}
-
-// GetKeepServersAndSummarize gets keep servers from api
-func GetKeepServersAndSummarize(params GetKeepServersParams) (results ReadServers, err error) {
-       results, err = GetKeepServers(params)
-       if err != nil {
-               return
-       }
-       log.Printf("Returned %d keep disks", len(results.ServerToContents))
-
-       results.Summarize(params.Logger)
-       log.Printf("Replication level distribution: %v",
-               results.BlockReplicationCounts)
-
-       return
-}
-
-// GetKeepServers from api server
-func GetKeepServers(params GetKeepServersParams) (results ReadServers, err error) {
-       sdkParams := arvadosclient.Dict{
-               "filters": [][]string{{"service_type", "!=", "proxy"}},
-       }
-       if params.Limit > 0 {
-               sdkParams["limit"] = params.Limit
-       }
-
-       var sdkResponse ServiceList
-       err = params.Client.List("keep_services", sdkParams, &sdkResponse)
-
-       if err != nil {
-               return
-       }
-
-       var keepServers []ServerAddress
-       for _, server := range sdkResponse.KeepServers {
-               if server.ServiceType == serviceType {
-                       keepServers = append(keepServers, server)
-               } else {
-                       log.Printf("Skipping keep_service %q because its service_type %q does not match -service-type=%q", server, server.ServiceType, serviceType)
-               }
-       }
-
-       if len(keepServers) == 0 {
-               return results, fmt.Errorf("Found no keepservices with the service type %v", serviceType)
-       }
-
-       if params.Logger != nil {
-               params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       keepInfo["num_keep_servers_available"] = sdkResponse.ItemsAvailable
-                       keepInfo["num_keep_servers_received"] = len(sdkResponse.KeepServers)
-                       keepInfo["keep_servers"] = sdkResponse.KeepServers
-                       keepInfo["indexable_keep_servers"] = keepServers
-               })
-       }
-
-       log.Printf("Received keep services list: %+v", sdkResponse)
-
-       if len(sdkResponse.KeepServers) < sdkResponse.ItemsAvailable {
-               return results, fmt.Errorf("Did not receive all available keep servers: %+v", sdkResponse)
-       }
-
-       results.KeepServerIndexToAddress = keepServers
-       results.KeepServerAddressToIndex = make(map[ServerAddress]int)
-       for i, address := range results.KeepServerIndexToAddress {
-               results.KeepServerAddressToIndex[address] = i
-       }
-
-       log.Printf("Got Server Addresses: %v", results)
-
-       // Send off all the index requests concurrently
-       responseChan := make(chan ServerResponse)
-       for _, keepServer := range results.KeepServerIndexToAddress {
-               // The above keepsServer variable is reused for each iteration, so
-               // it would be shared across all goroutines. This would result in
-               // us querying one server n times instead of n different servers
-               // as we intended. To avoid this we add it as an explicit
-               // parameter which gets copied. This bug and solution is described
-               // in https://golang.org/doc/effective_go.html#channels
-               go func(keepServer ServerAddress) {
-                       responseChan <- GetServerContents(params.Logger,
-                               keepServer,
-                               params.Client)
-               }(keepServer)
-       }
-
-       results.ServerToContents = make(map[ServerAddress]ServerContents)
-       results.BlockToServers = make(map[blockdigest.DigestWithSize][]BlockServerInfo)
-
-       // Read all the responses
-       for i := range results.KeepServerIndexToAddress {
-               _ = i // Here to prevent go from complaining.
-               response := <-responseChan
-
-               // Check if there were any errors during GetServerContents
-               if response.Err != nil {
-                       return results, response.Err
-               }
-
-               log.Printf("Received channel response from %v containing %d files",
-                       response.Address,
-                       len(response.Contents.BlockDigestToInfo))
-               results.ServerToContents[response.Address] = response.Contents
-               serverIndex := results.KeepServerAddressToIndex[response.Address]
-               for _, blockInfo := range response.Contents.BlockDigestToInfo {
-                       results.BlockToServers[blockInfo.Digest] = append(
-                               results.BlockToServers[blockInfo.Digest],
-                               BlockServerInfo{ServerIndex: serverIndex,
-                                       Mtime: blockInfo.Mtime})
-               }
-       }
-       return
-}
-
-// GetServerContents of the keep server
-func GetServerContents(arvLogger *logger.Logger,
-       keepServer ServerAddress,
-       arv *arvadosclient.ArvadosClient) (response ServerResponse) {
-
-       err := GetServerStatus(arvLogger, keepServer, arv)
-       if err != nil {
-               response.Err = err
-               return
-       }
-
-       req, err := CreateIndexRequest(arvLogger, keepServer, arv)
-       if err != nil {
-               response.Err = err
-               return
-       }
-
-       resp, err := arv.Client.Do(req)
-       if err != nil {
-               response.Err = err
-               return
-       }
-
-       response, err = ReadServerResponse(arvLogger, keepServer, resp)
-       if err != nil {
-               response.Err = err
-               return
-       }
-
-       return
-}
-
-// GetServerStatus get keep server status by invoking /status.json
-func GetServerStatus(arvLogger *logger.Logger,
-       keepServer ServerAddress,
-       arv *arvadosclient.ArvadosClient) error {
-       url := fmt.Sprintf("http://%s:%d/status.json",
-               keepServer.Host,
-               keepServer.Port)
-
-       if arvLogger != nil {
-               now := time.Now()
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       serverInfo := make(map[string]interface{})
-                       serverInfo["status_request_sent_at"] = now
-                       serverInfo["host"] = keepServer.Host
-                       serverInfo["port"] = keepServer.Port
-
-                       keepInfo[keepServer.UUID] = serverInfo
-               })
-       }
-
-       resp, err := arv.Client.Get(url)
-       if err != nil {
-               return fmt.Errorf("Error getting keep status from %s: %v", url, err)
-       } else if resp.StatusCode != 200 {
-               return fmt.Errorf("Received error code %d in response to request "+
-                       "for %s status: %s",
-                       resp.StatusCode, url, resp.Status)
-       }
-
-       var keepStatus map[string]interface{}
-       decoder := json.NewDecoder(resp.Body)
-       decoder.UseNumber()
-       err = decoder.Decode(&keepStatus)
-       if err != nil {
-               return fmt.Errorf("Error decoding keep status from %s: %v", url, err)
-       }
-
-       if arvLogger != nil {
-               now := time.Now()
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
-                       serverInfo["status_response_processed_at"] = now
-                       serverInfo["status"] = keepStatus
-               })
-       }
-
-       return nil
-}
-
-// CreateIndexRequest to the keep server
-func CreateIndexRequest(arvLogger *logger.Logger,
-       keepServer ServerAddress,
-       arv *arvadosclient.ArvadosClient) (req *http.Request, err error) {
-       url := fmt.Sprintf("http://%s:%d/index", keepServer.Host, keepServer.Port)
-       log.Println("About to fetch keep server contents from " + url)
-
-       if arvLogger != nil {
-               now := time.Now()
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
-                       serverInfo["index_request_sent_at"] = now
-               })
-       }
-
-       req, err = http.NewRequest("GET", url, nil)
-       if err != nil {
-               return req, fmt.Errorf("Error building http request for %s: %v", url, err)
-       }
-
-       req.Header.Add("Authorization", "OAuth2 "+arv.ApiToken)
-       return req, err
-}
-
-// ReadServerResponse reads reasponse from keep server
-func ReadServerResponse(arvLogger *logger.Logger,
-       keepServer ServerAddress,
-       resp *http.Response) (response ServerResponse, err error) {
-
-       if resp.StatusCode != 200 {
-               return response, fmt.Errorf("Received error code %d in response to index request for %s: %s",
-                       resp.StatusCode, keepServer.String(), resp.Status)
-       }
-
-       if arvLogger != nil {
-               now := time.Now()
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
-                       serverInfo["index_response_received_at"] = now
-               })
-       }
-
-       response.Address = keepServer
-       response.Contents.BlockDigestToInfo =
-               make(map[blockdigest.DigestWithSize]BlockInfo)
-       reader := bufio.NewReader(resp.Body)
-       numLines, numDuplicates, numSizeDisagreements := 0, 0, 0
-       for {
-               numLines++
-               line, err := reader.ReadString('\n')
-               if err == io.EOF {
-                       return response, fmt.Errorf("Index from %s truncated at line %d",
-                               keepServer.String(), numLines)
-               } else if err != nil {
-                       return response, fmt.Errorf("Error reading index response from %s at line %d: %v",
-                               keepServer.String(), numLines, err)
-               }
-               if line == "\n" {
-                       if _, err := reader.Peek(1); err == nil {
-                               extra, _ := reader.ReadString('\n')
-                               return response, fmt.Errorf("Index from %s had trailing data at line %d after EOF marker: %s",
-                                       keepServer.String(), numLines+1, extra)
-                       } else if err != io.EOF {
-                               return response, fmt.Errorf("Index from %s had read error after EOF marker at line %d: %v",
-                                       keepServer.String(), numLines, err)
-                       }
-                       numLines--
-                       break
-               }
-               blockInfo, err := parseBlockInfoFromIndexLine(line)
-               if err != nil {
-                       return response, fmt.Errorf("Error parsing BlockInfo from index line "+
-                               "received from %s: %v",
-                               keepServer.String(),
-                               err)
-               }
-
-               if storedBlock, ok := response.Contents.BlockDigestToInfo[blockInfo.Digest]; ok {
-                       // This server returned multiple lines containing the same block digest.
-                       numDuplicates++
-                       // Keep the block that's newer.
-                       if storedBlock.Mtime < blockInfo.Mtime {
-                               response.Contents.BlockDigestToInfo[blockInfo.Digest] = blockInfo
-                       }
-               } else {
-                       response.Contents.BlockDigestToInfo[blockInfo.Digest] = blockInfo
-               }
-       }
-
-       log.Printf("%s index contained %d lines with %d duplicates with "+
-               "%d size disagreements",
-               keepServer.String(),
-               numLines,
-               numDuplicates,
-               numSizeDisagreements)
-
-       if arvLogger != nil {
-               now := time.Now()
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       serverInfo := keepInfo[keepServer.UUID].(map[string]interface{})
-
-                       serverInfo["processing_finished_at"] = now
-                       serverInfo["lines_received"] = numLines
-                       serverInfo["duplicates_seen"] = numDuplicates
-                       serverInfo["size_disagreements_seen"] = numSizeDisagreements
-               })
-       }
-       resp.Body.Close()
-       return
-}
-
-func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err error) {
-       tokens := strings.Fields(indexLine)
-       if len(tokens) != 2 {
-               err = fmt.Errorf("Expected 2 tokens per line but received a "+
-                       "line containing %#q instead.",
-                       tokens)
-       }
-
-       var locator blockdigest.BlockLocator
-       if locator, err = blockdigest.ParseBlockLocator(tokens[0]); err != nil {
-               err = fmt.Errorf("%v Received error while parsing line \"%#q\"",
-                       err, indexLine)
-               return
-       }
-       if len(locator.Hints) > 0 {
-               err = fmt.Errorf("Block locator in index line should not contain hints "+
-                       "but it does: %#q",
-                       locator)
-               return
-       }
-
-       var ns int64
-       ns, err = strconv.ParseInt(tokens[1], 10, 64)
-       if err != nil {
-               return
-       }
-       if ns < 1e12 {
-               // An old version of keepstore is giving us timestamps
-               // in seconds instead of nanoseconds. (This threshold
-               // correctly handles all times between 1970-01-02 and
-               // 33658-09-27.)
-               ns = ns * 1e9
-       }
-       blockInfo.Mtime = ns
-       blockInfo.Digest = blockdigest.DigestWithSize{
-               Digest: locator.Digest,
-               Size:   uint32(locator.Size),
-       }
-       return
-}
-
-// Summarize results from keep server
-func (readServers *ReadServers) Summarize(arvLogger *logger.Logger) {
-       readServers.BlockReplicationCounts = make(map[int]int)
-       for _, infos := range readServers.BlockToServers {
-               replication := len(infos)
-               readServers.BlockReplicationCounts[replication]++
-       }
-
-       if arvLogger != nil {
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
-                       keepInfo["distinct_blocks_stored"] = len(readServers.BlockToServers)
-               })
-       }
-}
-
-// TrashRequest struct
-type TrashRequest struct {
-       Locator    string `json:"locator"`
-       BlockMtime int64  `json:"block_mtime"`
-}
-
-// TrashList is an array of TrashRequest objects
-type TrashList []TrashRequest
-
-// SendTrashLists to trash queue
-func SendTrashLists(arvLogger *logger.Logger, kc *keepclient.KeepClient, spl map[string]TrashList, dryRun bool) (errs []error) {
-       count := 0
-       barrier := make(chan error)
-
-       client := kc.Client
-
-       for url, v := range spl {
-               if arvLogger != nil {
-                       // We need a local variable because Update doesn't call our mutator func until later,
-                       // when our list variable might have been reused by the next loop iteration.
-                       url := url
-                       trashLen := len(v)
-                       arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                               trashListInfo := logger.GetOrCreateMap(p, "trash_list_len")
-                               trashListInfo[url] = trashLen
-                       })
-               }
-
-               if dryRun {
-                       log.Printf("dry run, not sending trash list to service %s with %d blocks", url, len(v))
-                       continue
-               }
-
-               count++
-               log.Printf("Sending trash list to %v", url)
-
-               go (func(url string, v TrashList) {
-                       pipeReader, pipeWriter := io.Pipe()
-                       go (func() {
-                               enc := json.NewEncoder(pipeWriter)
-                               enc.Encode(v)
-                               pipeWriter.Close()
-                       })()
-
-                       req, err := http.NewRequest("PUT", fmt.Sprintf("%s/trash", url), pipeReader)
-                       if err != nil {
-                               log.Printf("Error creating trash list request for %v error: %v", url, err.Error())
-                               barrier <- err
-                               return
-                       }
-
-                       req.Header.Add("Authorization", "OAuth2 "+kc.Arvados.ApiToken)
-
-                       // Make the request
-                       var resp *http.Response
-                       if resp, err = client.Do(req); err != nil {
-                               log.Printf("Error sending trash list to %v error: %v", url, err.Error())
-                               barrier <- err
-                               return
-                       }
-
-                       log.Printf("Sent trash list to %v: response was HTTP %v", url, resp.Status)
-
-                       io.Copy(ioutil.Discard, resp.Body)
-                       resp.Body.Close()
-
-                       if resp.StatusCode != 200 {
-                               barrier <- errors.New(fmt.Sprintf("Got HTTP code %v", resp.StatusCode))
-                       } else {
-                               barrier <- nil
-                       }
-               })(url, v)
-       }
-
-       for i := 0; i < count; i++ {
-               b := <-barrier
-               if b != nil {
-                       errs = append(errs, b)
-               }
-       }
-
-       return errs
-}
diff --git a/services/datamanager/keep/keep_test.go b/services/datamanager/keep/keep_test.go

deleted file mode 100644 (file)

index ca8797e..0000000
--- a/services/datamanager/keep/keep_test.go
+++ /dev/null
@@ -1,278 +0,0 @@
-package keep
-
-import (
-       "encoding/json"
-       "fmt"
-       "net"
-       "net/http"
-       "net/http/httptest"
-       "net/url"
-       "strconv"
-       "strings"
-       "testing"
-
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-
-       . "gopkg.in/check.v1"
-)
-
-// Gocheck boilerplate
-func Test(t *testing.T) {
-       TestingT(t)
-}
-
-type KeepSuite struct{}
-
-var _ = Suite(&KeepSuite{})
-
-type TestHandler struct {
-       request TrashList
-}
-
-func (ts *TestHandler) ServeHTTP(writer http.ResponseWriter, req *http.Request) {
-       r := json.NewDecoder(req.Body)
-       r.Decode(&ts.request)
-}
-
-func (s *KeepSuite) TestSendTrashLists(c *C) {
-       th := TestHandler{}
-       server := httptest.NewServer(&th)
-       defer server.Close()
-
-       tl := map[string]TrashList{
-               server.URL: {TrashRequest{"000000000000000000000000deadbeef", 99}}}
-
-       arv := &arvadosclient.ArvadosClient{ApiToken: "abc123"}
-       kc := keepclient.KeepClient{Arvados: arv, Client: &http.Client{}}
-       kc.SetServiceRoots(map[string]string{"xxxx": server.URL},
-               map[string]string{"xxxx": server.URL},
-               map[string]string{})
-
-       err := SendTrashLists(nil, &kc, tl, false)
-
-       c.Check(err, IsNil)
-
-       c.Check(th.request,
-               DeepEquals,
-               tl[server.URL])
-
-}
-
-type TestHandlerError struct {
-}
-
-func (tse *TestHandlerError) ServeHTTP(writer http.ResponseWriter, req *http.Request) {
-       http.Error(writer, "I'm a teapot", 418)
-}
-
-func sendTrashListError(c *C, server *httptest.Server) {
-       tl := map[string]TrashList{
-               server.URL: {TrashRequest{"000000000000000000000000deadbeef", 99}}}
-
-       arv := &arvadosclient.ArvadosClient{ApiToken: "abc123"}
-       kc := keepclient.KeepClient{Arvados: arv, Client: &http.Client{}}
-       kc.SetServiceRoots(map[string]string{"xxxx": server.URL},
-               map[string]string{"xxxx": server.URL},
-               map[string]string{})
-
-       err := SendTrashLists(nil, &kc, tl, false)
-
-       c.Check(err, NotNil)
-       c.Check(err[0], NotNil)
-}
-
-func (s *KeepSuite) TestSendTrashListErrorResponse(c *C) {
-       server := httptest.NewServer(&TestHandlerError{})
-       sendTrashListError(c, server)
-       defer server.Close()
-}
-
-func (s *KeepSuite) TestSendTrashListUnreachable(c *C) {
-       sendTrashListError(c, httptest.NewUnstartedServer(&TestHandler{}))
-}
-
-type APITestData struct {
-       numServers int
-       serverType string
-       statusCode int
-}
-
-func (s *KeepSuite) TestGetKeepServers_UnsupportedServiceType(c *C) {
-       testGetKeepServersFromAPI(c, APITestData{1, "notadisk", 200}, "Found no keepservices with the service type disk")
-}
-
-func (s *KeepSuite) TestGetKeepServers_ReceivedTooFewServers(c *C) {
-       testGetKeepServersFromAPI(c, APITestData{2, "disk", 200}, "Did not receive all available keep servers")
-}
-
-func (s *KeepSuite) TestGetKeepServers_ServerError(c *C) {
-       testGetKeepServersFromAPI(c, APITestData{-1, "disk", -1}, "arvados API server error")
-}
-
-func testGetKeepServersFromAPI(c *C, testData APITestData, expectedError string) {
-       keepServers := ServiceList{
-               ItemsAvailable: testData.numServers,
-               KeepServers: []ServerAddress{{
-                       SSL:         false,
-                       Host:        "example.com",
-                       Port:        12345,
-                       UUID:        "abcdefg",
-                       ServiceType: testData.serverType,
-               }},
-       }
-
-       ksJSON, _ := json.Marshal(keepServers)
-       apiStubResponses := make(map[string]arvadostest.StubResponse)
-       apiStubResponses["/arvados/v1/keep_services"] = arvadostest.StubResponse{testData.statusCode, string(ksJSON)}
-       apiStub := arvadostest.ServerStub{apiStubResponses}
-
-       api := httptest.NewServer(&apiStub)
-       defer api.Close()
-
-       arv := &arvadosclient.ArvadosClient{
-               Scheme:    "http",
-               ApiServer: api.URL[7:],
-               ApiToken:  "abc123",
-               Client:    &http.Client{Transport: &http.Transport{}},
-       }
-
-       kc := keepclient.KeepClient{Arvados: arv, Client: &http.Client{}}
-       kc.SetServiceRoots(map[string]string{"xxxx": "http://example.com:23456"},
-               map[string]string{"xxxx": "http://example.com:23456"},
-               map[string]string{})
-
-       params := GetKeepServersParams{
-               Client: arv,
-               Logger: nil,
-               Limit:  10,
-       }
-
-       _, err := GetKeepServersAndSummarize(params)
-       c.Assert(err, NotNil)
-       c.Assert(err, ErrorMatches, fmt.Sprintf(".*%s.*", expectedError))
-}
-
-type KeepServerTestData struct {
-       // handle /status.json
-       statusStatusCode int
-
-       // handle /index
-       indexStatusCode   int
-       indexResponseBody string
-
-       // expected error, if any
-       expectedError string
-}
-
-func (s *KeepSuite) TestGetKeepServers_ErrorGettingKeepServerStatus(c *C) {
-       testGetKeepServersAndSummarize(c, KeepServerTestData{500, 200, "ok",
-               ".*http://.* 500 Internal Server Error"})
-}
-
-func (s *KeepSuite) TestGetKeepServers_GettingIndex(c *C) {
-       testGetKeepServersAndSummarize(c, KeepServerTestData{200, -1, "notok",
-               ".*redirect-loop.*"})
-}
-
-func (s *KeepSuite) TestGetKeepServers_ErrorReadServerResponse(c *C) {
-       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 500, "notok",
-               ".*http://.* 500 Internal Server Error"})
-}
-
-func (s *KeepSuite) TestGetKeepServers_ReadServerResponseTuncatedAtLineOne(c *C) {
-       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 200,
-               "notterminatedwithnewline", "Index from http://.* truncated at line 1"})
-}
-
-func (s *KeepSuite) TestGetKeepServers_InvalidBlockLocatorPattern(c *C) {
-       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 200, "testing\n",
-               "Error parsing BlockInfo from index line.*"})
-}
-
-func (s *KeepSuite) TestGetKeepServers_ReadServerResponseEmpty(c *C) {
-       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 200, "\n", ""})
-}
-
-func (s *KeepSuite) TestGetKeepServers_ReadServerResponseWithTwoBlocks(c *C) {
-       testGetKeepServersAndSummarize(c, KeepServerTestData{200, 200,
-               "51752ba076e461ec9ec1d27400a08548+20 1447526361\na048cc05c02ba1ee43ad071274b9e547+52 1447526362\n\n", ""})
-}
-
-func testGetKeepServersAndSummarize(c *C, testData KeepServerTestData) {
-       ksStubResponses := make(map[string]arvadostest.StubResponse)
-       ksStubResponses["/status.json"] = arvadostest.StubResponse{testData.statusStatusCode, string(`{}`)}
-       ksStubResponses["/index"] = arvadostest.StubResponse{testData.indexStatusCode, testData.indexResponseBody}
-       ksStub := arvadostest.ServerStub{ksStubResponses}
-       ks := httptest.NewServer(&ksStub)
-       defer ks.Close()
-
-       ksURL, err := url.Parse(ks.URL)
-       c.Check(err, IsNil)
-       ksHost, port, err := net.SplitHostPort(ksURL.Host)
-       ksPort, err := strconv.Atoi(port)
-       c.Check(err, IsNil)
-
-       servers_list := ServiceList{
-               ItemsAvailable: 1,
-               KeepServers: []ServerAddress{{
-                       SSL:         false,
-                       Host:        ksHost,
-                       Port:        ksPort,
-                       UUID:        "abcdefg",
-                       ServiceType: "disk",
-               }},
-       }
-       ksJSON, _ := json.Marshal(servers_list)
-       apiStubResponses := make(map[string]arvadostest.StubResponse)
-       apiStubResponses["/arvados/v1/keep_services"] = arvadostest.StubResponse{200, string(ksJSON)}
-       apiStub := arvadostest.ServerStub{apiStubResponses}
-
-       api := httptest.NewServer(&apiStub)
-       defer api.Close()
-
-       arv := &arvadosclient.ArvadosClient{
-               Scheme:    "http",
-               ApiServer: api.URL[7:],
-               ApiToken:  "abc123",
-               Client:    &http.Client{Transport: &http.Transport{}},
-       }
-
-       kc := keepclient.KeepClient{Arvados: arv, Client: &http.Client{}}
-       kc.SetServiceRoots(map[string]string{"xxxx": ks.URL},
-               map[string]string{"xxxx": ks.URL},
-               map[string]string{})
-
-       params := GetKeepServersParams{
-               Client: arv,
-               Logger: nil,
-               Limit:  10,
-       }
-
-       // GetKeepServersAndSummarize
-       results, err := GetKeepServersAndSummarize(params)
-
-       if testData.expectedError == "" {
-               c.Assert(err, IsNil)
-               c.Assert(results, NotNil)
-
-               blockToServers := results.BlockToServers
-
-               blockLocators := strings.Split(testData.indexResponseBody, "\n")
-               for _, loc := range blockLocators {
-                       locator := strings.Split(loc, " ")[0]
-                       if locator != "" {
-                               blockLocator, err := blockdigest.ParseBlockLocator(locator)
-                               c.Assert(err, IsNil)
-
-                               blockDigestWithSize := blockdigest.DigestWithSize{blockLocator.Digest, uint32(blockLocator.Size)}
-                               blockServerInfo := blockToServers[blockDigestWithSize]
-                               c.Assert(blockServerInfo[0].Mtime, NotNil)
-                       }
-               }
-       } else {
-               c.Assert(err, ErrorMatches, testData.expectedError)
-       }
-}
diff --git a/services/datamanager/loggerutil/loggerutil.go b/services/datamanager/loggerutil/loggerutil.go

deleted file mode 100644 (file)

index 8111425..0000000
--- a/services/datamanager/loggerutil/loggerutil.go
+++ /dev/null
@@ -1,52 +0,0 @@
-/* Datamanager-specific logging methods. */
-
-package loggerutil
-
-import (
-       "git.curoverse.com/arvados.git/sdk/go/logger"
-       "log"
-       "os"
-       "runtime"
-       "time"
-)
-
-// Useful to call at the beginning of execution to log info about the
-// current run.
-func LogRunInfo(arvLogger *logger.Logger) {
-       if arvLogger != nil {
-               now := time.Now()
-               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       runInfo := logger.GetOrCreateMap(p, "run_info")
-                       runInfo["started_at"] = now
-                       runInfo["args"] = os.Args
-                       hostname, err := os.Hostname()
-                       if err != nil {
-                               runInfo["hostname_error"] = err.Error()
-                       } else {
-                               runInfo["hostname"] = hostname
-                       }
-                       runInfo["pid"] = os.Getpid()
-               })
-       }
-}
-
-// A LogMutator that records the current memory usage. This is most useful as a logger write hook.
-func LogMemoryAlloc(p map[string]interface{}, e map[string]interface{}) {
-       runInfo := logger.GetOrCreateMap(p, "run_info")
-       var memStats runtime.MemStats
-       runtime.ReadMemStats(&memStats)
-       runInfo["memory_bytes_in_use"] = memStats.Alloc
-       runInfo["memory_bytes_reserved"] = memStats.Sys
-}
-
-func FatalWithMessage(arvLogger *logger.Logger, message string) {
-       if arvLogger != nil {
-               arvLogger.FinalUpdate(func(p map[string]interface{}, e map[string]interface{}) {
-                       p["FATAL"] = message
-                       runInfo := logger.GetOrCreateMap(p, "run_info")
-                       runInfo["finished_at"] = time.Now()
-               })
-       }
-
-       log.Fatalf(message)
-}
diff --git a/services/datamanager/summary/canonical_string.go b/services/datamanager/summary/canonical_string.go

deleted file mode 100644 (file)

index 152314c..0000000
--- a/services/datamanager/summary/canonical_string.go
+++ /dev/null
@@ -1,30 +0,0 @@
-/* Ensures that we only have one copy of each unique string. This is
-/* not designed for concurrent access. */
-
-package summary
-
-// This code should probably be moved somewhere more universal.
-
-// CanonicalString struct
-type CanonicalString struct {
-       m map[string]string
-}
-
-// Get a CanonicalString
-func (cs *CanonicalString) Get(s string) (r string) {
-       if cs.m == nil {
-               cs.m = make(map[string]string)
-       }
-       value, found := cs.m[s]
-       if found {
-               return value
-       }
-
-       // s may be a substring of a much larger string.
-       // If we store s, it will prevent that larger string from getting
-       // garbage collected.
-       // If this is something you worry about you should change this code
-       // to make an explict copy of s using a byte array.
-       cs.m[s] = s
-       return s
-}
diff --git a/services/datamanager/summary/file.go b/services/datamanager/summary/file.go

deleted file mode 100644 (file)

index 6e463d7..0000000
--- a/services/datamanager/summary/file.go
+++ /dev/null
@@ -1,115 +0,0 @@
-// Handles writing data to and reading data from disk to speed up development.
-
-package summary
-
-import (
-       "encoding/gob"
-       "flag"
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/logger"
-       "git.curoverse.com/arvados.git/services/datamanager/collection"
-       "git.curoverse.com/arvados.git/services/datamanager/keep"
-       "log"
-       "os"
-)
-
-// Used to locally cache data read from servers to reduce execution
-// time when developing. Not for use in production.
-type serializedData struct {
-       ReadCollections collection.ReadCollections
-       KeepServerInfo  keep.ReadServers
-}
-
-var (
-       WriteDataTo  string
-       readDataFrom string
-)
-
-// DataFetcher to fetch data from keep servers
-type DataFetcher func(arvLogger *logger.Logger,
-       readCollections *collection.ReadCollections,
-       keepServerInfo *keep.ReadServers) error
-
-func init() {
-       flag.StringVar(&WriteDataTo,
-               "write-data-to",
-               "",
-               "Write summary of data received to this file. Used for development only.")
-       flag.StringVar(&readDataFrom,
-               "read-data-from",
-               "",
-               "Avoid network i/o and read summary data from this file instead. Used for development only.")
-}
-
-// MaybeWriteData writes data we've read to a file.
-//
-// This is useful for development, so that we don't need to read all
-// our data from the network every time we tweak something.
-//
-// This should not be used outside of development, since you'll be
-// working with stale data.
-func MaybeWriteData(arvLogger *logger.Logger,
-       readCollections collection.ReadCollections,
-       keepServerInfo keep.ReadServers) error {
-       if WriteDataTo == "" {
-               return nil
-       }
-       summaryFile, err := os.Create(WriteDataTo)
-       if err != nil {
-               return err
-       }
-       defer summaryFile.Close()
-
-       enc := gob.NewEncoder(summaryFile)
-       data := serializedData{
-               ReadCollections: readCollections,
-               KeepServerInfo:  keepServerInfo}
-       err = enc.Encode(data)
-       if err != nil {
-               return err
-       }
-       log.Printf("Wrote summary data to: %s", WriteDataTo)
-       return nil
-}
-
-// ShouldReadData should not be used outside of development
-func ShouldReadData() bool {
-       return readDataFrom != ""
-}
-
-// ReadData reads data that we've written to a file.
-//
-// This is useful for development, so that we don't need to read all
-// our data from the network every time we tweak something.
-//
-// This should not be used outside of development, since you'll be
-// working with stale data.
-func ReadData(arvLogger *logger.Logger,
-       readCollections *collection.ReadCollections,
-       keepServerInfo *keep.ReadServers) error {
-       if readDataFrom == "" {
-               return fmt.Errorf("ReadData() called with empty filename.")
-       }
-       summaryFile, err := os.Open(readDataFrom)
-       if err != nil {
-               return err
-       }
-       defer summaryFile.Close()
-
-       dec := gob.NewDecoder(summaryFile)
-       data := serializedData{}
-       err = dec.Decode(&data)
-       if err != nil {
-               return err
-       }
-
-       // re-summarize data, so that we can update our summarizing
-       // functions without needing to do all our network i/o
-       data.ReadCollections.Summarize(arvLogger)
-       data.KeepServerInfo.Summarize(arvLogger)
-
-       *readCollections = data.ReadCollections
-       *keepServerInfo = data.KeepServerInfo
-       log.Printf("Read summary data from: %s", readDataFrom)
-       return nil
-}
diff --git a/services/datamanager/summary/pull_list.go b/services/datamanager/summary/pull_list.go

deleted file mode 100644 (file)

index d7fb3eb..0000000
--- a/services/datamanager/summary/pull_list.go
+++ /dev/null
@@ -1,215 +0,0 @@
-// Code for generating pull lists as described in https://arvados.org/projects/arvados/wiki/Keep_Design_Doc#Pull-List
-
-package summary
-
-import (
-       "encoding/json"
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       "git.curoverse.com/arvados.git/sdk/go/logger"
-       "git.curoverse.com/arvados.git/services/datamanager/keep"
-       "log"
-       "os"
-       "strings"
-)
-
-// Locator is a block digest
-type Locator blockdigest.DigestWithSize
-
-// MarshalJSON encoding
-func (l Locator) MarshalJSON() ([]byte, error) {
-       return []byte("\"" + blockdigest.DigestWithSize(l).String() + "\""), nil
-}
-
-// PullRequest represents one entry in the Pull List
-type PullRequest struct {
-       Locator Locator  `json:"locator"`
-       Servers []string `json:"servers"`
-}
-
-// PullList for a particular server
-type PullList []PullRequest
-
-// PullListByLocator implements sort.Interface for PullList based on
-// the Digest.
-type PullListByLocator PullList
-
-func (a PullListByLocator) Len() int      { return len(a) }
-func (a PullListByLocator) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
-func (a PullListByLocator) Less(i, j int) bool {
-       di, dj := a[i].Locator.Digest, a[j].Locator.Digest
-       if di.H < dj.H {
-               return true
-       } else if di.H == dj.H {
-               if di.L < dj.L {
-                       return true
-               } else if di.L == dj.L {
-                       return a[i].Locator.Size < a[j].Locator.Size
-               }
-       }
-       return false
-}
-
-// PullServers struct
-// For a given under-replicated block, this structure represents which
-// servers should pull the specified block and which servers they can
-// pull it from.
-type PullServers struct {
-       To   []string // Servers that should pull the specified block
-       From []string // Servers that already contain the specified block
-}
-
-// ComputePullServers creates a map from block locator to PullServers
-// with one entry for each under-replicated block.
-//
-// This method ignores zero-replica blocks since there are no servers
-// to pull them from, so callers should feel free to omit them, but
-// this function will ignore them if they are provided.
-func ComputePullServers(kc *keepclient.KeepClient,
-       keepServerInfo *keep.ReadServers,
-       blockToDesiredReplication map[blockdigest.DigestWithSize]int,
-       underReplicated BlockSet) (m map[Locator]PullServers) {
-       m = map[Locator]PullServers{}
-       // We use CanonicalString to avoid filling memory with duplicate
-       // copies of the same string.
-       var cs CanonicalString
-
-       // Servers that are writeable
-       writableServers := map[string]struct{}{}
-       for _, url := range kc.WritableLocalRoots() {
-               writableServers[cs.Get(url)] = struct{}{}
-       }
-
-       for block := range underReplicated {
-               serversStoringBlock := keepServerInfo.BlockToServers[block]
-               numCopies := len(serversStoringBlock)
-               numCopiesMissing := blockToDesiredReplication[block] - numCopies
-               if numCopiesMissing > 0 {
-                       // We expect this to always be true, since the block was listed
-                       // in underReplicated.
-
-                       if numCopies > 0 {
-                               // Not much we can do with blocks with no copies.
-
-                               // A server's host-port string appears as a key in this map
-                               // iff it contains the block.
-                               serverHasBlock := map[string]struct{}{}
-                               for _, info := range serversStoringBlock {
-                                       sa := keepServerInfo.KeepServerIndexToAddress[info.ServerIndex]
-                                       serverHasBlock[cs.Get(sa.URL())] = struct{}{}
-                               }
-
-                               roots := keepclient.NewRootSorter(kc.LocalRoots(),
-                                       block.String()).GetSortedRoots()
-
-                               l := Locator(block)
-                               m[l] = CreatePullServers(cs, serverHasBlock, writableServers,
-                                       roots, numCopiesMissing)
-                       }
-               }
-       }
-       return m
-}
-
-// CreatePullServers creates a pull list in which the To and From
-// fields preserve the ordering of sorted servers and the contents
-// are all canonical strings.
-func CreatePullServers(cs CanonicalString,
-       serverHasBlock map[string]struct{},
-       writableServers map[string]struct{},
-       sortedServers []string,
-       maxToFields int) (ps PullServers) {
-
-       ps = PullServers{
-               To:   make([]string, 0, maxToFields),
-               From: make([]string, 0, len(serverHasBlock)),
-       }
-
-       for _, host := range sortedServers {
-               // Strip the protocol portion of the url.
-               // Use the canonical copy of the string to avoid memory waste.
-               server := cs.Get(host)
-               _, hasBlock := serverHasBlock[server]
-               if hasBlock {
-                       // The from field should include the protocol.
-                       ps.From = append(ps.From, cs.Get(host))
-               } else if len(ps.To) < maxToFields {
-                       _, writable := writableServers[host]
-                       if writable {
-                               ps.To = append(ps.To, server)
-                       }
-               }
-       }
-
-       return
-}
-
-// RemoveProtocolPrefix strips the protocol prefix from a url.
-func RemoveProtocolPrefix(url string) string {
-       return url[(strings.LastIndex(url, "/") + 1):]
-}
-
-// BuildPullLists produces a PullList for each keep server.
-func BuildPullLists(lps map[Locator]PullServers) (spl map[string]PullList) {
-       spl = map[string]PullList{}
-       // We don't worry about canonicalizing our strings here, because we
-       // assume lps was created by ComputePullServers() which already
-       // canonicalized the strings for us.
-       for locator, pullServers := range lps {
-               for _, destination := range pullServers.To {
-                       pullList, pullListExists := spl[destination]
-                       if !pullListExists {
-                               pullList = PullList{}
-                       }
-                       spl[destination] = append(pullList,
-                               PullRequest{Locator: locator, Servers: pullServers.From})
-               }
-       }
-       return
-}
-
-// WritePullLists writes each pull list to a file.
-// The filename is based on the hostname.
-//
-// This is just a hack for prototyping, it is not expected to be used
-// in production.
-func WritePullLists(arvLogger *logger.Logger,
-       pullLists map[string]PullList,
-       dryRun bool) error {
-       r := strings.NewReplacer(":", ".")
-
-       for host, list := range pullLists {
-               if arvLogger != nil {
-                       // We need a local variable because Update doesn't call our mutator func until later,
-                       // when our list variable might have been reused by the next loop iteration.
-                       host := host
-                       listLen := len(list)
-                       arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                               pullListInfo := logger.GetOrCreateMap(p, "pull_list_len")
-                               pullListInfo[host] = listLen
-                       })
-               }
-
-               if dryRun {
-                       log.Print("dry run, not sending pull list to service %s with %d blocks", host, len(list))
-                       continue
-               }
-
-               filename := fmt.Sprintf("pull_list.%s", r.Replace(RemoveProtocolPrefix(host)))
-               pullListFile, err := os.Create(filename)
-               if err != nil {
-                       return err
-               }
-               defer pullListFile.Close()
-
-               enc := json.NewEncoder(pullListFile)
-               err = enc.Encode(list)
-               if err != nil {
-                       return err
-               }
-               log.Printf("Wrote pull list to %s.", filename)
-       }
-
-       return nil
-}
diff --git a/services/datamanager/summary/pull_list_test.go b/services/datamanager/summary/pull_list_test.go

deleted file mode 100644 (file)

index 60b495c..0000000
--- a/services/datamanager/summary/pull_list_test.go
+++ /dev/null
@@ -1,272 +0,0 @@
-package summary
-
-import (
-       "encoding/json"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       . "gopkg.in/check.v1"
-       "sort"
-       "testing"
-)
-
-// Gocheck boilerplate
-func TestPullLists(t *testing.T) {
-       TestingT(t)
-}
-
-type PullSuite struct{}
-
-var _ = Suite(&PullSuite{})
-
-// Helper method to declare string sets more succinctly
-// Could be placed somewhere more general.
-func stringSet(slice ...string) (m map[string]struct{}) {
-       m = map[string]struct{}{}
-       for _, s := range slice {
-               m[s] = struct{}{}
-       }
-       return
-}
-
-func (s *PullSuite) TestPullListPrintsJSONCorrectly(c *C) {
-       pl := PullList{PullRequest{
-               Locator: Locator(blockdigest.MakeTestDigestSpecifySize(0xBadBeef, 56789)),
-               Servers: []string{"keep0.qr1hi.arvadosapi.com:25107",
-                       "keep1.qr1hi.arvadosapi.com:25108"}}}
-
-       b, err := json.Marshal(pl)
-       c.Assert(err, IsNil)
-       expectedOutput := `[{"locator":"0000000000000000000000000badbeef+56789",` +
-               `"servers":["keep0.qr1hi.arvadosapi.com:25107",` +
-               `"keep1.qr1hi.arvadosapi.com:25108"]}]`
-       c.Check(string(b), Equals, expectedOutput)
-}
-
-func (s *PullSuite) TestCreatePullServers(c *C) {
-       var cs CanonicalString
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet(),
-                       stringSet(),
-                       []string{},
-                       5),
-               DeepEquals,
-               PullServers{To: []string{}, From: []string{}})
-
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet("https://keep0:25107", "https://keep1:25108"),
-                       stringSet(),
-                       []string{},
-                       5),
-               DeepEquals,
-               PullServers{To: []string{}, From: []string{}})
-
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet("https://keep0:25107", "https://keep1:25108"),
-                       stringSet("https://keep0:25107"),
-                       []string{"https://keep0:25107"},
-                       5),
-               DeepEquals,
-               PullServers{To: []string{}, From: []string{"https://keep0:25107"}})
-
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet("https://keep0:25107", "https://keep1:25108"),
-                       stringSet("https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"),
-                       []string{"https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"},
-                       5),
-               DeepEquals,
-               PullServers{To: []string{"https://keep3:25110", "https://keep2:25109"},
-                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
-
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet("https://keep0:25107", "https://keep1:25108"),
-                       stringSet("https://keep3:25110", "https://keep1:25108", "https://keep0:25107"),
-                       []string{"https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"},
-                       5),
-               DeepEquals,
-               PullServers{To: []string{"https://keep3:25110"},
-                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
-
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet("https://keep0:25107", "https://keep1:25108"),
-                       stringSet("https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"),
-                       []string{"https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"},
-                       1),
-               DeepEquals,
-               PullServers{To: []string{"https://keep3:25110"},
-                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
-
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet("https://keep0:25107", "https://keep1:25108"),
-                       stringSet("https://keep3:25110", "https://keep2:25109",
-                               "https://keep1:25108", "https://keep0:25107"),
-                       []string{"https://keep3:25110", "https://keep2:25109",
-                               "https://keep1:25108", "https://keep0:25107"},
-                       1),
-               DeepEquals,
-               PullServers{To: []string{"https://keep3:25110"},
-                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
-
-       c.Check(
-               CreatePullServers(cs,
-                       stringSet("https://keep0:25107", "https://keep1:25108"),
-                       stringSet("https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"),
-                       []string{"https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"},
-                       0),
-               DeepEquals,
-               PullServers{To: []string{},
-                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
-}
-
-// Checks whether two pull list maps are equal. Since pull lists are
-// ordered arbitrarily, we need to sort them by digest before
-// comparing them for deep equality.
-type pullListMapEqualsChecker struct {
-       *CheckerInfo
-}
-
-func (c *pullListMapEqualsChecker) Check(params []interface{}, names []string) (result bool, error string) {
-       obtained, ok := params[0].(map[string]PullList)
-       if !ok {
-               return false, "First parameter is not a PullList map"
-       }
-       expected, ok := params[1].(map[string]PullList)
-       if !ok {
-               return false, "Second parameter is not a PullList map"
-       }
-
-       for _, v := range obtained {
-               sort.Sort(PullListByLocator(v))
-       }
-       for _, v := range expected {
-               sort.Sort(PullListByLocator(v))
-       }
-
-       return DeepEquals.Check(params, names)
-}
-
-var PullListMapEquals Checker = &pullListMapEqualsChecker{&CheckerInfo{
-       Name:   "PullListMapEquals",
-       Params: []string{"obtained", "expected"},
-}}
-
-func (s *PullSuite) TestBuildPullLists(c *C) {
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{}),
-               PullListMapEquals,
-               map[string]PullList{})
-
-       locator1 := Locator{Digest: blockdigest.MakeTestBlockDigest(0xBadBeef)}
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {To: []string{}, From: []string{}}}),
-               PullListMapEquals,
-               map[string]PullList{})
-
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {To: []string{}, From: []string{"f1", "f2"}}}),
-               PullListMapEquals,
-               map[string]PullList{})
-
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {To: []string{"t1"}, From: []string{"f1", "f2"}}}),
-               PullListMapEquals,
-               map[string]PullList{
-                       "t1": {PullRequest{locator1, []string{"f1", "f2"}}}})
-
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {To: []string{"t1"}, From: []string{}}}),
-               PullListMapEquals,
-               map[string]PullList{"t1": {
-                       PullRequest{locator1, []string{}}}})
-
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {
-                               To:   []string{"t1", "t2"},
-                               From: []string{"f1", "f2"},
-                       }}),
-               PullListMapEquals,
-               map[string]PullList{
-                       "t1": {PullRequest{locator1, []string{"f1", "f2"}}},
-                       "t2": {PullRequest{locator1, []string{"f1", "f2"}}},
-               })
-
-       locator2 := Locator{Digest: blockdigest.MakeTestBlockDigest(0xCabbed)}
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {To: []string{"t1"}, From: []string{"f1", "f2"}},
-                       locator2: {To: []string{"t2"}, From: []string{"f3", "f4"}}}),
-               PullListMapEquals,
-               map[string]PullList{
-                       "t1": {PullRequest{locator1, []string{"f1", "f2"}}},
-                       "t2": {PullRequest{locator2, []string{"f3", "f4"}}},
-               })
-
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {
-                               To:   []string{"t1"},
-                               From: []string{"f1", "f2"}},
-                       locator2: {
-                               To:   []string{"t2", "t1"},
-                               From: []string{"f3", "f4"}},
-               }),
-               PullListMapEquals,
-               map[string]PullList{
-                       "t1": {
-                               PullRequest{locator1, []string{"f1", "f2"}},
-                               PullRequest{locator2, []string{"f3", "f4"}},
-                       },
-                       "t2": {
-                               PullRequest{locator2, []string{"f3", "f4"}},
-                       },
-               })
-
-       locator3 := Locator{Digest: blockdigest.MakeTestBlockDigest(0xDeadBeef)}
-       locator4 := Locator{Digest: blockdigest.MakeTestBlockDigest(0xFedBeef)}
-       c.Check(
-               BuildPullLists(map[Locator]PullServers{
-                       locator1: {
-                               To:   []string{"t1"},
-                               From: []string{"f1", "f2"}},
-                       locator2: {
-                               To:   []string{"t2", "t1"},
-                               From: []string{"f3", "f4"}},
-                       locator3: {
-                               To:   []string{"t3", "t2", "t1"},
-                               From: []string{"f4", "f5"}},
-                       locator4: {
-                               To:   []string{"t4", "t3", "t2", "t1"},
-                               From: []string{"f1", "f5"}},
-               }),
-               PullListMapEquals,
-               map[string]PullList{
-                       "t1": {
-                               PullRequest{locator1, []string{"f1", "f2"}},
-                               PullRequest{locator2, []string{"f3", "f4"}},
-                               PullRequest{locator3, []string{"f4", "f5"}},
-                               PullRequest{locator4, []string{"f1", "f5"}},
-                       },
-                       "t2": {
-                               PullRequest{locator2, []string{"f3", "f4"}},
-                               PullRequest{locator3, []string{"f4", "f5"}},
-                               PullRequest{locator4, []string{"f1", "f5"}},
-                       },
-                       "t3": {
-                               PullRequest{locator3, []string{"f4", "f5"}},
-                               PullRequest{locator4, []string{"f1", "f5"}},
-                       },
-                       "t4": {
-                               PullRequest{locator4, []string{"f1", "f5"}},
-                       },
-               })
-}
diff --git a/services/datamanager/summary/summary.go b/services/datamanager/summary/summary.go

deleted file mode 100644 (file)

index 9fb0316..0000000
--- a/services/datamanager/summary/summary.go
+++ /dev/null
@@ -1,277 +0,0 @@
-// Summarizes Collection Data and Keep Server Contents.
-
-package summary
-
-// TODO(misha): Check size of blocks as well as their digest.
-
-import (
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "git.curoverse.com/arvados.git/services/datamanager/collection"
-       "git.curoverse.com/arvados.git/services/datamanager/keep"
-       "sort"
-)
-
-// BlockSet is a map of blocks
-type BlockSet map[blockdigest.DigestWithSize]struct{}
-
-// Insert adds a single block to the set.
-func (bs BlockSet) Insert(digest blockdigest.DigestWithSize) {
-       bs[digest] = struct{}{}
-}
-
-// Union adds a set of blocks to the set.
-func (bs BlockSet) Union(obs BlockSet) {
-       for k, v := range obs {
-               bs[k] = v
-       }
-}
-
-// CollectionIndexSet is used to save space. To convert to and from
-// the uuid, use collection.ReadCollections' fields
-// CollectionIndexToUUID and CollectionUUIDToIndex.
-type CollectionIndexSet map[int]struct{}
-
-// Insert adds a single collection to the set. The collection is specified by
-// its index.
-func (cis CollectionIndexSet) Insert(collectionIndex int) {
-       cis[collectionIndex] = struct{}{}
-}
-
-// ToCollectionIndexSet gets block to collection indices
-func (bs BlockSet) ToCollectionIndexSet(
-       readCollections collection.ReadCollections,
-       collectionIndexSet *CollectionIndexSet) {
-       for block := range bs {
-               for _, collectionIndex := range readCollections.BlockToCollectionIndices[block] {
-                       collectionIndexSet.Insert(collectionIndex)
-               }
-       }
-}
-
-// ReplicationLevels struct
-// Keeps track of the requested and actual replication levels.
-// Currently this is only used for blocks but could easily be used for
-// collections as well.
-type ReplicationLevels struct {
-       // The requested replication level.
-       // For Blocks this is the maximum replication level among all the
-       // collections this block belongs to.
-       Requested int
-
-       // The actual number of keep servers this is on.
-       Actual int
-}
-
-// ReplicationLevelBlockSetMap maps from replication levels to their blocks.
-type ReplicationLevelBlockSetMap map[ReplicationLevels]BlockSet
-
-// ReplicationLevelBlockCount is an individual entry from ReplicationLevelBlockSetMap
-// which only reports the number of blocks, not which blocks.
-type ReplicationLevelBlockCount struct {
-       Levels ReplicationLevels
-       Count  int
-}
-
-// ReplicationLevelBlockSetSlice is an ordered list of ReplicationLevelBlockCount useful for reporting.
-type ReplicationLevelBlockSetSlice []ReplicationLevelBlockCount
-
-// ReplicationSummary sturct
-type ReplicationSummary struct {
-       CollectionBlocksNotInKeep  BlockSet
-       UnderReplicatedBlocks      BlockSet
-       OverReplicatedBlocks       BlockSet
-       CorrectlyReplicatedBlocks  BlockSet
-       KeepBlocksNotInCollections BlockSet
-
-       CollectionsNotFullyInKeep      CollectionIndexSet
-       UnderReplicatedCollections     CollectionIndexSet
-       OverReplicatedCollections      CollectionIndexSet
-       CorrectlyReplicatedCollections CollectionIndexSet
-}
-
-// ReplicationSummaryCounts struct counts the elements in each set in ReplicationSummary.
-type ReplicationSummaryCounts struct {
-       CollectionBlocksNotInKeep      int
-       UnderReplicatedBlocks          int
-       OverReplicatedBlocks           int
-       CorrectlyReplicatedBlocks      int
-       KeepBlocksNotInCollections     int
-       CollectionsNotFullyInKeep      int
-       UnderReplicatedCollections     int
-       OverReplicatedCollections      int
-       CorrectlyReplicatedCollections int
-}
-
-// GetOrCreate gets the BlockSet for a given set of ReplicationLevels,
-// creating it if it doesn't already exist.
-func (rlbs ReplicationLevelBlockSetMap) GetOrCreate(
-       repLevels ReplicationLevels) (bs BlockSet) {
-       bs, exists := rlbs[repLevels]
-       if !exists {
-               bs = make(BlockSet)
-               rlbs[repLevels] = bs
-       }
-       return
-}
-
-// Insert adds a block to the set for a given replication level.
-func (rlbs ReplicationLevelBlockSetMap) Insert(
-       repLevels ReplicationLevels,
-       block blockdigest.DigestWithSize) {
-       rlbs.GetOrCreate(repLevels).Insert(block)
-}
-
-// Union adds a set of blocks to the set for a given replication level.
-func (rlbs ReplicationLevelBlockSetMap) Union(
-       repLevels ReplicationLevels,
-       bs BlockSet) {
-       rlbs.GetOrCreate(repLevels).Union(bs)
-}
-
-// Counts outputs a sorted list of ReplicationLevelBlockCounts.
-func (rlbs ReplicationLevelBlockSetMap) Counts() (
-       sorted ReplicationLevelBlockSetSlice) {
-       sorted = make(ReplicationLevelBlockSetSlice, len(rlbs))
-       i := 0
-       for levels, set := range rlbs {
-               sorted[i] = ReplicationLevelBlockCount{Levels: levels, Count: len(set)}
-               i++
-       }
-       sort.Sort(sorted)
-       return
-}
-
-// Implemented to meet sort.Interface
-func (rlbss ReplicationLevelBlockSetSlice) Len() int {
-       return len(rlbss)
-}
-
-// Implemented to meet sort.Interface
-func (rlbss ReplicationLevelBlockSetSlice) Less(i, j int) bool {
-       return rlbss[i].Levels.Requested < rlbss[j].Levels.Requested ||
-               (rlbss[i].Levels.Requested == rlbss[j].Levels.Requested &&
-                       rlbss[i].Levels.Actual < rlbss[j].Levels.Actual)
-}
-
-// Implemented to meet sort.Interface
-func (rlbss ReplicationLevelBlockSetSlice) Swap(i, j int) {
-       rlbss[i], rlbss[j] = rlbss[j], rlbss[i]
-}
-
-// ComputeCounts returns ReplicationSummaryCounts
-func (rs ReplicationSummary) ComputeCounts() (rsc ReplicationSummaryCounts) {
-       // TODO(misha): Consider rewriting this method to iterate through
-       // the fields using reflection, instead of explictily listing the
-       // fields as we do now.
-       rsc.CollectionBlocksNotInKeep = len(rs.CollectionBlocksNotInKeep)
-       rsc.UnderReplicatedBlocks = len(rs.UnderReplicatedBlocks)
-       rsc.OverReplicatedBlocks = len(rs.OverReplicatedBlocks)
-       rsc.CorrectlyReplicatedBlocks = len(rs.CorrectlyReplicatedBlocks)
-       rsc.KeepBlocksNotInCollections = len(rs.KeepBlocksNotInCollections)
-       rsc.CollectionsNotFullyInKeep = len(rs.CollectionsNotFullyInKeep)
-       rsc.UnderReplicatedCollections = len(rs.UnderReplicatedCollections)
-       rsc.OverReplicatedCollections = len(rs.OverReplicatedCollections)
-       rsc.CorrectlyReplicatedCollections = len(rs.CorrectlyReplicatedCollections)
-       return rsc
-}
-
-// PrettyPrint ReplicationSummaryCounts
-func (rsc ReplicationSummaryCounts) PrettyPrint() string {
-       return fmt.Sprintf("Replication Block Counts:"+
-               "\n Missing From Keep: %d, "+
-               "\n Under Replicated: %d, "+
-               "\n Over Replicated: %d, "+
-               "\n Replicated Just Right: %d, "+
-               "\n Not In Any Collection: %d. "+
-               "\nReplication Collection Counts:"+
-               "\n Missing From Keep: %d, "+
-               "\n Under Replicated: %d, "+
-               "\n Over Replicated: %d, "+
-               "\n Replicated Just Right: %d.",
-               rsc.CollectionBlocksNotInKeep,
-               rsc.UnderReplicatedBlocks,
-               rsc.OverReplicatedBlocks,
-               rsc.CorrectlyReplicatedBlocks,
-               rsc.KeepBlocksNotInCollections,
-               rsc.CollectionsNotFullyInKeep,
-               rsc.UnderReplicatedCollections,
-               rsc.OverReplicatedCollections,
-               rsc.CorrectlyReplicatedCollections)
-}
-
-// BucketReplication returns ReplicationLevelBlockSetMap
-func BucketReplication(readCollections collection.ReadCollections,
-       keepServerInfo keep.ReadServers) (rlbs ReplicationLevelBlockSetMap) {
-       rlbs = make(ReplicationLevelBlockSetMap)
-
-       for block, requestedReplication := range readCollections.BlockToDesiredReplication {
-               rlbs.Insert(
-                       ReplicationLevels{
-                               Requested: requestedReplication,
-                               Actual:    len(keepServerInfo.BlockToServers[block])},
-                       block)
-       }
-
-       for block, servers := range keepServerInfo.BlockToServers {
-               if 0 == readCollections.BlockToDesiredReplication[block] {
-                       rlbs.Insert(
-                               ReplicationLevels{Requested: 0, Actual: len(servers)},
-                               block)
-               }
-       }
-       return
-}
-
-// SummarizeBuckets reads collections and summarizes
-func (rlbs ReplicationLevelBlockSetMap) SummarizeBuckets(
-       readCollections collection.ReadCollections) (
-       rs ReplicationSummary) {
-       rs.CollectionBlocksNotInKeep = make(BlockSet)
-       rs.UnderReplicatedBlocks = make(BlockSet)
-       rs.OverReplicatedBlocks = make(BlockSet)
-       rs.CorrectlyReplicatedBlocks = make(BlockSet)
-       rs.KeepBlocksNotInCollections = make(BlockSet)
-
-       rs.CollectionsNotFullyInKeep = make(CollectionIndexSet)
-       rs.UnderReplicatedCollections = make(CollectionIndexSet)
-       rs.OverReplicatedCollections = make(CollectionIndexSet)
-       rs.CorrectlyReplicatedCollections = make(CollectionIndexSet)
-
-       for levels, bs := range rlbs {
-               if levels.Actual == 0 {
-                       rs.CollectionBlocksNotInKeep.Union(bs)
-               } else if levels.Requested == 0 {
-                       rs.KeepBlocksNotInCollections.Union(bs)
-               } else if levels.Actual < levels.Requested {
-                       rs.UnderReplicatedBlocks.Union(bs)
-               } else if levels.Actual > levels.Requested {
-                       rs.OverReplicatedBlocks.Union(bs)
-               } else {
-                       rs.CorrectlyReplicatedBlocks.Union(bs)
-               }
-       }
-
-       rs.CollectionBlocksNotInKeep.ToCollectionIndexSet(readCollections,
-               &rs.CollectionsNotFullyInKeep)
-       // Since different collections can specify different replication
-       // levels, the fact that a block is under-replicated does not imply
-       // that all collections that it belongs to are under-replicated, but
-       // we'll ignore that for now.
-       // TODO(misha): Fix this and report the correct set of collections.
-       rs.UnderReplicatedBlocks.ToCollectionIndexSet(readCollections,
-               &rs.UnderReplicatedCollections)
-       rs.OverReplicatedBlocks.ToCollectionIndexSet(readCollections,
-               &rs.OverReplicatedCollections)
-
-       for i := range readCollections.CollectionIndexToUUID {
-               if _, notInKeep := rs.CollectionsNotFullyInKeep[i]; notInKeep {
-               } else if _, underReplicated := rs.UnderReplicatedCollections[i]; underReplicated {
-               } else if _, overReplicated := rs.OverReplicatedCollections[i]; overReplicated {
-               } else {
-                       rs.CorrectlyReplicatedCollections.Insert(i)
-               }
-       }
-
-       return
-}
diff --git a/services/datamanager/summary/summary_test.go b/services/datamanager/summary/summary_test.go

deleted file mode 100644 (file)

index 8268404..0000000
--- a/services/datamanager/summary/summary_test.go
+++ /dev/null
@@ -1,220 +0,0 @@
-package summary
-
-import (
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "git.curoverse.com/arvados.git/services/datamanager/collection"
-       "git.curoverse.com/arvados.git/services/datamanager/keep"
-       "reflect"
-       "sort"
-       "testing"
-)
-
-func BlockSetFromSlice(digests []int) (bs BlockSet) {
-       bs = make(BlockSet)
-       for _, digest := range digests {
-               bs.Insert(blockdigest.MakeTestDigestWithSize(digest))
-       }
-       return
-}
-
-func CollectionIndexSetFromSlice(indices []int) (cis CollectionIndexSet) {
-       cis = make(CollectionIndexSet)
-       for _, index := range indices {
-               cis.Insert(index)
-       }
-       return
-}
-
-func (cis CollectionIndexSet) ToSlice() (ints []int) {
-       ints = make([]int, len(cis))
-       i := 0
-       for collectionIndex := range cis {
-               ints[i] = collectionIndex
-               i++
-       }
-       sort.Ints(ints)
-       return
-}
-
-// Helper method to meet interface expected by older tests.
-func SummarizeReplication(readCollections collection.ReadCollections,
-       keepServerInfo keep.ReadServers) (rs ReplicationSummary) {
-       return BucketReplication(readCollections, keepServerInfo).
-               SummarizeBuckets(readCollections)
-}
-
-// Takes a map from block digest to replication level and represents
-// it in a keep.ReadServers structure.
-func SpecifyReplication(digestToReplication map[int]int) (rs keep.ReadServers) {
-       rs.BlockToServers = make(map[blockdigest.DigestWithSize][]keep.BlockServerInfo)
-       for digest, replication := range digestToReplication {
-               rs.BlockToServers[blockdigest.MakeTestDigestWithSize(digest)] =
-                       make([]keep.BlockServerInfo, replication)
-       }
-       return
-}
-
-// Verifies that
-// blocks.ToCollectionIndexSet(rc.BlockToCollectionIndices) returns
-// expectedCollections.
-func VerifyToCollectionIndexSet(
-       t *testing.T,
-       blocks []int,
-       blockToCollectionIndices map[int][]int,
-       expectedCollections []int) {
-
-       expected := CollectionIndexSetFromSlice(expectedCollections)
-
-       rc := collection.ReadCollections{
-               BlockToCollectionIndices: map[blockdigest.DigestWithSize][]int{},
-       }
-       for digest, indices := range blockToCollectionIndices {
-               rc.BlockToCollectionIndices[blockdigest.MakeTestDigestWithSize(digest)] = indices
-       }
-
-       returned := make(CollectionIndexSet)
-       BlockSetFromSlice(blocks).ToCollectionIndexSet(rc, &returned)
-
-       if !reflect.DeepEqual(returned, expected) {
-               t.Errorf("Expected %v.ToCollectionIndexSet(%v) to return \n %v \n but instead received \n %v",
-                       blocks,
-                       blockToCollectionIndices,
-                       expectedCollections,
-                       returned.ToSlice())
-       }
-}
-
-func TestToCollectionIndexSet(t *testing.T) {
-       VerifyToCollectionIndexSet(t, []int{6}, map[int][]int{6: {0}}, []int{0})
-       VerifyToCollectionIndexSet(t, []int{4}, map[int][]int{4: {1}}, []int{1})
-       VerifyToCollectionIndexSet(t, []int{4}, map[int][]int{4: {1, 9}}, []int{1, 9})
-       VerifyToCollectionIndexSet(t, []int{5, 6},
-               map[int][]int{5: {2, 3}, 6: {3, 4}},
-               []int{2, 3, 4})
-       VerifyToCollectionIndexSet(t, []int{5, 6},
-               map[int][]int{5: {8}, 6: {4}},
-               []int{4, 8})
-       VerifyToCollectionIndexSet(t, []int{6}, map[int][]int{5: {0}}, []int{})
-}
-
-func TestSimpleSummary(t *testing.T) {
-       rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
-               {ReplicationLevel: 1, Blocks: []int{1, 2}},
-       })
-       rc.Summarize(nil)
-       cIndex := rc.CollectionIndicesForTesting()
-
-       keepInfo := SpecifyReplication(map[int]int{1: 1, 2: 1})
-
-       expectedSummary := ReplicationSummary{
-               CollectionBlocksNotInKeep:  BlockSet{},
-               UnderReplicatedBlocks:      BlockSet{},
-               OverReplicatedBlocks:       BlockSet{},
-               CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1, 2}),
-               KeepBlocksNotInCollections: BlockSet{},
-
-               CollectionsNotFullyInKeep:      CollectionIndexSet{},
-               UnderReplicatedCollections:     CollectionIndexSet{},
-               OverReplicatedCollections:      CollectionIndexSet{},
-               CorrectlyReplicatedCollections: CollectionIndexSetFromSlice([]int{cIndex[0]}),
-       }
-
-       returnedSummary := SummarizeReplication(rc, keepInfo)
-
-       if !reflect.DeepEqual(returnedSummary, expectedSummary) {
-               t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v", expectedSummary, returnedSummary)
-       }
-}
-
-func TestMissingBlock(t *testing.T) {
-       rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
-               {ReplicationLevel: 1, Blocks: []int{1, 2}},
-       })
-       rc.Summarize(nil)
-       cIndex := rc.CollectionIndicesForTesting()
-
-       keepInfo := SpecifyReplication(map[int]int{1: 1})
-
-       expectedSummary := ReplicationSummary{
-               CollectionBlocksNotInKeep:  BlockSetFromSlice([]int{2}),
-               UnderReplicatedBlocks:      BlockSet{},
-               OverReplicatedBlocks:       BlockSet{},
-               CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1}),
-               KeepBlocksNotInCollections: BlockSet{},
-
-               CollectionsNotFullyInKeep:      CollectionIndexSetFromSlice([]int{cIndex[0]}),
-               UnderReplicatedCollections:     CollectionIndexSet{},
-               OverReplicatedCollections:      CollectionIndexSet{},
-               CorrectlyReplicatedCollections: CollectionIndexSet{},
-       }
-
-       returnedSummary := SummarizeReplication(rc, keepInfo)
-
-       if !reflect.DeepEqual(returnedSummary, expectedSummary) {
-               t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v",
-                       expectedSummary,
-                       returnedSummary)
-       }
-}
-
-func TestUnderAndOverReplicatedBlocks(t *testing.T) {
-       rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
-               {ReplicationLevel: 2, Blocks: []int{1, 2}},
-       })
-       rc.Summarize(nil)
-       cIndex := rc.CollectionIndicesForTesting()
-
-       keepInfo := SpecifyReplication(map[int]int{1: 1, 2: 3})
-
-       expectedSummary := ReplicationSummary{
-               CollectionBlocksNotInKeep:  BlockSet{},
-               UnderReplicatedBlocks:      BlockSetFromSlice([]int{1}),
-               OverReplicatedBlocks:       BlockSetFromSlice([]int{2}),
-               CorrectlyReplicatedBlocks:  BlockSet{},
-               KeepBlocksNotInCollections: BlockSet{},
-
-               CollectionsNotFullyInKeep:      CollectionIndexSet{},
-               UnderReplicatedCollections:     CollectionIndexSetFromSlice([]int{cIndex[0]}),
-               OverReplicatedCollections:      CollectionIndexSetFromSlice([]int{cIndex[0]}),
-               CorrectlyReplicatedCollections: CollectionIndexSet{},
-       }
-
-       returnedSummary := SummarizeReplication(rc, keepInfo)
-
-       if !reflect.DeepEqual(returnedSummary, expectedSummary) {
-               t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v",
-                       expectedSummary,
-                       returnedSummary)
-       }
-}
-
-func TestMixedReplication(t *testing.T) {
-       rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
-               {ReplicationLevel: 1, Blocks: []int{1, 2}},
-               {ReplicationLevel: 1, Blocks: []int{3, 4}},
-               {ReplicationLevel: 2, Blocks: []int{5, 6}},
-       })
-       rc.Summarize(nil)
-       cIndex := rc.CollectionIndicesForTesting()
-
-       keepInfo := SpecifyReplication(map[int]int{1: 1, 2: 1, 3: 1, 5: 1, 6: 3, 7: 2})
-
-       expectedSummary := ReplicationSummary{
-               CollectionBlocksNotInKeep:  BlockSetFromSlice([]int{4}),
-               UnderReplicatedBlocks:      BlockSetFromSlice([]int{5}),
-               OverReplicatedBlocks:       BlockSetFromSlice([]int{6}),
-               CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1, 2, 3}),
-               KeepBlocksNotInCollections: BlockSetFromSlice([]int{7}),
-
-               CollectionsNotFullyInKeep:      CollectionIndexSetFromSlice([]int{cIndex[1]}),
-               UnderReplicatedCollections:     CollectionIndexSetFromSlice([]int{cIndex[2]}),
-               OverReplicatedCollections:      CollectionIndexSetFromSlice([]int{cIndex[2]}),
-               CorrectlyReplicatedCollections: CollectionIndexSetFromSlice([]int{cIndex[0]}),
-       }
-
-       returnedSummary := SummarizeReplication(rc, keepInfo)
-
-       if !reflect.DeepEqual(returnedSummary, expectedSummary) {
-               t.Fatalf("Expected returnedSummary to look like: \n%+v but instead it is: \n%+v. Index to UUID is %v. BlockToCollectionIndices is %v.", expectedSummary, returnedSummary, rc.CollectionIndexToUUID, rc.BlockToCollectionIndices)
-       }
-}
diff --git a/services/datamanager/summary/trash_list.go b/services/datamanager/summary/trash_list.go

deleted file mode 100644 (file)

index 3e4d387..0000000
--- a/services/datamanager/summary/trash_list.go
+++ /dev/null
@@ -1,62 +0,0 @@
-// Code for generating trash lists
-
-package summary
-
-import (
-       "errors"
-       "fmt"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
-       "git.curoverse.com/arvados.git/services/datamanager/keep"
-       "time"
-)
-
-// BuildTrashLists builds list of blocks to be sent to trash queue
-func BuildTrashLists(kc *keepclient.KeepClient,
-       keepServerInfo *keep.ReadServers,
-       keepBlocksNotInCollections BlockSet) (m map[string]keep.TrashList, err error) {
-
-       // Servers that are writeable
-       writableServers := map[string]struct{}{}
-       for _, url := range kc.WritableLocalRoots() {
-               writableServers[url] = struct{}{}
-       }
-
-       _ttl, err := kc.Arvados.Discovery("blobSignatureTtl")
-       if err != nil {
-               return nil, errors.New(fmt.Sprintf("Failed to get blobSignatureTtl, can't build trash lists: %v", err))
-       }
-
-       ttl := int64(_ttl.(float64))
-
-       // expire unreferenced blocks more than "ttl" seconds old.
-       expiry := time.Now().UTC().UnixNano() - ttl*1e9
-
-       return buildTrashListsInternal(writableServers, keepServerInfo, expiry, keepBlocksNotInCollections), nil
-}
-
-func buildTrashListsInternal(writableServers map[string]struct{},
-       keepServerInfo *keep.ReadServers,
-       expiry int64,
-       keepBlocksNotInCollections BlockSet) (m map[string]keep.TrashList) {
-
-       m = make(map[string]keep.TrashList)
-
-       for block := range keepBlocksNotInCollections {
-               for _, blockOnServer := range keepServerInfo.BlockToServers[block] {
-                       if blockOnServer.Mtime >= expiry {
-                               continue
-                       }
-
-                       // block is older than expire cutoff
-                       srv := keepServerInfo.KeepServerIndexToAddress[blockOnServer.ServerIndex].String()
-
-                       if _, writable := writableServers[srv]; !writable {
-                               continue
-                       }
-
-                       m[srv] = append(m[srv], keep.TrashRequest{Locator: block.Digest.String(), BlockMtime: blockOnServer.Mtime})
-               }
-       }
-       return
-
-}
diff --git a/services/datamanager/summary/trash_list_test.go b/services/datamanager/summary/trash_list_test.go

deleted file mode 100644 (file)

index 3626904..0000000
--- a/services/datamanager/summary/trash_list_test.go
+++ /dev/null
@@ -1,76 +0,0 @@
-package summary
-
-import (
-       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
-       "git.curoverse.com/arvados.git/services/datamanager/keep"
-       . "gopkg.in/check.v1"
-       "testing"
-)
-
-// Gocheck boilerplate
-func TestTrash(t *testing.T) {
-       TestingT(t)
-}
-
-type TrashSuite struct{}
-
-var _ = Suite(&TrashSuite{})
-
-func (s *TrashSuite) TestBuildTrashLists(c *C) {
-       var sv0 = keep.ServerAddress{Host: "keep0.example.com", Port: 80}
-       var sv1 = keep.ServerAddress{Host: "keep1.example.com", Port: 80}
-
-       var block0 = blockdigest.MakeTestDigestWithSize(0xdeadbeef)
-       var block1 = blockdigest.MakeTestDigestWithSize(0xfedbeef)
-
-       var keepServerInfo = keep.ReadServers{
-               KeepServerIndexToAddress: []keep.ServerAddress{sv0, sv1},
-               BlockToServers: map[blockdigest.DigestWithSize][]keep.BlockServerInfo{
-                       block0: {
-                               {0, 99},
-                               {1, 101}},
-                       block1: {
-                               {0, 99},
-                               {1, 101}}}}
-
-       // only block0 is in delete set
-       var bs = make(BlockSet)
-       bs[block0] = struct{}{}
-
-       // Test trash list where only sv0 is on writable list.
-       c.Check(buildTrashListsInternal(
-               map[string]struct{}{
-                       sv0.URL(): {}},
-               &keepServerInfo,
-               110,
-               bs),
-               DeepEquals,
-               map[string]keep.TrashList{
-                       "http://keep0.example.com:80": {keep.TrashRequest{"000000000000000000000000deadbeef", 99}}})
-
-       // Test trash list where both sv0 and sv1 are on writable list.
-       c.Check(buildTrashListsInternal(
-               map[string]struct{}{
-                       sv0.URL(): {},
-                       sv1.URL(): {}},
-               &keepServerInfo,
-               110,
-               bs),
-               DeepEquals,
-               map[string]keep.TrashList{
-                       "http://keep0.example.com:80": {keep.TrashRequest{"000000000000000000000000deadbeef", 99}},
-                       "http://keep1.example.com:80": {keep.TrashRequest{"000000000000000000000000deadbeef", 101}}})
-
-       // Test trash list where only block on sv0 is expired
-       c.Check(buildTrashListsInternal(
-               map[string]struct{}{
-                       sv0.URL(): {},
-                       sv1.URL(): {}},
-               &keepServerInfo,
-               100,
-               bs),
-               DeepEquals,
-               map[string]keep.TrashList{
-                       "http://keep0.example.com:80": {keep.TrashRequest{"000000000000000000000000deadbeef", 99}}})
-
-}
diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py

index 1828e150bb76bdf6185f7fcbb3fe3172fb68e616..1d25aa83cba34c2753579e2f56423db22f97a310 100644 (file)
--- a/services/fuse/arvados_fuse/__init__.py
+++ b/services/fuse/arvados_fuse/__init__.py
@@ -74,7 +74,12 @@ import Queue
  # unlimited to avoid deadlocks, see https://arvados.org/issues/3198#note-43 for
  # details.
  
-llfuse.capi._notify_queue = Queue.Queue()
+if hasattr(llfuse, 'capi'):
+    # llfuse < 0.42
+    llfuse.capi._notify_queue = Queue.Queue()
+else:
+    # llfuse >= 0.42
+    llfuse._notify_queue = Queue.Queue()
  
  from fusedir import sanitize_filename, Directory, CollectionDirectory, TmpCollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase
  from fusefile import StringFile, FuseArvadosFile
@@ -355,12 +360,17 @@ class Operations(llfuse.Operations):
  
      @catch_exceptions
      def destroy(self):
-        with llfuse.lock:
-            self._shutdown_started.set()
-            if self.events:
-                self.events.close()
-                self.events = None
+        self._shutdown_started.set()
+        if self.events:
+            self.events.close()
+            self.events = None
  
+        if llfuse.lock.acquire():
+            # llfuse < 0.42
+            self.inodes.clear()
+            llfuse.lock.release()
+        else:
+            # llfuse >= 0.42
              self.inodes.clear()
  
      def access(self, inode, mode, ctx):
@@ -377,38 +387,30 @@ class Operations(llfuse.Operations):
          if 'event_type' not in ev:
              return
          with llfuse.lock:
+            new_attrs = (ev.get("properties") or {}).get("new_attributes") or {}
+            pdh = new_attrs.get("portable_data_hash")
+            # new_attributes.modified_at currently lacks
+            # subsecond precision (see #6347) so use event_at
+            # which should always be the same.
+            stamp = ev.get("event_at")
+
              for item in self.inodes.inode_cache.find_by_uuid(ev["object_uuid"]):
                  item.invalidate()
-                if ev["object_kind"] == "arvados#collection":
-                    new_attr = (ev.get("properties") and
-                                ev["properties"].get("new_attributes") and
-                                ev["properties"]["new_attributes"])
-
-                    # new_attributes.modified_at currently lacks
-                    # subsecond precision (see #6347) so use event_at
-                    # which should always be the same.
-                    record_version = (
-                        (ev["event_at"], new_attr["portable_data_hash"])
-                        if new_attr else None)
-
-                    item.update(to_record_version=record_version)
+                if stamp and pdh and ev.get("object_kind") == "arvados#collection":
+                    item.update(to_record_version=(stamp, pdh))
                  else:
                      item.update()
  
-            oldowner = (
-                ev.get("properties") and
-                ev["properties"].get("old_attributes") and
-                ev["properties"]["old_attributes"].get("owner_uuid"))
-            newowner = ev["object_owner_uuid"]
+            oldowner = ((ev.get("properties") or {}).get("old_attributes") or {}).get("owner_uuid")
+            newowner = ev.get("object_owner_uuid")
              for parent in (
                      self.inodes.inode_cache.find_by_uuid(oldowner) +
                      self.inodes.inode_cache.find_by_uuid(newowner)):
                  parent.invalidate()
                  parent.update()
  
-
      @catch_exceptions
-    def getattr(self, inode):
+    def getattr(self, inode, ctx=None):
          if inode not in self.inodes:
              raise llfuse.FUSEError(errno.ENOENT)
  
@@ -440,19 +442,36 @@ class Operations(llfuse.Operations):
  
          entry.st_blksize = 512
          entry.st_blocks = (entry.st_size/512)+1
-        entry.st_atime = int(e.atime())
-        entry.st_mtime = int(e.mtime())
-        entry.st_ctime = int(e.mtime())
+        if hasattr(entry, 'st_atime_ns'):
+            # llfuse >= 0.42
+            entry.st_atime_ns = int(e.atime() * 1000000000)
+            entry.st_mtime_ns = int(e.mtime() * 1000000000)
+            entry.st_ctime_ns = int(e.mtime() * 1000000000)
+        else:
+            # llfuse < 0.42
+            entry.st_atime = int(e.atime)
+            entry.st_mtime = int(e.mtime)
+            entry.st_ctime = int(e.mtime)
  
          return entry
  
      @catch_exceptions
-    def setattr(self, inode, attr):
+    def setattr(self, inode, attr, fields=None, fh=None, ctx=None):
          entry = self.getattr(inode)
  
-        e = self.inodes[inode]
+        if fh is not None and fh in self._filehandles:
+            handle = self._filehandles[fh]
+            e = handle.obj
+        else:
+            e = self.inodes[inode]
  
-        if attr.st_size is not None and isinstance(e, FuseArvadosFile):
+        if fields is None:
+            # llfuse < 0.42
+            update_size = attr.st_size is not None
+        else:
+            # llfuse >= 0.42
+            update_size = fields.update_size
+        if update_size and isinstance(e, FuseArvadosFile):
              with llfuse.lock_released:
                  e.arvfile.truncate(attr.st_size)
                  entry.st_size = e.arvfile.size()
@@ -460,7 +479,7 @@ class Operations(llfuse.Operations):
          return entry
  
      @catch_exceptions
-    def lookup(self, parent_inode, name):
+    def lookup(self, parent_inode, name, ctx=None):
          name = unicode(name, self.inodes.encoding)
          inode = None
  
@@ -496,7 +515,7 @@ class Operations(llfuse.Operations):
                  self.inodes.del_entry(ent)
  
      @catch_exceptions
-    def open(self, inode, flags):
+    def open(self, inode, flags, ctx=None):
          if inode in self.inodes:
              p = self.inodes[inode]
          else:
@@ -583,7 +602,7 @@ class Operations(llfuse.Operations):
          self.release(fh)
  
      @catch_exceptions
-    def opendir(self, inode):
+    def opendir(self, inode, ctx=None):
          _logger.debug("arv-mount opendir: inode %i", inode)
  
          if inode in self.inodes:
@@ -622,7 +641,7 @@ class Operations(llfuse.Operations):
              e += 1
  
      @catch_exceptions
-    def statfs(self):
+    def statfs(self, ctx=None):
          st = llfuse.StatvfsData()
          st.f_bsize = 128 * 1024
          st.f_blocks = 0
@@ -655,7 +674,7 @@ class Operations(llfuse.Operations):
          return p
  
      @catch_exceptions
-    def create(self, inode_parent, name, mode, flags, ctx):
+    def create(self, inode_parent, name, mode, flags, ctx=None):
          _logger.debug("arv-mount create: parent_inode %i '%s' %o", inode_parent, name, mode)
  
          p = self._check_writable(inode_parent)
@@ -671,7 +690,7 @@ class Operations(llfuse.Operations):
          return (fh, self.getattr(f.inode))
  
      @catch_exceptions
-    def mkdir(self, inode_parent, name, mode, ctx):
+    def mkdir(self, inode_parent, name, mode, ctx=None):
          _logger.debug("arv-mount mkdir: parent_inode %i '%s' %o", inode_parent, name, mode)
  
          p = self._check_writable(inode_parent)
@@ -684,19 +703,19 @@ class Operations(llfuse.Operations):
          return self.getattr(d.inode)
  
      @catch_exceptions
-    def unlink(self, inode_parent, name):
+    def unlink(self, inode_parent, name, ctx=None):
          _logger.debug("arv-mount unlink: parent_inode %i '%s'", inode_parent, name)
          p = self._check_writable(inode_parent)
          p.unlink(name)
  
      @catch_exceptions
-    def rmdir(self, inode_parent, name):
+    def rmdir(self, inode_parent, name, ctx=None):
          _logger.debug("arv-mount rmdir: parent_inode %i '%s'", inode_parent, name)
          p = self._check_writable(inode_parent)
          p.rmdir(name)
  
      @catch_exceptions
-    def rename(self, inode_parent_old, name_old, inode_parent_new, name_new):
+    def rename(self, inode_parent_old, name_old, inode_parent_new, name_new, ctx=None):
          _logger.debug("arv-mount rename: old_parent_inode %i '%s' new_parent_inode %i '%s'", inode_parent_old, name_old, inode_parent_new, name_new)
          src = self._check_writable(inode_parent_old)
          dest = self._check_writable(inode_parent_new)
diff --git a/services/fuse/arvados_fuse/_version.py b/services/fuse/arvados_fuse/_version.py

new file mode 100644 (file)

index 0000000..837d4b9
--- /dev/null
+++ b/services/fuse/arvados_fuse/_version.py
@@ -0,0 +1,3 @@
+import pkg_resources
+
+__version__ = pkg_resources.require('arvados_fuse')[0].version
diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py

index 3f89732bea25dcd1ca546fbef126227e9e0a9256..ffcfc6500f5c5ac31289da3c404166b386f74374 100644 (file)
--- a/services/fuse/arvados_fuse/command.py
+++ b/services/fuse/arvados_fuse/command.py
@@ -13,6 +13,7 @@ import time
  import arvados.commands._util as arv_cmd
  from arvados_fuse import crunchstat
  from arvados_fuse import *
+from arvados_fuse._version import __version__
  
  class ArgumentParser(argparse.ArgumentParser):
      def __init__(self):
@@ -24,6 +25,9 @@ class ArgumentParser(argparse.ArgumentParser):
      mountpoint before --exec, or mark the end of your --exec arguments
      with "--".
              """)
+        self.add_argument('--version', action='version',
+                          version="%s %s" % (sys.argv[0], __version__),
+                          help='Print version and exit.')
          self.add_argument('mountpoint', type=str, help="""Mount point.""")
          self.add_argument('--allow-other', action='store_true',
                              help="""Let other users read the mount""")
@@ -122,6 +126,8 @@ class Mount(object):
          return self
  
      def __exit__(self, exc_type, exc_value, traceback):
+        if self.operations.events:
+            self.operations.events.close(timeout=self.args.unmount_timeout)
          subprocess.call(["fusermount", "-u", "-z", self.args.mountpoint])
          self.llfuse_thread.join(timeout=self.args.unmount_timeout)
          if self.llfuse_thread.is_alive():
diff --git a/services/fuse/setup.py b/services/fuse/setup.py

index d7e1a8afb302b26ae582bc5a3a5aaecc9514ae7c..9e282caf49919972b3fefe60001c603ae8176305 100644 (file)
--- a/services/fuse/setup.py
+++ b/services/fuse/setup.py
@@ -40,7 +40,8 @@ setup(name='arvados_fuse',
          'arvados-python-client >= 0.1.20151118035730',
          'llfuse==0.41.1',
          'python-daemon',
-        'ciso8601'
+        'ciso8601',
+        'setuptools'
          ],
        test_suite='tests',
        tests_require=['pbr<1.7.0', 'mock>=1.0', 'PyYAML'],
diff --git a/services/fuse/tests/mount_test_base.py b/services/fuse/tests/mount_test_base.py

index 20192f9d84302e1d9967136bcfa19e03ef7012dd..1319aebdccaa1e9dcc0f3e4323fa66340cc05a7f 100644 (file)
--- a/services/fuse/tests/mount_test_base.py
+++ b/services/fuse/tests/mount_test_base.py
@@ -66,6 +66,8 @@ class MountTestBase(unittest.TestCase):
  
      def tearDown(self):
          if self.llfuse_thread:
+            if self.operations.events:
+                self.operations.events.close(timeout=10)
              subprocess.call(["fusermount", "-u", "-z", self.mounttmp])
              t0 = time.time()
              self.llfuse_thread.join(timeout=10)
diff --git a/services/fuse/tests/test_command_args.py b/services/fuse/tests/test_command_args.py

index e8488d7ff967179423f3732c8e6e56b05194ed58..57b4a37826d6c4b4a73a22c6c51716021367f22b 100644 (file)
--- a/services/fuse/tests/test_command_args.py
+++ b/services/fuse/tests/test_command_args.py
@@ -3,6 +3,7 @@ import arvados_fuse
  import arvados_fuse.command
  import contextlib
  import functools
+import io
  import json
  import llfuse
  import logging
@@ -48,6 +49,14 @@ class MountArgsTest(unittest.TestCase):
              ent = ent[p]
          return ent
  
+    @contextlib.contextmanager
+    def stderrMatches(self, stderr):
+        orig, sys.stderr = sys.stderr, stderr
+        try:
+            yield
+        finally:
+            sys.stderr = orig
+
      def check_ent_type(self, cls, *path):
          ent = self.lookup(self.mnt, *path)
          self.assertEqual(ent.__class__, cls)
@@ -170,6 +179,13 @@ class MountArgsTest(unittest.TestCase):
                           run_test_server.fixture('users')['active']['uuid'])
          self.assertEqual(True, self.mnt.listen_for_events)
  
+    def test_version_argument(self):
+        orig, sys.stderr = sys.stderr, io.BytesIO()
+        with self.assertRaises(SystemExit):
+            args = arvados_fuse.command.ArgumentParser().parse_args(['--version'])
+        self.assertRegexpMatches(sys.stderr.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
+        sys.stderr = orig
+
      @noexit
      @mock.patch('arvados.events.subscribe')
      def test_disable_event_listening(self, mock_subscribe):
diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py

index 4c70ce98d3d36bb2d28f77b3ad30a5bbfda42238..f623ae538633a75d87fc9b520c740f6ae117f6c8 100644 (file)
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -267,14 +267,22 @@ class FuseSharedTest(MountTestBase):
  
          # check mtime on template
          st = os.stat(pipeline_template_path)
-        self.assertEqual(st.st_mtime, 1397493304)
+        try:
+            mtime = st.st_mtime_ns / 1000000000
+        except AttributeError:
+            mtime = st.st_mtime
+        self.assertEqual(mtime, 1397493304)
  
          # check mtime on collection
          st = os.stat(os.path.join(
                  self.mounttmp,
                  'FUSE User',
                  'collection #1 owned by FUSE'))
-        self.assertEqual(st.st_mtime, 1391448174)
+        try:
+            mtime = st.st_mtime_ns / 1000000000
+        except AttributeError:
+            mtime = st.st_mtime
+        self.assertEqual(mtime, 1391448174)
  
  
  class FuseHomeTest(MountTestBase):
diff --git a/services/fuse/tests/test_retry.py b/services/fuse/tests/test_retry.py

index b46ba7839f72f2a578fb6fbcdcbe8fbe9e71d681..81d5c86072d8b0f6e7008235f128adfda2857550 100644 (file)
--- a/services/fuse/tests/test_retry.py
+++ b/services/fuse/tests/test_retry.py
@@ -50,7 +50,7 @@ class RetryPUT(IntegrationTest):
          q.put(mockedCurl)
          q.put(pycurl.Curl())
          q.put(pycurl.Curl())
-        with mock.patch('arvados.keep.KeepClient.KeepService._get_user_agent', side_effect=lambda: q.get(block=None)):
+        with mock.patch('arvados.keep.KeepClient.KeepService._get_user_agent', side_effect=q.get_nowait):
              self.pool_test(os.path.join(self.mnt, 'zzz'))
              self.assertTrue(mockedCurl.perform.called)
      @staticmethod
diff --git a/services/keep-balance/balance.go b/services/keep-balance/balance.go

index 8fc06c3534b76054cecbfdb1116007579952bcb1..9389f19ed801cf1ee840642d2078b70de8aa9e50 100644 (file)
--- a/services/keep-balance/balance.go
+++ b/services/keep-balance/balance.go
@@ -246,7 +246,7 @@ func (bal *Balancer) GetCurrentState(c *arvados.Client, pageSize, bufs int) erro
                         }
                         if len(errs) > 0 {
                                 // Some other goroutine encountered an
-                               // error -- any futher effort here
+                               // error -- any further effort here
                                 // will be wasted.
                                 return
                         }
diff --git a/services/keep-balance/balance_run_test.go b/services/keep-balance/balance_run_test.go

index 23d74fe1198a4f9b91f840624b06af3ccbbe4ccb..30683b4228957386dcea27d709ded7d2cdbcfee7 100644 (file)
--- a/services/keep-balance/balance_run_test.go
+++ b/services/keep-balance/balance_run_test.go
@@ -302,7 +302,7 @@ func (s *runSuite) TestDryRun(c *check.C) {
                 Logger:      s.logger(c),
         }
         s.stub.serveCurrentUserAdmin()
-       s.stub.serveFooBarFileCollections()
+       collReqs := s.stub.serveFooBarFileCollections()
         s.stub.serveFourDiskKeepServices()
         s.stub.serveKeepstoreIndexFoo4Bar1()
         trashReqs := s.stub.serveKeepstoreTrash()
@@ -310,6 +310,9 @@ func (s *runSuite) TestDryRun(c *check.C) {
         var bal Balancer
         _, err := bal.Run(s.config, opts)
         c.Check(err, check.IsNil)
+       for _, req := range collReqs.reqs {
+               c.Check(req.Form.Get("include_trash"), check.Equals, "true")
+       }
         c.Check(trashReqs.Count(), check.Equals, 0)
         c.Check(pullReqs.Count(), check.Equals, 0)
         stats := bal.getStatistics()
diff --git a/services/keep-balance/collection.go b/services/keep-balance/collection.go

index 147d6165e31ae13db903a5861384c291c14d28a7..b3a631e979a77a78f6c7a8b60fd012e77b360368 100644 (file)
--- a/services/keep-balance/collection.go
+++ b/services/keep-balance/collection.go
@@ -30,7 +30,9 @@ func EachCollection(c *arvados.Client, pageSize int, f func(arvados.Collection)
                 progress = func(_, _ int) {}
         }
  
-       expectCount, err := countCollections(c, arvados.ResourceListParams{})
+       expectCount, err := countCollections(c, arvados.ResourceListParams{
+               IncludeTrash: true,
+       })
         if err != nil {
                 return err
         }
@@ -41,9 +43,10 @@ func EachCollection(c *arvados.Client, pageSize int, f func(arvados.Collection)
                 limit = 1<<31 - 1
         }
         params := arvados.ResourceListParams{
-               Limit:  &limit,
-               Order:  "modified_at, uuid",
-               Select: []string{"uuid", "unsigned_manifest_text", "modified_at", "portable_data_hash", "replication_desired"},
+               Limit:        &limit,
+               Order:        "modified_at, uuid",
+               Select:       []string{"uuid", "unsigned_manifest_text", "modified_at", "portable_data_hash", "replication_desired"},
+               IncludeTrash: true,
         }
         var last arvados.Collection
         var filterTime time.Time
@@ -89,10 +92,13 @@ func EachCollection(c *arvados.Client, pageSize int, f func(arvados.Collection)
         }
         progress(callCount, expectCount)
  
-       if checkCount, err := countCollections(c, arvados.ResourceListParams{Filters: []arvados.Filter{{
-               Attr:     "modified_at",
-               Operator: "<=",
-               Operand:  filterTime}}}); err != nil {
+       if checkCount, err := countCollections(c, arvados.ResourceListParams{
+               Filters: []arvados.Filter{{
+                       Attr:     "modified_at",
+                       Operator: "<=",
+                       Operand:  filterTime}},
+               IncludeTrash: true,
+       }); err != nil {
                 return err
         } else if callCount < checkCount {
                 return fmt.Errorf("Retrieved %d collections with modtime <= T=%q, but server now reports there are %d collections with modtime <= T", callCount, filterTime, checkCount)
diff --git a/services/keep-balance/keep-balance.service b/services/keep-balance/keep-balance.service

index 157e42cc55122f7fc7aacd8ebfd6b5832a29de64..a6f5b6e349a69e2c12b297f048f5e4e7d3d2decf 100644 (file)
--- a/services/keep-balance/keep-balance.service
+++ b/services/keep-balance/keep-balance.service
@@ -6,7 +6,7 @@ AssertPathExists=/etc/arvados/keep-balance/keep-balance.yml
  
  [Service]
  Type=simple
-ExecStart=/usr/bin/keep-balance -config /etc/arvados/keep-balance/keep-balance.yml -commit-pulls -commit-trash
+ExecStart=/usr/bin/keep-balance -commit-pulls -commit-trash
  Restart=always
  RestartSec=10s
  
diff --git a/services/keep-balance/main.go b/services/keep-balance/main.go

index 60349e91d8b9064c9715db9c701cd9b11b9ef8d3..310c77a21c228c12779de5ac39c686cc08d7b624 100644 (file)
--- a/services/keep-balance/main.go
+++ b/services/keep-balance/main.go
@@ -13,6 +13,8 @@ import (
         "git.curoverse.com/arvados.git/sdk/go/config"
  )
  
+const defaultConfigPath = "/etc/arvados/keep-balance/keep-balance.yml"
+
  // Config specifies site configuration, like API credentials and the
  // choice of which servers are to be balanced.
  //
@@ -65,7 +67,7 @@ func main() {
         var config Config
         var runOptions RunOptions
  
-       configPath := flag.String("config", "",
+       configPath := flag.String("config", defaultConfigPath,
                 "`path` of JSON or YAML configuration file")
         serviceListPath := flag.String("config.KeepServiceList", "",
                 "`path` of JSON or YAML file with list of keep services to balance, as given by \"arv keep_service list\" "+
@@ -81,9 +83,6 @@ func main() {
         flag.Usage = usage
         flag.Parse()
  
-       if *configPath == "" {
-               log.Fatal("You must specify a config file (see `keep-balance -help`)")
-       }
         mustReadConfig(&config, *configPath)
         if *serviceListPath != "" {
                 mustReadConfig(&config.KeepServiceList, *serviceListPath)
diff --git a/services/keep-balance/usage.go b/services/keep-balance/usage.go

index d11201047bff92752c89cfd3a3183b6f08dd3371..6f48af116f1091b9307aff10c62b21eca9beb15e 100644 (file)
--- a/services/keep-balance/usage.go
+++ b/services/keep-balance/usage.go
@@ -26,7 +26,7 @@ overreplicated and unreferenced blocks, and moves blocks to better
  positions (according to the rendezvous hash algorithm) so clients find
  them faster.
  
-Usage: keep-balance -config path/to/keep-balance.yml [options]
+Usage: keep-balance [options]
  
  Options:
  `)
diff --git a/services/keepstore/azure_blob_volume.go b/services/keepstore/azure_blob_volume.go

index 43cf83a07ead3db94b2620be74375c738d4e5d08..4839e3f6828a98fc62f63925d843f3c42bb03db1 100644 (file)
--- a/services/keepstore/azure_blob_volume.go
+++ b/services/keepstore/azure_blob_volume.go
@@ -103,7 +103,7 @@ type AzureBlobVolume struct {
         RequestTimeout        arvados.Duration
  
         azClient storage.Client
-       bsClient storage.BlobStorageClient
+       bsClient *azureBlobClient
  }
  
  // Examples implements VolumeWithExamples.
@@ -147,7 +147,10 @@ func (v *AzureBlobVolume) Start() error {
         v.azClient.HTTPClient = &http.Client{
                 Timeout: time.Duration(v.RequestTimeout),
         }
-       v.bsClient = v.azClient.GetBlobService()
+       bs := v.azClient.GetBlobService()
+       v.bsClient = &azureBlobClient{
+               client: &bs,
+       }
  
         ok, err := v.bsClient.ContainerExists(v.ContainerName)
         if err != nil {
@@ -187,7 +190,7 @@ func (v *AzureBlobVolume) Get(ctx context.Context, loc string, buf []byte) (int,
         }
         var deadline time.Time
         haveDeadline := false
-       size, err := v.get(loc, buf)
+       size, err := v.get(ctx, loc, buf)
         for err == nil && size == 0 && loc != "d41d8cd98f00b204e9800998ecf8427e" {
                 // Seeing a brand new empty block probably means we're
                 // in a race with CreateBlob, which under the hood
@@ -208,8 +211,12 @@ func (v *AzureBlobVolume) Get(ctx context.Context, loc string, buf []byte) (int,
                 } else if time.Now().After(deadline) {
                         break
                 }
-               time.Sleep(azureWriteRacePollTime)
-               size, err = v.get(loc, buf)
+               select {
+               case <-ctx.Done():
+                       return 0, ctx.Err()
+               case <-time.After(azureWriteRacePollTime):
+               }
+               size, err = v.get(ctx, loc, buf)
         }
         if haveDeadline {
                 log.Printf("Race ended with size==%d", size)
@@ -217,7 +224,9 @@ func (v *AzureBlobVolume) Get(ctx context.Context, loc string, buf []byte) (int,
         return size, err
  }
  
-func (v *AzureBlobVolume) get(loc string, buf []byte) (int, error) {
+func (v *AzureBlobVolume) get(ctx context.Context, loc string, buf []byte) (int, error) {
+       ctx, cancel := context.WithCancel(ctx)
+       defer cancel()
         expectSize := len(buf)
         if azureMaxGetBytes < BlockSize {
                 // Unfortunately the handler doesn't tell us how long the blob
@@ -239,10 +248,18 @@ func (v *AzureBlobVolume) get(loc string, buf []byte) (int, error) {
         // We'll update this actualSize if/when we get the last piece.
         actualSize := -1
         pieces := (expectSize + azureMaxGetBytes - 1) / azureMaxGetBytes
-       errors := make([]error, pieces)
+       errors := make(chan error, pieces)
         var wg sync.WaitGroup
         wg.Add(pieces)
         for p := 0; p < pieces; p++ {
+               // Each goroutine retrieves one piece. If we hit an
+               // error, it is sent to the errors chan so get() can
+               // return it -- but only if the error happens before
+               // ctx is done. This way, if ctx is done before we hit
+               // any other error (e.g., requesting client has hung
+               // up), we return the original ctx.Err() instead of
+               // the secondary errors from the transfers that got
+               // interrupted as a result.
                 go func(p int) {
                         defer wg.Done()
                         startPos := p * azureMaxGetBytes
@@ -252,23 +269,51 @@ func (v *AzureBlobVolume) get(loc string, buf []byte) (int, error) {
                         }
                         var rdr io.ReadCloser
                         var err error
-                       if startPos == 0 && endPos == expectSize {
-                               rdr, err = v.bsClient.GetBlob(v.ContainerName, loc)
-                       } else {
-                               rdr, err = v.bsClient.GetBlobRange(v.ContainerName, loc, fmt.Sprintf("%d-%d", startPos, endPos-1), nil)
+                       gotRdr := make(chan struct{})
+                       go func() {
+                               defer close(gotRdr)
+                               if startPos == 0 && endPos == expectSize {
+                                       rdr, err = v.bsClient.GetBlob(v.ContainerName, loc)
+                               } else {
+                                       rdr, err = v.bsClient.GetBlobRange(v.ContainerName, loc, fmt.Sprintf("%d-%d", startPos, endPos-1), nil)
+                               }
+                       }()
+                       select {
+                       case <-ctx.Done():
+                               go func() {
+                                       <-gotRdr
+                                       if err == nil {
+                                               rdr.Close()
+                                       }
+                               }()
+                               return
+                       case <-gotRdr:
                         }
                         if err != nil {
-                               errors[p] = err
+                               errors <- err
+                               cancel()
                                 return
                         }
-                       defer rdr.Close()
+                       go func() {
+                               // Close the reader when the client
+                               // hangs up or another piece fails
+                               // (possibly interrupting ReadFull())
+                               // or when all pieces succeed and
+                               // get() returns.
+                               <-ctx.Done()
+                               rdr.Close()
+                       }()
                         n, err := io.ReadFull(rdr, buf[startPos:endPos])
                         if pieces == 1 && (err == io.ErrUnexpectedEOF || err == io.EOF) {
                                 // If we don't know the actual size,
                                 // and just tried reading 64 MiB, it's
                                 // normal to encounter EOF.
                         } else if err != nil {
-                               errors[p] = err
+                               if ctx.Err() == nil {
+                                       errors <- err
+                               }
+                               cancel()
+                               return
                         }
                         if p == pieces-1 {
                                 actualSize = startPos + n
@@ -276,10 +321,12 @@ func (v *AzureBlobVolume) get(loc string, buf []byte) (int, error) {
                 }(p)
         }
         wg.Wait()
-       for _, err := range errors {
-               if err != nil {
-                       return 0, v.translateError(err)
-               }
+       close(errors)
+       if len(errors) > 0 {
+               return 0, v.translateError(<-errors)
+       }
+       if ctx.Err() != nil {
+               return 0, ctx.Err()
         }
         return actualSize, nil
  }
@@ -293,7 +340,23 @@ func (v *AzureBlobVolume) Compare(ctx context.Context, loc string, expect []byte
         if trashed {
                 return os.ErrNotExist
         }
-       rdr, err := v.bsClient.GetBlob(v.ContainerName, loc)
+       var rdr io.ReadCloser
+       gotRdr := make(chan struct{})
+       go func() {
+               defer close(gotRdr)
+               rdr, err = v.bsClient.GetBlob(v.ContainerName, loc)
+       }()
+       select {
+       case <-ctx.Done():
+               go func() {
+                       <-gotRdr
+                       if err == nil {
+                               rdr.Close()
+                       }
+               }()
+               return ctx.Err()
+       case <-gotRdr:
+       }
         if err != nil {
                 return v.translateError(err)
         }
@@ -306,7 +369,36 @@ func (v *AzureBlobVolume) Put(ctx context.Context, loc string, block []byte) err
         if v.ReadOnly {
                 return MethodDisabledError
         }
-       return v.bsClient.CreateBlockBlobFromReader(v.ContainerName, loc, uint64(len(block)), bytes.NewReader(block), nil)
+       // Send the block data through a pipe, so that (if we need to)
+       // we can close the pipe early and abandon our
+       // CreateBlockBlobFromReader() goroutine, without worrying
+       // about CreateBlockBlobFromReader() accessing our block
+       // buffer after we release it.
+       bufr, bufw := io.Pipe()
+       go func() {
+               io.Copy(bufw, bytes.NewReader(block))
+               bufw.Close()
+       }()
+       errChan := make(chan error)
+       go func() {
+               errChan <- v.bsClient.CreateBlockBlobFromReader(v.ContainerName, loc, uint64(len(block)), bufr, nil)
+       }()
+       select {
+       case <-ctx.Done():
+               theConfig.debugLogf("%s: taking CreateBlockBlobFromReader's input away: %s", v, ctx.Err())
+               // Our pipe might be stuck in Write(), waiting for
+               // io.Copy() to read. If so, un-stick it. This means
+               // CreateBlockBlobFromReader will get corrupt data,
+               // but that's OK: the size won't match, so the write
+               // will fail.
+               go io.Copy(ioutil.Discard, bufr)
+               // CloseWithError() will return once pending I/O is done.
+               bufw.CloseWithError(ctx.Err())
+               theConfig.debugLogf("%s: abandoning CreateBlockBlobFromReader goroutine", v)
+               return ctx.Err()
+       case err := <-errChan:
+               return err
+       }
  }
  
  // Touch updates the last-modified property of a block blob.
@@ -534,3 +626,104 @@ func (v *AzureBlobVolume) EmptyTrash() {
  
         log.Printf("EmptyTrash stats for %v: Deleted %v bytes in %v blocks. Remaining in trash: %v bytes in %v blocks.", v.String(), bytesDeleted, blocksDeleted, bytesInTrash-bytesDeleted, blocksInTrash-blocksDeleted)
  }
+
+// InternalStats returns bucket I/O and API call counters.
+func (v *AzureBlobVolume) InternalStats() interface{} {
+       return &v.bsClient.stats
+}
+
+type azureBlobStats struct {
+       statsTicker
+       Ops              uint64
+       GetOps           uint64
+       GetRangeOps      uint64
+       GetMetadataOps   uint64
+       GetPropertiesOps uint64
+       CreateOps        uint64
+       SetMetadataOps   uint64
+       DelOps           uint64
+       ListOps          uint64
+}
+
+func (s *azureBlobStats) TickErr(err error) {
+       if err == nil {
+               return
+       }
+       errType := fmt.Sprintf("%T", err)
+       if err, ok := err.(storage.AzureStorageServiceError); ok {
+               errType = errType + fmt.Sprintf(" %d (%s)", err.StatusCode, err.Code)
+       }
+       log.Printf("errType %T, err %s", err, err)
+       s.statsTicker.TickErr(err, errType)
+}
+
+// azureBlobClient wraps storage.BlobStorageClient in order to count
+// I/O and API usage stats.
+type azureBlobClient struct {
+       client *storage.BlobStorageClient
+       stats  azureBlobStats
+}
+
+func (c *azureBlobClient) ContainerExists(cname string) (bool, error) {
+       c.stats.Tick(&c.stats.Ops)
+       ok, err := c.client.ContainerExists(cname)
+       c.stats.TickErr(err)
+       return ok, err
+}
+
+func (c *azureBlobClient) GetBlobMetadata(cname, bname string) (map[string]string, error) {
+       c.stats.Tick(&c.stats.Ops, &c.stats.GetMetadataOps)
+       m, err := c.client.GetBlobMetadata(cname, bname)
+       c.stats.TickErr(err)
+       return m, err
+}
+
+func (c *azureBlobClient) GetBlobProperties(cname, bname string) (*storage.BlobProperties, error) {
+       c.stats.Tick(&c.stats.Ops, &c.stats.GetPropertiesOps)
+       p, err := c.client.GetBlobProperties(cname, bname)
+       c.stats.TickErr(err)
+       return p, err
+}
+
+func (c *azureBlobClient) GetBlob(cname, bname string) (io.ReadCloser, error) {
+       c.stats.Tick(&c.stats.Ops, &c.stats.GetOps)
+       rdr, err := c.client.GetBlob(cname, bname)
+       c.stats.TickErr(err)
+       return NewCountingReader(rdr, c.stats.TickInBytes), err
+}
+
+func (c *azureBlobClient) GetBlobRange(cname, bname, byterange string, hdrs map[string]string) (io.ReadCloser, error) {
+       c.stats.Tick(&c.stats.Ops, &c.stats.GetRangeOps)
+       rdr, err := c.client.GetBlobRange(cname, bname, byterange, hdrs)
+       c.stats.TickErr(err)
+       return NewCountingReader(rdr, c.stats.TickInBytes), err
+}
+
+func (c *azureBlobClient) CreateBlockBlobFromReader(cname, bname string, size uint64, rdr io.Reader, hdrs map[string]string) error {
+       c.stats.Tick(&c.stats.Ops, &c.stats.CreateOps)
+       rdr = NewCountingReader(rdr, c.stats.TickOutBytes)
+       err := c.client.CreateBlockBlobFromReader(cname, bname, size, rdr, hdrs)
+       c.stats.TickErr(err)
+       return err
+}
+
+func (c *azureBlobClient) SetBlobMetadata(cname, bname string, m, hdrs map[string]string) error {
+       c.stats.Tick(&c.stats.Ops, &c.stats.SetMetadataOps)
+       err := c.client.SetBlobMetadata(cname, bname, m, hdrs)
+       c.stats.TickErr(err)
+       return err
+}
+
+func (c *azureBlobClient) ListBlobs(cname string, params storage.ListBlobsParameters) (storage.BlobListResponse, error) {
+       c.stats.Tick(&c.stats.Ops, &c.stats.ListOps)
+       resp, err := c.client.ListBlobs(cname, params)
+       c.stats.TickErr(err)
+       return resp, err
+}
+
+func (c *azureBlobClient) DeleteBlob(cname, bname string, hdrs map[string]string) error {
+       c.stats.Tick(&c.stats.Ops, &c.stats.DelOps)
+       err := c.client.DeleteBlob(cname, bname, hdrs)
+       c.stats.TickErr(err)
+       return err
+}
diff --git a/services/keepstore/azure_blob_volume_test.go b/services/keepstore/azure_blob_volume_test.go

index c5dbc8f5831402aa3e223391c3ad0ece918de0a3..4b015a9962d170e47c9f8fdd401d6c63bdd569ea 100644 (file)
--- a/services/keepstore/azure_blob_volume_test.go
+++ b/services/keepstore/azure_blob_volume_test.go
@@ -5,6 +5,7 @@ import (
         "context"
         "crypto/md5"
         "encoding/base64"
+       "encoding/json"
         "encoding/xml"
         "flag"
         "fmt"
@@ -23,12 +24,16 @@ import (
  
         log "github.com/Sirupsen/logrus"
         "github.com/curoverse/azure-sdk-for-go/storage"
+       check "gopkg.in/check.v1"
  )
  
  const (
-       // The same fake credentials used by Microsoft's Azure emulator
-       emulatorAccountName = "devstoreaccount1"
-       emulatorAccountKey  = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
+       // This cannot be the fake account name "devstoreaccount1"
+       // used by Microsoft's Azure emulator: the Azure SDK
+       // recognizes that magic string and changes its behavior to
+       // cater to the Azure SDK's own test suite.
+       fakeAccountName = "fakeAccountName"
+       fakeAccountKey  = "Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw=="
  )
  
  var azureTestContainer string
@@ -350,7 +355,7 @@ func NewTestableAzureBlobVolume(t TB, readonly bool, replication int) *TestableA
                 // Connect to stub instead of real Azure storage service
                 stubURLBase := strings.Split(azStub.URL, "://")[1]
                 var err error
-               if azClient, err = storage.NewClient(emulatorAccountName, emulatorAccountKey, stubURLBase, storage.DefaultAPIVersion, false); err != nil {
+               if azClient, err = storage.NewClient(fakeAccountName, fakeAccountKey, stubURLBase, storage.DefaultAPIVersion, false); err != nil {
                         t.Fatal(err)
                 }
                 container = "fakecontainername"
@@ -366,12 +371,13 @@ func NewTestableAzureBlobVolume(t TB, readonly bool, replication int) *TestableA
                 }
         }
  
+       bs := azClient.GetBlobService()
         v := &AzureBlobVolume{
                 ContainerName:    container,
                 ReadOnly:         readonly,
                 AzureReplication: replication,
                 azClient:         azClient,
-               bsClient:         azClient.GetBlobService(),
+               bsClient:         &azureBlobClient{client: &bs},
         }
  
         return &TestableAzureBlobVolume{
@@ -382,6 +388,29 @@ func NewTestableAzureBlobVolume(t TB, readonly bool, replication int) *TestableA
         }
  }
  
+var _ = check.Suite(&StubbedAzureBlobSuite{})
+
+type StubbedAzureBlobSuite struct {
+       volume            *TestableAzureBlobVolume
+       origHTTPTransport http.RoundTripper
+}
+
+func (s *StubbedAzureBlobSuite) SetUpTest(c *check.C) {
+       s.origHTTPTransport = http.DefaultTransport
+       http.DefaultTransport = &http.Transport{
+               Dial: (&azStubDialer{}).Dial,
+       }
+       azureWriteRaceInterval = time.Millisecond
+       azureWriteRacePollTime = time.Nanosecond
+
+       s.volume = NewTestableAzureBlobVolume(c, false, 3)
+}
+
+func (s *StubbedAzureBlobSuite) TearDownTest(c *check.C) {
+       s.volume.Teardown()
+       http.DefaultTransport = s.origHTTPTransport
+}
+
  func TestAzureBlobVolumeWithGeneric(t *testing.T) {
         defer func(t http.RoundTripper) {
                 http.DefaultTransport = t
@@ -466,10 +495,10 @@ func TestAzureBlobVolumeRangeFenceposts(t *testing.T) {
                 }
                 gotHash := fmt.Sprintf("%x", md5.Sum(gotData))
                 if gotLen != size {
-                       t.Error("length mismatch: got %d != %d", gotLen, size)
+                       t.Errorf("length mismatch: got %d != %d", gotLen, size)
                 }
                 if gotHash != hash {
-                       t.Error("hash mismatch: got %s != %s", gotHash, hash)
+                       t.Errorf("hash mismatch: got %s != %s", gotHash, hash)
                 }
         }
  }
@@ -576,6 +605,100 @@ func TestAzureBlobVolumeCreateBlobRaceDeadline(t *testing.T) {
         }
  }
  
+func TestAzureBlobVolumeContextCancelGet(t *testing.T) {
+       testAzureBlobVolumeContextCancel(t, func(ctx context.Context, v *TestableAzureBlobVolume) error {
+               v.PutRaw(TestHash, TestBlock)
+               _, err := v.Get(ctx, TestHash, make([]byte, BlockSize))
+               return err
+       })
+}
+
+func TestAzureBlobVolumeContextCancelPut(t *testing.T) {
+       testAzureBlobVolumeContextCancel(t, func(ctx context.Context, v *TestableAzureBlobVolume) error {
+               return v.Put(ctx, TestHash, make([]byte, BlockSize))
+       })
+}
+
+func TestAzureBlobVolumeContextCancelCompare(t *testing.T) {
+       testAzureBlobVolumeContextCancel(t, func(ctx context.Context, v *TestableAzureBlobVolume) error {
+               v.PutRaw(TestHash, TestBlock)
+               return v.Compare(ctx, TestHash, TestBlock2)
+       })
+}
+
+func testAzureBlobVolumeContextCancel(t *testing.T, testFunc func(context.Context, *TestableAzureBlobVolume) error) {
+       defer func(t http.RoundTripper) {
+               http.DefaultTransport = t
+       }(http.DefaultTransport)
+       http.DefaultTransport = &http.Transport{
+               Dial: (&azStubDialer{}).Dial,
+       }
+
+       v := NewTestableAzureBlobVolume(t, false, 3)
+       defer v.Teardown()
+       v.azHandler.race = make(chan chan struct{})
+
+       ctx, cancel := context.WithCancel(context.Background())
+       allDone := make(chan struct{})
+       go func() {
+               defer close(allDone)
+               err := testFunc(ctx, v)
+               if err != context.Canceled {
+                       t.Errorf("got %T %q, expected %q", err, err, context.Canceled)
+               }
+       }()
+       releaseHandler := make(chan struct{})
+       select {
+       case <-allDone:
+               t.Error("testFunc finished without waiting for v.azHandler.race")
+       case <-time.After(10 * time.Second):
+               t.Error("timed out waiting to enter handler")
+       case v.azHandler.race <- releaseHandler:
+       }
+
+       cancel()
+
+       select {
+       case <-time.After(10 * time.Second):
+               t.Error("timed out waiting to cancel")
+       case <-allDone:
+       }
+
+       go func() {
+               <-releaseHandler
+       }()
+}
+
+func (s *StubbedAzureBlobSuite) TestStats(c *check.C) {
+       stats := func() string {
+               buf, err := json.Marshal(s.volume.InternalStats())
+               c.Check(err, check.IsNil)
+               return string(buf)
+       }
+
+       c.Check(stats(), check.Matches, `.*"Ops":0,.*`)
+       c.Check(stats(), check.Matches, `.*"Errors":0,.*`)
+
+       loc := "acbd18db4cc2f85cedef654fccc4a4d8"
+       _, err := s.volume.Get(context.Background(), loc, make([]byte, 3))
+       c.Check(err, check.NotNil)
+       c.Check(stats(), check.Matches, `.*"Ops":[^0],.*`)
+       c.Check(stats(), check.Matches, `.*"Errors":[^0],.*`)
+       c.Check(stats(), check.Matches, `.*"storage\.AzureStorageServiceError 404 \(404 Not Found\)":[^0].*`)
+       c.Check(stats(), check.Matches, `.*"InBytes":0,.*`)
+
+       err = s.volume.Put(context.Background(), loc, []byte("foo"))
+       c.Check(err, check.IsNil)
+       c.Check(stats(), check.Matches, `.*"OutBytes":3,.*`)
+       c.Check(stats(), check.Matches, `.*"CreateOps":1,.*`)
+
+       _, err = s.volume.Get(context.Background(), loc, make([]byte, 3))
+       c.Check(err, check.IsNil)
+       _, err = s.volume.Get(context.Background(), loc, make([]byte, 3))
+       c.Check(err, check.IsNil)
+       c.Check(stats(), check.Matches, `.*"InBytes":6,.*`)
+}
+
  func (v *TestableAzureBlobVolume) PutRaw(locator string, data []byte) {
         v.azHandler.PutRaw(v.ContainerName, locator, data)
  }
diff --git a/services/keepstore/handlers.go b/services/keepstore/handlers.go

index ed0d8d7a5bff1721a144db27bcd29273f70c77d3..adaaa361e96177080a9df4e2b2f1d77aac98424d 100644 (file)
--- a/services/keepstore/handlers.go
+++ b/services/keepstore/handlers.go
@@ -24,14 +24,20 @@ import (
         "sync"
         "time"
  
+       "git.curoverse.com/arvados.git/sdk/go/httpserver"
         log "github.com/Sirupsen/logrus"
  )
  
-// MakeRESTRouter returns a new mux.Router that forwards all Keep
-// requests to the appropriate handlers.
-//
-func MakeRESTRouter() *mux.Router {
+type router struct {
+       *mux.Router
+       limiter httpserver.RequestCounter
+}
+
+// MakeRESTRouter returns a new router that forwards all Keep requests
+// to the appropriate handlers.
+func MakeRESTRouter() *router {
         rest := mux.NewRouter()
+       rtr := &router{Router: rest}
  
         rest.HandleFunc(
                 `/{hash:[0-9a-f]{32}}`, GetBlockHandler).Methods("GET", "HEAD")
@@ -48,10 +54,10 @@ func MakeRESTRouter() *mux.Router {
         rest.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, IndexHandler).Methods("GET", "HEAD")
  
         // Internals/debugging info (runtime.MemStats)
-       rest.HandleFunc(`/debug.json`, DebugHandler).Methods("GET", "HEAD")
+       rest.HandleFunc(`/debug.json`, rtr.DebugHandler).Methods("GET", "HEAD")
  
         // List volumes: path, device number, bytes used/avail.
-       rest.HandleFunc(`/status.json`, StatusHandler).Methods("GET", "HEAD")
+       rest.HandleFunc(`/status.json`, rtr.StatusHandler).Methods("GET", "HEAD")
  
         // Replace the current pull queue.
         rest.HandleFunc(`/pull`, PullHandler).Methods("PUT")
@@ -66,7 +72,7 @@ func MakeRESTRouter() *mux.Router {
         // 400 Bad Request.
         rest.NotFoundHandler = http.HandlerFunc(BadRequestHandler)
  
-       return rest
+       return rtr
  }
  
  // BadRequestHandler is a HandleFunc to address bad requests.
@@ -259,17 +265,19 @@ type volumeStatusEnt struct {
  
  // NodeStatus struct
  type NodeStatus struct {
-       Volumes    []*volumeStatusEnt
-       BufferPool PoolStatus
-       PullQueue  WorkQueueStatus
-       TrashQueue WorkQueueStatus
+       Volumes         []*volumeStatusEnt
+       BufferPool      PoolStatus
+       PullQueue       WorkQueueStatus
+       TrashQueue      WorkQueueStatus
+       RequestsCurrent int
+       RequestsMax     int
  }
  
  var st NodeStatus
  var stLock sync.Mutex
  
  // DebugHandler addresses /debug.json requests.
-func DebugHandler(resp http.ResponseWriter, req *http.Request) {
+func (rtr *router) DebugHandler(resp http.ResponseWriter, req *http.Request) {
         type debugStats struct {
                 MemStats runtime.MemStats
         }
@@ -282,9 +290,9 @@ func DebugHandler(resp http.ResponseWriter, req *http.Request) {
  }
  
  // StatusHandler addresses /status.json requests.
-func StatusHandler(resp http.ResponseWriter, req *http.Request) {
+func (rtr *router) StatusHandler(resp http.ResponseWriter, req *http.Request) {
         stLock.Lock()
-       readNodeStatus(&st)
+       rtr.readNodeStatus(&st)
         jstat, err := json.Marshal(&st)
         stLock.Unlock()
         if err == nil {
@@ -297,7 +305,7 @@ func StatusHandler(resp http.ResponseWriter, req *http.Request) {
  }
  
  // populate the given NodeStatus struct with current values.
-func readNodeStatus(st *NodeStatus) {
+func (rtr *router) readNodeStatus(st *NodeStatus) {
         vols := KeepVM.AllReadable()
         if cap(st.Volumes) < len(vols) {
                 st.Volumes = make([]*volumeStatusEnt, len(vols))
@@ -320,6 +328,10 @@ func readNodeStatus(st *NodeStatus) {
         st.BufferPool.Len = bufs.Len()
         st.PullQueue = getWorkQueueStatus(pullq)
         st.TrashQueue = getWorkQueueStatus(trashq)
+       if rtr.limiter != nil {
+               st.RequestsCurrent = rtr.limiter.Current()
+               st.RequestsMax = rtr.limiter.Max()
+       }
  }
  
  // return a WorkQueueStatus for the given queue. If q is nil (which
diff --git a/services/keepstore/keepstore.go b/services/keepstore/keepstore.go

index d82c7d0f7a78792b4ccd46558e5ccb159ae463e4..54147959719183141a8e3137d5d1363ec9667e6b 100644 (file)
--- a/services/keepstore/keepstore.go
+++ b/services/keepstore/keepstore.go
@@ -150,9 +150,10 @@ func main() {
         KeepVM = MakeRRVolumeManager(theConfig.Volumes)
  
         // Middleware stack: logger, MaxRequests limiter, method handlers
-       http.Handle("/", &LoggingRESTRouter{
-               router: httpserver.NewRequestLimiter(theConfig.MaxRequests, MakeRESTRouter()),
-       })
+       router := MakeRESTRouter()
+       limiter := httpserver.NewRequestLimiter(theConfig.MaxRequests, router)
+       router.limiter = limiter
+       http.Handle("/", &LoggingRESTRouter{router: limiter})
  
         // Set up a TCP listener.
         listener, err := net.Listen("tcp", theConfig.Listen)
diff --git a/services/keepstore/logging_router.go b/services/keepstore/logging_router.go

index bfd006ee8d2f3576332b8a3be4c6b040cce14214..e34f8581fd5448d606a6305e52c0673f944a8898 100644 (file)
--- a/services/keepstore/logging_router.go
+++ b/services/keepstore/logging_router.go
@@ -5,12 +5,12 @@ package main
  
  import (
         "context"
-       "fmt"
         "net/http"
         "strings"
         "time"
  
         "git.curoverse.com/arvados.git/sdk/go/httpserver"
+       "git.curoverse.com/arvados.git/sdk/go/stats"
         log "github.com/Sirupsen/logrus"
  )
  
@@ -97,25 +97,11 @@ func (loggingRouter *LoggingRESTRouter) ServeHTTP(wrappedResp http.ResponseWrite
         }
  
         lgr.WithFields(log.Fields{
-               "timeTotal":      loggedDuration(tDone.Sub(tStart)),
-               "timeToStatus":   loggedDuration(resp.sentHdr.Sub(tStart)),
-               "timeWriteBody":  loggedDuration(tDone.Sub(resp.sentHdr)),
+               "timeTotal":      stats.Duration(tDone.Sub(tStart)),
+               "timeToStatus":   stats.Duration(resp.sentHdr.Sub(tStart)),
+               "timeWriteBody":  stats.Duration(tDone.Sub(resp.sentHdr)),
                 "respStatusCode": resp.Status,
                 "respStatus":     statusText,
                 "respBytes":      resp.Length,
         }).Info("response")
  }
-
-type loggedDuration time.Duration
-
-// MarshalJSON formats a duration as a number of seconds, using
-// fixed-point notation with no more than 6 decimal places.
-func (d loggedDuration) MarshalJSON() ([]byte, error) {
-       return []byte(d.String()), nil
-}
-
-// String formats a duration as a number of seconds, using
-// fixed-point notation with no more than 6 decimal places.
-func (d loggedDuration) String() string {
-       return fmt.Sprintf("%.6f", time.Duration(d).Seconds())
-}
diff --git a/services/keepstore/pipe_adapters.go b/services/keepstore/pipe_adapters.go

new file mode 100644 (file)

index 0000000..3cb01f1
--- /dev/null
+++ b/services/keepstore/pipe_adapters.go
@@ -0,0 +1,89 @@
+package main
+
+import (
+       "bytes"
+       "context"
+       "io"
+       "io/ioutil"
+)
+
+// getWithPipe invokes getter and copies the resulting data into
+// buf. If ctx is done before all data is copied, getWithPipe closes
+// the pipe with an error, and returns early with an error.
+func getWithPipe(ctx context.Context, loc string, buf []byte, br BlockReader) (int, error) {
+       piper, pipew := io.Pipe()
+       go func() {
+               pipew.CloseWithError(br.ReadBlock(ctx, loc, pipew))
+       }()
+       done := make(chan struct{})
+       var size int
+       var err error
+       go func() {
+               size, err = io.ReadFull(piper, buf)
+               if err == io.EOF || err == io.ErrUnexpectedEOF {
+                       err = nil
+               }
+               close(done)
+       }()
+       select {
+       case <-ctx.Done():
+               piper.CloseWithError(ctx.Err())
+               return 0, ctx.Err()
+       case <-done:
+               piper.Close()
+               return size, err
+       }
+}
+
+// putWithPipe invokes putter with a new pipe, and and copies data
+// from buf into the pipe. If ctx is done before all data is copied,
+// putWithPipe closes the pipe with an error, and returns early with
+// an error.
+func putWithPipe(ctx context.Context, loc string, buf []byte, bw BlockWriter) error {
+       piper, pipew := io.Pipe()
+       copyErr := make(chan error)
+       go func() {
+               _, err := io.Copy(pipew, bytes.NewReader(buf))
+               copyErr <- err
+               close(copyErr)
+       }()
+
+       putErr := make(chan error, 1)
+       go func() {
+               putErr <- bw.WriteBlock(ctx, loc, piper)
+               close(putErr)
+       }()
+
+       var err error
+       select {
+       case err = <-copyErr:
+       case err = <-putErr:
+       case <-ctx.Done():
+               err = ctx.Err()
+       }
+
+       // Ensure io.Copy goroutine isn't blocked writing to pipew
+       // (otherwise, io.Copy is still using buf so it isn't safe to
+       // return). This can cause pipew to receive corrupt data if
+       // err came from copyErr or ctx.Done() before the copy
+       // finished. That's OK, though: in that case err != nil, and
+       // CloseWithErr(err) ensures putter() will get an error from
+       // piper.Read() before seeing EOF.
+       go pipew.CloseWithError(err)
+       go io.Copy(ioutil.Discard, piper)
+       <-copyErr
+
+       // Note: io.Copy() is finished now, but putter() might still
+       // be running. If we encounter an error before putter()
+       // returns, we return right away without waiting for putter().
+
+       if err != nil {
+               return err
+       }
+       select {
+       case <-ctx.Done():
+               return ctx.Err()
+       case err = <-putErr:
+               return err
+       }
+}
diff --git a/services/keepstore/s3_volume.go b/services/keepstore/s3_volume.go

index ca5b1a2eb945cb2ae940c9599c955fb59d9e489a..d34b8772c5eb90d8a5ac3ad89d3a2cee6a1f5d9c 100644 (file)
--- a/services/keepstore/s3_volume.go
+++ b/services/keepstore/s3_volume.go
@@ -14,7 +14,6 @@ import (
         "regexp"
         "strings"
         "sync"
-       "sync/atomic"
         "time"
  
         "git.curoverse.com/arvados.git/sdk/go/arvados"
@@ -456,10 +455,10 @@ func (v *S3Volume) IndexTo(prefix string, writer io.Writer) error {
                 Prefix:   "recent/" + prefix,
                 PageSize: v.IndexPageSize,
         }
-       v.bucket.stats.tick(&v.bucket.stats.Ops, &v.bucket.stats.ListOps)
-       v.bucket.stats.tick(&v.bucket.stats.Ops, &v.bucket.stats.ListOps)
+       v.bucket.stats.Tick(&v.bucket.stats.Ops, &v.bucket.stats.ListOps)
+       v.bucket.stats.Tick(&v.bucket.stats.Ops, &v.bucket.stats.ListOps)
         for data, recent := dataL.First(), recentL.First(); data != nil; data = dataL.Next() {
-               v.bucket.stats.tick(&v.bucket.stats.Ops, &v.bucket.stats.ListOps)
+               v.bucket.stats.Tick(&v.bucket.stats.Ops, &v.bucket.stats.ListOps)
                 if data.Key >= "g" {
                         // Conveniently, "recent/*" and "trash/*" are
                         // lexically greater than all hex-encoded data
@@ -481,12 +480,12 @@ func (v *S3Volume) IndexTo(prefix string, writer io.Writer) error {
                 for recent != nil {
                         if cmp := strings.Compare(recent.Key[7:], data.Key); cmp < 0 {
                                 recent = recentL.Next()
-                               v.bucket.stats.tick(&v.bucket.stats.Ops, &v.bucket.stats.ListOps)
+                               v.bucket.stats.Tick(&v.bucket.stats.Ops, &v.bucket.stats.ListOps)
                                 continue
                         } else if cmp == 0 {
                                 stamp = recent
                                 recent = recentL.Next()
-                               v.bucket.stats.tick(&v.bucket.stats.Ops, &v.bucket.stats.ListOps)
+                               v.bucket.stats.Tick(&v.bucket.stats.Ops, &v.bucket.stats.ListOps)
                                 break
                         } else {
                                 // recent/X marker is missing: we'll
@@ -872,82 +871,56 @@ type s3bucket struct {
  
  func (b *s3bucket) GetReader(path string) (io.ReadCloser, error) {
         rdr, err := b.Bucket.GetReader(path)
-       b.stats.tick(&b.stats.Ops, &b.stats.GetOps)
-       b.stats.tickErr(err)
-       return NewCountingReader(rdr, b.stats.tickInBytes), err
+       b.stats.Tick(&b.stats.Ops, &b.stats.GetOps)
+       b.stats.TickErr(err)
+       return NewCountingReader(rdr, b.stats.TickInBytes), err
  }
  
  func (b *s3bucket) Head(path string, headers map[string][]string) (*http.Response, error) {
         resp, err := b.Bucket.Head(path, headers)
-       b.stats.tick(&b.stats.Ops, &b.stats.HeadOps)
-       b.stats.tickErr(err)
+       b.stats.Tick(&b.stats.Ops, &b.stats.HeadOps)
+       b.stats.TickErr(err)
         return resp, err
  }
  
  func (b *s3bucket) PutReader(path string, r io.Reader, length int64, contType string, perm s3.ACL, options s3.Options) error {
-       err := b.Bucket.PutReader(path, NewCountingReader(r, b.stats.tickOutBytes), length, contType, perm, options)
-       b.stats.tick(&b.stats.Ops, &b.stats.PutOps)
-       b.stats.tickErr(err)
+       err := b.Bucket.PutReader(path, NewCountingReader(r, b.stats.TickOutBytes), length, contType, perm, options)
+       b.stats.Tick(&b.stats.Ops, &b.stats.PutOps)
+       b.stats.TickErr(err)
         return err
  }
  
  func (b *s3bucket) Put(path string, data []byte, contType string, perm s3.ACL, options s3.Options) error {
-       err := b.Bucket.PutReader(path, NewCountingReader(bytes.NewBuffer(data), b.stats.tickOutBytes), int64(len(data)), contType, perm, options)
-       b.stats.tick(&b.stats.Ops, &b.stats.PutOps)
-       b.stats.tickErr(err)
+       err := b.Bucket.PutReader(path, NewCountingReader(bytes.NewBuffer(data), b.stats.TickOutBytes), int64(len(data)), contType, perm, options)
+       b.stats.Tick(&b.stats.Ops, &b.stats.PutOps)
+       b.stats.TickErr(err)
         return err
  }
  
  func (b *s3bucket) Del(path string) error {
         err := b.Bucket.Del(path)
-       b.stats.tick(&b.stats.Ops, &b.stats.DelOps)
-       b.stats.tickErr(err)
+       b.stats.Tick(&b.stats.Ops, &b.stats.DelOps)
+       b.stats.TickErr(err)
         return err
  }
  
  type s3bucketStats struct {
-       Errors   uint64
-       Ops      uint64
-       GetOps   uint64
-       PutOps   uint64
-       HeadOps  uint64
-       DelOps   uint64
-       ListOps  uint64
-       InBytes  uint64
-       OutBytes uint64
-
-       ErrorCodes map[string]uint64 `json:",omitempty"`
-
-       lock sync.Mutex
-}
-
-func (s *s3bucketStats) tickInBytes(n uint64) {
-       atomic.AddUint64(&s.InBytes, n)
-}
-
-func (s *s3bucketStats) tickOutBytes(n uint64) {
-       atomic.AddUint64(&s.OutBytes, n)
+       statsTicker
+       Ops     uint64
+       GetOps  uint64
+       PutOps  uint64
+       HeadOps uint64
+       DelOps  uint64
+       ListOps uint64
  }
  
-func (s *s3bucketStats) tick(counters ...*uint64) {
-       for _, counter := range counters {
-               atomic.AddUint64(counter, 1)
-       }
-}
-
-func (s *s3bucketStats) tickErr(err error) {
+func (s *s3bucketStats) TickErr(err error) {
         if err == nil {
                 return
         }
-       atomic.AddUint64(&s.Errors, 1)
-       errStr := fmt.Sprintf("%T", err)
+       errType := fmt.Sprintf("%T", err)
         if err, ok := err.(*s3.Error); ok {
-               errStr = errStr + fmt.Sprintf(" %d %s", err.StatusCode, err.Code)
-       }
-       s.lock.Lock()
-       if s.ErrorCodes == nil {
-               s.ErrorCodes = make(map[string]uint64)
+               errType = errType + fmt.Sprintf(" %d %s", err.StatusCode, err.Code)
         }
-       s.ErrorCodes[errStr]++
-       s.lock.Unlock()
+       s.statsTicker.TickErr(err, errType)
  }
diff --git a/services/keepstore/s3_volume_test.go b/services/keepstore/s3_volume_test.go

index 6389d503dfc5ccee32471d308bf81ee64b012135..c43b85b1c588700ca41378432363ea41545e5dd0 100644 (file)
--- a/services/keepstore/s3_volume_test.go
+++ b/services/keepstore/s3_volume_test.go
@@ -7,6 +7,8 @@ import (
         "encoding/json"
         "fmt"
         "io/ioutil"
+       "net/http"
+       "net/http/httptest"
         "os"
         "time"
  
@@ -112,6 +114,94 @@ func (s *StubbedS3Suite) TestStats(c *check.C) {
         c.Check(stats(), check.Matches, `.*"InBytes":6,.*`)
  }
  
+type blockingHandler struct {
+       requested chan *http.Request
+       unblock   chan struct{}
+}
+
+func (h *blockingHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+       if h.requested != nil {
+               h.requested <- r
+       }
+       if h.unblock != nil {
+               <-h.unblock
+       }
+       http.Error(w, "nothing here", http.StatusNotFound)
+}
+
+func (s *StubbedS3Suite) TestGetContextCancel(c *check.C) {
+       loc := "acbd18db4cc2f85cedef654fccc4a4d8"
+       buf := make([]byte, 3)
+
+       s.testContextCancel(c, func(ctx context.Context, v *TestableS3Volume) error {
+               _, err := v.Get(ctx, loc, buf)
+               return err
+       })
+}
+
+func (s *StubbedS3Suite) TestCompareContextCancel(c *check.C) {
+       loc := "acbd18db4cc2f85cedef654fccc4a4d8"
+       buf := []byte("bar")
+
+       s.testContextCancel(c, func(ctx context.Context, v *TestableS3Volume) error {
+               return v.Compare(ctx, loc, buf)
+       })
+}
+
+func (s *StubbedS3Suite) TestPutContextCancel(c *check.C) {
+       loc := "acbd18db4cc2f85cedef654fccc4a4d8"
+       buf := []byte("foo")
+
+       s.testContextCancel(c, func(ctx context.Context, v *TestableS3Volume) error {
+               return v.Put(ctx, loc, buf)
+       })
+}
+
+func (s *StubbedS3Suite) testContextCancel(c *check.C, testFunc func(context.Context, *TestableS3Volume) error) {
+       handler := &blockingHandler{}
+       srv := httptest.NewServer(handler)
+       defer srv.Close()
+
+       v := s.newTestableVolume(c, 5*time.Minute, false, 2)
+       vol := *v.S3Volume
+       vol.Endpoint = srv.URL
+       v = &TestableS3Volume{S3Volume: &vol}
+       v.Start()
+
+       ctx, cancel := context.WithCancel(context.Background())
+
+       handler.requested = make(chan *http.Request)
+       handler.unblock = make(chan struct{})
+       defer close(handler.unblock)
+
+       doneFunc := make(chan struct{})
+       go func() {
+               err := testFunc(ctx, v)
+               c.Check(err, check.Equals, context.Canceled)
+               close(doneFunc)
+       }()
+
+       timeout := time.After(10 * time.Second)
+
+       // Wait for the stub server to receive a request, meaning
+       // Get() is waiting for an s3 operation.
+       select {
+       case <-timeout:
+               c.Fatal("timed out waiting for test func to call our handler")
+       case <-doneFunc:
+               c.Fatal("test func finished without even calling our handler!")
+       case <-handler.requested:
+       }
+
+       cancel()
+
+       select {
+       case <-timeout:
+               c.Fatal("timed out")
+       case <-doneFunc:
+       }
+}
+
  func (s *StubbedS3Suite) TestBackendStates(c *check.C) {
         defer func(tl, bs arvados.Duration) {
                 theConfig.TrashLifetime = tl
@@ -320,18 +410,9 @@ func (s *StubbedS3Suite) newTestableVolume(c *check.C, raceWindow time.Duration,
         srv, err := s3test.NewServer(&s3test.Config{Clock: clock})
         c.Assert(err, check.IsNil)
  
-       tmp, err := ioutil.TempFile("", "keepstore")
-       c.Assert(err, check.IsNil)
-       defer os.Remove(tmp.Name())
-       _, err = tmp.Write([]byte("xxx\n"))
-       c.Assert(err, check.IsNil)
-       c.Assert(tmp.Close(), check.IsNil)
-
         v := &TestableS3Volume{
                 S3Volume: &S3Volume{
                         Bucket:             TestBucketName,
-                       AccessKeyFile:      tmp.Name(),
-                       SecretKeyFile:      tmp.Name(),
                         Endpoint:           srv.URL(),
                         Region:             "test-region-1",
                         LocationConstraint: true,
@@ -341,15 +422,31 @@ func (s *StubbedS3Suite) newTestableVolume(c *check.C, raceWindow time.Duration,
                         ReadOnly:           readonly,
                         IndexPageSize:      1000,
                 },
+               c:           c,
                 server:      srv,
                 serverClock: clock,
         }
-       c.Assert(v.Start(), check.IsNil)
+       v.Start()
         err = v.bucket.PutBucket(s3.ACL("private"))
         c.Assert(err, check.IsNil)
         return v
  }
  
+func (v *TestableS3Volume) Start() error {
+       tmp, err := ioutil.TempFile("", "keepstore")
+       v.c.Assert(err, check.IsNil)
+       defer os.Remove(tmp.Name())
+       _, err = tmp.Write([]byte("xxx\n"))
+       v.c.Assert(err, check.IsNil)
+       v.c.Assert(tmp.Close(), check.IsNil)
+
+       v.S3Volume.AccessKeyFile = tmp.Name()
+       v.S3Volume.SecretKeyFile = tmp.Name()
+
+       v.c.Assert(v.S3Volume.Start(), check.IsNil)
+       return nil
+}
+
  // PutRaw skips the ContentMD5 test
  func (v *TestableS3Volume) PutRaw(loc string, block []byte) {
         err := v.bucket.Put(loc, block, "application/octet-stream", s3ACL, s3.Options{})
diff --git a/services/keepstore/stats_ticker.go b/services/keepstore/stats_ticker.go

new file mode 100644 (file)

index 0000000..f3a79c6
--- /dev/null
+++ b/services/keepstore/stats_ticker.go
@@ -0,0 +1,50 @@
+package main
+
+import (
+       "sync"
+       "sync/atomic"
+)
+
+type statsTicker struct {
+       Errors   uint64
+       InBytes  uint64
+       OutBytes uint64
+
+       ErrorCodes map[string]uint64 `json:",omitempty"`
+       lock       sync.Mutex
+}
+
+// Tick increments each of the given counters by 1 using
+// atomic.AddUint64.
+func (s *statsTicker) Tick(counters ...*uint64) {
+       for _, counter := range counters {
+               atomic.AddUint64(counter, 1)
+       }
+}
+
+// TickErr increments the overall error counter, as well as the
+// ErrorCodes entry for the given errType. If err is nil, TickErr is a
+// no-op.
+func (s *statsTicker) TickErr(err error, errType string) {
+       if err == nil {
+               return
+       }
+       s.Tick(&s.Errors)
+
+       s.lock.Lock()
+       if s.ErrorCodes == nil {
+               s.ErrorCodes = make(map[string]uint64)
+       }
+       s.ErrorCodes[errType]++
+       s.lock.Unlock()
+}
+
+// TickInBytes increments the incoming byte counter by n.
+func (s *statsTicker) TickInBytes(n uint64) {
+       atomic.AddUint64(&s.InBytes, n)
+}
+
+// TickOutBytes increments the outgoing byte counter by n.
+func (s *statsTicker) TickOutBytes(n uint64) {
+       atomic.AddUint64(&s.OutBytes, n)
+}
diff --git a/services/keepstore/volume.go b/services/keepstore/volume.go

index b72258d51a5e6358c190227db08d5bf8c419dfe4..778f27fcde87cbc324a246aec571b3fe7a5c2b8a 100644 (file)
--- a/services/keepstore/volume.go
+++ b/services/keepstore/volume.go
@@ -7,6 +7,18 @@ import (
         "time"
  )
  
+type BlockWriter interface {
+       // WriteBlock reads all data from r, writes it to a backing
+       // store as "loc", and returns the number of bytes written.
+       WriteBlock(ctx context.Context, loc string, r io.Reader) error
+}
+
+type BlockReader interface {
+       // ReadBlock retrieves data previously stored as "loc" and
+       // writes it to w.
+       ReadBlock(ctx context.Context, loc string, w io.Writer) error
+}
+
  // A Volume is an interface representing a Keep back-end storage unit:
  // for example, a single mounted disk, a RAID array, an Amazon S3 volume,
  // etc.
diff --git a/services/keepstore/volume_unix.go b/services/keepstore/volume_unix.go

index fff02aac260f59a6fc46fc24cbebea57b27e5743..fa28ee272ff7b00948557ca8177288f518d255bf 100644 (file)
--- a/services/keepstore/volume_unix.go
+++ b/services/keepstore/volume_unix.go
@@ -111,6 +111,8 @@ type UnixVolume struct {
         // something to lock during IO, typically a sync.Mutex (or nil
         // to skip locking)
         locker sync.Locker
+
+       os osWithStats
  }
  
  // Examples implements VolumeWithExamples.
@@ -145,7 +147,7 @@ func (v *UnixVolume) Start() error {
         if v.DirectoryReplication == 0 {
                 v.DirectoryReplication = 1
         }
-       _, err := os.Stat(v.Root)
+       _, err := v.os.Stat(v.Root)
         return err
  }
  
@@ -155,27 +157,30 @@ func (v *UnixVolume) Touch(loc string) error {
                 return MethodDisabledError
         }
         p := v.blockPath(loc)
-       f, err := os.OpenFile(p, os.O_RDWR|os.O_APPEND, 0644)
+       f, err := v.os.OpenFile(p, os.O_RDWR|os.O_APPEND, 0644)
         if err != nil {
                 return err
         }
         defer f.Close()
-       if v.locker != nil {
-               v.locker.Lock()
-               defer v.locker.Unlock()
+       if err := v.lock(context.TODO()); err != nil {
+               return err
         }
-       if e := lockfile(f); e != nil {
+       defer v.unlock()
+       if e := v.lockfile(f); e != nil {
                 return e
         }
-       defer unlockfile(f)
+       defer v.unlockfile(f)
         ts := syscall.NsecToTimespec(time.Now().UnixNano())
-       return syscall.UtimesNano(p, []syscall.Timespec{ts, ts})
+       v.os.stats.Tick(&v.os.stats.UtimesOps)
+       err = syscall.UtimesNano(p, []syscall.Timespec{ts, ts})
+       v.os.stats.TickErr(err)
+       return err
  }
  
  // Mtime returns the stored timestamp for the given locator.
  func (v *UnixVolume) Mtime(loc string) (time.Time, error) {
         p := v.blockPath(loc)
-       fi, err := os.Stat(p)
+       fi, err := v.os.Stat(p)
         if err != nil {
                 return time.Time{}, err
         }
@@ -185,24 +190,21 @@ func (v *UnixVolume) Mtime(loc string) (time.Time, error) {
  // Lock the locker (if one is in use), open the file for reading, and
  // call the given function if and when the file is ready to read.
  func (v *UnixVolume) getFunc(ctx context.Context, path string, fn func(io.Reader) error) error {
-       if v.locker != nil {
-               v.locker.Lock()
-               defer v.locker.Unlock()
-       }
-       if ctx.Err() != nil {
-               return ctx.Err()
+       if err := v.lock(ctx); err != nil {
+               return err
         }
-       f, err := os.Open(path)
+       defer v.unlock()
+       f, err := v.os.Open(path)
         if err != nil {
                 return err
         }
         defer f.Close()
-       return fn(f)
+       return fn(NewCountingReader(ioutil.NopCloser(f), v.os.stats.TickInBytes))
  }
  
  // stat is os.Stat() with some extra sanity checks.
  func (v *UnixVolume) stat(path string) (os.FileInfo, error) {
-       stat, err := os.Stat(path)
+       stat, err := v.os.Stat(path)
         if err == nil {
                 if stat.Size() < 0 {
                         err = os.ErrInvalid
@@ -216,21 +218,23 @@ func (v *UnixVolume) stat(path string) (os.FileInfo, error) {
  // Get retrieves a block, copies it to the given slice, and returns
  // the number of bytes copied.
  func (v *UnixVolume) Get(ctx context.Context, loc string, buf []byte) (int, error) {
+       return getWithPipe(ctx, loc, buf, v)
+}
+
+// ReadBlock implements BlockReader.
+func (v *UnixVolume) ReadBlock(ctx context.Context, loc string, w io.Writer) error {
         path := v.blockPath(loc)
         stat, err := v.stat(path)
         if err != nil {
-               return 0, v.translateError(err)
-       }
-       if stat.Size() > int64(len(buf)) {
-               return 0, TooLongError
+               return v.translateError(err)
         }
-       var read int
-       size := int(stat.Size())
-       err = v.getFunc(ctx, path, func(rdr io.Reader) error {
-               read, err = io.ReadFull(rdr, buf[:size])
+       return v.getFunc(ctx, path, func(rdr io.Reader) error {
+               n, err := io.Copy(w, rdr)
+               if err == nil && n != stat.Size() {
+                       err = io.ErrUnexpectedEOF
+               }
                 return err
         })
-       return read, err
  }
  
  // Compare returns nil if Get(loc) would return the same content as
@@ -251,6 +255,11 @@ func (v *UnixVolume) Compare(ctx context.Context, loc string, expect []byte) err
  // returns a FullError.  If the write fails due to some other error,
  // that error is returned.
  func (v *UnixVolume) Put(ctx context.Context, loc string, block []byte) error {
+       return putWithPipe(ctx, loc, block, v)
+}
+
+// ReadBlock implements BlockWriter.
+func (v *UnixVolume) WriteBlock(ctx context.Context, loc string, rdr io.Reader) error {
         if v.ReadOnly {
                 return MethodDisabledError
         }
@@ -264,37 +273,34 @@ func (v *UnixVolume) Put(ctx context.Context, loc string, block []byte) error {
                 return err
         }
  
-       tmpfile, tmperr := ioutil.TempFile(bdir, "tmp"+loc)
+       tmpfile, tmperr := v.os.TempFile(bdir, "tmp"+loc)
         if tmperr != nil {
                 log.Printf("ioutil.TempFile(%s, tmp%s): %s", bdir, loc, tmperr)
                 return tmperr
         }
+
         bpath := v.blockPath(loc)
  
-       if v.locker != nil {
-               v.locker.Lock()
-               defer v.locker.Unlock()
-       }
-       select {
-       case <-ctx.Done():
-               return ctx.Err()
-       default:
+       if err := v.lock(ctx); err != nil {
+               return err
         }
-       if _, err := tmpfile.Write(block); err != nil {
+       defer v.unlock()
+       n, err := io.Copy(tmpfile, rdr)
+       v.os.stats.TickOutBytes(uint64(n))
+       if err != nil {
                 log.Printf("%s: writing to %s: %s\n", v, bpath, err)
                 tmpfile.Close()
-               os.Remove(tmpfile.Name())
+               v.os.Remove(tmpfile.Name())
                 return err
         }
         if err := tmpfile.Close(); err != nil {
                 log.Printf("closing %s: %s\n", tmpfile.Name(), err)
-               os.Remove(tmpfile.Name())
+               v.os.Remove(tmpfile.Name())
                 return err
         }
-       if err := os.Rename(tmpfile.Name(), bpath); err != nil {
+       if err := v.os.Rename(tmpfile.Name(), bpath); err != nil {
                 log.Printf("rename %s %s: %s\n", tmpfile.Name(), bpath, err)
-               os.Remove(tmpfile.Name())
-               return err
+               return v.os.Remove(tmpfile.Name())
         }
         return nil
  }
@@ -303,18 +309,15 @@ func (v *UnixVolume) Put(ctx context.Context, loc string, block []byte) error {
  // current state, or nil if an error occurs.
  //
  func (v *UnixVolume) Status() *VolumeStatus {
-       var fs syscall.Statfs_t
-       var devnum uint64
-
-       if fi, err := os.Stat(v.Root); err == nil {
-               devnum = fi.Sys().(*syscall.Stat_t).Dev
-       } else {
+       fi, err := v.os.Stat(v.Root)
+       if err != nil {
                 log.Printf("%s: os.Stat: %s\n", v, err)
                 return nil
         }
+       devnum := fi.Sys().(*syscall.Stat_t).Dev
  
-       err := syscall.Statfs(v.Root, &fs)
-       if err != nil {
+       var fs syscall.Statfs_t
+       if err := syscall.Statfs(v.Root, &fs); err != nil {
                 log.Printf("%s: statfs: %s\n", v, err)
                 return nil
         }
@@ -350,11 +353,12 @@ var blockFileRe = regexp.MustCompile(`^[0-9a-f]{32}$`)
  //
  func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
         var lastErr error
-       rootdir, err := os.Open(v.Root)
+       rootdir, err := v.os.Open(v.Root)
         if err != nil {
                 return err
         }
         defer rootdir.Close()
+       v.os.stats.Tick(&v.os.stats.ReaddirOps)
         for {
                 names, err := rootdir.Readdirnames(1)
                 if err == io.EOF {
@@ -370,12 +374,13 @@ func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
                         continue
                 }
                 blockdirpath := filepath.Join(v.Root, names[0])
-               blockdir, err := os.Open(blockdirpath)
+               blockdir, err := v.os.Open(blockdirpath)
                 if err != nil {
                         log.Print("Error reading ", blockdirpath, ": ", err)
                         lastErr = err
                         continue
                 }
+               v.os.stats.Tick(&v.os.stats.ReaddirOps)
                 for {
                         fileInfo, err := blockdir.Readdir(1)
                         if err == io.EOF {
@@ -418,36 +423,36 @@ func (v *UnixVolume) Trash(loc string) error {
         if v.ReadOnly {
                 return MethodDisabledError
         }
-       if v.locker != nil {
-               v.locker.Lock()
-               defer v.locker.Unlock()
+       if err := v.lock(context.TODO()); err != nil {
+               return err
         }
+       defer v.unlock()
         p := v.blockPath(loc)
-       f, err := os.OpenFile(p, os.O_RDWR|os.O_APPEND, 0644)
+       f, err := v.os.OpenFile(p, os.O_RDWR|os.O_APPEND, 0644)
         if err != nil {
                 return err
         }
         defer f.Close()
-       if e := lockfile(f); e != nil {
+       if e := v.lockfile(f); e != nil {
                 return e
         }
-       defer unlockfile(f)
+       defer v.unlockfile(f)
  
         // If the block has been PUT in the last blobSignatureTTL
         // seconds, return success without removing the block. This
         // protects data from garbage collection until it is no longer
         // possible for clients to retrieve the unreferenced blocks
         // anyway (because the permission signatures have expired).
-       if fi, err := os.Stat(p); err != nil {
+       if fi, err := v.os.Stat(p); err != nil {
                 return err
         } else if time.Since(fi.ModTime()) < time.Duration(theConfig.BlobSignatureTTL) {
                 return nil
         }
  
         if theConfig.TrashLifetime == 0 {
-               return os.Remove(p)
+               return v.os.Remove(p)
         }
-       return os.Rename(p, fmt.Sprintf("%v.trash.%d", p, time.Now().Add(theConfig.TrashLifetime.Duration()).Unix()))
+       return v.os.Rename(p, fmt.Sprintf("%v.trash.%d", p, time.Now().Add(theConfig.TrashLifetime.Duration()).Unix()))
  }
  
  // Untrash moves block from trash back into store
@@ -458,6 +463,7 @@ func (v *UnixVolume) Untrash(loc string) (err error) {
                 return MethodDisabledError
         }
  
+       v.os.stats.Tick(&v.os.stats.ReaddirOps)
         files, err := ioutil.ReadDir(v.blockDir(loc))
         if err != nil {
                 return err
@@ -472,7 +478,7 @@ func (v *UnixVolume) Untrash(loc string) (err error) {
         for _, f := range files {
                 if strings.HasPrefix(f.Name(), prefix) {
                         foundTrash = true
-                       err = os.Rename(v.blockPath(f.Name()), v.blockPath(loc))
+                       err = v.os.Rename(v.blockPath(f.Name()), v.blockPath(loc))
                         if err == nil {
                                 break
                         }
@@ -559,13 +565,55 @@ func (v *UnixVolume) Replication() int {
         return v.DirectoryReplication
  }
  
+// InternalStats returns I/O and filesystem ops counters.
+func (v *UnixVolume) InternalStats() interface{} {
+       return &v.os.stats
+}
+
+// lock acquires the serialize lock, if one is in use. If ctx is done
+// before the lock is acquired, lock returns ctx.Err() instead of
+// acquiring the lock.
+func (v *UnixVolume) lock(ctx context.Context) error {
+       if v.locker == nil {
+               return nil
+       }
+       locked := make(chan struct{})
+       go func() {
+               v.locker.Lock()
+               close(locked)
+       }()
+       select {
+       case <-ctx.Done():
+               go func() {
+                       <-locked
+                       v.locker.Unlock()
+               }()
+               return ctx.Err()
+       case <-locked:
+               return nil
+       }
+}
+
+// unlock releases the serialize lock, if one is in use.
+func (v *UnixVolume) unlock() {
+       if v.locker == nil {
+               return
+       }
+       v.locker.Unlock()
+}
+
  // lockfile and unlockfile use flock(2) to manage kernel file locks.
-func lockfile(f *os.File) error {
-       return syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
+func (v *UnixVolume) lockfile(f *os.File) error {
+       v.os.stats.Tick(&v.os.stats.FlockOps)
+       err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
+       v.os.stats.TickErr(err)
+       return err
  }
  
-func unlockfile(f *os.File) error {
-       return syscall.Flock(int(f.Fd()), syscall.LOCK_UN)
+func (v *UnixVolume) unlockfile(f *os.File) error {
+       err := syscall.Flock(int(f.Fd()), syscall.LOCK_UN)
+       v.os.stats.TickErr(err)
+       return err
  }
  
  // Where appropriate, translate a more specific filesystem error to an
@@ -611,7 +659,7 @@ func (v *UnixVolume) EmptyTrash() {
                 if deadline > time.Now().Unix() {
                         return nil
                 }
-               err = os.Remove(path)
+               err = v.os.Remove(path)
                 if err != nil {
                         log.Printf("EmptyTrash: Remove %v: %v", path, err)
                         return nil
@@ -627,3 +675,68 @@ func (v *UnixVolume) EmptyTrash() {
  
         log.Printf("EmptyTrash stats for %v: Deleted %v bytes in %v blocks. Remaining in trash: %v bytes in %v blocks.", v.String(), bytesDeleted, blocksDeleted, bytesInTrash-bytesDeleted, blocksInTrash-blocksDeleted)
  }
+
+type unixStats struct {
+       statsTicker
+       OpenOps    uint64
+       StatOps    uint64
+       FlockOps   uint64
+       UtimesOps  uint64
+       CreateOps  uint64
+       RenameOps  uint64
+       UnlinkOps  uint64
+       ReaddirOps uint64
+}
+
+func (s *unixStats) TickErr(err error) {
+       if err == nil {
+               return
+       }
+       s.statsTicker.TickErr(err, fmt.Sprintf("%T", err))
+}
+
+type osWithStats struct {
+       stats unixStats
+}
+
+func (o *osWithStats) Open(name string) (*os.File, error) {
+       o.stats.Tick(&o.stats.OpenOps)
+       f, err := os.Open(name)
+       o.stats.TickErr(err)
+       return f, err
+}
+
+func (o *osWithStats) OpenFile(name string, flag int, perm os.FileMode) (*os.File, error) {
+       o.stats.Tick(&o.stats.OpenOps)
+       f, err := os.OpenFile(name, flag, perm)
+       o.stats.TickErr(err)
+       return f, err
+}
+
+func (o *osWithStats) Remove(path string) error {
+       o.stats.Tick(&o.stats.UnlinkOps)
+       err := os.Remove(path)
+       o.stats.TickErr(err)
+       return err
+}
+
+func (o *osWithStats) Rename(a, b string) error {
+       o.stats.Tick(&o.stats.RenameOps)
+       err := os.Rename(a, b)
+       o.stats.TickErr(err)
+       return err
+}
+
+func (o *osWithStats) Stat(path string) (os.FileInfo, error) {
+       o.stats.Tick(&o.stats.StatOps)
+       fi, err := os.Stat(path)
+       o.stats.TickErr(err)
+       return fi, err
+}
+
+func (o *osWithStats) TempFile(dir, base string) (*os.File, error) {
+       o.stats.Tick(&o.stats.CreateOps)
+       f, err := ioutil.TempFile(dir, base)
+       o.stats.TickErr(err)
+       return f, err
+}
diff --git a/services/keepstore/volume_unix_test.go b/services/keepstore/volume_unix_test.go

index 3021d6bd362724e7136d1054095e49bb53778199..0edf9b8538b39b9cc995cbcce57776bc2f45a28c 100644 (file)
--- a/services/keepstore/volume_unix_test.go
+++ b/services/keepstore/volume_unix_test.go
@@ -3,6 +3,7 @@ package main
  import (
         "bytes"
         "context"
+       "encoding/json"
         "errors"
         "fmt"
         "io"
@@ -13,6 +14,8 @@ import (
         "syscall"
         "testing"
         "time"
+
+       check "gopkg.in/check.v1"
  )
  
  type TestableUnixVolume struct {
@@ -323,14 +326,100 @@ func TestUnixVolumeCompare(t *testing.T) {
         }
  }
  
-// TODO(twp): show that the underlying Read/Write operations executed
-// serially and not concurrently. The easiest way to do this is
-// probably to activate verbose or debug logging, capture log output
-// and examine it to confirm that Reads and Writes did not overlap.
-//
-// TODO(twp): a proper test of I/O serialization requires that a
-// second request start while the first one is still underway.
-// Guaranteeing that the test behaves this way requires some tricky
-// synchronization and mocking.  For now we'll just launch a bunch of
-// requests simultaenously in goroutines and demonstrate that they
-// return accurate results.
+func TestUnixVolumeContextCancelPut(t *testing.T) {
+       v := NewTestableUnixVolume(t, true, false)
+       defer v.Teardown()
+       v.locker.Lock()
+       ctx, cancel := context.WithCancel(context.Background())
+       go func() {
+               time.Sleep(50 * time.Millisecond)
+               cancel()
+               time.Sleep(50 * time.Millisecond)
+               v.locker.Unlock()
+       }()
+       err := v.Put(ctx, TestHash, TestBlock)
+       if err != context.Canceled {
+               t.Errorf("Put() returned %s -- expected short read / canceled", err)
+       }
+}
+
+func TestUnixVolumeContextCancelGet(t *testing.T) {
+       v := NewTestableUnixVolume(t, false, false)
+       defer v.Teardown()
+       bpath := v.blockPath(TestHash)
+       v.PutRaw(TestHash, TestBlock)
+       os.Remove(bpath)
+       err := syscall.Mkfifo(bpath, 0600)
+       if err != nil {
+               t.Fatalf("Mkfifo %s: %s", bpath, err)
+       }
+       defer os.Remove(bpath)
+       ctx, cancel := context.WithCancel(context.Background())
+       go func() {
+               time.Sleep(50 * time.Millisecond)
+               cancel()
+       }()
+       buf := make([]byte, len(TestBlock))
+       n, err := v.Get(ctx, TestHash, buf)
+       if n == len(TestBlock) || err != context.Canceled {
+               t.Errorf("Get() returned %d, %s -- expected short read / canceled", n, err)
+       }
+}
+
+var _ = check.Suite(&UnixVolumeSuite{})
+
+type UnixVolumeSuite struct {
+       volume *TestableUnixVolume
+}
+
+func (s *UnixVolumeSuite) TearDownTest(c *check.C) {
+       if s.volume != nil {
+               s.volume.Teardown()
+       }
+}
+
+func (s *UnixVolumeSuite) TestStats(c *check.C) {
+       s.volume = NewTestableUnixVolume(c, false, false)
+       stats := func() string {
+               buf, err := json.Marshal(s.volume.InternalStats())
+               c.Check(err, check.IsNil)
+               return string(buf)
+       }
+
+       c.Check(stats(), check.Matches, `.*"StatOps":0,.*`)
+       c.Check(stats(), check.Matches, `.*"Errors":0,.*`)
+
+       loc := "acbd18db4cc2f85cedef654fccc4a4d8"
+       _, err := s.volume.Get(context.Background(), loc, make([]byte, 3))
+       c.Check(err, check.NotNil)
+       c.Check(stats(), check.Matches, `.*"StatOps":[^0],.*`)
+       c.Check(stats(), check.Matches, `.*"Errors":[^0],.*`)
+       c.Check(stats(), check.Matches, `.*"\*os\.PathError":[^0].*`)
+       c.Check(stats(), check.Matches, `.*"InBytes":0,.*`)
+       c.Check(stats(), check.Matches, `.*"OpenOps":0,.*`)
+       c.Check(stats(), check.Matches, `.*"CreateOps":0,.*`)
+
+       err = s.volume.Put(context.Background(), loc, []byte("foo"))
+       c.Check(err, check.IsNil)
+       c.Check(stats(), check.Matches, `.*"OutBytes":3,.*`)
+       c.Check(stats(), check.Matches, `.*"CreateOps":1,.*`)
+       c.Check(stats(), check.Matches, `.*"OpenOps":0,.*`)
+       c.Check(stats(), check.Matches, `.*"UtimesOps":0,.*`)
+
+       err = s.volume.Touch(loc)
+       c.Check(err, check.IsNil)
+       c.Check(stats(), check.Matches, `.*"FlockOps":1,.*`)
+       c.Check(stats(), check.Matches, `.*"OpenOps":1,.*`)
+       c.Check(stats(), check.Matches, `.*"UtimesOps":1,.*`)
+
+       _, err = s.volume.Get(context.Background(), loc, make([]byte, 3))
+       c.Check(err, check.IsNil)
+       err = s.volume.Compare(context.Background(), loc, []byte("foo"))
+       c.Check(err, check.IsNil)
+       c.Check(stats(), check.Matches, `.*"InBytes":6,.*`)
+       c.Check(stats(), check.Matches, `.*"OpenOps":3,.*`)
+
+       err = s.volume.Trash(loc)
+       c.Check(err, check.IsNil)
+       c.Check(stats(), check.Matches, `.*"FlockOps":2,.*`)
+}
diff --git a/services/nodemanager/arvnodeman/_version.py b/services/nodemanager/arvnodeman/_version.py

new file mode 100644 (file)

index 0000000..9a29cc1
--- /dev/null
+++ b/services/nodemanager/arvnodeman/_version.py
@@ -0,0 +1,3 @@
+import pkg_resources
+
+__version__ = pkg_resources.require('arvados-node-manager')[0].version
diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py

index b853f00a6728693cce4b855021e18bb35c869087..1c6d214fe8818e9dd49e94b413daa6609096a4c8 100644 (file)
--- a/services/nodemanager/arvnodeman/computenode/driver/gce.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -31,6 +31,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
          create_kwargs = create_kwargs.copy()
          create_kwargs.setdefault('external_ip', None)
          create_kwargs.setdefault('ex_metadata', {})
+        self._project = auth_kwargs.get("project")
          super(ComputeNodeDriver, self).__init__(
              auth_kwargs, list_kwargs, create_kwargs,
              driver_class)
@@ -44,7 +45,7 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
  
      def _init_image(self, image_name):
          return 'image', self.search_for(
-            image_name, 'list_images', self._name_key)
+            image_name, 'list_images', self._name_key, ex_project=self._project)
  
      def _init_network(self, network_name):
          return 'ex_network', self.search_for(
diff --git a/services/nodemanager/arvnodeman/launcher.py b/services/nodemanager/arvnodeman/launcher.py

index 1be7e46387ff6c5bfe38d4e4805694fb7986cfa7..4fe5f8aaed0258a9c161d49a9b8fcf7ea0166bce 100644 (file)
--- a/services/nodemanager/arvnodeman/launcher.py
+++ b/services/nodemanager/arvnodeman/launcher.py
@@ -10,6 +10,7 @@ import time
  
  import daemon
  import pykka
+import libcloud
  
  from . import config as nmconfig
  from .baseactor import WatchdogActor
@@ -17,6 +18,7 @@ from .daemon import NodeManagerDaemonActor
  from .jobqueue import JobQueueMonitorActor, ServerCalculator
  from .nodelist import ArvadosNodeListMonitorActor, CloudNodeListMonitorActor
  from .timedcallback import TimedCallBackActor
+from ._version import __version__
  
  node_daemon = None
  
@@ -28,6 +30,10 @@ def parse_cli(args):
      parser = argparse.ArgumentParser(
          prog='arvados-node-manager',
          description="Dynamically allocate Arvados cloud compute nodes")
+    parser.add_argument(
+        '--version', action='version',
+        version="%s %s" % (sys.argv[0], __version__),
+        help='Print version and exit.')
      parser.add_argument(
          '--foreground', action='store_true', default=False,
          help="Run in the foreground.  Don't daemonize.")
@@ -57,6 +63,7 @@ def setup_logging(path, level, **sublevels):
      for logger_name, sublevel in sublevels.iteritems():
          sublogger = logging.getLogger(logger_name)
          sublogger.setLevel(sublevel)
+    return root_logger
  
  def build_server_calculator(config):
      cloud_size_list = config.node_sizes(config.new_cloud_client().list_sizes())
@@ -105,7 +112,8 @@ def main(args=None):
          signal.signal(sigcode, shutdown_signal)
  
      try:
-        setup_logging(config.get('Logging', 'file'), **config.log_levels())
+        root_logger = setup_logging(config.get('Logging', 'file'), **config.log_levels())
+        root_logger.info("%s %s, libcloud %s", sys.argv[0], __version__, libcloud.__version__)
          node_setup, node_shutdown, node_update, node_monitor = \
              config.dispatch_classes()
          server_calculator = build_server_calculator(config)
diff --git a/services/nodemanager/setup.py b/services/nodemanager/setup.py

index 3d838e49b443750be9608eec67738bbdb9b679f2..c30108f44bb65a487945e665e7f2afae91528c00 100644 (file)
--- a/services/nodemanager/setup.py
+++ b/services/nodemanager/setup.py
@@ -33,6 +33,7 @@ setup(name='arvados-node-manager',
          'arvados-python-client>=0.1.20150206225333',
          'pykka',
          'python-daemon',
+        'setuptools'
          ],
        dependency_links = [
            "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.1.dev4.zip"
diff --git a/services/nodemanager/tests/test_arguments.py b/services/nodemanager/tests/test_arguments.py

new file mode 100644 (file)

index 0000000..f98309a
--- /dev/null
+++ b/services/nodemanager/tests/test_arguments.py
@@ -0,0 +1,27 @@
+#!/usr/bin/env python
+
+import io
+import os
+import sys
+import tempfile
+import unittest
+
+import arvnodeman.launcher as nodeman
+from . import testutil
+
+class ArvNodemArgumentsTestCase(unittest.TestCase):
+    def run_nodeman(self, args):
+        return nodeman.main(args)
+
+    def test_unsupported_arg(self):
+        with self.assertRaises(SystemExit):
+            self.run_nodeman(['-x=unknown'])
+
+    def test_version_argument(self):
+        err = io.BytesIO()
+        out = io.BytesIO()
+        with testutil.redirected_streams(stdout=out, stderr=err):
+            with self.assertRaises(SystemExit):
+                self.run_nodeman(['--version'])
+        self.assertEqual(out.getvalue(), '')
+        self.assertRegexpMatches(err.getvalue(), "[0-9]+\.[0-9]+\.[0-9]+")
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py

index 1b6aab3cafed16cfc0960d1a39a32d669fe53ffb..15337c4120173b6a7f2ca6b21f0924bd571f2326 100644 (file)
--- a/services/nodemanager/tests/testutil.py
+++ b/services/nodemanager/tests/testutil.py
@@ -2,13 +2,15 @@
  
  from __future__ import absolute_import, print_function
  
+import contextlib
  import datetime
+import mock
+import pykka
+import sys
  import threading
  import time
  
  import libcloud.common.types as cloud_types
-import mock
-import pykka
  
  from . import pykka_timeout
  
@@ -55,6 +57,17 @@ def cloud_node_fqdn(node):
  def ip_address_mock(last_octet):
      return '10.20.30.{}'.format(last_octet)
  
+@contextlib.contextmanager
+def redirected_streams(stdout=None, stderr=None):
+    orig_stdout, sys.stdout = sys.stdout, stdout or sys.stdout
+    orig_stderr, sys.stderr = sys.stderr, stderr or sys.stderr
+    try:
+        yield
+    finally:
+        sys.stdout = orig_stdout
+        sys.stderr = orig_stderr
+
+
  class MockShutdownTimer(object):
      def _set_state(self, is_open, next_opening):
          self.window_open = lambda: is_open
diff --git a/services/ws/arvados-ws.service b/services/ws/arvados-ws.service

new file mode 100644 (file)

index 0000000..ebccf0c
--- /dev/null
+++ b/services/ws/arvados-ws.service
@@ -0,0 +1,13 @@
+[Unit]
+Description=Arvados websocket server
+Documentation=https://doc.arvados.org/
+After=network.target
+AssertPathExists=/etc/arvados/ws/ws.yml
+
+[Service]
+Type=notify
+ExecStart=/usr/bin/arvados-ws
+Restart=always
+
+[Install]
+WantedBy=multi-user.target
diff --git a/services/ws/config.go b/services/ws/config.go

new file mode 100644 (file)

index 0000000..0faa863
--- /dev/null
+++ b/services/ws/config.go
@@ -0,0 +1,40 @@
+package main
+
+import (
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+type wsConfig struct {
+       Client    arvados.Client
+       Postgres  pgConfig
+       Listen    string
+       LogLevel  string
+       LogFormat string
+
+       PingTimeout      arvados.Duration
+       ClientEventQueue int
+       ServerEventQueue int
+}
+
+func defaultConfig() wsConfig {
+       return wsConfig{
+               Client: arvados.Client{
+                       APIHost: "localhost:443",
+               },
+               Postgres: pgConfig{
+                       "dbname":          "arvados_production",
+                       "user":            "arvados",
+                       "password":        "xyzzy",
+                       "host":            "localhost",
+                       "connect_timeout": "30",
+                       "sslmode":         "require",
+               },
+               LogLevel:         "info",
+               LogFormat:        "json",
+               PingTimeout:      arvados.Duration(time.Minute),
+               ClientEventQueue: 64,
+               ServerEventQueue: 4,
+       }
+}
diff --git a/services/ws/doc.go b/services/ws/doc.go

new file mode 100644 (file)

index 0000000..7ccb588
--- /dev/null
+++ b/services/ws/doc.go
@@ -0,0 +1,55 @@
+// Arvados-ws exposes Arvados APIs (currently just one, the
+// cache-invalidation event feed at "ws://.../websocket") to
+// websocket clients.
+//
+// Installation
+//
+// See https://doc.arvados.org/install/install-ws.html.
+//
+// Developer info
+//
+// See https://dev.arvados.org/projects/arvados/wiki/Hacking_websocket_server.
+//
+// Usage
+//
+//     arvados-ws [-config /etc/arvados/ws/ws.yml] [-dump-config]
+//
+// Minimal configuration
+//
+//     Client:
+//       APIHost: localhost:443
+//     Listen: ":1234"
+//     Postgres:
+//       dbname: arvados_production
+//       host: localhost
+//       password: xyzzy
+//       user: arvados
+//
+// Options
+//
+// -config path
+//
+// Load configuration from the given file instead of the default
+// /etc/arvados/ws/ws.yml
+//
+// -dump-config
+//
+// Print the loaded configuration to stdout and exit.
+//
+// Logs
+//
+// Logs are printed to stderr, formatted as JSON.
+//
+// A log is printed each time a client connects or disconnects.
+//
+// Enable additional logs by configuring:
+//
+//     LogLevel: debug
+//
+// Runtime status
+//
+// GET /debug.json responds with debug stats.
+//
+// GET /status.json responds with health check results and
+// activity/usage metrics.
+package main
diff --git a/services/ws/event.go b/services/ws/event.go

new file mode 100644 (file)

index 0000000..304f86b
--- /dev/null
+++ b/services/ws/event.go
@@ -0,0 +1,65 @@
+package main
+
+import (
+       "database/sql"
+       "sync"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/ghodss/yaml"
+)
+
+type eventSink interface {
+       Channel() <-chan *event
+       Stop()
+}
+
+type eventSource interface {
+       NewSink() eventSink
+       DB() *sql.DB
+}
+
+type event struct {
+       LogID    uint64
+       Received time.Time
+       Ready    time.Time
+       Serial   uint64
+
+       db     *sql.DB
+       logRow *arvados.Log
+       err    error
+       mtx    sync.Mutex
+}
+
+// Detail returns the database row corresponding to the event. It can
+// be called safely from multiple goroutines. Only one attempt will be
+// made. If the database row cannot be retrieved, Detail returns nil.
+func (e *event) Detail() *arvados.Log {
+       e.mtx.Lock()
+       defer e.mtx.Unlock()
+       if e.logRow != nil || e.err != nil {
+               return e.logRow
+       }
+       var logRow arvados.Log
+       var propYAML []byte
+       e.err = e.db.QueryRow(`SELECT id, uuid, object_uuid, COALESCE(object_owner_uuid,''), COALESCE(event_type,''), event_at, created_at, properties FROM logs WHERE id = $1`, e.LogID).Scan(
+               &logRow.ID,
+               &logRow.UUID,
+               &logRow.ObjectUUID,
+               &logRow.ObjectOwnerUUID,
+               &logRow.EventType,
+               &logRow.EventAt,
+               &logRow.CreatedAt,
+               &propYAML)
+       if e.err != nil {
+               logger(nil).WithField("LogID", e.LogID).WithError(e.err).Error("QueryRow failed")
+               return nil
+       }
+       e.err = yaml.Unmarshal(propYAML, &logRow.Properties)
+       if e.err != nil {
+               logger(nil).WithField("LogID", e.LogID).WithError(e.err).Error("yaml decode failed")
+               return nil
+       }
+       e.logRow = &logRow
+       return e.logRow
+}
diff --git a/services/ws/event_source.go b/services/ws/event_source.go

new file mode 100644 (file)

index 0000000..ea90ec7
--- /dev/null
+++ b/services/ws/event_source.go
@@ -0,0 +1,216 @@
+package main
+
+import (
+       "database/sql"
+       "strconv"
+       "strings"
+       "sync"
+       "sync/atomic"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/stats"
+       "github.com/lib/pq"
+)
+
+type pgConfig map[string]string
+
+func (c pgConfig) ConnectionString() string {
+       s := ""
+       for k, v := range c {
+               s += k
+               s += "='"
+               s += strings.Replace(
+                       strings.Replace(v, `\`, `\\`, -1),
+                       `'`, `\'`, -1)
+               s += "' "
+       }
+       return s
+}
+
+type pgEventSource struct {
+       DataSource string
+       QueueSize  int
+
+       db         *sql.DB
+       pqListener *pq.Listener
+       queue      chan *event
+       sinks      map[*pgEventSink]bool
+       setupOnce  sync.Once
+       mtx        sync.Mutex
+       shutdown   chan error
+
+       lastQDelay time.Duration
+       eventsIn   uint64
+       eventsOut  uint64
+}
+
+var _ debugStatuser = (*pgEventSource)(nil)
+
+func (ps *pgEventSource) setup() {
+       ps.shutdown = make(chan error, 1)
+       ps.sinks = make(map[*pgEventSink]bool)
+
+       db, err := sql.Open("postgres", ps.DataSource)
+       if err != nil {
+               logger(nil).WithError(err).Fatal("sql.Open failed")
+       }
+       if err = db.Ping(); err != nil {
+               logger(nil).WithError(err).Fatal("db.Ping failed")
+       }
+       ps.db = db
+
+       ps.pqListener = pq.NewListener(ps.DataSource, time.Second, time.Minute, func(ev pq.ListenerEventType, err error) {
+               if err != nil {
+                       // Until we have a mechanism for catching up
+                       // on missed events, we cannot recover from a
+                       // dropped connection without breaking our
+                       // promises to clients.
+                       logger(nil).WithError(err).Error("listener problem")
+                       ps.shutdown <- err
+               }
+       })
+       err = ps.pqListener.Listen("logs")
+       if err != nil {
+               logger(nil).WithError(err).Fatal("pq Listen failed")
+       }
+       logger(nil).Debug("pgEventSource listening")
+
+       go ps.run()
+}
+
+func (ps *pgEventSource) run() {
+       ps.queue = make(chan *event, ps.QueueSize)
+
+       go func() {
+               for e := range ps.queue {
+                       // Wait for the "select ... from logs" call to
+                       // finish. This limits max concurrent queries
+                       // to ps.QueueSize. Without this, max
+                       // concurrent queries would be bounded by
+                       // client_count X client_queue_size.
+                       e.Detail()
+
+                       logger(nil).
+                               WithField("serial", e.Serial).
+                               WithField("detail", e.Detail()).
+                               Debug("event ready")
+                       e.Ready = time.Now()
+                       ps.lastQDelay = e.Ready.Sub(e.Received)
+
+                       ps.mtx.Lock()
+                       atomic.AddUint64(&ps.eventsOut, uint64(len(ps.sinks)))
+                       for sink := range ps.sinks {
+                               sink.channel <- e
+                       }
+                       ps.mtx.Unlock()
+               }
+       }()
+
+       var serial uint64
+       ticker := time.NewTicker(time.Minute)
+       defer ticker.Stop()
+       for {
+               select {
+               case err, ok := <-ps.shutdown:
+                       if ok {
+                               logger(nil).WithError(err).Info("shutdown")
+                       }
+                       close(ps.queue)
+                       return
+
+               case <-ticker.C:
+                       logger(nil).Debug("listener ping")
+                       ps.pqListener.Ping()
+
+               case pqEvent, ok := <-ps.pqListener.Notify:
+                       if !ok {
+                               close(ps.queue)
+                               return
+                       }
+                       if pqEvent.Channel != "logs" {
+                               continue
+                       }
+                       logID, err := strconv.ParseUint(pqEvent.Extra, 10, 64)
+                       if err != nil {
+                               logger(nil).WithField("pqEvent", pqEvent).Error("bad notify payload")
+                               continue
+                       }
+                       serial++
+                       e := &event{
+                               LogID:    logID,
+                               Received: time.Now(),
+                               Serial:   serial,
+                               db:       ps.db,
+                       }
+                       logger(nil).WithField("event", e).Debug("incoming")
+                       atomic.AddUint64(&ps.eventsIn, 1)
+                       ps.queue <- e
+                       go e.Detail()
+               }
+       }
+}
+
+// NewSink subscribes to the event source. NewSink returns an
+// eventSink, whose Channel() method returns a channel: a pointer to
+// each subsequent event will be sent to that channel.
+//
+// The caller must ensure events are received from the sink channel as
+// quickly as possible because when one sink stops being ready, all
+// other sinks block.
+func (ps *pgEventSource) NewSink() eventSink {
+       ps.setupOnce.Do(ps.setup)
+       sink := &pgEventSink{
+               channel: make(chan *event, 1),
+               source:  ps,
+       }
+       ps.mtx.Lock()
+       ps.sinks[sink] = true
+       ps.mtx.Unlock()
+       return sink
+}
+
+func (ps *pgEventSource) DB() *sql.DB {
+       ps.setupOnce.Do(ps.setup)
+       return ps.db
+}
+
+func (ps *pgEventSource) DebugStatus() interface{} {
+       ps.mtx.Lock()
+       defer ps.mtx.Unlock()
+       blocked := 0
+       for sink := range ps.sinks {
+               blocked += len(sink.channel)
+       }
+       return map[string]interface{}{
+               "EventsIn":     atomic.LoadUint64(&ps.eventsIn),
+               "EventsOut":    atomic.LoadUint64(&ps.eventsOut),
+               "Queue":        len(ps.queue),
+               "QueueLimit":   cap(ps.queue),
+               "QueueDelay":   stats.Duration(ps.lastQDelay),
+               "Sinks":        len(ps.sinks),
+               "SinksBlocked": blocked,
+       }
+}
+
+type pgEventSink struct {
+       channel chan *event
+       source  *pgEventSource
+}
+
+func (sink *pgEventSink) Channel() <-chan *event {
+       return sink.channel
+}
+
+func (sink *pgEventSink) Stop() {
+       go func() {
+               // Ensure this sink cannot fill up and block the
+               // server-side queue (which otherwise could in turn
+               // block our mtx.Lock() here)
+               for _ = range sink.channel {
+               }
+       }()
+       sink.source.mtx.Lock()
+       delete(sink.source.sinks, sink)
+       sink.source.mtx.Unlock()
+       close(sink.channel)
+}
diff --git a/services/ws/handler.go b/services/ws/handler.go

new file mode 100644 (file)

index 0000000..7229190
--- /dev/null
+++ b/services/ws/handler.go
@@ -0,0 +1,235 @@
+package main
+
+import (
+       "context"
+       "io"
+       "sync"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "git.curoverse.com/arvados.git/sdk/go/stats"
+)
+
+type handler struct {
+       Client      arvados.Client
+       PingTimeout time.Duration
+       QueueSize   int
+
+       mtx       sync.Mutex
+       lastDelay map[chan interface{}]stats.Duration
+       setupOnce sync.Once
+}
+
+type handlerStats struct {
+       QueueDelayNs time.Duration
+       WriteDelayNs time.Duration
+       EventBytes   uint64
+       EventCount   uint64
+}
+
+func (h *handler) Handle(ws wsConn, eventSource eventSource, newSession func(wsConn, chan<- interface{}) (session, error)) (hStats handlerStats) {
+       h.setupOnce.Do(h.setup)
+
+       ctx, cancel := context.WithCancel(ws.Request().Context())
+       defer cancel()
+       log := logger(ctx)
+
+       incoming := eventSource.NewSink()
+       defer incoming.Stop()
+
+       queue := make(chan interface{}, h.QueueSize)
+       h.mtx.Lock()
+       h.lastDelay[queue] = 0
+       h.mtx.Unlock()
+       defer func() {
+               h.mtx.Lock()
+               delete(h.lastDelay, queue)
+               h.mtx.Unlock()
+       }()
+
+       sess, err := newSession(ws, queue)
+       if err != nil {
+               log.WithError(err).Error("newSession failed")
+               return
+       }
+
+       // Receive websocket frames from the client and pass them to
+       // sess.Receive().
+       go func() {
+               buf := make([]byte, 2<<20)
+               for {
+                       select {
+                       case <-ctx.Done():
+                               return
+                       default:
+                       }
+                       ws.SetReadDeadline(time.Now().Add(24 * 365 * time.Hour))
+                       n, err := ws.Read(buf)
+                       buf := buf[:n]
+                       log.WithField("frame", string(buf[:n])).Debug("received frame")
+                       if err == nil && n == cap(buf) {
+                               err = errFrameTooBig
+                       }
+                       if err != nil {
+                               if err != io.EOF {
+                                       log.WithError(err).Info("read error")
+                               }
+                               cancel()
+                               return
+                       }
+                       err = sess.Receive(buf)
+                       if err != nil {
+                               log.WithError(err).Error("sess.Receive() failed")
+                               cancel()
+                               return
+                       }
+               }
+       }()
+
+       // Take items from the outgoing queue, serialize them using
+       // sess.EventMessage() as needed, and send them to the client
+       // as websocket frames.
+       go func() {
+               for {
+                       var ok bool
+                       var data interface{}
+                       select {
+                       case <-ctx.Done():
+                               return
+                       case data, ok = <-queue:
+                               if !ok {
+                                       return
+                               }
+                       }
+                       var e *event
+                       var buf []byte
+                       var err error
+                       log := log
+
+                       switch data := data.(type) {
+                       case []byte:
+                               buf = data
+                       case *event:
+                               e = data
+                               log = log.WithField("serial", e.Serial)
+                               buf, err = sess.EventMessage(e)
+                               if err != nil {
+                                       log.WithError(err).Error("EventMessage failed")
+                                       cancel()
+                                       break
+                               } else if len(buf) == 0 {
+                                       log.Debug("skip")
+                                       continue
+                               }
+                       default:
+                               log.WithField("data", data).Error("bad object in client queue")
+                               continue
+                       }
+
+                       log.WithField("frame", string(buf)).Debug("send event")
+                       ws.SetWriteDeadline(time.Now().Add(h.PingTimeout))
+                       t0 := time.Now()
+                       _, err = ws.Write(buf)
+                       if err != nil {
+                               log.WithError(err).Error("write failed")
+                               cancel()
+                               break
+                       }
+                       log.Debug("sent")
+
+                       if e != nil {
+                               hStats.QueueDelayNs += t0.Sub(e.Ready)
+                               h.mtx.Lock()
+                               h.lastDelay[queue] = stats.Duration(time.Since(e.Ready))
+                               h.mtx.Unlock()
+                       }
+                       hStats.WriteDelayNs += time.Since(t0)
+                       hStats.EventBytes += uint64(len(buf))
+                       hStats.EventCount++
+               }
+       }()
+
+       // Filter incoming events against the current subscription
+       // list, and forward matching events to the outgoing message
+       // queue. Close the queue and return when the request context
+       // is done/cancelled or the incoming event stream ends. Shut
+       // down the handler if the outgoing queue fills up.
+       go func() {
+               ticker := time.NewTicker(h.PingTimeout)
+               defer ticker.Stop()
+
+               for {
+                       select {
+                       case <-ctx.Done():
+                               return
+                       case <-ticker.C:
+                               // If the outgoing queue is empty,
+                               // send an empty message. This can
+                               // help detect a disconnected network
+                               // socket, and prevent an idle socket
+                               // from being closed.
+                               if len(queue) == 0 {
+                                       select {
+                                       case queue <- []byte(`{}`):
+                                       default:
+                                       }
+                               }
+                               continue
+                       case e, ok := <-incoming.Channel():
+                               if !ok {
+                                       cancel()
+                                       return
+                               }
+                               if !sess.Filter(e) {
+                                       continue
+                               }
+                               select {
+                               case queue <- e:
+                               default:
+                                       log.WithError(errQueueFull).Error("terminate")
+                                       cancel()
+                                       return
+                               }
+                       }
+               }
+       }()
+
+       <-ctx.Done()
+       return
+}
+
+func (h *handler) DebugStatus() interface{} {
+       h.mtx.Lock()
+       defer h.mtx.Unlock()
+
+       var s struct {
+               QueueCount    int
+               QueueMin      int
+               QueueMax      int
+               QueueTotal    uint64
+               QueueDelayMin stats.Duration
+               QueueDelayMax stats.Duration
+       }
+       for q, lastDelay := range h.lastDelay {
+               s.QueueCount++
+               n := len(q)
+               s.QueueTotal += uint64(n)
+               if s.QueueMax < n {
+                       s.QueueMax = n
+               }
+               if s.QueueMin > n || s.QueueCount == 1 {
+                       s.QueueMin = n
+               }
+               if (s.QueueDelayMin > lastDelay || s.QueueDelayMin == 0) && lastDelay > 0 {
+                       s.QueueDelayMin = lastDelay
+               }
+               if s.QueueDelayMax < lastDelay {
+                       s.QueueDelayMax = lastDelay
+               }
+       }
+       return &s
+}
+
+func (h *handler) setup() {
+       h.lastDelay = make(map[chan interface{}]stats.Duration)
+}
diff --git a/services/ws/main.go b/services/ws/main.go

new file mode 100644 (file)

index 0000000..7c3625b
--- /dev/null
+++ b/services/ws/main.go
@@ -0,0 +1,67 @@
+package main
+
+import (
+       "flag"
+       "fmt"
+       "net/http"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/config"
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+       "github.com/coreos/go-systemd/daemon"
+)
+
+var logger = ctxlog.FromContext
+
+func main() {
+       log := logger(nil)
+
+       configPath := flag.String("config", "/etc/arvados/ws/ws.yml", "`path` to config file")
+       dumpConfig := flag.Bool("dump-config", false, "show current configuration and exit")
+       cfg := defaultConfig()
+       flag.Parse()
+
+       err := config.LoadFile(&cfg, *configPath)
+       if err != nil {
+               log.Fatal(err)
+       }
+
+       ctxlog.SetLevel(cfg.LogLevel)
+       ctxlog.SetFormat(cfg.LogFormat)
+
+       if *dumpConfig {
+               txt, err := config.Dump(&cfg)
+               if err != nil {
+                       log.Fatal(err)
+               }
+               fmt.Print(string(txt))
+               return
+       }
+
+       log.Info("started")
+       eventSource := &pgEventSource{
+               DataSource: cfg.Postgres.ConnectionString(),
+               QueueSize:  cfg.ServerEventQueue,
+       }
+       srv := &http.Server{
+               Addr:           cfg.Listen,
+               ReadTimeout:    time.Minute,
+               WriteTimeout:   time.Minute,
+               MaxHeaderBytes: 1 << 20,
+               Handler: &router{
+                       Config:         &cfg,
+                       eventSource:    eventSource,
+                       newPermChecker: func() permChecker { return newPermChecker(cfg.Client) },
+               },
+       }
+       // Bootstrap the eventSource by attaching a dummy subscriber
+       // and hanging up.
+       eventSource.NewSink().Stop()
+
+       if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
+               log.WithError(err).Warn("error notifying init daemon")
+       }
+
+       log.WithField("Listen", srv.Addr).Info("listening")
+       log.Fatal(srv.ListenAndServe())
+}
diff --git a/services/ws/permission.go b/services/ws/permission.go

new file mode 100644 (file)

index 0000000..e467e06
--- /dev/null
+++ b/services/ws/permission.go
@@ -0,0 +1,94 @@
+package main
+
+import (
+       "net/http"
+       "net/url"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+const (
+       maxPermCacheAge = time.Hour
+       minPermCacheAge = 5 * time.Minute
+)
+
+type permChecker interface {
+       SetToken(token string)
+       Check(uuid string) (bool, error)
+}
+
+func newPermChecker(ac arvados.Client) permChecker {
+       ac.AuthToken = ""
+       return &cachingPermChecker{
+               Client:     &ac,
+               cache:      make(map[string]cacheEnt),
+               maxCurrent: 16,
+       }
+}
+
+type cacheEnt struct {
+       time.Time
+       allowed bool
+}
+
+type cachingPermChecker struct {
+       *arvados.Client
+       cache      map[string]cacheEnt
+       maxCurrent int
+}
+
+func (pc *cachingPermChecker) SetToken(token string) {
+       pc.Client.AuthToken = token
+}
+
+func (pc *cachingPermChecker) Check(uuid string) (bool, error) {
+       logger := logger(nil).
+               WithField("token", pc.Client.AuthToken).
+               WithField("uuid", uuid)
+       pc.tidy()
+       now := time.Now()
+       if perm, ok := pc.cache[uuid]; ok && now.Sub(perm.Time) < maxPermCacheAge {
+               logger.WithField("allowed", perm.allowed).Debug("cache hit")
+               return perm.allowed, nil
+       }
+       var buf map[string]interface{}
+       path, err := pc.PathForUUID("get", uuid)
+       if err != nil {
+               return false, err
+       }
+       err = pc.RequestAndDecode(&buf, "GET", path, nil, url.Values{
+               "select": {`["uuid"]`},
+       })
+
+       var allowed bool
+       if err == nil {
+               allowed = true
+       } else if txErr, ok := err.(*arvados.TransactionError); ok && txErr.StatusCode == http.StatusNotFound {
+               allowed = false
+       } else if txErr.StatusCode == http.StatusForbidden {
+               // Some requests are expressly forbidden for reasons
+               // other than "you aren't allowed to know whether this
+               // UUID exists" (404).
+               allowed = false
+       } else {
+               logger.WithError(err).Error("lookup error")
+               return false, err
+       }
+       logger.WithField("allowed", allowed).Debug("cache miss")
+       pc.cache[uuid] = cacheEnt{Time: now, allowed: allowed}
+       return allowed, nil
+}
+
+func (pc *cachingPermChecker) tidy() {
+       if len(pc.cache) <= pc.maxCurrent*2 {
+               return
+       }
+       tooOld := time.Now().Add(-minPermCacheAge)
+       for uuid, t := range pc.cache {
+               if t.Before(tooOld) {
+                       delete(pc.cache, uuid)
+               }
+       }
+       pc.maxCurrent = len(pc.cache)
+}
diff --git a/services/ws/router.go b/services/ws/router.go

new file mode 100644 (file)

index 0000000..15b825f
--- /dev/null
+++ b/services/ws/router.go
@@ -0,0 +1,140 @@
+package main
+
+import (
+       "encoding/json"
+       "io"
+       "net/http"
+       "strconv"
+       "sync"
+       "sync/atomic"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/ctxlog"
+       "github.com/Sirupsen/logrus"
+       "golang.org/x/net/websocket"
+)
+
+type wsConn interface {
+       io.ReadWriter
+       Request() *http.Request
+       SetReadDeadline(time.Time) error
+       SetWriteDeadline(time.Time) error
+}
+
+type router struct {
+       Config         *wsConfig
+       eventSource    eventSource
+       newPermChecker func() permChecker
+
+       handler   *handler
+       mux       *http.ServeMux
+       setupOnce sync.Once
+
+       lastReqID  int64
+       lastReqMtx sync.Mutex
+
+       status routerDebugStatus
+}
+
+type routerDebugStatus struct {
+       ReqsReceived int64
+       ReqsActive   int64
+}
+
+type debugStatuser interface {
+       DebugStatus() interface{}
+}
+
+func (rtr *router) setup() {
+       rtr.handler = &handler{
+               PingTimeout: rtr.Config.PingTimeout.Duration(),
+               QueueSize:   rtr.Config.ClientEventQueue,
+       }
+       rtr.mux = http.NewServeMux()
+       rtr.mux.Handle("/websocket", rtr.makeServer(newSessionV0))
+       rtr.mux.Handle("/arvados/v1/events.ws", rtr.makeServer(newSessionV1))
+       rtr.mux.HandleFunc("/debug.json", jsonHandler(rtr.DebugStatus))
+       rtr.mux.HandleFunc("/status.json", jsonHandler(rtr.Status))
+}
+
+func (rtr *router) makeServer(newSession sessionFactory) *websocket.Server {
+       return &websocket.Server{
+               Handshake: func(c *websocket.Config, r *http.Request) error {
+                       return nil
+               },
+               Handler: websocket.Handler(func(ws *websocket.Conn) {
+                       t0 := time.Now()
+                       log := logger(ws.Request().Context())
+                       log.Info("connected")
+
+                       stats := rtr.handler.Handle(ws, rtr.eventSource,
+                               func(ws wsConn, sendq chan<- interface{}) (session, error) {
+                                       return newSession(ws, sendq, rtr.eventSource.DB(), rtr.newPermChecker(), &rtr.Config.Client)
+                               })
+
+                       log.WithFields(logrus.Fields{
+                               "elapsed": time.Now().Sub(t0).Seconds(),
+                               "stats":   stats,
+                       }).Info("disconnect")
+                       ws.Close()
+               }),
+       }
+}
+
+func (rtr *router) newReqID() string {
+       rtr.lastReqMtx.Lock()
+       defer rtr.lastReqMtx.Unlock()
+       id := time.Now().UnixNano()
+       if id <= rtr.lastReqID {
+               id = rtr.lastReqID + 1
+       }
+       return strconv.FormatInt(id, 36)
+}
+
+func (rtr *router) DebugStatus() interface{} {
+       s := map[string]interface{}{
+               "HTTP":     rtr.status,
+               "Outgoing": rtr.handler.DebugStatus(),
+       }
+       if es, ok := rtr.eventSource.(debugStatuser); ok {
+               s["EventSource"] = es.DebugStatus()
+       }
+       return s
+}
+
+func (rtr *router) Status() interface{} {
+       return map[string]interface{}{
+               "Clients": atomic.LoadInt64(&rtr.status.ReqsActive),
+       }
+}
+
+func (rtr *router) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       rtr.setupOnce.Do(rtr.setup)
+       atomic.AddInt64(&rtr.status.ReqsReceived, 1)
+       atomic.AddInt64(&rtr.status.ReqsActive, 1)
+       defer atomic.AddInt64(&rtr.status.ReqsActive, -1)
+
+       logger := logger(req.Context()).
+               WithField("RequestID", rtr.newReqID())
+       ctx := ctxlog.Context(req.Context(), logger)
+       req = req.WithContext(ctx)
+       logger.WithFields(logrus.Fields{
+               "remoteAddr":      req.RemoteAddr,
+               "reqForwardedFor": req.Header.Get("X-Forwarded-For"),
+       }).Info("accept request")
+       rtr.mux.ServeHTTP(resp, req)
+}
+
+func jsonHandler(fn func() interface{}) http.HandlerFunc {
+       return func(resp http.ResponseWriter, req *http.Request) {
+               logger := logger(req.Context())
+               resp.Header().Set("Content-Type", "application/json")
+               enc := json.NewEncoder(resp)
+               err := enc.Encode(fn())
+               if err != nil {
+                       msg := "encode failed"
+                       logger.WithError(err).Error(msg)
+                       http.Error(resp, msg, http.StatusInternalServerError)
+               }
+       }
+}
diff --git a/services/ws/session.go b/services/ws/session.go

new file mode 100644 (file)

index 0000000..67f4608
--- /dev/null
+++ b/services/ws/session.go
@@ -0,0 +1,33 @@
+package main
+
+import (
+       "database/sql"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+type session interface {
+       // Receive processes a message received from the client. If a
+       // non-nil error is returned, the connection will be
+       // terminated.
+       Receive([]byte) error
+
+       // Filter returns true if the event should be queued for
+       // sending to the client. It should return as fast as
+       // possible, and must not block.
+       Filter(*event) bool
+
+       // EventMessage encodes the given event (from the front of the
+       // queue) into a form suitable to send to the client. If a
+       // non-nil error is returned, the connection is terminated. If
+       // the returned buffer is empty, nothing is sent to the client
+       // and the event is not counted in statistics.
+       //
+       // Unlike Filter, EventMessage can block without affecting
+       // other connections. If EventMessage is slow, additional
+       // incoming events will be queued. If the event queue fills
+       // up, the connection will be dropped.
+       EventMessage(*event) ([]byte, error)
+}
+
+type sessionFactory func(wsConn, chan<- interface{}, *sql.DB, permChecker, *arvados.Client) (session, error)
diff --git a/services/ws/session_v0.go b/services/ws/session_v0.go

new file mode 100644 (file)

index 0000000..44e2a1d
--- /dev/null
+++ b/services/ws/session_v0.go
@@ -0,0 +1,295 @@
+package main
+
+import (
+       "database/sql"
+       "encoding/json"
+       "errors"
+       "sync"
+       "sync/atomic"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+       "github.com/Sirupsen/logrus"
+)
+
+var (
+       errQueueFull   = errors.New("client queue full")
+       errFrameTooBig = errors.New("frame too big")
+
+       sendObjectAttributes = []string{"state", "name", "owner_uuid", "portable_data_hash"}
+
+       v0subscribeOK   = []byte(`{"status":200}`)
+       v0subscribeFail = []byte(`{"status":400}`)
+)
+
+type v0session struct {
+       ac            *arvados.Client
+       ws            wsConn
+       sendq         chan<- interface{}
+       db            *sql.DB
+       permChecker   permChecker
+       subscriptions []v0subscribe
+       lastMsgID     uint64
+       log           *logrus.Entry
+       mtx           sync.Mutex
+       setupOnce     sync.Once
+}
+
+// newSessionV0 returns a v0 session: a partial port of the Rails/puma
+// implementation, with just enough functionality to support Workbench
+// and arv-mount.
+func newSessionV0(ws wsConn, sendq chan<- interface{}, db *sql.DB, pc permChecker, ac *arvados.Client) (session, error) {
+       sess := &v0session{
+               sendq:       sendq,
+               ws:          ws,
+               db:          db,
+               ac:          ac,
+               permChecker: pc,
+               log:         logger(ws.Request().Context()),
+       }
+
+       err := ws.Request().ParseForm()
+       if err != nil {
+               sess.log.WithError(err).Error("ParseForm failed")
+               return nil, err
+       }
+       token := ws.Request().Form.Get("api_token")
+       sess.permChecker.SetToken(token)
+       sess.log.WithField("token", token).Debug("set token")
+
+       return sess, nil
+}
+
+func (sess *v0session) Receive(buf []byte) error {
+       var sub v0subscribe
+       if err := json.Unmarshal(buf, &sub); err != nil {
+               sess.log.WithError(err).Info("invalid message from client")
+       } else if sub.Method == "subscribe" {
+               sub.prepare(sess)
+               sess.log.WithField("sub", sub).Debug("sub prepared")
+               sess.sendq <- v0subscribeOK
+               sess.mtx.Lock()
+               sess.subscriptions = append(sess.subscriptions, sub)
+               sess.mtx.Unlock()
+               sub.sendOldEvents(sess)
+               return nil
+       } else {
+               sess.log.WithField("Method", sub.Method).Info("unknown method")
+       }
+       sess.sendq <- v0subscribeFail
+       return nil
+}
+
+func (sess *v0session) EventMessage(e *event) ([]byte, error) {
+       detail := e.Detail()
+       if detail == nil {
+               return nil, nil
+       }
+
+       ok, err := sess.permChecker.Check(detail.ObjectUUID)
+       if err != nil || !ok {
+               return nil, err
+       }
+
+       kind, _ := sess.ac.KindForUUID(detail.ObjectUUID)
+       msg := map[string]interface{}{
+               "msgID":             atomic.AddUint64(&sess.lastMsgID, 1),
+               "id":                detail.ID,
+               "uuid":              detail.UUID,
+               "object_uuid":       detail.ObjectUUID,
+               "object_owner_uuid": detail.ObjectOwnerUUID,
+               "object_kind":       kind,
+               "event_type":        detail.EventType,
+               "event_at":          detail.EventAt,
+       }
+       if detail.Properties != nil && detail.Properties["text"] != nil {
+               msg["properties"] = detail.Properties
+       } else {
+               msgProps := map[string]map[string]interface{}{}
+               for _, ak := range []string{"old_attributes", "new_attributes"} {
+                       eventAttrs, ok := detail.Properties[ak].(map[string]interface{})
+                       if !ok {
+                               continue
+                       }
+                       msgAttrs := map[string]interface{}{}
+                       for _, k := range sendObjectAttributes {
+                               if v, ok := eventAttrs[k]; ok {
+                                       msgAttrs[k] = v
+                               }
+                       }
+                       msgProps[ak] = msgAttrs
+               }
+               msg["properties"] = msgProps
+       }
+       return json.Marshal(msg)
+}
+
+func (sess *v0session) Filter(e *event) bool {
+       sess.mtx.Lock()
+       defer sess.mtx.Unlock()
+       for _, sub := range sess.subscriptions {
+               if sub.match(sess, e) {
+                       return true
+               }
+       }
+       return false
+}
+
+func (sub *v0subscribe) sendOldEvents(sess *v0session) {
+       if sub.LastLogID == 0 {
+               return
+       }
+       sess.log.WithField("LastLogID", sub.LastLogID).Debug("getOldEvents")
+       // Here we do a "select id" query and queue an event for every
+       // log since the given ID, then use (*event)Detail() to
+       // retrieve the whole row and decide whether to send it. This
+       // approach is very inefficient if the subscriber asks for
+       // last_log_id==1, even if the filters end up matching very
+       // few events.
+       //
+       // To mitigate this, filter on "created > 10 minutes ago" when
+       // retrieving the list of old event IDs to consider.
+       rows, err := sess.db.Query(
+               `SELECT id FROM logs WHERE id > $1 AND created_at > $2 ORDER BY id`,
+               sub.LastLogID,
+               time.Now().UTC().Add(-10*time.Minute).Format(time.RFC3339Nano))
+       if err != nil {
+               sess.log.WithError(err).Error("db.Query failed")
+               return
+       }
+       for rows.Next() {
+               var id uint64
+               err := rows.Scan(&id)
+               if err != nil {
+                       sess.log.WithError(err).Error("row Scan failed")
+                       continue
+               }
+               for len(sess.sendq)*2 > cap(sess.sendq) {
+                       // Ugly... but if we fill up the whole client
+                       // queue with a backlog of old events, a
+                       // single new event will overflow it and
+                       // terminate the connection, and then the
+                       // client will probably reconnect and do the
+                       // same thing all over again.
+                       time.Sleep(100 * time.Millisecond)
+               }
+               now := time.Now()
+               e := &event{
+                       LogID:    id,
+                       Received: now,
+                       Ready:    now,
+                       db:       sess.db,
+               }
+               if sub.match(sess, e) {
+                       select {
+                       case sess.sendq <- e:
+                       case <-sess.ws.Request().Context().Done():
+                               return
+                       }
+               }
+       }
+       if err := rows.Err(); err != nil {
+               sess.log.WithError(err).Error("db.Query failed")
+       }
+}
+
+type v0subscribe struct {
+       Method    string
+       Filters   []v0filter
+       LastLogID int64 `json:"last_log_id"`
+
+       funcs []func(*event) bool
+}
+
+type v0filter [3]interface{}
+
+func (sub *v0subscribe) match(sess *v0session, e *event) bool {
+       log := sess.log.WithField("LogID", e.LogID)
+       detail := e.Detail()
+       if detail == nil {
+               log.Error("match failed, no detail")
+               return false
+       }
+       log = log.WithField("funcs", len(sub.funcs))
+       for i, f := range sub.funcs {
+               if !f(e) {
+                       log.WithField("func", i).Debug("match failed")
+                       return false
+               }
+       }
+       log.Debug("match passed")
+       return true
+}
+
+func (sub *v0subscribe) prepare(sess *v0session) {
+       for _, f := range sub.Filters {
+               if len(f) != 3 {
+                       continue
+               }
+               if col, ok := f[0].(string); ok && col == "event_type" {
+                       op, ok := f[1].(string)
+                       if !ok || op != "in" {
+                               continue
+                       }
+                       arr, ok := f[2].([]interface{})
+                       if !ok {
+                               continue
+                       }
+                       var strs []string
+                       for _, s := range arr {
+                               if s, ok := s.(string); ok {
+                                       strs = append(strs, s)
+                               }
+                       }
+                       sub.funcs = append(sub.funcs, func(e *event) bool {
+                               for _, s := range strs {
+                                       if s == e.Detail().EventType {
+                                               return true
+                                       }
+                               }
+                               return false
+                       })
+               } else if ok && col == "created_at" {
+                       op, ok := f[1].(string)
+                       if !ok {
+                               continue
+                       }
+                       tstr, ok := f[2].(string)
+                       if !ok {
+                               continue
+                       }
+                       t, err := time.Parse(time.RFC3339Nano, tstr)
+                       if err != nil {
+                               sess.log.WithField("data", tstr).WithError(err).Info("time.Parse failed")
+                               continue
+                       }
+                       var fn func(*event) bool
+                       switch op {
+                       case ">=":
+                               fn = func(e *event) bool {
+                                       return !e.Detail().CreatedAt.Before(t)
+                               }
+                       case "<=":
+                               fn = func(e *event) bool {
+                                       return !e.Detail().CreatedAt.After(t)
+                               }
+                       case ">":
+                               fn = func(e *event) bool {
+                                       return e.Detail().CreatedAt.After(t)
+                               }
+                       case "<":
+                               fn = func(e *event) bool {
+                                       return e.Detail().CreatedAt.Before(t)
+                               }
+                       case "=":
+                               fn = func(e *event) bool {
+                                       return e.Detail().CreatedAt.Equal(t)
+                               }
+                       default:
+                               sess.log.WithField("operator", op).Info("bogus operator")
+                               continue
+                       }
+                       sub.funcs = append(sub.funcs, fn)
+               }
+       }
+}
diff --git a/services/ws/session_v1.go b/services/ws/session_v1.go

new file mode 100644 (file)

index 0000000..71a1303
--- /dev/null
+++ b/services/ws/session_v1.go
@@ -0,0 +1,14 @@
+package main
+
+import (
+       "database/sql"
+       "errors"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
+)
+
+// newSessionV1 returns a v1 session -- see
+// https://dev.arvados.org/projects/arvados/wiki/Websocket_server
+func newSessionV1(ws wsConn, sendq chan<- interface{}, db *sql.DB, pc permChecker, ac *arvados.Client) (session, error) {
+       return nil, errors.New("Not implemented")
+}
diff --git a/tools/arvbash/arvbash.sh b/tools/arvbash/arvbash.sh

new file mode 100755 (executable)

index 0000000..c2785c4
--- /dev/null
+++ b/tools/arvbash/arvbash.sh
@@ -0,0 +1,124 @@
+#!/bin/bash
+# bash functions for managing Arvados tokens and other conveniences.
+
+read -rd "\000" helpmessage <<EOF
+$(basename $0): bash functions for managing Arvados tokens and other shortcuts.
+
+Syntax:
+        . $0            # activate for current shell
+        $0 --install    # install into .bashrc
+
+arvswitch <name>
+  Set ARVADOS_API_HOST and ARVADOS_API_TOKEN in the current environment based on
+  $HOME/.config/arvados/<name>.conf
+  With no arguments, list available Arvados configurations.
+
+arvsave <name>
+  Save values of ARVADOS_API_HOST and ARVADOS_API_TOKEN in the current environment to
+  $HOME/.config/arvados/<name>.conf
+
+arvrm <name>
+  Delete $HOME/.config/arvados/<name>.conf
+
+arvboxswitch <name>
+  Set ARVBOX_CONTAINER to <name>
+  With no arguments, list available arvboxes.
+
+arvopen:
+  Open an Arvados uuid in web browser (http://curover.se)
+
+arvissue
+  Open an Arvados ticket in web browser (http://dev.arvados.org)
+
+EOF
+
+if [[ "$1" = "--install" ]] ; then
+    this=$(readlink -f $0)
+    if ! grep ". $this" ~/.bashrc >/dev/null ; then
+        echo ". $this" >> ~/.bashrc
+        echo "Installed into ~/.bashrc"
+    else
+        echo "Already installed in ~/.bashrc"
+    fi
+elif ! [[ $0 =~ bash$ ]] ; then
+    echo "$helpmessage"
+fi
+
+HISTIGNORE=$HISTIGNORE:'export ARVADOS_API_TOKEN=*'
+
+arvswitch() {
+    if [[ -n "$1" ]] ; then
+        if [[ -f $HOME/.config/arvados/$1.conf ]] ; then
+            unset ARVADOS_API_HOST_INSECURE
+            for a in $(cat $HOME/.config/arvados/$1.conf) ; do export $a ; done
+            echo "Switched to $1"
+        else
+            echo "$1 unknown"
+        fi
+    else
+        echo "Switch Arvados environment conf"
+        echo "Usage: arvswitch name"
+        echo "Available confs:" $((cd $HOME/.config/arvados && ls --indicator-style=none *.conf) | rev | cut -c6- | rev)
+    fi
+}
+
+arvsave() {
+    if [[ -n "$1" ]] ; then
+       touch $HOME/.config/arvados/$1.conf
+       chmod 0600 $HOME/.config/arvados/$1.conf
+        env | grep ARVADOS_ > $HOME/.config/arvados/$1.conf
+    else
+        echo "Save current Arvados environment variables to conf file"
+        echo "Usage: arvsave name"
+    fi
+}
+
+arvrm() {
+    if [[ -n "$1" ]] ; then
+        if [[ -f $HOME/.config/arvados/$1.conf ]] ; then
+            rm $HOME/.config/arvados/$1.conf
+        else
+            echo "$1 unknown"
+        fi
+    else
+        echo "Delete Arvados environment conf"
+        echo "Usage: arvrm name"
+    fi
+}
+
+arvboxswitch() {
+    if [[ -n "$1" ]] ; then
+        if [[ -d $HOME/.arvbox/$1 ]] ; then
+            export ARVBOX_CONTAINER=$1
+            echo "Arvbox switched to $1"
+        else
+            echo "$1 unknown"
+        fi
+    else
+        if test -z "$ARVBOX_CONTAINER" ; then
+            ARVBOX_CONTAINER=arvbox
+        fi
+        echo "Switch Arvbox environment conf"
+        echo "Usage: arvboxswitch name"
+        echo "Your current container is: $ARVBOX_CONTAINER"
+        echo "Available confs:" $(cd $HOME/.arvbox && ls --indicator-style=none)
+    fi
+}
+
+arvopen() {
+    if [[ -n "$1" ]] ; then
+        xdg-open https://curover.se/$1
+    else
+        echo "Open Arvados uuid in browser"
+        echo "Usage: arvopen uuid"
+    fi
+}
+
+arvissue() {
+    if [[ -n "$1" ]] ; then
+        xdg-open https://dev.arvados.org/issues/$1
+    else
+        echo "Open Arvados issue in browser"
+        echo "Usage: arvissue uuid"
+    fi
+}
diff --git a/tools/arvbox/lib/arvbox/docker/Dockerfile.base b/tools/arvbox/lib/arvbox/docker/Dockerfile.base

index 6568a887b3c829681de54cc294b68389f3cc0001..a894350970f115594300befe9eab2798946cdfe4 100644 (file)
--- a/tools/arvbox/lib/arvbox/docker/Dockerfile.base
+++ b/tools/arvbox/lib/arvbox/docker/Dockerfile.base
@@ -68,6 +68,8 @@ RUN set -e && \
   tar -C /usr/local -xjf /tmp/$PJS.tar.bz2 && \
   ln -s ../$PJS/bin/phantomjs /usr/local/bin/
  
+RUN pip install -U setuptools
+
  ARG arvados_version
  RUN echo arvados_version is git commit $arvados_version
  
diff --git a/tools/arvbox/lib/arvbox/docker/api-setup.sh b/tools/arvbox/lib/arvbox/docker/api-setup.sh

index 67c43b47c6bed69efe12f4d6fc31ca6ff3b58a6d..d4ff7c94dd3738c44a838e26eb06a3bc2ef6b5d7 100755 (executable)
--- a/tools/arvbox/lib/arvbox/docker/api-setup.sh
+++ b/tools/arvbox/lib/arvbox/docker/api-setup.sh
@@ -82,5 +82,6 @@ if ! test -s /var/lib/arvados/superuser_token ; then
  fi
  
  rm -rf tmp
+mkdir -p tmp/cache
  
  bundle exec rake db:migrate
diff --git a/tools/arvbox/lib/arvbox/docker/common.sh b/tools/arvbox/lib/arvbox/docker/common.sh

index 230a189a9a244c79d94380960f82d4ad52ddb6c3..8488ed8cab2a5340ec589ab63cc99de48c1263a8 100644 (file)
--- a/tools/arvbox/lib/arvbox/docker/common.sh
+++ b/tools/arvbox/lib/arvbox/docker/common.sh
@@ -40,7 +40,12 @@ run_bundler() {
          frozen=""
      fi
      if ! test -x bundle ; then
-        gem install --no-document bundler
+        bundlergem=$(ls -r $GEM_HOME/cache/bundler-*.gem 2>/dev/null | head -n1 || true)
+        if test -n "$bundlergem" ; then
+            flock /var/lib/gems/gems.lock gem install --local --no-document $bundlergem
+        else
+            flock /var/lib/gems/gems.lock gem install --no-document bundler
+        fi
      fi
      if ! flock /var/lib/gems/gems.lock bundle install --path $GEM_HOME --local --no-deployment $frozen "$@" ; then
          flock /var/lib/gems/gems.lock bundle install --path $GEM_HOME --no-deployment $frozen "$@"
@@ -49,7 +54,7 @@ run_bundler() {
  
  pip_install() {
      pushd /var/lib/pip
-    for p in $(ls http*.tar.gz) $(ls http*.whl) $(ls http*.zip) ; do
+    for p in $(ls http*.tar.gz) $(ls http*.tar.bz2) $(ls http*.whl) $(ls http*.zip) ; do
          if test -f $p ; then
              ln -sf $p $(echo $p | sed 's/.*%2F\(.*\)/\1/')
          fi
diff --git a/tools/arvbox/lib/arvbox/docker/createusers.sh b/tools/arvbox/lib/arvbox/docker/createusers.sh

index 9ef37921ec18f01b85f9b77bd91fc69f24b48415..dbdf9d0642653644f6f0d095e6481f4ef9da3fd5 100755 (executable)
--- a/tools/arvbox/lib/arvbox/docker/createusers.sh
+++ b/tools/arvbox/lib/arvbox/docker/createusers.sh
@@ -26,8 +26,8 @@ if ! grep "^arvbox:" /etc/passwd >/dev/null 2>/dev/null ; then
            /var/lib/nginx /var/log/nginx /etc/ssl/private \
            /var/lib/gopath /var/lib/pip
  
-    mkdir -p /var/lib/gems/ruby/2.1.0
-    chown arvbox:arvbox -R /var/lib/gems/ruby/2.1.0
+    mkdir -p /var/lib/gems/ruby
+    chown arvbox:arvbox -R /var/lib/gems/ruby
  
      mkdir -p /tmp/crunch0 /tmp/crunch1
      chown crunch:crunch -R /tmp/crunch0 /tmp/crunch1
diff --git a/tools/arvbox/lib/arvbox/docker/service/api/run-service b/tools/arvbox/lib/arvbox/docker/service/api/run-service

index a36205c9678d4e67063bbec141072df1a737cea9..5f72f1c613192e5b8aa806a0f490711b6055ff69 100755 (executable)
--- a/tools/arvbox/lib/arvbox/docker/service/api/run-service
+++ b/tools/arvbox/lib/arvbox/docker/service/api/run-service
@@ -9,7 +9,8 @@ cd /usr/src/arvados/services/api
  export RAILS_ENV=development
  
  run_bundler --without=development
-bundle exec passenger start --runtime-check-only --runtime-dir=/var/lib/passenger
+bundle exec passenger-config build-native-support
+bundle exec passenger-config install-standalone-runtime
  
  if test "$1" = "--only-deps" ; then
      exit
@@ -23,6 +24,5 @@ if test "$1" = "--only-setup" ; then
  fi
  
  exec bundle exec passenger start --port=${services[api]} \
-                  --runtime-dir=/var/lib/passenger \
                    --ssl --ssl-certificate=/var/lib/arvados/self-signed.pem \
                    --ssl-certificate-key=/var/lib/arvados/self-signed.key
diff --git a/tools/arvbox/lib/arvbox/docker/service/sso/run-service b/tools/arvbox/lib/arvbox/docker/service/sso/run-service

index da413e09de3d189b57c2924a0dec7a189e53270f..4c2407a4e7e737f6414feafbede72b91a09e7ca1 100755 (executable)
--- a/tools/arvbox/lib/arvbox/docker/service/sso/run-service
+++ b/tools/arvbox/lib/arvbox/docker/service/sso/run-service
@@ -73,6 +73,7 @@ EOF
  fi
  
  rm -rf tmp
+mkdir -p tmp/cache
  
  bundle exec rake db:migrate
author	Tom Clegg <tom@curoverse.com>
	Thu, 12 Jan 2017 15:35:46 +0000 (10:35 -0500)
committer	Tom Clegg <tom@curoverse.com>
	Thu, 12 Jan 2017 15:35:46 +0000 (10:35 -0500)