7824: Merge branch 'master' into 7824-arvls-arvput-collection-api-usage

author Lucas Di Pentima <lucas@curoverse.com>

Thu, 6 Apr 2017 19:15:25 +0000 (16:15 -0300)

committer Lucas Di Pentima <lucas@curoverse.com>

Thu, 6 Apr 2017 19:15:25 +0000 (16:15 -0300)
author Lucas Di Pentima <lucas@curoverse.com>
Thu, 6 Apr 2017 19:15:25 +0000 (16:15 -0300)
committer Lucas Di Pentima <lucas@curoverse.com>
Thu, 6 Apr 2017 19:15:25 +0000 (16:15 -0300)
diff --git a/apps/workbench/app/controllers/actions_controller.rb b/apps/workbench/app/controllers/actions_controller.rb

index 28680df33f3cf4f5902d5abdc278c305011018f2..55e4e9aa4a0227ac4b68e064451e9d51ebcfd834 100644 (file)
--- a/apps/workbench/app/controllers/actions_controller.rb
+++ b/apps/workbench/app/controllers/actions_controller.rb
@@ -114,42 +114,7 @@ class ActionsController < ApplicationController
    end
  
    expose_action :combine_selected_files_into_collection do
-    link_uuids, coll_ids = params["selection"].partition do |sel_s|
-      ArvadosBase::resource_class_for_uuid(sel_s) == Link
-    end
-
-    unless link_uuids.empty?
-      Link.select([:head_uuid]).where(uuid: link_uuids).each do |link|
-        if ArvadosBase::resource_class_for_uuid(link.head_uuid) == Collection
-          coll_ids << link.head_uuid
-        end
-      end
-    end
-
-    uuids = []
-    pdhs = []
-    source_paths = Hash.new { |hash, key| hash[key] = [] }
-    coll_ids.each do |coll_id|
-      if m = CollectionsHelper.match(coll_id)
-        key = m[1] + m[2]
-        pdhs << key
-        source_paths[key] << m[4]
-      elsif m = CollectionsHelper.match_uuid_with_optional_filepath(coll_id)
-        key = m[1]
-        uuids << key
-        source_paths[key] << m[4]
-      end
-    end
-
-    unless pdhs.empty?
-      Collection.where(portable_data_hash: pdhs.uniq).
-          select([:uuid, :portable_data_hash]).each do |coll|
-        unless source_paths[coll.portable_data_hash].empty?
-          uuids << coll.uuid
-          source_paths[coll.uuid] = source_paths.delete(coll.portable_data_hash)
-        end
-      end
-    end
+    uuids, source_paths = selected_collection_files params
  
      new_coll = Arv::Collection.new
      Collection.where(uuid: uuids.uniq).
diff --git a/apps/workbench/app/controllers/application_controller.rb b/apps/workbench/app/controllers/application_controller.rb

index 23b8788bcb17520b4e8ca71f493b514c0fcda9f3..a63fe6e21f1311a353392a2bc2f27f656147bee5 100644 (file)
--- a/apps/workbench/app/controllers/application_controller.rb
+++ b/apps/workbench/app/controllers/application_controller.rb
@@ -1250,6 +1250,49 @@ class ApplicationController < ActionController::Base
      end
    end
  
+  # helper method to get the names of collection files selected
+  helper_method :selected_collection_files
+  def selected_collection_files params
+    link_uuids, coll_ids = params["selection"].partition do |sel_s|
+      ArvadosBase::resource_class_for_uuid(sel_s) == Link
+    end
+
+    unless link_uuids.empty?
+      Link.select([:head_uuid]).where(uuid: link_uuids).each do |link|
+        if ArvadosBase::resource_class_for_uuid(link.head_uuid) == Collection
+          coll_ids << link.head_uuid
+        end
+      end
+    end
+
+    uuids = []
+    pdhs = []
+    source_paths = Hash.new { |hash, key| hash[key] = [] }
+    coll_ids.each do |coll_id|
+      if m = CollectionsHelper.match(coll_id)
+        key = m[1] + m[2]
+        pdhs << key
+        source_paths[key] << m[4]
+      elsif m = CollectionsHelper.match_uuid_with_optional_filepath(coll_id)
+        key = m[1]
+        uuids << key
+        source_paths[key] << m[4]
+      end
+    end
+
+    unless pdhs.empty?
+      Collection.where(portable_data_hash: pdhs.uniq).
+          select([:uuid, :portable_data_hash]).each do |coll|
+        unless source_paths[coll.portable_data_hash].empty?
+          uuids << coll.uuid
+          source_paths[coll.uuid] = source_paths.delete(coll.portable_data_hash)
+        end
+      end
+    end
+
+    [uuids, source_paths]
+  end
+
    def wiselinks_layout
      'body'
    end
diff --git a/apps/workbench/app/controllers/collections_controller.rb b/apps/workbench/app/controllers/collections_controller.rb

index be58b91871f1a7a51fdeb28dc854425af9d8e68a..dc9ed43c409b64a2838ad74d3e165609c49e1e62 100644 (file)
--- a/apps/workbench/app/controllers/collections_controller.rb
+++ b/apps/workbench/app/controllers/collections_controller.rb
@@ -1,4 +1,5 @@
  require "arvados/keep"
+require "arvados/collection"
  require "uri"
  
  class CollectionsController < ApplicationController
@@ -293,6 +294,57 @@ class CollectionsController < ApplicationController
      sharing_popup
    end
  
+  def remove_selected_files
+    uuids, source_paths = selected_collection_files params
+
+    arv_coll = Arv::Collection.new(@object.manifest_text)
+    source_paths[uuids[0]].each do |p|
+      arv_coll.rm "."+p
+    end
+
+    if @object.update_attributes manifest_text: arv_coll.manifest_text
+      show
+    else
+      self.render_error status: 422
+    end
+  end
+
+  def update
+    updated_attr = params[:collection].each.select {|a| a[0].andand.start_with? 'rename-file-path:'}
+
+    if updated_attr.size > 0
+      # Is it file rename?
+      file_path = updated_attr[0][0].split('rename-file-path:')[-1]
+
+      new_file_path = updated_attr[0][1]
+      if new_file_path.start_with?('./')
+        # looks good
+      elsif new_file_path.start_with?('/')
+        new_file_path = '.' + new_file_path
+      else
+        new_file_path = './' + new_file_path
+      end
+
+      arv_coll = Arv::Collection.new(@object.manifest_text)
+
+      if arv_coll.exist?(new_file_path)
+        @errors = 'Duplicate file path. Please use a different name.'
+        self.render_error status: 422
+      else
+        arv_coll.rename "./"+file_path, new_file_path
+
+        if @object.update_attributes manifest_text: arv_coll.manifest_text
+          show
+        else
+          self.render_error status: 422
+        end
+      end
+    else
+      # Not a file rename; use default
+      super
+    end
+  end
+
    protected
  
    def find_usable_token(token_list)
diff --git a/apps/workbench/app/helpers/application_helper.rb b/apps/workbench/app/helpers/application_helper.rb

index 21879a57a7d90aa77ba0f90e6a325f9ae9d5f00c..056f12f6c8e70faa77f45d5f5d356439d528a1ec 100644 (file)
--- a/apps/workbench/app/helpers/application_helper.rb
+++ b/apps/workbench/app/helpers/application_helper.rb
@@ -274,10 +274,10 @@ module ApplicationHelper
        "data-placement" => "bottom",
        "data-type" => input_type,
        "data-title" => "Edit #{attr.to_s.gsub '_', ' '}",
-      "data-name" => attr,
+      "data-name" => htmloptions['selection_name'] || attr,
        "data-object-uuid" => object.uuid,
        "data-toggle" => "manual",
-      "data-value" => attrvalue,
+      "data-value" => htmloptions['data-value'] || attrvalue,
        "id" => span_id,
        :class => "editable #{is_textile?( object, attr ) ? 'editable-textile' : ''}"
      }.merge(htmloptions).merge(ajax_options)
diff --git a/apps/workbench/app/models/collection.rb b/apps/workbench/app/models/collection.rb

index 13f5357faadba842e57bbc24b1bdc883509ed7f6..ea81ad8c0a7588edc00062585d99ba9fa116035f 100644 (file)
--- a/apps/workbench/app/models/collection.rb
+++ b/apps/workbench/app/models/collection.rb
@@ -71,7 +71,7 @@ class Collection < ArvadosBase
    end
  
    def editable_attributes
-    %w(name description manifest_text)
+    %w(name description manifest_text filename)
    end
  
    def provenance
diff --git a/apps/workbench/app/views/collections/_show_files.html.erb b/apps/workbench/app/views/collections/_show_files.html.erb

index a21a514c47de1c51ec184dbe1531d5785c2b68d9..d39c81b2b16d2b9b8331226695f8df97f0ac0cec 100644 (file)
--- a/apps/workbench/app/views/collections/_show_files.html.erb
+++ b/apps/workbench/app/views/collections/_show_files.html.erb
@@ -27,6 +27,15 @@
                      'data-selection-action' => 'combine-collections',
                      'data-toggle' => 'dropdown'
                %></li>
+            <% if object.editable? %>
+            <li><%= link_to "Remove selected files", '#',
+                    method: :post,
+                    'data-href' => url_for(controller: 'collections', action: :remove_selected_files),
+                    'data-selection-param-name' => 'selection[]',
+                    'data-selection-action' => 'remove-selected-files',
+                    'data-toggle' => 'dropdown'
+              %></li>
+            <% end %>
            </ul>
          </div>
          <div class="btn-group btn-group-sm">
@@ -86,8 +95,20 @@
                  } %>
              <span>&nbsp;</span>
              <% end %>
+
+            <% if object.editable? %>
+                <%= link_to({controller: 'collections', action: 'remove_selected_files', id: object.uuid, selection: [object.portable_data_hash+'/'+file_path]}, method: :post, remote: true, data: {confirm: "Remove #{file_path}?", toggle: 'tooltip', placement: 'top'}, class: 'btn btn-sm btn-default btn-nodecorate', title: "Remove #{file_path}") do %>
+                  <i class="fa fa-fw fa-trash-o"></i>
+                <% end %>
+            <% end %>
          <% if CollectionsHelper::is_image(filename) %>
-            <i class="fa fa-fw fa-bar-chart-o"></i> <%= filename %></div>
+            <i class="fa fa-fw fa-bar-chart-o"></i>
+              <% if object.editable? %>
+                <%= render_editable_attribute object, 'filename', filename, {'data-value' => file_path, 'data-toggle' => 'manual', 'selection_path' => 'rename-file-path:'+file_path}, {tiptitle: 'Edit path of this file (name or directory or both). If you use the same path as another file, it may be removed.'} %>
+              <% else %>
+                <%= filename %>
+              <% end %>
+            </div>
            <div class="collection_files_inline">
              <%= link_to(image_tag("#{url_for object}/#{file_path}"),
                          link_params.merge(disposition: 'inline'),
@@ -95,7 +116,12 @@
            </div>
           </div>
          <% else %>
-            <i class="fa fa-fw fa-file" href="<%=object.uuid%>/<%=file_path%>" ></i> <%= filename %></div>
+              <% if object.editable? %>
+                <i class="fa fa-fw fa-file"></i><%= render_editable_attribute object, 'filename', filename, {'data-value' => file_path, 'data-toggle' => 'manual', 'selection_name' => 'rename-file-path:'+file_path}, {tiptitle: 'Edit path of this file (name or directory or both). If you use the same path as another file, it may be removed.'}  %>
+              <% else %>
+                <i class="fa fa-fw fa-file" href="<%=object.uuid%>/<%=file_path%>" ></i> <%= filename %>
+              <% end %>
+            </div>
           </div>
          <% end %>
          </li>
diff --git a/apps/workbench/config/routes.rb b/apps/workbench/config/routes.rb

index 0d5b8fca83a3a789776baa48b9ea0556767df210..0eef73f8ae3d0116ba96ccf8f93f6663fc4204e8 100644 (file)
--- a/apps/workbench/config/routes.rb
+++ b/apps/workbench/config/routes.rb
@@ -87,6 +87,7 @@ ArvadosWorkbench::Application.routes.draw do
      post 'share', :on => :member
      post 'unshare', :on => :member
      get 'choose', on: :collection
+    post 'remove_selected_files', on: :member
    end
    get('/collections/download/:uuid/:reader_token/*file' => 'collections#show_file',
        format: false)
diff --git a/apps/workbench/test/controllers/collections_controller_test.rb b/apps/workbench/test/controllers/collections_controller_test.rb

index 1bf967ccfd8c1e8e2da60a10db81b7e241d70fa9..1632dd0653b11d82d9a97e245bc9dd55db5ccc1d 100644 (file)
--- a/apps/workbench/test/controllers/collections_controller_test.rb
+++ b/apps/workbench/test/controllers/collections_controller_test.rb
@@ -506,7 +506,7 @@ class CollectionsControllerTest < ActionController::TestCase
      collection = api_fixture('collections')['foo_file']
      get :show, {id: collection['uuid']}, session_for(:active)
      assert_includes @response.body, collection['name']
-    assert_match /href="#{collection['uuid']}\/foo" ><\/i> foo</, @response.body
+    assert_match /not authorized to manage collection sharing links/, @response.body
    end
  
    test "No Upload tab on non-writable collection" do
@@ -632,4 +632,134 @@ class CollectionsControllerTest < ActionController::TestCase
        assert_equal "https://download.example/c=#{id.sub '+', '-'}/_/w%20a%20z?api_token=#{tok}", @response.redirect_url
      end
    end
+
+  test "remove selected files from collection" do
+    use_token :active
+
+    # create a new collection to test; using existing collections will cause other tests to fail,
+    # and resetting fixtures after each test makes it take almost 4 times to run this test file.
+    manifest_text = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1 0:0:file2\n./dir1 d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1 0:0:file2\n"
+
+    collection = Collection.create(manifest_text: manifest_text)
+    assert_includes(collection['manifest_text'], "0:0:file1")
+
+    # now remove all files named 'file1' from the collection
+    post :remove_selected_files, {
+      id: collection['uuid'],
+      selection: ["#{collection['uuid']}/file1",
+                  "#{collection['uuid']}/dir1/file1"],
+      format: :json
+    }, session_for(:active)
+    assert_response :success
+
+    # verify no 'file1' in the updated collection
+    collection = Collection.select([:uuid, :manifest_text]).where(uuid: collection['uuid']).first
+    assert_not_includes(collection['manifest_text'], "0:0:file1")
+    assert_includes(collection['manifest_text'], "0:0:file2") # but other files still exist
+  end
+
+  test "remove all files from a subdir of a collection" do
+    use_token :active
+
+    # create a new collection to test
+    manifest_text = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1 0:0:file2\n./dir1 d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1 0:0:file2\n"
+
+    collection = Collection.create(manifest_text: manifest_text)
+    assert_includes(collection['manifest_text'], "0:0:file1")
+
+    # now remove all files from "dir1" subdir of the collection
+    post :remove_selected_files, {
+      id: collection['uuid'],
+      selection: ["#{collection['uuid']}/dir1/file1",
+                  "#{collection['uuid']}/dir1/file2"],
+      format: :json
+    }, session_for(:active)
+    assert_response :success
+
+    # verify that "./dir1" no longer exists in this collection's manifest text
+    collection = Collection.select([:uuid, :manifest_text]).where(uuid: collection['uuid']).first
+    assert_match /. d41d8cd98f00b204e9800998ecf8427e\+0\+A(.*) 0:0:file1 0:0:file2\n$/, collection['manifest_text']
+    assert_not_includes(collection['manifest_text'], 'dir1')
+  end
+
+  test "rename file in a collection" do
+    use_token :active
+
+    # create a new collection to test
+    manifest_text = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1 0:0:file2\n./dir1 d41d8cd98f00b204e9800998ecf8427e+0 0:0:dir1file1 0:0:dir1file2\n"
+
+    collection = Collection.create(manifest_text: manifest_text)
+    assert_includes(collection['manifest_text'], "0:0:file1")
+
+    # rename 'file1' as 'file1renamed' and verify
+    post :update, {
+      id: collection['uuid'],
+      collection: {
+        'rename-file-path:file1' => 'file1renamed'
+      },
+      format: :json
+    }, session_for(:active)
+    assert_response :success
+
+    collection = Collection.select([:uuid, :manifest_text]).where(uuid: collection['uuid']).first
+    assert_match /. d41d8cd98f00b204e9800998ecf8427e\+0\+A(.*) 0:0:file1renamed 0:0:file2\n.\/dir1 d41d8cd98f00b204e9800998ecf8427e\+0\+A(.*) 0:0:dir1file1 0:0:dir1file2\n$/, collection['manifest_text']
+
+    # now rename 'file2' such that it is moved into 'dir1'
+    @test_counter = 0
+    post :update, {
+      id: collection['uuid'],
+      collection: {
+        'rename-file-path:file2' => 'dir1/file2'
+      },
+      format: :json
+    }, session_for(:active)
+    assert_response :success
+
+    collection = Collection.select([:uuid, :manifest_text]).where(uuid: collection['uuid']).first
+    assert_match /. d41d8cd98f00b204e9800998ecf8427e\+0\+A(.*) 0:0:file1renamed\n.\/dir1 d41d8cd98f00b204e9800998ecf8427e\+0\+A(.*) 0:0:dir1file1 0:0:dir1file2 0:0:file2\n$/, collection['manifest_text']
+
+    # now rename 'dir1/dir1file1' such that it is moved into a new subdir
+    @test_counter = 0
+    post :update, {
+      id: collection['uuid'],
+      collection: {
+        'rename-file-path:dir1/dir1file1' => 'dir2/dir3/dir1file1moved'
+      },
+      format: :json
+    }, session_for(:active)
+    assert_response :success
+
+    collection = Collection.select([:uuid, :manifest_text]).where(uuid: collection['uuid']).first
+    assert_match /. d41d8cd98f00b204e9800998ecf8427e\+0\+A(.*) 0:0:file1renamed\n.\/dir1 d41d8cd98f00b204e9800998ecf8427e\+0\+A(.*) 0:0:dir1file2 0:0:file2\n.\/dir2\/dir3 d41d8cd98f00b204e9800998ecf8427e\+0\+A(.*) 0:0:dir1file1moved\n$/, collection['manifest_text']
+  end
+
+  test "renaming file with a duplicate name in same stream not allowed" do
+    use_token :active
+
+    # rename 'file2' as 'file1' and expect error
+    post :update, {
+      id: 'zzzzz-4zz18-pyw8yp9g3pr7irn',
+      collection: {
+        'rename-file-path:file2' => 'file1'
+      },
+      format: :json
+    }, session_for(:active)
+    assert_response 422
+    assert_includes json_response['errors'], 'Duplicate file path'
+  end
+
+  test "renaming file with a duplicate name as another stream not allowed" do
+    use_token :active
+
+    # rename 'file1' as 'dir1/file1' and expect error
+    post :update, {
+      id: 'zzzzz-4zz18-pyw8yp9g3pr7irn',
+      collection: {
+        'rename-file-path:file1' => 'dir1/file1'
+      },
+      format: :json
+    }, session_for(:active)
+    assert_response 422
+    assert_includes json_response['errors'], 'Duplicate file path'
+  end
  end
diff --git a/apps/workbench/test/integration/collections_test.rb b/apps/workbench/test/integration/collections_test.rb

index 8190b35f00747627c198af7bdd296d2e61d23377..eb9c2e831a8866d7a25f3f89c9c1eb04ef00a003 100644 (file)
--- a/apps/workbench/test/integration/collections_test.rb
+++ b/apps/workbench/test/integration/collections_test.rb
@@ -298,4 +298,63 @@ class CollectionsTest < ActionDispatch::IntegrationTest
      # Make sure we're not still on the old collection page.
      refute_match(%r{/collections/#{col['uuid']}}, page.current_url)
    end
+
+  test "remove a file from collection using checkbox and dropdown option" do
+    visit page_with_token('active', '/collections/zzzzz-4zz18-a21ux3541sxa8sf')
+    assert(page.has_text?('file1'), 'file not found - file1')
+
+    # remove first file
+    input_files = page.all('input[type=checkbox]')
+    input_files[0].click
+
+    click_button 'Selection...'
+    within('.selection-action-container') do
+      click_link 'Remove selected files'
+    end
+
+    assert(page.has_no_text?('file1'), 'file found - file')
+    assert(page.has_text?('file2'), 'file not found - file2')
+  end
+
+  test "remove a file in collection using trash icon" do
+    need_selenium 'to confirm remove'
+
+    visit page_with_token('active', '/collections/zzzzz-4zz18-a21ux3541sxa8sf')
+    assert(page.has_text?('file1'), 'file not found - file1')
+
+    first('.fa-trash-o').click
+    page.driver.browser.switch_to.alert.accept
+
+    assert(page.has_no_text?('file1'), 'file found - file')
+    assert(page.has_text?('file2'), 'file not found - file2')
+  end
+
+  test "rename a file in collection" do
+    visit page_with_token('active', '/collections/zzzzz-4zz18-a21ux3541sxa8sf')
+
+    within('.collection_files') do
+      first('.fa-pencil').click
+      find('.editable-input input').set('file1renamed')
+      find('.editable-submit').click
+    end
+
+    assert(page.has_text?('file1renamed'), 'file not found - file1renamed')
+  end
+
+  test "remove/rename file options not presented if user cannot update a collection" do
+    # visit a publicly accessible collection as 'spectator'
+    visit page_with_token('spectator', '/collections/zzzzz-4zz18-uukreo9rbgwsujr')
+
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li', text: 'Create new collection with selected files'
+      assert_no_selector 'li', text: 'Remove selected files'
+    end
+
+    within('.collection_files') do
+      assert(page.has_text?('GNU_General_Public_License'), 'file not found - GNU_General_Public_License')
+      assert_nil first('.fa-pencil')
+      assert_nil first('.fa-trash-o')
+    end
+  end
  end
diff --git a/build/build.list b/build/build.list

index 9e6317a41e3cf44e2ce9cab6abfb06923035a74b..98f6a3b773852061659a58c255907bf7cd413c20 100644 (file)
--- a/build/build.list
+++ b/build/build.list
@@ -16,7 +16,7 @@ debian8,ubuntu1204,ubuntu1404,ubuntu1604|backports.ssl_match_hostname|3.5.0.1|2|
  debian8,ubuntu1204,ubuntu1404,centos7|llfuse|0.41.1|3|python|amd64
  debian8,ubuntu1204,ubuntu1404,ubuntu1604,centos7|pycurl|7.19.5.3|3|python|amd64
  debian8,ubuntu1204,ubuntu1404,ubuntu1604,centos7|pyyaml|3.12|2|python|amd64
-debian8,ubuntu1204,ubuntu1404,ubuntu1604,centos7|rdflib|4.2.1|2|python|all
+debian8,ubuntu1204,ubuntu1404,ubuntu1604,centos7|rdflib|4.2.2|2|python|all
  debian8,ubuntu1204,ubuntu1404,centos7|shellescape|3.4.1|2|python|all
  debian8,ubuntu1204,ubuntu1404,ubuntu1604,centos7|mistune|0.7.3|2|python|all
  debian8,ubuntu1204,ubuntu1404,ubuntu1604,centos7|typing|3.5.3.0|2|python|all
@@ -34,8 +34,6 @@ ubuntu1204,centos7|isodate|0.5.4|2|python|all
  centos7|daemon|2.1.1|2|python|all
  centos7|pbr|0.11.1|2|python|all
  centos7|pyparsing|2.1.10|2|python|all
-centos7|sparqlwrapper|1.8.0|2|python|all
-centos7|html5lib|0.9999999|2|python|all
  centos7|keepalive|0.5|2|python|all
  debian8,ubuntu1204,ubuntu1404,ubuntu1604,centos7|lockfile|0.12.2|2|python|all|--epoch 1
  all|ruamel.yaml|0.13.7|2|python|amd64|--python-setup-py-arguments --single-version-externally-managed
diff --git a/build/package-test-dockerfiles/ubuntu1204/Dockerfile b/build/package-test-dockerfiles/ubuntu1204/Dockerfile

index f6223715e1e43793fd71906856fc02aa1f661533..68a4a16fcb6e7550661e22155ffb14f1dbd65910 100644 (file)
--- a/build/package-test-dockerfiles/ubuntu1204/Dockerfile
+++ b/build/package-test-dockerfiles/ubuntu1204/Dockerfile
@@ -3,7 +3,7 @@ MAINTAINER Peter Amstutz <peter.amstutz@curoverse.com>
  
  # Install RVM
  RUN apt-get update && \
-    DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends curl ca-certificates && \
+    DEBIAN_FRONTEND=noninteractive apt-get -y install --no-install-recommends curl ca-certificates g++ && \
      gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
      curl -L https://get.rvm.io | bash -s stable && \
      /usr/local/rvm/bin/rvm install 2.3 && \
diff --git a/doc/_config.yml b/doc/_config.yml

index 8c3d42a397691b3a745b77b1a9de5f4f61316c24..55edc3d3a293e5bc2916dc8141cc39f6dca96659 100644 (file)
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -151,6 +151,7 @@ navbar:
        - install/install-postgresql.html.textile.liquid
        - install/install-sso.html.textile.liquid
        - install/install-api-server.html.textile.liquid
+      - install/install-ws.html.textile.liquid
        - install/install-arv-git-httpd.html.textile.liquid
        - install/install-workbench-app.html.textile.liquid
        - install/install-shell-server.html.textile.liquid
diff --git a/doc/api/methods/container_requests.html.textile.liquid b/doc/api/methods/container_requests.html.textile.liquid

index 3809b2f1eb65c254ae400b7d661ac0869ce2d833..75bf3d1ccca0c6845afc9f2d81deac5736710a0d 100644 (file)
--- a/doc/api/methods/container_requests.html.textile.liquid
+++ b/doc/api/methods/container_requests.html.textile.liquid
@@ -43,6 +43,8 @@ table(table table-bordered table-condensed).
  |cwd|string|Initial working directory, given as an absolute path (in the container) or a path relative to the WORKDIR given in the image's Dockerfile.|Required.|
  |command|array of strings|Command to execute in the container.|Required. e.g., @["echo","hello"]@|
  |output_path|string|Path to a directory or file inside the container that should be preserved as container's output when it finishes. This path must be, or be inside, one of the mount targets. For best performance, point output_path to a writable collection mount. Also, see "Pre-populate output using Mount points":#pre-populate-output for details regarding optional output pre-population using mount points.|Required.|
+|output_name|string|Desired name for the output collection. If null, a name will be assigned automatically.||
+|output_ttl|integer|Desired lifetime for the output collection, in seconds. If zero, the output collection will not be deleted automatically.||
  |priority|integer|Higher value means spend more resources on this container_request, i.e., go ahead of other queued containers, bring up more nodes etc.|Priority 0 means a container should not be run on behalf of this request. Clients are expected to submit container requests with zero priority in order to preview the container that will be used to satisfy it. Priority can be null if and only if state!="Committed".|
  |expires_at|datetime|After this time, priority is considered to be zero.|Not yet implemented.|
  |use_existing|boolean|If possible, use an existing (non-failed) container to satisfy the request instead of creating a new one.|Default is true|
diff --git a/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid b/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid

index e6b8e1ea01d2deab5c408b1dd2c6015473d6d00c..f50534e1986d2b3ba1a77cf08ce1811dd5131e48 100644 (file)
--- a/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
+++ b/doc/install/crunch2-slurm/install-dispatch.html.textile.liquid
@@ -120,6 +120,21 @@ You can work around this issue by disabling the Docker daemon's systemd integrat
  
  {% include 'notebox_end' %}
  
+h3. CrunchRunCommand: Using host networking for containers
+
+Older Linux kernels (prior to 3.18) have bugs in network namespace handling which can lead to compute node lockups.  This by is indicated by blocked kernel tasks in "Workqueue: netns cleanup_net".   If you are experiencing this problem, as a workaround you can disable use of network namespaces by Docker across the cluster.  Be aware this reduces container isolation, which may be a security risk.
+
+<notextile>
+<pre><code class="userinput">Client:
+  APIHost: <b>zzzzz.arvadosapi.com</b>
+  AuthToken: <b>zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz</b>
+CrunchRunCommand:
+- <b>crunch-run</b>
+- <b>"-container-enable-networking=always"</b>
+- <b>"-container-network-mode=host"</b>
+</code></pre>
+</notextile>
+
  h3. MinRetryPeriod: Rate-limit repeated attempts to start containers
  
  If SLURM is unable to run a container, the dispatcher will submit it again after the next PollPeriod. If PollPeriod is very short, this can be excessive. If MinRetryPeriod is set, the dispatcher will avoid submitting the same container to SLURM more than once in the given time span.
diff --git a/doc/install/install-api-server.html.textile.liquid b/doc/install/install-api-server.html.textile.liquid

index b4019a418ab523c106c7b10df4698deb47cf0abb..fa07f889f16942d352faef398aeb9def9796122f 100644 (file)
--- a/doc/install/install-api-server.html.textile.liquid
+++ b/doc/install/install-api-server.html.textile.liquid
@@ -153,45 +153,15 @@ Example @application.yml@:
  </code></pre>
  </notextile>
  
-h2(#set_up). Set up Web servers
+h2(#set_up). Set up Nginx and Passenger
  
-For best performance, we recommend you use Nginx as your Web server front-end, with a Passenger backend for the main API server and a Puma backend for API server Websockets.  To do that:
+The Nginx server will serve API requests using Passenger. It will also be used to proxy SSL requests to other services which are covered later in this guide.
  
-<notextile>
-<ol>
-<li><a href="https://www.phusionpassenger.com/library/walkthroughs/deploy/ruby/ownserver/nginx/oss/install_passenger_main.html">Install Nginx and Phusion Passenger</a>.</li>
-
-<li><p>Install runit to supervise the Puma daemon.  {% include 'install_runit' %}<notextile></p></li>
-
-<li><p>Install the script below as the run script for the Puma service, modifying it as directed by the comments.</p>
-
-<pre><code>#!/bin/bash
-
-set -e
-exec 2>&1
-
-# Uncomment the line below if you're using RVM.
-#source /etc/profile.d/rvm.sh
-
-envdir="`pwd`/env"
-mkdir -p "$envdir"
-echo ws-only > "$envdir/ARVADOS_WEBSOCKETS"
-
-cd /var/www/arvados-api/current
-echo "Starting puma in `pwd`"
-
-# Change arguments below to match your deployment, "webserver-user" and
-# "webserver-group" should be changed to the user and group of the web server
-# process.  This is typically "www-data:www-data" on Debian systems by default,
-# other systems may use different defaults such the name of the web server
-# software (for example, "nginx:nginx").
-exec chpst -m 1073741824 -u webserver-user:webserver-group -e "$envdir" \
-  bundle exec puma -t 0:512 -e production -b tcp://127.0.0.1:8100
-</code></pre>
-</li>
+First, "Install Nginx and Phusion Passenger":https://www.phusionpassenger.com/library/walkthroughs/deploy/ruby/ownserver/nginx/oss/install_passenger_main.html.
  
-<li><p>Edit the http section of your Nginx configuration to run the Passenger server, and act as a front-end for both it and Puma.  You might add a block like the following, adding SSL and logging parameters to taste:</p>
+Edit the http section of your Nginx configuration to run the Passenger server, and serve SSL requests. Add a block like the following, adding SSL and logging parameters to taste:
  
+<notextile>
  <pre><code>server {
    listen 127.0.0.1:8000;
    server_name localhost-api;
@@ -216,11 +186,6 @@ upstream api {
    server     127.0.0.1:8000  fail_timeout=10s;
  }
  
-upstream websockets {
-  # The address below must match the one specified in puma's -b option.
-  server     127.0.0.1:8100  fail_timeout=10s;
-}
-
  proxy_http_version 1.1;
  
  # When Keep clients request a list of Keep services from the API server, the
@@ -259,41 +224,14 @@ server {
      proxy_set_header      X-Forwarded-For $proxy_add_x_forwarded_for;
    }
  }
-
-server {
-  listen       <span class="userinput">[your public IP address]</span>:443 ssl;
-  server_name  ws.<span class="userinput">uuid_prefix.your.domain</span>;
-
-  ssl on;
-  ssl_certificate     <span class="userinput">/YOUR/PATH/TO/cert.pem</span>;
-  ssl_certificate_key <span class="userinput">/YOUR/PATH/TO/cert.key</span>;
-
-  index  index.html index.htm index.php;
-
-  location / {
-    proxy_pass            http://websockets;
-    proxy_redirect        off;
-    proxy_connect_timeout 90s;
-    proxy_read_timeout    300s;
-
-    proxy_set_header      Upgrade $http_upgrade;
-    proxy_set_header      Connection "upgrade";
-    proxy_set_header      Host $host;
-    proxy_set_header      X-Real-IP $remote_addr;
-    proxy_set_header      X-Forwarded-For $proxy_add_x_forwarded_for;
-  }
-}
  </code></pre>
-</li>
+</notextile>
  
-<li><p>Restart Nginx:</p>
+Restart Nginx to apply the new configuration.
  
+<notextile>
  <pre><code>~$ <span class="userinput">sudo nginx -s reload</span>
  </code></pre>
-
-</li>
-
-</ol>
  </notextile>
  
  h2. Prepare the API server deployment
@@ -303,7 +241,9 @@ h2. Prepare the API server deployment
  
  {% include 'notebox_begin' %}
  You can safely ignore the following messages if they appear while this command runs:
-<pre>Don't run Bundler as root. Bundler can ask for sudo if it is needed, and installing your bundle as root will
-break this application for all non-root users on this machine.</pre>
-<pre>fatal: Not a git repository (or any of the parent directories): .git</pre>
+
+<notextile><pre>Don't run Bundler as root. Bundler can ask for sudo if it is needed, and installing your bundle as root will
+break this application for all non-root users on this machine.</pre></notextile>
+
+<notextile><pre>fatal: Not a git repository (or any of the parent directories): .git</pre></notextile>
  {% include 'notebox_end' %}
diff --git a/doc/install/install-ws.html.textile.liquid b/doc/install/install-ws.html.textile.liquid

index a36a59a56f9847654e95397835b5bb8d129cca27..9887f462c6559d76d8238ba5231bec3e2d0cb86a 100644 (file)
--- a/doc/install/install-ws.html.textile.liquid
+++ b/doc/install/install-ws.html.textile.liquid
@@ -4,13 +4,7 @@ navsection: installguide
  title: Install the websocket server
  ...
  
-{% include 'notebox_begin_warning' %}
-
-This websocket server is an alternative to the puma server that comes with the API server. It is available as an *experimental pre-release* and is not recommended for production sites.
-
-{% include 'notebox_end' %}
-
-The arvados-ws server provides event notifications to websocket clients. It can be installed anywhere with access to Postgres database and the Arvados API server, typically behind a web proxy that provides SSL support. See the "godoc page":http://godoc.org/github.com/curoverse/arvados/services/keep-web for additional information.
+The arvados-ws server provides event notifications to websocket clients. It can be installed anywhere with access to Postgres database and the Arvados API server, typically behind a web proxy that provides SSL support. See the "godoc page":http://godoc.org/github.com/curoverse/arvados/services/ws for additional information.
  
  By convention, we use the following hostname for the websocket service.
  
@@ -175,7 +169,9 @@ server {
  }
  </pre></notextile>
  
-If Nginx is already configured to proxy @ws@ requests to puma, move that configuration out of the way or change its @server_name@ so it doesn't conflict.
+{% include 'notebox_begin' %}
+If you are upgrading a cluster where Nginx is configured to proxy @ws@ requests to puma, change the @server_name@ value in the old configuration block so it doesn't conflict. When the new configuration is working, delete the old Nginx configuration sections (i.e., the "upstream websockets" block, and the "server" block that references @http://websockets@), and disable/remove the runit or systemd files for the puma server.
+{% include 'notebox_end' %}
  
  h3. Update API server configuration
  
diff --git a/docker/migrate-docker19/dnd.sh b/docker/migrate-docker19/dnd.sh

index ce72601bff9fb405270fd3f98fa94f0a4fe1a901..ec6f1e3e12bbde76ecb1e2c5d3406c03d47ab8b2 100755 (executable)
--- a/docker/migrate-docker19/dnd.sh
+++ b/docker/migrate-docker19/dnd.sh
@@ -14,7 +14,7 @@ CGROUP=/sys/fs/cgroup
  [ -d $CGROUP ] || mkdir $CGROUP
  
  if mountpoint -q $CGROUP ; then
-    break
+    true
  else
      mount -n -t tmpfs -o uid=0,gid=0,mode=0755 cgroup $CGROUP
  fi
@@ -52,10 +52,10 @@ do
          # Systemd and OpenRC (and possibly others) both create such a
          # cgroup. To avoid the aforementioned bug, we symlink "foo" to
          # "name=foo". This shouldn't have any adverse effect.
-        echo $SUBSYS | grep -q ^name= && {
-                NAME=$(echo $SUBSYS | sed s/^name=//)
-                ln -s $SUBSYS $CGROUP/$NAME
-        }
+        #echo $SUBSYS | grep -q ^name= && {
+        #        NAME=$(echo $SUBSYS | sed s/^name=//)
+        #        ln -s $SUBSYS $CGROUP/$NAME
+        #}
  
          # Likewise, on at least one system, it has been reported that
          # systemd would mount the CPU and CPU accounting controllers
@@ -96,6 +96,4 @@ rm -rf /var/run/docker.pid
  
  read pid cmd state ppid pgrp session tty_nr tpgid rest < /proc/self/stat
  
-if ! docker daemon --storage-driver=overlay $DOCKER_DAEMON_ARGS ; then
-    docker daemon $DOCKER_DAEMON_ARGS
-fi
+exec docker daemon --storage-driver=vfs $DOCKER_DAEMON_ARGS
diff --git a/docker/migrate-docker19/migrate.sh b/docker/migrate-docker19/migrate.sh

index 58d6665f2233cd206e7401f01444d9b1bc5339af..8e39be45678aa357632dd7e4dd0bb147bcdeebc3 100755 (executable)
--- a/docker/migrate-docker19/migrate.sh
+++ b/docker/migrate-docker19/migrate.sh
@@ -2,16 +2,39 @@
  
  set -e
  
+function start_docker {
+    /root/dnd.sh &
+    for i in $(seq 1 10) ; do
+        if docker version >/dev/null 2>/dev/null ; then
+            return
+        fi
+        sleep 1
+    done
+    false
+}
+
+function kill_docker {
+    if test -f /var/run/docker.pid ; then
+        kill $(cat /var/run/docker.pid)
+    fi
+    for i in $(seq 1 10) ; do
+        if ! docker version >/dev/null 2>/dev/null ; then
+            return
+        fi
+        sleep 1
+    done
+    false
+}
+
  function cleanup {
-    kill $(cat /var/run/docker.pid)
-    sleep 1
+    kill_docker
      rm -rf /var/lib/docker/*
+    rm -rf /root/.cache/arvados/docker/*
  }
  
  trap cleanup EXIT
  
-/root/dnd.sh &
-sleep 2
+start_docker
  
  image_tar_keepref=$1
  image_id=$2
@@ -25,14 +48,12 @@ docker tag $image_id $image_repo:$image_tag
  
  docker images -a
  
-kill $(cat /var/run/docker.pid)
-sleep 1
+kill_docker
  
  cd /root/pkgs
-dpkg -i libltdl7_2.4.2-1.11+b1_amd64.deb  docker-engine_1.13.1-0~debian-jessie_amd64.deb
+dpkg -i libltdl7_2.4.2-1.11+b1_amd64.deb docker-engine_1.13.1-0~debian-jessie_amd64.deb
  
-/root/dnd.sh &
-sleep 2
+start_docker
  
  docker images -a
  
diff --git a/sdk/cli/arvados-cli.gemspec b/sdk/cli/arvados-cli.gemspec

index 651ebf20b0e96f13b4b32bb1b55855f0ac1076a9..06b5559102be9e02a69ddf8d859f0670b342d54b 100644 (file)
--- a/sdk/cli/arvados-cli.gemspec
+++ b/sdk/cli/arvados-cli.gemspec
@@ -29,7 +29,7 @@ Gem::Specification.new do |s|
    # satisfied by the buggy 0.9.pre*.  https://dev.arvados.org/issues/9213
    s.add_runtime_dependency 'google-api-client', '~> 0.6', '>= 0.6.3', '<0.8.9'
    s.add_runtime_dependency 'activesupport', '>= 3.2.13', '< 5'
-  s.add_runtime_dependency 'json', '~> 1.7', '>= 1.7.7'
+  s.add_runtime_dependency 'json', '>= 1.7.7', '<3'
    s.add_runtime_dependency 'trollop', '~> 2.0'
    s.add_runtime_dependency 'andand', '~> 1.3', '>= 1.3.3'
    s.add_runtime_dependency 'oj', '~> 2.0', '>= 2.0.3'
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py

index 36fe6efbb28ca7be90044e4e2ebd6368a73237ee..1e109920fabaae4fa208615cbbaabd8f9161c3a1 100644 (file)
--- a/sdk/cwl/arvados_cwl/__init__.py
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -11,6 +11,7 @@ import threading
  import hashlib
  import copy
  import json
+import re
  from functools import partial
  import pkg_resources  # part of setuptools
  
@@ -629,6 +630,8 @@ def arg_parser():  # type: () -> argparse.ArgumentParser
  
  def add_arv_hints():
      cache = {}
+    cwltool.draft2tool.ACCEPTLIST_RE = re.compile(r".*")
+    cwltool.draft2tool.ACCEPTLIST_EN_RELAXED_RE = re.compile(r".*")
      res = pkg_resources.resource_stream(__name__, 'arv-cwl-schema.yml')
      cache["http://arvados.org/cwl"] = res.read()
      res.close()
diff --git a/sdk/cwl/arvados_cwl/fsaccess.py b/sdk/cwl/arvados_cwl/fsaccess.py

index 2191dcb8df712818cafc24f963f65b2a2a42b9d8..70aa69f669be6d6e8c0a7210ea3da0808af3e6bc 100644 (file)
--- a/sdk/cwl/arvados_cwl/fsaccess.py
+++ b/sdk/cwl/arvados_cwl/fsaccess.py
@@ -152,6 +152,8 @@ class CollectionFetcher(DefaultFetcher):
  
      def check_exists(self, url):
          try:
+            if url.startswith("http://arvados.org/cwl"):
+                return True
              if url.startswith("keep:"):
                  return self.fsaccess.exists(url)
              if url.startswith("arvwf:"):
diff --git a/sdk/cwl/arvados_cwl/runner.py b/sdk/cwl/arvados_cwl/runner.py

index 49047fa14328271106608721c9fa22099dfcb5ae..1c10fe8196aba940495934169d559bb756cfda31 100644 (file)
--- a/sdk/cwl/arvados_cwl/runner.py
+++ b/sdk/cwl/arvados_cwl/runner.py
@@ -3,7 +3,6 @@ import urlparse
  from functools import partial
  import logging
  import json
-import re
  import subprocess
  
  from StringIO import StringIO
@@ -30,8 +29,6 @@ from . import done
  
  logger = logging.getLogger('arvados.cwl-runner')
  
-cwltool.draft2tool.ACCEPTLIST_RE = re.compile(r".*")
-
  def trim_listing(obj):
      """Remove 'listing' field from Directory objects that are keep references.
  
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py

index d5a75a9441156b8a6a01a94a2b0d39dbae5e9b38..b8ed68296511fe5fe2ab7f0f4ee5bce33400bc63 100644 (file)
--- a/sdk/cwl/setup.py
+++ b/sdk/cwl/setup.py
@@ -48,8 +48,9 @@ setup(name='arvados-cwl-runner',
        # Note that arvados/build/run-build-packages.sh looks at this
        # file to determine what version of cwltool and schema-salad to build.
        install_requires=[
-          'cwltool==1.0.20170213175853',
-          'schema-salad==2.2.20170208112505',
+          'cwltool==1.0.20170224141733',
+          'schema-salad==2.2.20170222151604',
+          'typing==3.5.3.0',
            'ruamel.yaml==0.13.7',
            'arvados-python-client>=0.1.20170112173420',
            'setuptools'
diff --git a/sdk/cwl/tests/test_submit.py b/sdk/cwl/tests/test_submit.py

index cab18dcc98f1de0fa78b12b110b70d8b96854411..9982f6ffd5955109904ed2fc726c0d0b35d495ec 100644 (file)
--- a/sdk/cwl/tests/test_submit.py
+++ b/sdk/cwl/tests/test_submit.py
@@ -249,17 +249,17 @@ class TestSubmit(unittest.TestCase):
              mock.call(body=JsonDiffMatcher({
                  'manifest_text':
                  '. 5bcc9fe8f8d5992e6cf418dc7ce4dbb3+16 0:16:blub.txt\n',
-                'owner_uuid': None,
+                'replication_desired': None,
                  'name': 'submit_tool.cwl dependencies',
              }), ensure_unique_name=True),
-            mock.call().execute(),
+            mock.call().execute(num_retries=4),
              mock.call(body=JsonDiffMatcher({
                  'manifest_text':
                  '. 979af1245a12a1fed634d4222473bfdc+16 0:16:blorp.txt\n',
-                'owner_uuid': None,
+                'replication_desired': None,
                  'name': 'submit_wf.cwl input',
              }), ensure_unique_name=True),
-            mock.call().execute()])
+            mock.call().execute(num_retries=4)])
  
          arvdock.assert_has_calls([
              mock.call(stubs.api, {"class": "DockerRequirement", "dockerPull": "debian:8"}, True, None),
@@ -432,17 +432,17 @@ class TestSubmit(unittest.TestCase):
              mock.call(body=JsonDiffMatcher({
                  'manifest_text':
                  '. 5bcc9fe8f8d5992e6cf418dc7ce4dbb3+16 0:16:blub.txt\n',
-                'owner_uuid': None,
+                'replication_desired': None,
                  'name': 'submit_tool.cwl dependencies',
              }), ensure_unique_name=True),
-            mock.call().execute(),
+            mock.call().execute(num_retries=4),
              mock.call(body=JsonDiffMatcher({
                  'manifest_text':
                  '. 979af1245a12a1fed634d4222473bfdc+16 0:16:blorp.txt\n',
-                'owner_uuid': None,
+                'replication_desired': None,
                  'name': 'submit_wf.cwl input',
              }), ensure_unique_name=True),
-            mock.call().execute()])
+            mock.call().execute(num_retries=4)])
  
          expect_container = copy.deepcopy(stubs.expect_container_spec)
          stubs.api.container_requests().create.assert_called_with(
diff --git a/sdk/go/keepclient/support.go b/sdk/go/keepclient/support.go

index 22447794f1c7f93c4eca6fcdec83d254cbc1740b..9adbb4878f40541eb13c0feed39bf22241f4c3f5 100644 (file)
--- a/sdk/go/keepclient/support.go
+++ b/sdk/go/keepclient/support.go
@@ -7,9 +7,12 @@ import (
         "git.curoverse.com/arvados.git/sdk/go/streamer"
         "io"
         "io/ioutil"
+       "log"
         "math/rand"
         "net"
         "net/http"
+       "os"
+       "regexp"
         "strings"
         "time"
  )
@@ -19,6 +22,13 @@ import (
  // log.Printf to DebugPrintf.
  var DebugPrintf = func(string, ...interface{}) {}
  
+func init() {
+       var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
+       if matchTrue.MatchString(os.Getenv("ARVADOS_DEBUG")) {
+               DebugPrintf = log.Printf
+       }
+}
+
  type keepService struct {
         Uuid     string `json:"uuid"`
         Hostname string `json:"service_host"`
diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py

index ccf16a5fcef3be02b6450bd4b527e6ff682c88d1..1af50b311020f4ed87391cd2a1e76463f39394f5 100644 (file)
--- a/sdk/python/arvados/api.py
+++ b/sdk/python/arvados/api.py
@@ -15,6 +15,7 @@ from apiclient import errors as apiclient_errors
  import config
  import errors
  import util
+import cache
  
  _logger = logging.getLogger('arvados.api')
  
@@ -135,8 +136,8 @@ def http_cache(data_type):
      try:
          util.mkdir_dash_p(path)
      except OSError:
-        path = None
-    return path
+        return None
+    return cache.SafeHTTPCache(path, max_age=60*60*24*2)
  
  def api(version=None, cache=True, host=None, token=None, insecure=False, **kwargs):
      """Return an apiclient Resources object for an Arvados instance.
diff --git a/sdk/python/arvados/cache.py b/sdk/python/arvados/cache.py

new file mode 100644 (file)

index 0000000..08c19e4
--- /dev/null
+++ b/sdk/python/arvados/cache.py
@@ -0,0 +1,71 @@
+import errno
+import md5
+import os
+import tempfile
+import time
+
+class SafeHTTPCache(object):
+    """Thread-safe replacement for httplib2.FileCache"""
+
+    def __init__(self, path=None, max_age=None):
+        self._dir = path
+        if max_age is not None:
+            try:
+                self._clean(threshold=time.time() - max_age)
+            except:
+                pass
+
+    def _clean(self, threshold=0):
+        for ent in os.listdir(self._dir):
+            fnm = os.path.join(self._dir, ent)
+            if os.path.isdir(fnm) or not fnm.endswith('.tmp'):
+                continue
+            stat = os.lstat(fnm)
+            if stat.st_mtime < threshold:
+                try:
+                    os.unlink(fnm)
+                except OSError as err:
+                    if err.errno != errno.ENOENT:
+                        raise
+
+    def __str__(self):
+        return self._dir
+
+    def _filename(self, url):
+        return os.path.join(self._dir, md5.new(url).hexdigest()+'.tmp')
+
+    def get(self, url):
+        filename = self._filename(url)
+        try:
+            with open(filename, 'rb') as f:
+                return f.read()
+        except IOError, OSError:
+            return None
+
+    def set(self, url, content):
+        try:
+            fd, tempname = tempfile.mkstemp(dir=self._dir)
+        except:
+            return None
+        try:
+            try:
+                f = os.fdopen(fd, 'w')
+            except:
+                os.close(fd)
+                raise
+            try:
+                f.write(content)
+            finally:
+                f.close()
+            os.rename(tempname, self._filename(url))
+            tempname = None
+        finally:
+            if tempname:
+                os.unlink(tempname)
+
+    def delete(self, url):
+        try:
+            os.unlink(self._filename(url))
+        except OSError as err:
+            if err.errno != errno.ENOENT:
+                raise
diff --git a/sdk/python/arvados/commands/migrate19.py b/sdk/python/arvados/commands/migrate19.py

index e75095bf6c94956b316cd07faca4f1be278d617d..4be99961ba0febcfcf36b254f582945427b133e8 100644 (file)
--- a/sdk/python/arvados/commands/migrate19.py
+++ b/sdk/python/arvados/commands/migrate19.py
@@ -64,11 +64,16 @@ def main(arguments=None):
          '--print-unmigrated', action='store_true',
          default=False, help="Print list of images needing migration.")
  
+    migrate19_parser.add_argument('--tempdir', help="Set temporary directory")
+
      migrate19_parser.add_argument('infile', nargs='?', type=argparse.FileType('r'),
                                    default=None, help="List of images to be migrated")
  
      args = migrate19_parser.parse_args(arguments)
  
+    if args.tempdir:
+        tempfile.tempdir = args.tempdir
+
      only_migrate = None
      if args.infile:
          only_migrate = set()
@@ -105,14 +110,19 @@ def main(arguments=None):
  
      items = arvados.util.list_all(api_client.collections().list,
                                    filters=[["uuid", "in", [img["collection"] for img in old_images]]],
-                                  select=["uuid", "portable_data_hash"])
-    uuid_to_pdh = {i["uuid"]: i["portable_data_hash"] for i in items}
+                                  select=["uuid", "portable_data_hash", "manifest_text", "owner_uuid"])
+    uuid_to_collection = {i["uuid"]: i for i in items}
  
      need_migrate = {}
+    biggest = 0
      for img in old_images:
-        pdh = uuid_to_pdh[img["collection"]]
+        i = uuid_to_collection[img["collection"]]
+        pdh = i["portable_data_hash"]
          if pdh not in already_migrated and (only_migrate is None or pdh in only_migrate):
              need_migrate[pdh] = img
+            with CollectionReader(i["manifest_text"]) as c:
+                if c.values()[0].size() > biggest:
+                    biggest = c.values()[0].size()
  
      if args.print_unmigrated:
          only_migrate = set()
@@ -122,6 +132,8 @@ def main(arguments=None):
  
      logger.info("Already migrated %i images", len(already_migrated))
      logger.info("Need to migrate %i images", len(need_migrate))
+    logger.info("Using tempdir %s", tempfile.gettempdir())
+    logger.info("Biggest image is about %i MiB, tempdir needs at least %i MiB free", biggest/(2**20), (biggest*2)/(2**20))
  
      if args.dry_run:
          return
@@ -130,17 +142,19 @@ def main(arguments=None):
      failures = []
      count = 1
      for old_image in need_migrate.values():
-        if uuid_to_pdh[old_image["collection"]] in already_migrated:
+        if uuid_to_collection[old_image["collection"]]["portable_data_hash"] in already_migrated:
              continue
  
-        logger.info("[%i/%i] Migrating %s:%s (%s)", count, len(need_migrate), old_image["repo"], old_image["tag"], old_image["collection"])
+        oldcol = CollectionReader(uuid_to_collection[old_image["collection"]]["manifest_text"])
+        tarfile = oldcol.keys()[0]
+
+        logger.info("[%i/%i] Migrating %s:%s (%s) (%i MiB)", count, len(need_migrate), old_image["repo"],
+                    old_image["tag"], old_image["collection"], oldcol.values()[0].size()/(2**20))
          count += 1
          start = time.time()
  
-        oldcol = CollectionReader(old_image["collection"])
-        tarfile = oldcol.keys()[0]
-
          varlibdocker = tempfile.mkdtemp()
+        dockercache = tempfile.mkdtemp()
          try:
              with tempfile.NamedTemporaryFile() as envfile:
                  envfile.write("ARVADOS_API_HOST=%s\n" % (os.environ["ARVADOS_API_HOST"]))
@@ -154,13 +168,14 @@ def main(arguments=None):
                               "--rm",
                               "--env-file", envfile.name,
                               "--volume", "%s:/var/lib/docker" % varlibdocker,
+                             "--volume", "%s:/root/.cache/arvados/docker" % dockercache,
                               "arvados/migrate-docker19",
                               "/root/migrate.sh",
                               "%s/%s" % (old_image["collection"], tarfile),
                               tarfile[0:40],
                               old_image["repo"],
                               old_image["tag"],
-                             oldcol.api_response()["owner_uuid"]]
+                             uuid_to_collection[old_image["collection"]]["owner_uuid"]]
  
                  proc = subprocess.Popen(dockercmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
                  out, err = proc.communicate()
@@ -198,6 +213,7 @@ def main(arguments=None):
              failures.append(old_image["collection"])
          finally:
              shutil.rmtree(varlibdocker)
+            shutil.rmtree(dockercache)
  
      logger.info("Successfully migrated %i images", len(success))
      if failures:
diff --git a/sdk/python/arvados/commands/run.py b/sdk/python/arvados/commands/run.py

index 860fc8f805718bb5330f1eb25f4499f8fd45fcf3..46a56ca5589cf07a28054114cb0ccd75e580f44a 100644 (file)
--- a/sdk/python/arvados/commands/run.py
+++ b/sdk/python/arvados/commands/run.py
@@ -12,7 +12,9 @@ import time
  import subprocess
  import logging
  import sys
+import errno
  import arvados.commands._util as arv_cmd
+import arvados.collection
  
  from arvados._version import __version__
  
@@ -105,25 +107,41 @@ def determine_project(root, current_user):
  # original parameter string).
  def statfile(prefix, fn, fnPattern="$(file %s/%s)", dirPattern="$(dir %s/%s/)"):
      absfn = os.path.abspath(fn)
-    if os.path.exists(absfn):
+    try:
          st = os.stat(absfn)
-        if stat.S_ISREG(st.st_mode):
-            sp = os.path.split(absfn)
-            (pdh, branch) = is_in_collection(sp[0], sp[1])
-            if pdh:
+        sp = os.path.split(absfn)
+        (pdh, branch) = is_in_collection(sp[0], sp[1])
+        if pdh:
+            if stat.S_ISREG(st.st_mode):
                  return ArvFile(prefix, fnPattern % (pdh, branch))
-            else:
-                # trim leading '/' for path prefix test later
-                return UploadFile(prefix, absfn[1:])
-        if stat.S_ISDIR(st.st_mode):
-            sp = os.path.split(absfn)
-            (pdh, branch) = is_in_collection(sp[0], sp[1])
-            if pdh:
+            elif stat.S_ISDIR(st.st_mode):
                  return ArvFile(prefix, dirPattern % (pdh, branch))
+            else:
+                raise Exception("%s is not a regular file or directory" % absfn)
+        else:
+            # trim leading '/' for path prefix test later
+            return UploadFile(prefix, absfn[1:])
+    except OSError as e:
+        if e.errno == errno.ENOENT:
+            pass
+        else:
+            raise
  
      return prefix+fn
  
-def uploadfiles(files, api, dry_run=False, num_retries=0, project=None, fnPattern="$(file %s/%s)", name=None):
+def write_file(collection, pathprefix, fn):
+    with open(os.path.join(pathprefix, fn)) as src:
+        dst = collection.open(fn, "w")
+        r = src.read(1024*128)
+        while r:
+            dst.write(r)
+            r = src.read(1024*128)
+        dst.close(flush=False)
+
+def uploadfiles(files, api, dry_run=False, num_retries=0,
+                project=None,
+                fnPattern="$(file %s/%s)",
+                name=None):
      # Find the smallest path prefix that includes all the files that need to be uploaded.
      # This starts at the root and iteratively removes common parent directory prefixes
      # until all file paths no longer have a common parent.
@@ -152,9 +170,6 @@ def uploadfiles(files, api, dry_run=False, num_retries=0, project=None, fnPatter
              for c in files:
                  c.fn = c.fn[len(pathstep):]
  
-    orgdir = os.getcwd()
-    os.chdir(pathprefix)
-
      logger.info("Upload local files: \"%s\"", '" "'.join([c.fn for c in files]))
  
      if dry_run:
@@ -162,40 +177,45 @@ def uploadfiles(files, api, dry_run=False, num_retries=0, project=None, fnPatter
          pdh = "$(input)"
      else:
          files = sorted(files, key=lambda x: x.fn)
-        collection = arvados.CollectionWriter(api, num_retries=num_retries)
-        stream = None
+        collection = arvados.collection.Collection(api_client=api, num_retries=num_retries)
+        prev = ""
          for f in files:
-            sp = os.path.split(f.fn)
-            if sp[0] != stream:
-                stream = sp[0]
-                collection.start_new_stream(stream)
-            collection.write_file(f.fn, sp[1])
+            localpath = os.path.join(pathprefix, f.fn)
+            if prev and localpath.startswith(prev+"/"):
+                # If this path is inside an already uploaded subdirectory,
+                # don't redundantly re-upload it.
+                # e.g. we uploaded /tmp/foo and the next file is /tmp/foo/bar
+                # skip it because it starts with "/tmp/foo/"
+                continue
+            prev = localpath
+            if os.path.isfile(localpath):
+                write_file(collection, pathprefix, f.fn)
+            elif os.path.isdir(localpath):
+                for root, dirs, iterfiles in os.walk(localpath):
+                    root = root[len(pathprefix):]
+                    for src in iterfiles:
+                        write_file(collection, pathprefix, os.path.join(root, src))
  
          filters=[["portable_data_hash", "=", collection.portable_data_hash()],
                   ["name", "like", name+"%"]]
          if project:
              filters.append(["owner_uuid", "=", project])
  
-        exists = api.collections().list(filters=filters).execute(num_retries=num_retries)
+        exists = api.collections().list(filters=filters, limit=1).execute(num_retries=num_retries)
  
          if exists["items"]:
              item = exists["items"][0]
-            logger.info("Using collection %s", item["uuid"])
+            pdh = item["portable_data_hash"]
+            logger.info("Using collection %s (%s)", pdh, item["uuid"])
          else:
-            body = {"owner_uuid": project, "manifest_text": collection.manifest_text()}
-            if name is not None:
-                body["name"] = name
-            item = api.collections().create(body=body, ensure_unique_name=True).execute()
-            logger.info("Uploaded to %s", item["uuid"])
-
-        pdh = item["portable_data_hash"]
+            collection.save_new(name=name, owner_uuid=project, ensure_unique_name=True)
+            pdh = collection.portable_data_hash()
+            logger.info("Uploaded to %s (%s)", pdh, collection.manifest_locator())
  
      for c in files:
          c.keepref = "%s/%s" % (pdh, c.fn)
          c.fn = fnPattern % (pdh, c.fn)
  
-    os.chdir(orgdir)
-
  
  def main(arguments=None):
      args = arvrun_parser.parse_args(arguments)
diff --git a/sdk/python/setup.py b/sdk/python/setup.py

index 9da3b7834b5e2eadc35e59d1aae4f753e1dac2e4..ed380e9a00831dd65f5a62ef0d12f1382f0972f3 100644 (file)
--- a/sdk/python/setup.py
+++ b/sdk/python/setup.py
@@ -52,7 +52,7 @@ setup(name='arvados-python-client',
            'pycurl >=7.19.5.1, <7.21.5',
            'python-gflags<3.0',
            'setuptools',
-          'ws4py',
+          'ws4py<0.4',
            'ruamel.yaml==0.13.7'
        ],
        test_suite='tests',
diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py

index da35f4fc9b347e201ad304cdb0913b8d40b6bacf..d10e60c22fef1009179c90da126d098a2fdc9c56 100644 (file)
--- a/sdk/python/tests/run_test_server.py
+++ b/sdk/python/tests/run_test_server.py
@@ -239,8 +239,9 @@ def run(leave_running_atexit=False):
      # This will clear cached docs that belong to other processes (like
      # concurrent test suites) even if they're still running. They should
      # be able to tolerate that.
-    for fn in glob.glob(os.path.join(arvados.http_cache('discovery'),
-                                     '*,arvados,v1,rest,*')):
+    for fn in glob.glob(os.path.join(
+            str(arvados.http_cache('discovery')),
+            '*,arvados,v1,rest,*')):
          os.unlink(fn)
  
      pid_file = _pidfile('api')
diff --git a/sdk/python/tests/test_cache.py b/sdk/python/tests/test_cache.py

new file mode 100644 (file)

index 0000000..4d68b40
--- /dev/null
+++ b/sdk/python/tests/test_cache.py
@@ -0,0 +1,95 @@
+from __future__ import print_function
+
+import md5
+import mock
+import os
+import random
+import shutil
+import sys
+import tempfile
+import threading
+import unittest
+
+import arvados.cache
+import arvados
+import run_test_server
+
+
+def _random(n):
+    return bytearray(random.getrandbits(8) for _ in xrange(n))
+
+
+class CacheTestThread(threading.Thread):
+    def __init__(self, dir):
+        super(CacheTestThread, self).__init__()
+        self._dir = dir
+
+    def run(self):
+        c = arvados.cache.SafeHTTPCache(self._dir)
+        url = 'http://example.com/foo'
+        self.ok = True
+        for x in range(16):
+            try:
+                data_in = _random(128)
+                data_in = md5.new(data_in).hexdigest() + "\n" + str(data_in)
+                c.set(url, data_in)
+                data_out = c.get(url)
+                digest, content = data_out.split("\n", 1)
+                if digest != md5.new(content).hexdigest():
+                    self.ok = False
+            except Exception as err:
+                self.ok = False
+                print("cache failed: {}".format(err), file=sys.stderr)
+
+
+class CacheTest(unittest.TestCase):
+    def setUp(self):
+        self._dir = tempfile.mkdtemp()
+
+    def tearDown(self):
+        shutil.rmtree(self._dir)
+
+    def test_cache_create_error(self):
+        _, filename = tempfile.mkstemp()
+        home_was = os.environ['HOME']
+        os.environ['HOME'] = filename
+        try:
+            c = arvados.http_cache('test')
+            self.assertEqual(None, c)
+        finally:
+            os.environ['HOME'] = home_was
+            os.unlink(filename)
+
+    def test_cache_crud(self):
+        c = arvados.cache.SafeHTTPCache(self._dir, max_age=0)
+        url = 'https://example.com/foo?bar=baz'
+        data1 = _random(256)
+        data2 = _random(128)
+        self.assertEqual(None, c.get(url))
+        c.delete(url)
+        c.set(url, data1)
+        self.assertEqual(data1, c.get(url))
+        c.delete(url)
+        self.assertEqual(None, c.get(url))
+        c.set(url, data1)
+        c.set(url, data2)
+        self.assertEqual(data2, c.get(url))
+
+    def test_cache_threads(self):
+        threads = []
+        for _ in range(64):
+            t = CacheTestThread(dir=self._dir)
+            t.start()
+            threads.append(t)
+        for t in threads:
+            t.join()
+            self.assertTrue(t.ok)
+
+
+class CacheIntegrationTest(run_test_server.TestCaseWithServers):
+    MAIN_SERVER = {}
+
+    def test_cache_used_by_default_client(self):
+        with mock.patch('arvados.cache.SafeHTTPCache.get') as getter:
+            arvados.api('v1')._rootDesc.get('foobar')
+            getter.assert_called()
diff --git a/sdk/ruby/arvados.gemspec b/sdk/ruby/arvados.gemspec

index 84e24f4d3ba13acf5728b0c8c0cdfae1634105e7..264af9fb363b8a0f73672e5ff145250daf7e0527 100644 (file)
--- a/sdk/ruby/arvados.gemspec
+++ b/sdk/ruby/arvados.gemspec
@@ -27,7 +27,7 @@ Gem::Specification.new do |s|
    s.add_dependency('google-api-client', '>= 0.7', '< 0.8.9')
    # work around undeclared dependency on i18n in some activesupport 3.x.x:
    s.add_dependency('i18n', '~> 0')
-  s.add_dependency('json', '~> 1.7', '>= 1.7.7')
+  s.add_dependency('json', '>= 1.7.7', '<3')
    s.add_runtime_dependency('jwt', '<2', '>= 0.1.5')
    s.homepage    =
      'https://arvados.org'
diff --git a/services/api/Rakefile b/services/api/Rakefile

index fbbf53e3c6ac3d1c247b556db29f2f23141c4285..925e6c2d1dc11d58c4f662be5c032db022719a68 100644 (file)
--- a/services/api/Rakefile
+++ b/services/api/Rakefile
@@ -63,3 +63,13 @@ namespace :db do
      end
    end
  end
+
+# Work around Rails3+PostgreSQL9.5 incompatibility (pg_dump used to
+# accept -i as a no-op, but now it's not accepted at all).
+module Kernel
+  alias_method :orig_backtick, :`
+  def `(*args) #`#` sorry, parsers
+    args[0].sub!(/\Apg_dump -i /, 'pg_dump ') rescue nil
+    orig_backtick(*args)
+  end
+end
diff --git a/services/api/app/controllers/application_controller.rb b/services/api/app/controllers/application_controller.rb

index 2072520bb389449180eed9f350eff7f5183e76a2..71fb365fc686f7feaa519c9c9ce467ec426b1d29 100644 (file)
--- a/services/api/app/controllers/application_controller.rb
+++ b/services/api/app/controllers/application_controller.rb
@@ -51,8 +51,6 @@ class ApplicationController < ActionController::Base
  
    attr_writer :resource_attrs
  
-  MAX_UNIQUE_NAME_ATTEMPTS = 10
-
    begin
      rescue_from(Exception,
                  ArvadosModel::PermissionDeniedError,
@@ -99,50 +97,12 @@ class ApplicationController < ActionController::Base
    def create
      @object = model_class.new resource_attrs
  
-    if @object.respond_to? :name and params[:ensure_unique_name]
-      # Record the original name.  See below.
-      name_stem = @object.name
-      retries = MAX_UNIQUE_NAME_ATTEMPTS
+    if @object.respond_to?(:name) && params[:ensure_unique_name]
+      @object.save_with_unique_name!
      else
-      retries = 0
-    end
-
-    begin
        @object.save!
-    rescue ActiveRecord::RecordNotUnique => rn
-      raise unless retries > 0
-      retries -= 1
-
-      # Dig into the error to determine if it is specifically calling out a
-      # (owner_uuid, name) uniqueness violation.  In this specific case, and
-      # the client requested a unique name with ensure_unique_name==true,
-      # update the name field and try to save again.  Loop as necessary to
-      # discover a unique name.  It is necessary to handle name choosing at
-      # this level (as opposed to the client) to ensure that record creation
-      # never fails due to a race condition.
-      raise unless rn.original_exception.is_a? PG::UniqueViolation
-
-      # Unfortunately ActiveRecord doesn't abstract out any of the
-      # necessary information to figure out if this the error is actually
-      # the specific case where we want to apply the ensure_unique_name
-      # behavior, so the following code is specialized to Postgres.
-      err = rn.original_exception
-      detail = err.result.error_field(PG::Result::PG_DIAG_MESSAGE_DETAIL)
-      raise unless /^Key \(owner_uuid, name\)=\([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}, .*?\) already exists\./.match detail
-
-      @object.uuid = nil
-
-      new_name = "#{name_stem} (#{db_current_time.utc.iso8601(3)})"
-      if new_name == @object.name
-        # If the database is fast enough to do two attempts in the
-        # same millisecond, we need to wait to ensure we try a
-        # different timestamp on each attempt.
-        sleep 0.002
-        new_name = "#{name_stem} (#{db_current_time.utc.iso8601(3)})"
-      end
-      @object.name = new_name
-      retry
      end
+
      show
    end
  
diff --git a/services/api/app/controllers/arvados/v1/nodes_controller.rb b/services/api/app/controllers/arvados/v1/nodes_controller.rb

index 5e2404e62c4db63360ddf91f5a6f1c801c763ce3..b308c183291968d9ed2aa609216dc574d0b92efc 100644 (file)
--- a/services/api/app/controllers/arvados/v1/nodes_controller.rb
+++ b/services/api/app/controllers/arvados/v1/nodes_controller.rb
@@ -46,10 +46,12 @@ class Arvados::V1::NodesController < ApplicationController
        @objects = model_class.where('last_ping_at >= ?', db_current_time - 1.hours)
      end
      super
-    job_uuids = @objects.map { |n| n[:job_uuid] }.compact
-    assoc_jobs = readable_job_uuids(job_uuids)
-    @objects.each do |node|
-      node.job_readable = assoc_jobs.include?(node[:job_uuid])
+    if @select.nil? or @select.include? 'job_uuid'
+      job_uuids = @objects.map { |n| n[:job_uuid] }.compact
+      assoc_jobs = readable_job_uuids(job_uuids)
+      @objects.each do |node|
+        node.job_readable = assoc_jobs.include?(node[:job_uuid])
+      end
      end
    end
  
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb

index 0419dadafba891995156b7f5c286f1955a9fbb8e..96dc85e1e44138485899942a084e8095d3eb9182 100644 (file)
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -243,6 +243,57 @@ class ArvadosModel < ActiveRecord::Base
            permission_link_classes: ['permission', 'resources'])
    end
  
+  def save_with_unique_name!
+    uuid_was = uuid
+    name_was = name
+    max_retries = 2
+    transaction do
+      conn = ActiveRecord::Base.connection
+      conn.exec_query 'SAVEPOINT save_with_unique_name'
+      begin
+        save!
+      rescue ActiveRecord::RecordNotUnique => rn
+        raise if max_retries == 0
+        max_retries -= 1
+
+        conn.exec_query 'ROLLBACK TO SAVEPOINT save_with_unique_name'
+
+        # Dig into the error to determine if it is specifically calling out a
+        # (owner_uuid, name) uniqueness violation.  In this specific case, and
+        # the client requested a unique name with ensure_unique_name==true,
+        # update the name field and try to save again.  Loop as necessary to
+        # discover a unique name.  It is necessary to handle name choosing at
+        # this level (as opposed to the client) to ensure that record creation
+        # never fails due to a race condition.
+        err = rn.original_exception
+        raise unless err.is_a?(PG::UniqueViolation)
+
+        # Unfortunately ActiveRecord doesn't abstract out any of the
+        # necessary information to figure out if this the error is actually
+        # the specific case where we want to apply the ensure_unique_name
+        # behavior, so the following code is specialized to Postgres.
+        detail = err.result.error_field(PG::Result::PG_DIAG_MESSAGE_DETAIL)
+        raise unless /^Key \(owner_uuid, name\)=\([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}, .*?\) already exists\./.match detail
+
+        new_name = "#{name_was} (#{db_current_time.utc.iso8601(3)})"
+        if new_name == name
+          # If the database is fast enough to do two attempts in the
+          # same millisecond, we need to wait to ensure we try a
+          # different timestamp on each attempt.
+          sleep 0.002
+          new_name = "#{name_was} (#{db_current_time.utc.iso8601(3)})"
+        end
+
+        self[:name] = new_name
+        self[:uuid] = nil if uuid_was.nil? && !uuid.nil?
+        conn.exec_query 'SAVEPOINT save_with_unique_name'
+        retry
+      ensure
+        conn.exec_query 'RELEASE SAVEPOINT save_with_unique_name'
+      end
+    end
+  end
+
    def logged_attributes
      attributes.except(*Rails.configuration.unlogged_attributes)
    end
@@ -335,36 +386,31 @@ class ArvadosModel < ActiveRecord::Base
        raise PermissionDeniedError
      end
  
-    # Verify "write" permission on old owner
-    # default fail unless one of:
-    # owner_uuid did not change
-    # previous owner_uuid is nil
-    # current user is the old owner
-    # current user is this object
-    # current user can_write old owner
-    unless !owner_uuid_changed? or
-        owner_uuid_was.nil? or
-        current_user.uuid == self.owner_uuid_was or
-        current_user.uuid == self.uuid or
-        current_user.can? write: self.owner_uuid_was
-      logger.warn "User #{current_user.uuid} tried to modify #{self.class.to_s} #{uuid} but does not have permission to write old owner_uuid #{owner_uuid_was}"
-      errors.add :owner_uuid, "cannot be changed without write permission on old owner"
-      raise PermissionDeniedError
-    end
-
-    # Verify "write" permission on new owner
-    # default fail unless one of:
-    # current_user is this object
-    # current user can_write new owner, or this object if owner unchanged
-    if new_record? or owner_uuid_changed? or is_a?(ApiClientAuthorization)
-      write_target = owner_uuid
+    if new_record? || owner_uuid_changed?
+      # Permission on owner_uuid_was is needed to move an existing
+      # object away from its previous owner (which implies permission
+      # to modify this object itself, so we don't need to check that
+      # separately). Permission on the new owner_uuid is also needed.
+      [['old', owner_uuid_was],
+       ['new', owner_uuid]
+      ].each do |which, check_uuid|
+        if check_uuid.nil?
+          # old_owner_uuid is nil? New record, no need to check.
+        elsif !current_user.can?(write: check_uuid)
+          logger.warn "User #{current_user.uuid} tried to set ownership of #{self.class.to_s} #{self.uuid} but does not have permission to write #{which} owner_uuid #{check_uuid}"
+          errors.add :owner_uuid, "cannot be set or changed without write permission on #{which} owner"
+          raise PermissionDeniedError
+        end
+      end
      else
-      write_target = uuid
-    end
-    unless current_user == self or current_user.can? write: write_target
-      logger.warn "User #{current_user.uuid} tried to modify #{self.class.to_s} #{uuid} but does not have permission to write new owner_uuid #{owner_uuid}"
-      errors.add :owner_uuid, "cannot be changed without write permission on new owner"
-      raise PermissionDeniedError
+      # If the object already existed and we're not changing
+      # owner_uuid, we only need write permission on the object
+      # itself.
+      if !current_user.can?(write: self.uuid)
+        logger.warn "User #{current_user.uuid} tried to modify #{self.class.to_s} #{self.uuid} without write permission"
+        errors.add :uuid, "is not writable"
+        raise PermissionDeniedError
+      end
      end
  
      true
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb

index 6d1a0d5b55f595fcaa30baf9937432eb88bc923c..4a1e610303189a28ae3c029836f9ece92e50c4e0 100644 (file)
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -163,7 +163,7 @@ class Collection < ArvadosModel
        false
      elsif portable_data_hash[0..31] != computed_pdh[0..31]
        errors.add(:portable_data_hash,
-                 "does not match computed hash #{computed_pdh}")
+                 "'#{portable_data_hash}' does not match computed hash '#{computed_pdh}'")
        false
      else
        # Ignore the client-provided size part: always store
@@ -510,33 +510,49 @@ class Collection < ArvadosModel
      true
    end
  
-  # If trash_at is updated without touching delete_at, automatically
-  # update delete_at to a sensible value.
    def default_trash_interval
      if trash_at_changed? && !delete_at_changed?
+      # If trash_at is updated without touching delete_at,
+      # automatically update delete_at to a sensible value.
        if trash_at.nil?
          self.delete_at = nil
        else
          self.delete_at = trash_at + Rails.configuration.default_trash_lifetime.seconds
        end
+    elsif !trash_at || !delete_at || trash_at > delete_at
+      # Not trash, or bogus arguments? Just validate in
+      # validate_trash_and_delete_timing.
+    elsif delete_at_changed? && delete_at >= trash_at
+      # Fix delete_at if needed, so it's not earlier than the expiry
+      # time on any permission tokens that might have been given out.
+
+      # In any case there are no signatures expiring after now+TTL.
+      # Also, if the existing trash_at time has already passed, we
+      # know we haven't given out any signatures since then.
+      earliest_delete = [
+        @validation_timestamp,
+        trash_at_was,
+      ].compact.min + Rails.configuration.blob_signature_ttl.seconds
+
+      # The previous value of delete_at is also an upper bound on the
+      # longest-lived permission token. For example, if TTL=14,
+      # trash_at_was=now-7, delete_at_was=now+7, then it is safe to
+      # set trash_at=now+6, delete_at=now+8.
+      earliest_delete = [earliest_delete, delete_at_was].compact.min
+
+      # If delete_at is too soon, use the earliest possible time.
+      if delete_at < earliest_delete
+        self.delete_at = earliest_delete
+      end
      end
    end
  
    def validate_trash_and_delete_timing
      if trash_at.nil? != delete_at.nil?
        errors.add :delete_at, "must be set if trash_at is set, and must be nil otherwise"
-    end
-
-    earliest_delete = ([@validation_timestamp, trash_at_was].compact.min +
-                       Rails.configuration.blob_signature_ttl.seconds)
-    if delete_at && delete_at < earliest_delete
-      errors.add :delete_at, "#{delete_at} is too soon: earliest allowed is #{earliest_delete}"
-    end
-
-    if delete_at && delete_at < trash_at
+    elsif delete_at && delete_at < trash_at
        errors.add :delete_at, "must not be earlier than trash_at"
      end
-
      true
    end
  end
diff --git a/services/api/app/models/container.rb b/services/api/app/models/container.rb

index a3cc9c107191a2e1b8dbeb87e9624648a17f980e..9420ef3cb88be716b2313fbf2f89657f9e28c8c4 100644 (file)
--- a/services/api/app/models/container.rb
+++ b/services/api/app/models/container.rb
@@ -82,15 +82,102 @@ class Container < ArvadosModel
      end
    end
  
+  # Create a new container (or find an existing one) to satisfy the
+  # given container request.
+  def self.resolve(req)
+    c_attrs = {
+      command: req.command,
+      cwd: req.cwd,
+      environment: req.environment,
+      output_path: req.output_path,
+      container_image: resolve_container_image(req.container_image),
+      mounts: resolve_mounts(req.mounts),
+      runtime_constraints: resolve_runtime_constraints(req.runtime_constraints),
+      scheduling_parameters: req.scheduling_parameters,
+    }
+    act_as_system_user do
+      if req.use_existing && (reusable = find_reusable(c_attrs))
+        reusable
+      else
+        Container.create!(c_attrs)
+      end
+    end
+  end
+
+  # Return a runtime_constraints hash that complies with requested but
+  # is suitable for saving in a container record, i.e., has specific
+  # values instead of ranges.
+  #
+  # Doing this as a step separate from other resolutions, like "git
+  # revision range to commit hash", makes sense only when there is no
+  # opportunity to reuse an existing container (e.g., container reuse
+  # is not implemented yet, or we have already found that no existing
+  # containers are suitable).
+  def self.resolve_runtime_constraints(runtime_constraints)
+    rc = {}
+    defaults = {
+      'keep_cache_ram' =>
+      Rails.configuration.container_default_keep_cache_ram,
+    }
+    defaults.merge(runtime_constraints).each do |k, v|
+      if v.is_a? Array
+        rc[k] = v[0]
+      else
+        rc[k] = v
+      end
+    end
+    rc
+  end
+
+  # Return a mounts hash suitable for a Container, i.e., with every
+  # readonly collection UUID resolved to a PDH.
+  def self.resolve_mounts(mounts)
+    c_mounts = {}
+    mounts.each do |k, mount|
+      mount = mount.dup
+      c_mounts[k] = mount
+      if mount['kind'] != 'collection'
+        next
+      end
+      if (uuid = mount.delete 'uuid')
+        c = Collection.
+          readable_by(current_user).
+          where(uuid: uuid).
+          select(:portable_data_hash).
+          first
+        if !c
+          raise ArvadosModel::UnresolvableContainerError.new "cannot mount collection #{uuid.inspect}: not found"
+        end
+        if mount['portable_data_hash'].nil?
+          # PDH not supplied by client
+          mount['portable_data_hash'] = c.portable_data_hash
+        elsif mount['portable_data_hash'] != c.portable_data_hash
+          # UUID and PDH supplied by client, but they don't agree
+          raise ArgumentError.new "cannot mount collection #{uuid.inspect}: current portable_data_hash #{c.portable_data_hash.inspect} does not match #{c['portable_data_hash'].inspect} in request"
+        end
+      end
+    end
+    return c_mounts
+  end
+
+  # Return a container_image PDH suitable for a Container.
+  def self.resolve_container_image(container_image)
+    coll = Collection.for_latest_docker_image(container_image)
+    if !coll
+      raise ArvadosModel::UnresolvableContainerError.new "docker image #{container_image.inspect} not found"
+    end
+    coll.portable_data_hash
+  end
+
    def self.find_reusable(attrs)
      candidates = Container.
        where_serialized(:command, attrs[:command]).
        where('cwd = ?', attrs[:cwd]).
        where_serialized(:environment, attrs[:environment]).
        where('output_path = ?', attrs[:output_path]).
-      where('container_image = ?', attrs[:container_image]).
-      where_serialized(:mounts, attrs[:mounts]).
-      where_serialized(:runtime_constraints, attrs[:runtime_constraints])
+      where('container_image = ?', resolve_container_image(attrs[:container_image])).
+      where_serialized(:mounts, resolve_mounts(attrs[:mounts])).
+      where_serialized(:runtime_constraints, resolve_runtime_constraints(attrs[:runtime_constraints]))
  
      # Check for Completed candidates whose output and log are both readable.
      select_readable_pdh = Collection.
diff --git a/services/api/app/models/container_request.rb b/services/api/app/models/container_request.rb

index 87c3ebed30184f822839b35443e1d7808e62fa10..628ef886be99f06cdad61e95de4719ef0201b1ee 100644 (file)
--- a/services/api/app/models/container_request.rb
+++ b/services/api/app/models/container_request.rb
@@ -18,8 +18,9 @@ class ContainerRequest < ArvadosModel
    before_validation :validate_scheduling_parameters
    before_validation :set_container
    validates :command, :container_image, :output_path, :cwd, :presence => true
+  validates :output_ttl, numericality: { only_integer: true, greater_than_or_equal_to: 0 }
    validate :validate_state_change
-  validate :validate_change
+  validate :check_update_whitelist
    after_save :update_priority
    after_save :finalize_if_needed
    before_create :set_requesting_container_uuid
@@ -41,6 +42,7 @@ class ContainerRequest < ArvadosModel
      t.add :output_name
      t.add :output_path
      t.add :output_uuid
+    t.add :output_ttl
      t.add :priority
      t.add :properties
      t.add :requesting_container_uuid
@@ -64,6 +66,13 @@ class ContainerRequest < ArvadosModel
      Committed => [Final]
    }
  
+  AttrsPermittedAlways = [:owner_uuid, :state, :name, :description]
+  AttrsPermittedBeforeCommit = [:command, :container_count_max,
+  :container_image, :cwd, :environment, :filters, :mounts,
+  :output_path, :priority, :properties, :requesting_container_uuid,
+  :runtime_constraints, :state, :container_uuid, :use_existing,
+  :scheduling_parameters, :output_name, :output_ttl]
+
    def state_transitions
      State_transitions
    end
@@ -91,41 +100,31 @@ class ContainerRequest < ArvadosModel
      ['output', 'log'].each do |out_type|
        pdh = c.send(out_type)
        next if pdh.nil?
-      if self.output_name and out_type == 'output'
-        coll_name = self.output_name
-      else
-        coll_name = "Container #{out_type} for request #{uuid}"
+      coll_name = "Container #{out_type} for request #{uuid}"
+      trash_at = nil
+      if out_type == 'output'
+        if self.output_name
+          coll_name = self.output_name
+        end
+        if self.output_ttl > 0
+          trash_at = db_current_time + self.output_ttl
+        end
        end
        manifest = Collection.unscoped do
          Collection.where(portable_data_hash: pdh).first.manifest_text
        end
-      begin
-        coll = Collection.create!(owner_uuid: owner_uuid,
-                                  manifest_text: manifest,
-                                  portable_data_hash: pdh,
-                                  name: coll_name,
-                                  properties: {
-                                    'type' => out_type,
-                                    'container_request' => uuid,
-                                  })
-      rescue ActiveRecord::RecordNotUnique => rn
-        # In case this is executed as part of a transaction: When a Postgres exception happens,
-        # the following statements on the same transaction become invalid, so a rollback is
-        # needed. One example are Unit Tests, every test is enclosed inside a transaction so
-        # that the database can be reverted before every new test starts.
-        # See: http://api.rubyonrails.org/classes/ActiveRecord/Transactions/ClassMethods.html#module-ActiveRecord::Transactions::ClassMethods-label-Exception+handling+and+rolling+back
-        ActiveRecord::Base.connection.execute 'ROLLBACK'
-        raise unless out_type == 'output' and self.output_name
-        # Postgres specific unique name check. See ApplicationController#create for
-        # a detailed explanation.
-        raise unless rn.original_exception.is_a? PG::UniqueViolation
-        err = rn.original_exception
-        detail = err.result.error_field(PG::Result::PG_DIAG_MESSAGE_DETAIL)
-        raise unless /^Key \(owner_uuid, name\)=\([a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}, .*?\) already exists\./.match detail
-        # Output collection name collision detected: append a timestamp.
-        coll_name = "#{self.output_name} #{Time.now.getgm.strftime('%FT%TZ')}"
-        retry
-      end
+
+      coll = Collection.new(owner_uuid: owner_uuid,
+                            manifest_text: manifest,
+                            portable_data_hash: pdh,
+                            name: coll_name,
+                            trash_at: trash_at,
+                            delete_at: trash_at,
+                            properties: {
+                              'type' => out_type,
+                              'container_request' => uuid,
+                            })
+      coll.save_with_unique_name!
        if out_type == 'output'
          out_coll = coll.uuid
        else
@@ -149,93 +148,7 @@ class ContainerRequest < ArvadosModel
      self.cwd ||= "."
      self.container_count_max ||= Rails.configuration.container_count_max
      self.scheduling_parameters ||= {}
-  end
-
-  # Create a new container (or find an existing one) to satisfy this
-  # request.
-  def resolve
-    c_mounts = mounts_for_container
-    c_runtime_constraints = runtime_constraints_for_container
-    c_container_image = container_image_for_container
-    c = act_as_system_user do
-      c_attrs = {command: self.command,
-                 cwd: self.cwd,
-                 environment: self.environment,
-                 output_path: self.output_path,
-                 container_image: c_container_image,
-                 mounts: c_mounts,
-                 runtime_constraints: c_runtime_constraints}
-
-      reusable = self.use_existing ? Container.find_reusable(c_attrs) : nil
-      if not reusable.nil?
-        reusable
-      else
-        c_attrs[:scheduling_parameters] = self.scheduling_parameters
-        Container.create!(c_attrs)
-      end
-    end
-    self.container_uuid = c.uuid
-  end
-
-  # Return a runtime_constraints hash that complies with
-  # self.runtime_constraints but is suitable for saving in a container
-  # record, i.e., has specific values instead of ranges.
-  #
-  # Doing this as a step separate from other resolutions, like "git
-  # revision range to commit hash", makes sense only when there is no
-  # opportunity to reuse an existing container (e.g., container reuse
-  # is not implemented yet, or we have already found that no existing
-  # containers are suitable).
-  def runtime_constraints_for_container
-    rc = {}
-    runtime_constraints.each do |k, v|
-      if v.is_a? Array
-        rc[k] = v[0]
-      else
-        rc[k] = v
-      end
-    end
-    rc
-  end
-
-  # Return a mounts hash suitable for a Container, i.e., with every
-  # readonly collection UUID resolved to a PDH.
-  def mounts_for_container
-    c_mounts = {}
-    mounts.each do |k, mount|
-      mount = mount.dup
-      c_mounts[k] = mount
-      if mount['kind'] != 'collection'
-        next
-      end
-      if (uuid = mount.delete 'uuid')
-        c = Collection.
-          readable_by(current_user).
-          where(uuid: uuid).
-          select(:portable_data_hash).
-          first
-        if !c
-          raise ArvadosModel::UnresolvableContainerError.new "cannot mount collection #{uuid.inspect}: not found"
-        end
-        if mount['portable_data_hash'].nil?
-          # PDH not supplied by client
-          mount['portable_data_hash'] = c.portable_data_hash
-        elsif mount['portable_data_hash'] != c.portable_data_hash
-          # UUID and PDH supplied by client, but they don't agree
-          raise ArgumentError.new "cannot mount collection #{uuid.inspect}: current portable_data_hash #{c.portable_data_hash.inspect} does not match #{c['portable_data_hash'].inspect} in request"
-        end
-      end
-    end
-    return c_mounts
-  end
-
-  # Return a container_image PDH suitable for a Container.
-  def container_image_for_container
-    coll = Collection.for_latest_docker_image(container_image)
-    if !coll
-      raise ArvadosModel::UnresolvableContainerError.new "docker image #{container_image.inspect} not found"
-    end
-    coll.portable_data_hash
+    self.output_ttl ||= 0
    end
  
    def set_container
@@ -246,7 +159,7 @@ class ContainerRequest < ArvadosModel
        return false
      end
      if state_changed? and state == Committed and container_uuid.nil?
-      resolve
+      self.container_uuid = Container.resolve(self).uuid
      end
      if self.container_uuid != self.container_uuid_was
        if self.container_count_changed?
@@ -261,20 +174,17 @@ class ContainerRequest < ArvadosModel
    def validate_runtime_constraints
      case self.state
      when Committed
-      ['vcpus', 'ram'].each do |k|
-        if not (runtime_constraints.include? k and
-                runtime_constraints[k].is_a? Integer and
-                runtime_constraints[k] > 0)
-          errors.add :runtime_constraints, "#{k} must be a positive integer"
+      [['vcpus', true],
+       ['ram', true],
+       ['keep_cache_ram', false]].each do |k, required|
+        if !required && !runtime_constraints.include?(k)
+          next
+        end
+        v = runtime_constraints[k]
+        unless (v.is_a?(Integer) && v > 0)
+          errors.add(:runtime_constraints,
+                     "[#{k}]=#{v.inspect} must be a positive integer")
          end
-      end
-
-      if runtime_constraints.include? 'keep_cache_ram' and
-         (!runtime_constraints['keep_cache_ram'].is_a?(Integer) or
-          runtime_constraints['keep_cache_ram'] <= 0)
-            errors.add :runtime_constraints, "keep_cache_ram must be a positive integer"
-      elsif !runtime_constraints.include? 'keep_cache_ram'
-        runtime_constraints['keep_cache_ram'] = Rails.configuration.container_default_keep_cache_ram
        end
      end
    end
@@ -290,57 +200,45 @@ class ContainerRequest < ArvadosModel
      end
    end
  
-  def validate_change
-    permitted = [:owner_uuid]
+  def check_update_whitelist
+    permitted = AttrsPermittedAlways.dup
  
-    case self.state
-    when Uncommitted
-      # Permit updating most fields
-      permitted.push :command, :container_count_max,
-                     :container_image, :cwd, :description, :environment,
-                     :filters, :mounts, :name, :output_path, :priority,
-                     :properties, :requesting_container_uuid, :runtime_constraints,
-                     :state, :container_uuid, :use_existing, :scheduling_parameters,
-                     :output_name
+    if self.new_record? || self.state_was == Uncommitted
+      # Allow create-and-commit in a single operation.
+      permitted.push *AttrsPermittedBeforeCommit
+    end
  
+    case self.state
      when Committed
-      if container_uuid.nil?
-        errors.add :container_uuid, "has not been resolved to a container."
-      end
+      permitted.push :priority, :container_count_max, :container_uuid
  
-      if priority.nil?
-        errors.add :priority, "cannot be nil"
+      if self.container_uuid.nil?
+        self.errors.add :container_uuid, "has not been resolved to a container."
        end
  
-      # Can update priority, container count, name and description
-      permitted.push :priority, :container_count, :container_count_max, :container_uuid,
-                     :name, :description
+      if self.priority.nil?
+        self.errors.add :priority, "cannot be nil"
+      end
  
-      if self.state_changed?
-        # Allow create-and-commit in a single operation.
-        permitted.push :command, :container_image, :cwd, :description, :environment,
-                       :filters, :mounts, :name, :output_path, :properties,
-                       :requesting_container_uuid, :runtime_constraints,
-                       :state, :container_uuid, :use_existing, :scheduling_parameters,
-                       :output_name
+      # Allow container count to increment by 1
+      if (self.container_uuid &&
+          self.container_uuid != self.container_uuid_was &&
+          self.container_count == 1 + (self.container_count_was || 0))
+        permitted.push :container_count
        end
  
      when Final
-      if not current_user.andand.is_admin and not (self.name_changed? || self.description_changed?)
-        errors.add :state, "of container request can only be set to Final by system."
+      if self.state_changed? and not current_user.andand.is_admin
+        self.errors.add :state, "of container request can only be set to Final by system."
        end
  
-      if self.state_changed? || self.name_changed? || self.description_changed? || self.output_uuid_changed? || self.log_uuid_changed?
-          permitted.push :state, :name, :description, :output_uuid, :log_uuid
-      else
-        errors.add :state, "does not allow updates"
+      if self.state_was == Committed
+        permitted.push :output_uuid, :log_uuid
        end
  
-    else
-      errors.add :state, "invalid value"
      end
  
-    check_update_whitelist permitted
+    super(permitted)
    end
  
    def update_priority
diff --git a/services/api/app/models/log.rb b/services/api/app/models/log.rb

index 3207d1f288f2f264c671d6709063d93140ce3fec..eedf06a976c74726d515b4ce9e8a46c402fe18aa 100644 (file)
--- a/services/api/app/models/log.rb
+++ b/services/api/app/models/log.rb
@@ -1,3 +1,5 @@
+require 'audit_logs'
+
  class Log < ArvadosModel
    include HasUuid
    include KindAndEtag
@@ -5,6 +7,7 @@ class Log < ArvadosModel
    serialize :properties, Hash
    before_validation :set_default_event_at
    after_save :send_notify
+  after_commit { AuditLogs.tidy_in_background }
  
    api_accessible :user, extend: :common do |t|
      t.add :id
@@ -101,5 +104,4 @@ class Log < ArvadosModel
    def send_notify
      connection.execute "NOTIFY logs, '#{self.id}'"
    end
-
  end
diff --git a/services/api/config/application.default.yml b/services/api/config/application.default.yml

index cae6bbdf174468d719d2a0b2dc61d757a78f10bd..5241cb43788aa6b8d98a587887f5d6b88afec730 100644 (file)
--- a/services/api/config/application.default.yml
+++ b/services/api/config/application.default.yml
@@ -232,6 +232,24 @@ common:
    # crunchstat logs from the logs table.
    clean_container_log_rows_after: <%= 30.days %>
  
+  # Time to keep audit logs, in seconds. (An audit log is a row added
+  # to the "logs" table in the PostgreSQL database each time an
+  # Arvados object is created, modified, or deleted.)
+  #
+  # Currently, websocket event notifications rely on audit logs, so
+  # this should not be set lower than 600 (5 minutes).
+  max_audit_log_age: 1209600
+
+  # Maximum number of log rows to delete in a single SQL transaction.
+  #
+  # If max_audit_log_delete_batch is 0, log entries will never be
+  # deleted by Arvados. Cleanup can be done by an external process
+  # without affecting any Arvados system processes, as long as very
+  # recent (<5 minutes old) logs are not deleted.
+  #
+  # 100000 is a reasonable batch size for most sites.
+  max_audit_log_delete_batch: 0
+
    # The maximum number of compute nodes that can be in use simultaneously
    # If this limit is reduced, any existing nodes with slot number >= new limit
    # will not be counted against the new limit. In other words, the new limit
diff --git a/services/api/db/migrate/20170328215436_add_portable_data_hash_index_to_collections.rb b/services/api/db/migrate/20170328215436_add_portable_data_hash_index_to_collections.rb

new file mode 100644 (file)

index 0000000..01ebad5
--- /dev/null
+++ b/services/api/db/migrate/20170328215436_add_portable_data_hash_index_to_collections.rb
@@ -0,0 +1,5 @@
+class AddPortableDataHashIndexToCollections < ActiveRecord::Migration
+  def change
+    add_index :collections, :portable_data_hash
+  end
+end
diff --git a/services/api/db/migrate/20170330012505_add_output_ttl_to_container_requests.rb b/services/api/db/migrate/20170330012505_add_output_ttl_to_container_requests.rb

new file mode 100644 (file)

index 0000000..ee6fa37
--- /dev/null
+++ b/services/api/db/migrate/20170330012505_add_output_ttl_to_container_requests.rb
@@ -0,0 +1,5 @@
+class AddOutputTtlToContainerRequests < ActiveRecord::Migration
+  def change
+    add_column :container_requests, :output_ttl, :integer, default: 0, null: false
+  end
+end
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql

index 87b657543b6c851232e96defd117fc1f6991c8a6..e25a2a960571f21b823f14d4d45e619fa2c0ae9a 100644 (file)
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -297,7 +297,8 @@ CREATE TABLE container_requests (
      scheduling_parameters text,
      output_uuid character varying(255),
      log_uuid character varying(255),
-    output_name character varying(255) DEFAULT NULL::character varying
+    output_name character varying(255) DEFAULT NULL::character varying,
+    output_ttl integer DEFAULT 0 NOT NULL
  );
  
  
@@ -1687,6 +1688,13 @@ CREATE INDEX index_collections_on_owner_uuid ON collections USING btree (owner_u
  CREATE UNIQUE INDEX index_collections_on_owner_uuid_and_name ON collections USING btree (owner_uuid, name) WHERE (is_trashed = false);
  
  
+--
+-- Name: index_collections_on_portable_data_hash; Type: INDEX; Schema: public; Owner: -; Tablespace: 
+--
+
+CREATE INDEX index_collections_on_portable_data_hash ON collections USING btree (portable_data_hash);
+
+
  --
  -- Name: index_collections_on_trash_at; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
@@ -2744,4 +2752,8 @@ INSERT INTO schema_migrations (version) VALUES ('20170105160302');
  
  INSERT INTO schema_migrations (version) VALUES ('20170216170823');
  
-INSERT INTO schema_migrations (version) VALUES ('20170301225558');
-\ No newline at end of file
+INSERT INTO schema_migrations (version) VALUES ('20170301225558');
+
+INSERT INTO schema_migrations (version) VALUES ('20170328215436');
+
+INSERT INTO schema_migrations (version) VALUES ('20170330012505');
+\ No newline at end of file
diff --git a/services/api/lib/audit_logs.rb b/services/api/lib/audit_logs.rb

new file mode 100644 (file)

index 0000000..ddbf2d0
--- /dev/null
+++ b/services/api/lib/audit_logs.rb
@@ -0,0 +1,65 @@
+require 'current_api_client'
+require 'db_current_time'
+
+module AuditLogs
+  extend CurrentApiClient
+  extend DbCurrentTime
+
+  def self.delete_old(max_age:, max_batch:)
+    act_as_system_user do
+      if !File.owned?(Rails.root.join('tmp'))
+        Rails.logger.warn("AuditLogs: not owner of #{Rails.root}/tmp, skipping")
+        return
+      end
+      lockfile = Rails.root.join('tmp', 'audit_logs.lock')
+      File.open(lockfile, File::RDWR|File::CREAT, 0600) do |f|
+        return unless f.flock(File::LOCK_NB|File::LOCK_EX)
+
+        sql = "select clock_timestamp() - interval '#{'%.9f' % max_age} seconds'"
+        threshold = ActiveRecord::Base.connection.select_value(sql).to_time.utc
+        Rails.logger.info "AuditLogs: deleting logs older than #{threshold}"
+
+        did_total = 0
+        loop do
+          sql = Log.unscoped.
+                select(:id).
+                order(:created_at).
+                where('event_type in (?)', ['create', 'update', 'destroy', 'delete']).
+                where('created_at < ?', threshold).
+                limit(max_batch).
+                to_sql
+          did = Log.unscoped.where("id in (#{sql})").delete_all
+          did_total += did
+
+          Rails.logger.info "AuditLogs: deleted batch of #{did}"
+          break if did == 0
+        end
+        Rails.logger.info "AuditLogs: deleted total #{did_total}"
+      end
+    end
+  end
+
+  def self.tidy_in_background
+    max_age = Rails.configuration.max_audit_log_age
+    max_batch = Rails.configuration.max_audit_log_delete_batch
+    return if max_age <= 0 || max_batch <= 0
+
+    exp = (max_age/14).seconds
+    need = false
+    Rails.cache.fetch('AuditLogs', expires_in: exp) do
+      need = true
+    end
+    return if !need
+
+    Thread.new do
+      Thread.current.abort_on_exception = false
+      begin
+        delete_old(max_age: max_age, max_batch: max_batch)
+      rescue => e
+        Rails.logger.error "#{e.class}: #{e}\n#{e.backtrace.join("\n\t")}"
+      ensure
+        ActiveRecord::Base.connection.close
+      end
+    end
+  end
+end
diff --git a/services/api/lib/crunch_dispatch.rb b/services/api/lib/crunch_dispatch.rb

index bea1657de22b72c0d5296a4c571afcee3ffc0993..2ae99f01c59b7bef384e4e48011357a37f3363d3 100644 (file)
--- a/services/api/lib/crunch_dispatch.rb
+++ b/services/api/lib/crunch_dispatch.rb
@@ -684,17 +684,20 @@ class CrunchDispatch
      jobrecord = Job.find_by_uuid(job_done.uuid)
  
      if exit_status == EXIT_RETRY_UNLOCKED or (exit_tempfail and @job_retry_counts.include? jobrecord.uuid)
+      $stderr.puts("dispatch: job #{jobrecord.uuid} was interrupted by node failure")
        # Only this crunch-dispatch process can retry the job:
        # it's already locked, and there's no way to put it back in the
        # Queued state.  Put it in our internal todo list unless the job
        # has failed this way excessively.
        @job_retry_counts[jobrecord.uuid] += 1
        exit_tempfail = @job_retry_counts[jobrecord.uuid] <= RETRY_UNLOCKED_LIMIT
+      do_what_next = "give up now"
        if exit_tempfail
          @todo_job_retries[jobrecord.uuid] = jobrecord
-      else
-        $stderr.puts("dispatch: job #{jobrecord.uuid} exceeded node failure retry limit -- giving up")
+        do_what_next = "re-attempt"
        end
+      $stderr.puts("dispatch: job #{jobrecord.uuid} has been interrupted " +
+                   "#{@job_retry_counts[jobrecord.uuid]}x, will #{do_what_next}")
      end
  
      if !exit_tempfail
diff --git a/services/api/test/fixtures/collections.yml b/services/api/test/fixtures/collections.yml

index 5f7bad8abe2ed94d14986045290076d56e671e10..f48fbf1b8542d0f35fd37888c778fd264e820e7a 100644 (file)
--- a/services/api/test/fixtures/collections.yml
+++ b/services/api/test/fixtures/collections.yml
@@ -562,7 +562,7 @@ collection_with_one_property:
  # The following four collections are used to test combining collections with repeated filenames
  collection_with_repeated_filenames_and_contents_in_two_dirs_1:
    uuid: zzzzz-4zz18-duplicatenames1
-  portable_data_hash: fa7aeb5140e2848d39b416daeef4ffc5+45
+  portable_data_hash: f3a67fad3a19c31c658982fb8158fa58+144
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
    created_at: 2014-02-03T17:22:54Z
    modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
@@ -574,7 +574,7 @@ collection_with_repeated_filenames_and_contents_in_two_dirs_1:
  
  collection_with_repeated_filenames_and_contents_in_two_dirs_2:
    uuid: zzzzz-4zz18-duplicatenames2
-  portable_data_hash: fa7aeb5140e2848d39b416daeef4ffc5+45
+  portable_data_hash: f3a67fad3a19c31c658982fb8158fa58+144
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
    created_at: 2014-02-03T17:22:54Z
    modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
@@ -645,6 +645,18 @@ collection_not_readable_by_active:
    manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n"
    name: collection_not_readable_by_active
  
+collection_to_remove_and_rename_files:
+  uuid: zzzzz-4zz18-a21ux3541sxa8sf
+  portable_data_hash: 80cf6dd2cf079dd13f272ec4245cb4a8+48
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2014-02-03T17:22:54Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2014-02-03T17:22:54Z
+  updated_at: 2014-02-03T17:22:54Z
+  manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1 0:0:file2\n"
+  name: collection to remove and rename files
+
  
  # Test Helper trims the rest of the file
  
diff --git a/services/api/test/fixtures/logs.yml b/services/api/test/fixtures/logs.yml

index d83cf967e5b4c46c42b3713b581c768612443a75..a39aff5843841faf50f79dab92bc6d4fcacd0fd8 100644 (file)
--- a/services/api/test/fixtures/logs.yml
+++ b/services/api/test/fixtures/logs.yml
@@ -13,6 +13,7 @@ admin_changes_repository2: # admin changes repository2, which is owned by active
    object_uuid: zzzzz-2x53u-382brsig8rp3667 # repository foo
    object_owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz # active user
    event_at: <%= 2.minute.ago.to_s(:db) %>
+  event_type: update
  
  admin_changes_specimen: # admin changes specimen owned_by_spectator
    id: 3
@@ -21,6 +22,7 @@ admin_changes_specimen: # admin changes specimen owned_by_spectator
    object_uuid: zzzzz-2x53u-3b0xxwzlbzxq5yr # specimen owned_by_spectator
    object_owner_uuid: zzzzz-tpzed-l1s2piq4t4mps8r # spectator user
    event_at: <%= 3.minute.ago.to_s(:db) %>
+  event_type: update
  
  system_adds_foo_file: # foo collection added, readable by active through link
    id: 4
@@ -29,6 +31,7 @@ system_adds_foo_file: # foo collection added, readable by active through link
    object_uuid: zzzzz-4zz18-znfnqtbbv4spc3w # foo file
    object_owner_uuid: zzzzz-tpzed-000000000000000 # system user
    event_at: <%= 4.minute.ago.to_s(:db) %>
+  event_type: create
  
  system_adds_baz: # baz collection added, readable by active and spectator through group 'all users' group membership
    id: 5
@@ -37,6 +40,7 @@ system_adds_baz: # baz collection added, readable by active and spectator throug
    object_uuid: zzzzz-4zz18-y9vne9npefyxh8g # baz file
    object_owner_uuid: zzzzz-tpzed-000000000000000 # system user
    event_at: <%= 5.minute.ago.to_s(:db) %>
+  event_type: create
  
  log_owned_by_active:
    id: 6
diff --git a/services/api/test/functional/arvados/v1/nodes_controller_test.rb b/services/api/test/functional/arvados/v1/nodes_controller_test.rb

index 428c663a77b92ab2fc679ef20bbcf6b8fcc0fd69..b4ddd78554c8e7c85f4ad6ee0a3514de048f074d 100644 (file)
--- a/services/api/test/functional/arvados/v1/nodes_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/nodes_controller_test.rb
@@ -17,8 +17,8 @@ class Arvados::V1::NodesControllerTest < ActionController::TestCase
      authorize_with :inactive
      get :index
      assert_response :success
-    node_items = JSON.parse(@response.body)['items']
-    assert_equal 0, node_items.size
+    assert_equal 0, json_response['items'].size
+    assert_equal 0, json_response['items_available']
    end
  
    # active user sees non-secret attributes of up and recently-up nodes
@@ -26,8 +26,9 @@ class Arvados::V1::NodesControllerTest < ActionController::TestCase
      authorize_with :active
      get :index
      assert_response :success
-    node_items = JSON.parse(@response.body)['items']
-    assert_not_equal 0, node_items.size
+    assert_operator 0, :<, json_response['items_available']
+    node_items = json_response['items']
+    assert_operator 0, :<, node_items.size
      found_busy_node = false
      node_items.each do |node|
        assert_nil node['info'].andand['ping_secret']
@@ -113,6 +114,7 @@ class Arvados::V1::NodesControllerTest < ActionController::TestCase
        authorize_with user
        get :index, {select: ['domain']}
        assert_response :success
+      assert_operator 0, :<, json_response['items_available']
      end
    end
  
diff --git a/services/api/test/functional/arvados/v1/users_controller_test.rb b/services/api/test/functional/arvados/v1/users_controller_test.rb

index 579b8cc6d05256a88086cd0a50592e3d8a1afaa7..98643a9e74b99b7342b36f34e1b803949e773cbf 100644 (file)
--- a/services/api/test/functional/arvados/v1/users_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/users_controller_test.rb
@@ -6,7 +6,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
    include UsersTestHelper
  
    setup do
-    @all_links_at_start = Link.all
+    @initial_link_count = Link.count
      @vm_uuid = virtual_machines(:testvm).uuid
    end
  
@@ -107,7 +107,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
      assert_nil created['identity_url'], 'expected no identity_url'
  
      # arvados#user, repo link and link add user to 'All users' group
-    verify_num_links @all_links_at_start, 4
+    verify_links_added 4
  
      verify_link response_items, 'arvados#user', true, 'permission', 'can_login',
          created['uuid'], created['email'], 'arvados#user', false, 'User'
@@ -269,7 +269,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
      assert_equal response_object['email'], 'foo@example.com', 'expected given email'
  
      # four extra links; system_group, login, group and repo perms
-    verify_num_links @all_links_at_start, 4
+    verify_links_added 4
    end
  
    test "setup user with fake vm and expect error" do
@@ -306,7 +306,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
      assert_equal response_object['email'], 'foo@example.com', 'expected given email'
  
      # five extra links; system_group, login, group, vm, repo
-    verify_num_links @all_links_at_start, 5
+    verify_links_added 5
    end
  
    test "setup user with valid email, no vm and no repo as input" do
@@ -324,7 +324,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
      assert_equal response_object['email'], 'foo@example.com', 'expected given email'
  
      # three extra links; system_group, login, and group
-    verify_num_links @all_links_at_start, 3
+    verify_links_added 3
  
      verify_link response_items, 'arvados#user', true, 'permission', 'can_login',
          response_object['uuid'], response_object['email'], 'arvados#user', false, 'User'
@@ -361,7 +361,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
          'expecting first name'
  
      # five extra links; system_group, login, group, repo and vm
-    verify_num_links @all_links_at_start, 5
+    verify_links_added 5
    end
  
    test "setup user with an existing user email and check different object is created" do
@@ -384,7 +384,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
          'expected different uuid after create operation'
      assert_equal inactive_user['email'], response_object['email'], 'expected given email'
      # system_group, openid, group, and repo. No vm link.
-    verify_num_links @all_links_at_start, 4
+    verify_links_added 4
    end
  
    test "setup user with openid prefix" do
@@ -412,7 +412,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
  
      # verify links
      # four new links: system_group, arvados#user, repo, and 'All users' group.
-    verify_num_links @all_links_at_start, 4
+    verify_links_added 4
  
      verify_link response_items, 'arvados#user', true, 'permission', 'can_login',
          created['uuid'], created['email'], 'arvados#user', false, 'User'
@@ -472,7 +472,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
  
      # five new links: system_group, arvados#user, repo, vm and 'All
      # users' group link
-    verify_num_links @all_links_at_start, 5
+    verify_links_added 5
  
      verify_link response_items, 'arvados#user', true, 'permission', 'can_login',
          created['uuid'], created['email'], 'arvados#user', false, 'User'
@@ -841,9 +841,9 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
                   "admin's filtered index did not return inactive user")
    end
  
-  def verify_num_links (original_links, expected_additional_links)
-    assert_equal expected_additional_links, Link.all.size-original_links.size,
-        "Expected #{expected_additional_links.inspect} more links"
+  def verify_links_added more
+    assert_equal @initial_link_count+more, Link.count,
+        "Started with #{@initial_link_count} links, expected #{more} more"
    end
  
    def find_obj_in_resp (response_items, object_type, head_kind=nil)
diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb

index 87bec21520a05b27c76e807555ba84b45739500d..882e26059ced5a30df507143e0216e6695bc1e08 100644 (file)
--- a/services/api/test/unit/collection_test.rb
+++ b/services/api/test/unit/collection_test.rb
@@ -370,21 +370,26 @@ class CollectionTest < ActiveSupport::TestCase
      end
    end
  
+  now = Time.now
    [['trash-to-delete interval negative',
      :collection_owned_by_active,
-    {trash_at: Time.now+2.weeks, delete_at: Time.now},
+    {trash_at: now+2.weeks, delete_at: now},
      {state: :invalid}],
-   ['trash-to-delete interval too short',
+   ['now-to-delete interval short',
      :collection_owned_by_active,
-    {trash_at: Time.now+3.days, delete_at: Time.now+7.days},
-    {state: :invalid}],
+    {trash_at: now+3.days, delete_at: now+7.days},
+    {state: :trash_future}],
+   ['now-to-delete interval short, trash=delete',
+    :collection_owned_by_active,
+    {trash_at: now+3.days, delete_at: now+3.days},
+    {state: :trash_future}],
     ['trash-to-delete interval ok',
      :collection_owned_by_active,
-    {trash_at: Time.now, delete_at: Time.now+15.days},
+    {trash_at: now, delete_at: now+15.days},
      {state: :trash_now}],
     ['trash-to-delete interval short, but far enough in future',
      :collection_owned_by_active,
-    {trash_at: Time.now+13.days, delete_at: Time.now+15.days},
+    {trash_at: now+13.days, delete_at: now+15.days},
      {state: :trash_future}],
     ['trash by setting is_trashed bool',
      :collection_owned_by_active,
@@ -392,11 +397,11 @@ class CollectionTest < ActiveSupport::TestCase
      {state: :trash_now}],
     ['trash in future by setting just trash_at',
      :collection_owned_by_active,
-    {trash_at: Time.now+1.week},
+    {trash_at: now+1.week},
      {state: :trash_future}],
     ['trash in future by setting trash_at and delete_at',
      :collection_owned_by_active,
-    {trash_at: Time.now+1.week, delete_at: Time.now+4.weeks},
+    {trash_at: now+1.week, delete_at: now+4.weeks},
      {state: :trash_future}],
     ['untrash by clearing is_trashed bool',
      :expired_collection,
@@ -416,7 +421,7 @@ class CollectionTest < ActiveSupport::TestCase
          end
          updates_ok = c.update_attributes(updates)
          expect_valid = expect[:state] != :invalid
-        assert_equal updates_ok, expect_valid, c.errors.full_messages.to_s
+        assert_equal expect_valid, updates_ok, c.errors.full_messages.to_s
          case expect[:state]
          when :invalid
            refute c.valid?
diff --git a/services/api/test/unit/container_request_test.rb b/services/api/test/unit/container_request_test.rb

index af1d4b25fdeed5b26d55b17affeb3c552ab01646..a3dd1f9835e14ee40c475b80d41d0ec0521378aa 100644 (file)
--- a/services/api/test/unit/container_request_test.rb
+++ b/services/api/test/unit/container_request_test.rb
@@ -3,6 +3,7 @@ require 'helpers/docker_migration_helper'
  
  class ContainerRequestTest < ActiveSupport::TestCase
    include DockerMigrationHelper
+  include DbCurrentTime
  
    def create_minimal_req! attrs={}
      defaults = {
@@ -123,6 +124,8 @@ class ContainerRequestTest < ActiveSupport::TestCase
  
      cr.reload
  
+    assert_equal({"vcpus" => 2, "ram" => 30}, cr.runtime_constraints)
+
      assert_not_nil cr.container_uuid
      c = Container.find_by_uuid cr.container_uuid
      assert_not_nil c
@@ -310,8 +313,7 @@ class ContainerRequestTest < ActiveSupport::TestCase
      lambda { |resolved| resolved["ram"] == 1234234234 }],
    ].each do |rc, okfunc|
      test "resolve runtime constraint range #{rc} to values" do
-      cr = ContainerRequest.new(runtime_constraints: rc)
-      resolved = cr.send :runtime_constraints_for_container
+      resolved = Container.resolve_runtime_constraints(rc)
        assert(okfunc.call(resolved),
               "container runtime_constraints was #{resolved.inspect}")
      end
@@ -343,10 +345,9 @@ class ContainerRequestTest < ActiveSupport::TestCase
    ].each do |mounts, okfunc|
      test "resolve mounts #{mounts.inspect} to values" do
        set_user_from_auth :active
-      cr = ContainerRequest.new(mounts: mounts)
-      resolved = cr.send :mounts_for_container
+      resolved = Container.resolve_mounts(mounts)
        assert(okfunc.call(resolved),
-             "mounts_for_container returned #{resolved.inspect}")
+             "Container.resolve_mounts returned #{resolved.inspect}")
      end
    end
  
@@ -359,9 +360,8 @@ class ContainerRequestTest < ActiveSupport::TestCase
          "path" => "/foo",
        },
      }
-    cr = ContainerRequest.new(mounts: m)
      assert_raises(ArvadosModel::UnresolvableContainerError) do
-      cr.send :mounts_for_container
+      Container.resolve_mounts(m)
      end
    end
  
@@ -375,9 +375,8 @@ class ContainerRequestTest < ActiveSupport::TestCase
          "path" => "/foo",
        },
      }
-    cr = ContainerRequest.new(mounts: m)
      assert_raises(ArgumentError) do
-      cr.send :mounts_for_container
+      Container.resolve_mounts(m)
      end
    end
  
@@ -385,21 +384,19 @@ class ContainerRequestTest < ActiveSupport::TestCase
     'arvados/apitestfixture',
     'd8309758b8fe2c81034ffc8a10c36460b77db7bc5e7b448c4e5b684f9d95a678',
    ].each do |tag|
-    test "container_image_for_container(#{tag.inspect})" do
+    test "Container.resolve_container_image(#{tag.inspect})" do
        set_user_from_auth :active
-      cr = ContainerRequest.new(container_image: tag)
-      resolved = cr.send :container_image_for_container
+      resolved = Container.resolve_container_image(tag)
        assert_equal resolved, collections(:docker_image).portable_data_hash
      end
    end
  
-  test "container_image_for_container(pdh)" do
+  test "Container.resolve_container_image(pdh)" do
      set_user_from_auth :active
      [[:docker_image, 'v1'], [:docker_image_1_12, 'v2']].each do |coll, ver|
        Rails.configuration.docker_image_formats = [ver]
        pdh = collections(coll).portable_data_hash
-      cr = ContainerRequest.new(container_image: pdh)
-      resolved = cr.send :container_image_for_container
+      resolved = Container.resolve_container_image(pdh)
        assert_equal resolved, pdh
      end
    end
@@ -410,9 +407,8 @@ class ContainerRequestTest < ActiveSupport::TestCase
    ].each do |img|
      test "container_image_for_container(#{img.inspect}) => 422" do
        set_user_from_auth :active
-      cr = ContainerRequest.new(container_image: img)
        assert_raises(ArvadosModel::UnresolvableContainerError) do
-        cr.send :container_image_for_container
+        Container.resolve_container_image(img)
        end
      end
    end
@@ -426,12 +422,12 @@ class ContainerRequestTest < ActiveSupport::TestCase
      set_user_from_auth :active
      cr = create_minimal_req!(command: ["true", "1"],
                               container_image: collections(:docker_image).portable_data_hash)
-    assert_equal(cr.send(:container_image_for_container),
+    assert_equal(Container.resolve_container_image(cr.container_image),
                   collections(:docker_image_1_12).portable_data_hash)
  
      cr = create_minimal_req!(command: ["true", "2"],
                               container_image: links(:docker_image_collection_tag).name)
-    assert_equal(cr.send(:container_image_for_container),
+    assert_equal(Container.resolve_container_image(cr.container_image),
                   collections(:docker_image_1_12).portable_data_hash)
    end
  
@@ -445,12 +441,12 @@ class ContainerRequestTest < ActiveSupport::TestCase
      set_user_from_auth :active
      cr = create_minimal_req!(command: ["true", "1"],
                               container_image: collections(:docker_image).portable_data_hash)
-    assert_equal(cr.send(:container_image_for_container),
+    assert_equal(Container.resolve_container_image(cr.container_image),
                   collections(:docker_image).portable_data_hash)
  
      cr = create_minimal_req!(command: ["true", "2"],
                               container_image: links(:docker_image_collection_tag).name)
-    assert_equal(cr.send(:container_image_for_container),
+    assert_equal(Container.resolve_container_image(cr.container_image),
                   collections(:docker_image).portable_data_hash)
    end
  
@@ -463,7 +459,7 @@ class ContainerRequestTest < ActiveSupport::TestCase
      cr = create_minimal_req!(command: ["true", "1"],
                               container_image: collections(:docker_image_1_12).portable_data_hash)
      assert_raises(ArvadosModel::UnresolvableContainerError) do
-      cr.send(:container_image_for_container)
+      Container.resolve_container_image(cr.container_image)
      end
    end
  
@@ -475,12 +471,12 @@ class ContainerRequestTest < ActiveSupport::TestCase
      cr = create_minimal_req!(command: ["true", "1"],
                               container_image: collections(:docker_image).portable_data_hash)
      assert_raises(ArvadosModel::UnresolvableContainerError) do
-      cr.send(:container_image_for_container)
+      Container.resolve_container_image(cr.container_image)
      end
      cr = create_minimal_req!(command: ["true", "2"],
                               container_image: links(:docker_image_collection_tag).name)
      assert_raises(ArvadosModel::UnresolvableContainerError) do
-      cr.send(:container_image_for_container)
+      Container.resolve_container_image(cr.container_image)
      end
    end
  
@@ -502,8 +498,7 @@ class ContainerRequestTest < ActiveSupport::TestCase
                        command: ["echo", "hello"],
                        output_path: "test",
                        runtime_constraints: {"vcpus" => 4,
-                                            "ram" => 12000000000,
-                                            "keep_cache_ram" => 268435456},
+                                            "ram" => 12000000000},
                        mounts: {"test" => {"kind" => "json"}}}
        set_user_from_auth :active
        cr1 = create_minimal_req!(common_attrs.merge({state: ContainerRequest::Committed,
@@ -585,38 +580,65 @@ class ContainerRequestTest < ActiveSupport::TestCase
  
    test "Output collection name setting using output_name with name collision resolution" do
      set_user_from_auth :active
-    output_name = collections(:foo_file).name
+    output_name = 'unimaginative name'
+    Collection.create!(name: output_name)
  
      cr = create_minimal_req!(priority: 1,
                               state: ContainerRequest::Committed,
                               output_name: output_name)
-    act_as_system_user do
-      c = Container.find_by_uuid(cr.container_uuid)
-      c.update_attributes!(state: Container::Locked)
-      c.update_attributes!(state: Container::Running)
-      c.update_attributes!(state: Container::Complete,
-                           exit_code: 0,
-                           output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45',
-                           log: 'fa7aeb5140e2848d39b416daeef4ffc5+45')
-    end
-    cr.save
+    run_container(cr)
+    cr.reload
      assert_equal ContainerRequest::Final, cr.state
      output_coll = Collection.find_by_uuid(cr.output_uuid)
      # Make sure the resulting output collection name include the original name
      # plus the date
      assert_not_equal output_name, output_coll.name,
-                     "It shouldn't exist more than one collection with the same owner and name '${output_name}'"
+                     "more than one collection with the same owner and name"
      assert output_coll.name.include?(output_name),
             "New name should include original name"
-    assert_match /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}Z/, output_coll.name,
+    assert_match /\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z/, output_coll.name,
                   "New name should include ISO8601 date"
    end
  
-  test "Finalize committed request when reusing a finished container" do
-    set_user_from_auth :active
-    cr = create_minimal_req!(priority: 1, state: ContainerRequest::Committed)
-    cr.reload
-    assert_equal ContainerRequest::Committed, cr.state
+  [[0, :check_output_ttl_0],
+   [1, :check_output_ttl_1s],
+   [365*86400, :check_output_ttl_1y],
+  ].each do |ttl, checker|
+    test "output_ttl=#{ttl}" do
+      act_as_user users(:active) do
+        cr = create_minimal_req!(priority: 1,
+                                 state: ContainerRequest::Committed,
+                                 output_name: 'foo',
+                                 output_ttl: ttl)
+        run_container(cr)
+        cr.reload
+        output = Collection.find_by_uuid(cr.output_uuid)
+        send(checker, db_current_time, output.trash_at, output.delete_at)
+      end
+    end
+  end
+
+  def check_output_ttl_0(now, trash, delete)
+    assert_nil(trash)
+    assert_nil(delete)
+  end
+
+  def check_output_ttl_1s(now, trash, delete)
+    assert_not_nil(trash)
+    assert_not_nil(delete)
+    assert_in_delta(trash, now + 1.second, 10)
+    assert_in_delta(delete, now + Rails.configuration.blob_signature_ttl.second, 10)
+  end
+
+  def check_output_ttl_1y(now, trash, delete)
+    year = (86400*365).second
+    assert_not_nil(trash)
+    assert_not_nil(delete)
+    assert_in_delta(trash, now + year, 10)
+    assert_in_delta(delete, now + year, 10)
+  end
+
+  def run_container(cr)
      act_as_system_user do
        c = Container.find_by_uuid(cr.container_uuid)
        c.update_attributes!(state: Container::Locked)
@@ -625,7 +647,16 @@ class ContainerRequestTest < ActiveSupport::TestCase
                             exit_code: 0,
                             output: '1f4b0bc7583c2a7f9102c395f4ffc5e3+45',
                             log: 'fa7aeb5140e2848d39b416daeef4ffc5+45')
+      c
      end
+  end
+
+  test "Finalize committed request when reusing a finished container" do
+    set_user_from_auth :active
+    cr = create_minimal_req!(priority: 1, state: ContainerRequest::Committed)
+    cr.reload
+    assert_equal ContainerRequest::Committed, cr.state
+    run_container(cr)
      cr.reload
      assert_equal ContainerRequest::Final, cr.state
  
@@ -640,34 +671,6 @@ class ContainerRequestTest < ActiveSupport::TestCase
      assert_equal ContainerRequest::Final, cr3.state
    end
  
-  [
-    [{"vcpus" => 1, "ram" => 123, "keep_cache_ram" => 100}, ContainerRequest::Committed, 100],
-    [{"vcpus" => 1, "ram" => 123}, ContainerRequest::Uncommitted],
-    [{"vcpus" => 1, "ram" => 123}, ContainerRequest::Committed],
-    [{"vcpus" => 1, "ram" => 123, "keep_cache_ram" => -1}, ContainerRequest::Committed, ActiveRecord::RecordInvalid],
-    [{"vcpus" => 1, "ram" => 123, "keep_cache_ram" => '123'}, ContainerRequest::Committed, ActiveRecord::RecordInvalid],
-  ].each do |rc, state, expected|
-    test "create container request with #{rc} in state #{state} and verify keep_cache_ram #{expected}" do
-      common_attrs = {cwd: "test",
-                      priority: 1,
-                      command: ["echo", "hello"],
-                      output_path: "test",
-                      runtime_constraints: rc,
-                      mounts: {"test" => {"kind" => "json"}}}
-      set_user_from_auth :active
-
-      if expected == ActiveRecord::RecordInvalid
-        assert_raises(ActiveRecord::RecordInvalid) do
-          create_minimal_req!(common_attrs.merge({state: state}))
-        end
-      else
-        cr = create_minimal_req!(common_attrs.merge({state: state}))
-        expected = Rails.configuration.container_default_keep_cache_ram if state == ContainerRequest::Committed and expected.nil?
-        assert_equal expected, cr.runtime_constraints['keep_cache_ram']
-      end
-    end
-  end
-
    [
      [{"partitions" => ["fastcpu","vfastcpu", 100]}, ContainerRequest::Committed, ActiveRecord::RecordInvalid],
      [{"partitions" => ["fastcpu","vfastcpu", 100]}, ContainerRequest::Uncommitted],
@@ -699,4 +702,48 @@ class ContainerRequestTest < ActiveSupport::TestCase
        end
      end
    end
+
+  [['Committed', true, {name: "foobar", priority: 123}],
+   ['Committed', false, {container_count: 2}],
+   ['Committed', false, {container_count: 0}],
+   ['Committed', false, {container_count: nil}],
+   ['Final', false, {state: ContainerRequest::Committed, name: "foobar"}],
+   ['Final', false, {name: "foobar", priority: 123}],
+   ['Final', false, {name: "foobar", output_uuid: "zzzzz-4zz18-znfnqtbbv4spc3w"}],
+   ['Final', false, {name: "foobar", log_uuid: "zzzzz-4zz18-znfnqtbbv4spc3w"}],
+   ['Final', false, {log_uuid: "zzzzz-4zz18-znfnqtbbv4spc3w"}],
+   ['Final', false, {priority: 123}],
+   ['Final', false, {mounts: {}}],
+   ['Final', false, {container_count: 2}],
+   ['Final', true, {name: "foobar"}],
+   ['Final', true, {name: "foobar", description: "baz"}],
+  ].each do |state, permitted, updates|
+    test "state=#{state} can#{'not' if !permitted} update #{updates.inspect}" do
+      act_as_user users(:active) do
+        cr = create_minimal_req!(priority: 1,
+                                 state: "Committed",
+                                 container_count_max: 1)
+        case state
+        when 'Committed'
+          # already done
+        when 'Final'
+          act_as_system_user do
+            Container.find_by_uuid(cr.container_uuid).
+              update_attributes!(state: Container::Cancelled)
+          end
+          cr.reload
+        else
+          raise 'broken test case'
+        end
+        assert_equal state, cr.state
+        if permitted
+          assert cr.update_attributes!(updates)
+        else
+          assert_raises(ActiveRecord::RecordInvalid) do
+            cr.update_attributes!(updates)
+          end
+        end
+      end
+    end
+  end
  end
diff --git a/services/api/test/unit/container_test.rb b/services/api/test/unit/container_test.rb

index 5a19f05ee4bc0dcefb1572030ee002eff43f65d5..52d2aa6741d4e8a537fc515477aeaf104c46c4cc 100644 (file)
--- a/services/api/test/unit/container_test.rb
+++ b/services/api/test/unit/container_test.rb
@@ -11,14 +11,22 @@ class ContainerTest < ActiveSupport::TestCase
      runtime_constraints: {"vcpus" => 1, "ram" => 1},
    }
  
-  REUSABLE_COMMON_ATTRS = {container_image: "9ae44d5792468c58bcf85ce7353c7027+124",
-                           cwd: "test",
-                           command: ["echo", "hello"],
-                           output_path: "test",
-                           runtime_constraints: {"vcpus" => 4,
-                                                 "ram" => 12000000000},
-                           mounts: {"test" => {"kind" => "json"}},
-                           environment: {"var" => 'val'}}
+  REUSABLE_COMMON_ATTRS = {
+    container_image: "9ae44d5792468c58bcf85ce7353c7027+124",
+    cwd: "test",
+    command: ["echo", "hello"],
+    output_path: "test",
+    runtime_constraints: {
+      "ram" => 12000000000,
+      "vcpus" => 4,
+    },
+    mounts: {
+      "test" => {"kind" => "json"},
+    },
+    environment: {
+      "var" => "val",
+    },
+  }
  
    def minimal_new attrs={}
      cr = ContainerRequest.new DEFAULT_ATTRS.merge(attrs)
@@ -86,7 +94,7 @@ class ContainerTest < ActiveSupport::TestCase
    test "Container serialized hash attributes sorted before save" do
      env = {"C" => 3, "B" => 2, "A" => 1}
      m = {"F" => {"kind" => 3}, "E" => {"kind" => 2}, "D" => {"kind" => 1}}
-    rc = {"vcpus" => 1, "ram" => 1}
+    rc = {"vcpus" => 1, "ram" => 1, "keep_cache_ram" => 1}
      c, _ = minimal_new(environment: env, mounts: m, runtime_constraints: rc)
      assert_equal c.environment.to_json, Container.deep_sort_hash(env).to_json
      assert_equal c.mounts.to_json, Container.deep_sort_hash(m).to_json
@@ -149,21 +157,21 @@ class ContainerTest < ActiveSupport::TestCase
        log: 'ea10d51bcf88862dbcc36eb292017dfd+45',
      }
  
-    set_user_from_auth :dispatch1
-
-    c_output1 = Container.create common_attrs
-    c_output2 = Container.create common_attrs
-    assert_not_equal c_output1.uuid, c_output2.uuid
-
      cr = ContainerRequest.new common_attrs
+    cr.use_existing = false
      cr.state = ContainerRequest::Committed
-    cr.container_uuid = c_output1.uuid
      cr.save!
+    c_output1 = Container.where(uuid: cr.container_uuid).first
  
      cr = ContainerRequest.new common_attrs
+    cr.use_existing = false
      cr.state = ContainerRequest::Committed
-    cr.container_uuid = c_output2.uuid
      cr.save!
+    c_output2 = Container.where(uuid: cr.container_uuid).first
+
+    assert_not_equal c_output1.uuid, c_output2.uuid
+
+    set_user_from_auth :dispatch1
  
      out1 = '1f4b0bc7583c2a7f9102c395f4ffc5e3+45'
      log1 = collections(:real_log_collection).portable_data_hash
@@ -176,9 +184,8 @@ class ContainerTest < ActiveSupport::TestCase
      c_output2.update_attributes!({state: Container::Running})
      c_output2.update_attributes!(completed_attrs.merge({log: log1, output: out2}))
  
-    reused = Container.find_reusable(common_attrs)
-    assert_not_nil reused
-    assert_equal reused.uuid, c_output1.uuid
+    reused = Container.resolve(ContainerRequest.new(common_attrs))
+    assert_equal c_output1.uuid, reused.uuid
    end
  
    test "find_reusable method should select running container by start date" do
diff --git a/services/api/test/unit/log_test.rb b/services/api/test/unit/log_test.rb

index a1a19cad4b6dd0ed3a980fafd75e95cecc0ca103..7376876bb450ec92b8988e2ed56d92ce3f995d7a 100644 (file)
--- a/services/api/test/unit/log_test.rb
+++ b/services/api/test/unit/log_test.rb
@@ -1,4 +1,5 @@
  require 'test_helper'
+require 'audit_logs'
  
  class LogTest < ActiveSupport::TestCase
    include CurrentApiClient
@@ -311,4 +312,81 @@ class LogTest < ActiveSupport::TestCase
        end
      end
    end
+
+  def assert_no_logs_deleted
+    logs_before = Log.unscoped.all.count
+    yield
+    assert_equal logs_before, Log.unscoped.all.count
+  end
+
+  def remaining_audit_logs
+    Log.unscoped.where('event_type in (?)', %w(create update destroy delete))
+  end
+
+  # Default settings should not delete anything -- some sites rely on
+  # the original "keep everything forever" behavior.
+  test 'retain old audit logs with default settings' do
+    assert_no_logs_deleted do
+      AuditLogs.delete_old(
+        max_age: Rails.configuration.max_audit_log_age,
+        max_batch: Rails.configuration.max_audit_log_delete_batch)
+    end
+  end
+
+  # Batch size 0 should retain all logs -- even if max_age is very
+  # short, and even if the default settings (and associated test) have
+  # changed.
+  test 'retain old audit logs with max_audit_log_delete_batch=0' do
+    assert_no_logs_deleted do
+      AuditLogs.delete_old(max_age: 1, max_batch: 0)
+    end
+  end
+
+  # We recommend a more conservative age of 5 minutes for production,
+  # but 3 minutes suits our test data better (and is test-worthy in
+  # that it's expected to work correctly in production).
+  test 'delete old audit logs with production settings' do
+    initial_log_count = Log.unscoped.all.count
+    AuditLogs.delete_old(max_age: 180, max_batch: 100000)
+    assert_operator remaining_audit_logs.count, :<, initial_log_count
+  end
+
+  test 'delete all audit logs in multiple batches' do
+    AuditLogs.delete_old(max_age: 0.00001, max_batch: 2)
+    assert_equal [], remaining_audit_logs.collect(&:uuid)
+  end
+
+  test 'delete old audit logs in thread' do
+    begin
+      Rails.configuration.max_audit_log_age = 20
+      Rails.configuration.max_audit_log_delete_batch = 100000
+      Rails.cache.delete 'AuditLogs'
+      initial_log_count = Log.unscoped.all.count + 1
+      act_as_system_user do
+        Log.create!()
+        initial_log_count += 1
+      end
+      deadline = Time.now + 10
+      while remaining_audit_logs.count == initial_log_count
+        if Time.now > deadline
+          raise "timed out"
+        end
+        sleep 0.1
+      end
+      assert_operator remaining_audit_logs.count, :<, initial_log_count
+    ensure
+      # The test framework rolls back our transactions, but that
+      # doesn't undo the deletes we did from separate threads.
+      ActiveRecord::Base.connection.exec_query 'ROLLBACK'
+      Thread.new do
+        begin
+          dc = DatabaseController.new
+          dc.define_singleton_method :render do |*args| end
+          dc.reset
+        ensure
+          ActiveRecord::Base.connection.close
+        end
+      end.join
+    end
+  end
  end
diff --git a/services/arv-git-httpd/arvados-git-httpd.service b/services/arv-git-httpd/arvados-git-httpd.service

index c41a5f3465d61403959a366565a89ec671af236e..034c0f3db122a9558c192e2fecdfc3c88c383289 100644 (file)
--- a/services/arv-git-httpd/arvados-git-httpd.service
+++ b/services/arv-git-httpd/arvados-git-httpd.service
@@ -3,11 +3,16 @@ Description=Arvados git server
  Documentation=https://doc.arvados.org/
  After=network.target
  AssertPathExists=/etc/arvados/git-httpd/git-httpd.yml
+# systemd<230
+StartLimitInterval=0
+# systemd>=230
+StartLimitIntervalSec=0
  
  [Service]
  Type=notify
  ExecStart=/usr/bin/arvados-git-httpd
  Restart=always
+RestartSec=1
  
  [Install]
  WantedBy=multi-user.target
diff --git a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.service b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.service

index 34ba80b5c2e08c085959caa9e84c84238faea85e..80e4fb977e8bd7f7d53a1f46784214d6737c135c 100644 (file)
--- a/services/crunch-dispatch-slurm/crunch-dispatch-slurm.service
+++ b/services/crunch-dispatch-slurm/crunch-dispatch-slurm.service
@@ -3,11 +3,16 @@ Description=Arvados Crunch Dispatcher for SLURM
  Documentation=https://doc.arvados.org/
  After=network.target
  AssertPathExists=/etc/arvados/crunch-dispatch-slurm/crunch-dispatch-slurm.yml
+# systemd<230
+StartLimitInterval=0
+# systemd>=230
+StartLimitIntervalSec=0
  
  [Service]
  Type=notify
  ExecStart=/usr/bin/crunch-dispatch-slurm
  Restart=always
+RestartSec=1
  
  [Install]
  WantedBy=multi-user.target
diff --git a/services/crunch-run/crunchrun.go b/services/crunch-run/crunchrun.go

index 26c8a7a7eab29e420c69181d3fd19e825fe5a72f..a05f61a858c04321f41ff7c8a3faf97a87618431 100644 (file)
--- a/services/crunch-run/crunchrun.go
+++ b/services/crunch-run/crunchrun.go
@@ -1,6 +1,7 @@
  package main
  
  import (
+       "context"
         "encoding/json"
         "errors"
         "flag"
@@ -24,7 +25,11 @@ import (
         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
         "git.curoverse.com/arvados.git/sdk/go/keepclient"
         "git.curoverse.com/arvados.git/sdk/go/manifest"
-       "github.com/curoverse/dockerclient"
+
+       dockertypes "github.com/docker/docker/api/types"
+       dockercontainer "github.com/docker/docker/api/types/container"
+       dockernetwork "github.com/docker/docker/api/types/network"
+       dockerclient "github.com/docker/docker/client"
  )
  
  // IArvadosClient is the minimal Arvados API methods used by crunch-run.
@@ -55,14 +60,62 @@ type MkTempDir func(string, string) (string, error)
  
  // ThinDockerClient is the minimal Docker client interface used by crunch-run.
  type ThinDockerClient interface {
-       StopContainer(id string, timeout int) error
-       InspectImage(id string) (*dockerclient.ImageInfo, error)
-       LoadImage(reader io.Reader) error
-       CreateContainer(config *dockerclient.ContainerConfig, name string, authConfig *dockerclient.AuthConfig) (string, error)
-       StartContainer(id string, config *dockerclient.HostConfig) error
-       AttachContainer(id string, options *dockerclient.AttachOptions) (io.ReadCloser, error)
-       Wait(id string) <-chan dockerclient.WaitResult
-       RemoveImage(name string, force bool) ([]*dockerclient.ImageDelete, error)
+       ContainerAttach(ctx context.Context, container string, options dockertypes.ContainerAttachOptions) (dockertypes.HijackedResponse, error)
+       ContainerCreate(ctx context.Context, config *dockercontainer.Config, hostConfig *dockercontainer.HostConfig,
+               networkingConfig *dockernetwork.NetworkingConfig, containerName string) (dockercontainer.ContainerCreateCreatedBody, error)
+       ContainerStart(ctx context.Context, container string, options dockertypes.ContainerStartOptions) error
+       ContainerStop(ctx context.Context, container string, timeout *time.Duration) error
+       ContainerWait(ctx context.Context, container string) (int64, error)
+       ImageInspectWithRaw(ctx context.Context, image string) (dockertypes.ImageInspect, []byte, error)
+       ImageLoad(ctx context.Context, input io.Reader, quiet bool) (dockertypes.ImageLoadResponse, error)
+       ImageRemove(ctx context.Context, image string, options dockertypes.ImageRemoveOptions) ([]dockertypes.ImageDeleteResponseItem, error)
+}
+
+// ThinDockerClientProxy is a proxy implementation of ThinDockerClient
+// that executes the docker requests on dockerclient.Client
+type ThinDockerClientProxy struct {
+       Docker *dockerclient.Client
+}
+
+// ContainerAttach invokes dockerclient.Client.ContainerAttach
+func (proxy ThinDockerClientProxy) ContainerAttach(ctx context.Context, container string, options dockertypes.ContainerAttachOptions) (dockertypes.HijackedResponse, error) {
+       return proxy.Docker.ContainerAttach(ctx, container, options)
+}
+
+// ContainerCreate invokes dockerclient.Client.ContainerCreate
+func (proxy ThinDockerClientProxy) ContainerCreate(ctx context.Context, config *dockercontainer.Config, hostConfig *dockercontainer.HostConfig,
+       networkingConfig *dockernetwork.NetworkingConfig, containerName string) (dockercontainer.ContainerCreateCreatedBody, error) {
+       return proxy.Docker.ContainerCreate(ctx, config, hostConfig, networkingConfig, containerName)
+}
+
+// ContainerStart invokes dockerclient.Client.ContainerStart
+func (proxy ThinDockerClientProxy) ContainerStart(ctx context.Context, container string, options dockertypes.ContainerStartOptions) error {
+       return proxy.Docker.ContainerStart(ctx, container, options)
+}
+
+// ContainerStop invokes dockerclient.Client.ContainerStop
+func (proxy ThinDockerClientProxy) ContainerStop(ctx context.Context, container string, timeout *time.Duration) error {
+       return proxy.Docker.ContainerStop(ctx, container, timeout)
+}
+
+// ContainerWait invokes dockerclient.Client.ContainerWait
+func (proxy ThinDockerClientProxy) ContainerWait(ctx context.Context, container string) (int64, error) {
+       return proxy.Docker.ContainerWait(ctx, container)
+}
+
+// ImageInspectWithRaw invokes dockerclient.Client.ImageInspectWithRaw
+func (proxy ThinDockerClientProxy) ImageInspectWithRaw(ctx context.Context, image string) (dockertypes.ImageInspect, []byte, error) {
+       return proxy.Docker.ImageInspectWithRaw(ctx, image)
+}
+
+// ImageLoad invokes dockerclient.Client.ImageLoad
+func (proxy ThinDockerClientProxy) ImageLoad(ctx context.Context, input io.Reader, quiet bool) (dockertypes.ImageLoadResponse, error) {
+       return proxy.Docker.ImageLoad(ctx, input, quiet)
+}
+
+// ImageRemove invokes dockerclient.Client.ImageRemove
+func (proxy ThinDockerClientProxy) ImageRemove(ctx context.Context, image string, options dockertypes.ImageRemoveOptions) ([]dockertypes.ImageDeleteResponseItem, error) {
+       return proxy.Docker.ImageRemove(ctx, image, options)
  }
  
  // ContainerRunner is the main stateful struct used for a single execution of a
@@ -72,8 +125,8 @@ type ContainerRunner struct {
         ArvClient IArvadosClient
         Kc        IKeepClient
         arvados.Container
-       dockerclient.ContainerConfig
-       dockerclient.HostConfig
+       ContainerConfig dockercontainer.Config
+       dockercontainer.HostConfig
         token       string
         ContainerID string
         ExitCode    *int
@@ -117,6 +170,9 @@ type ContainerRunner struct {
         cStateLock sync.Mutex
         cStarted   bool // StartContainer() succeeded
         cCancelled bool // StopContainer() invoked
+
+       enableNetwork string // one of "default" or "always"
+       networkMode   string // passed through to HostConfig.NetworkMode
  }
  
  // SetupSignals sets up signal handling to gracefully terminate the underlying
@@ -143,7 +199,8 @@ func (runner *ContainerRunner) stop() {
         }
         runner.cCancelled = true
         if runner.cStarted {
-               err := runner.Docker.StopContainer(runner.ContainerID, 10)
+               timeout := time.Duration(10)
+               err := runner.Docker.ContainerStop(context.TODO(), runner.ContainerID, &(timeout))
                 if err != nil {
                         log.Printf("StopContainer failed: %s", err)
                 }
@@ -174,7 +231,7 @@ func (runner *ContainerRunner) LoadImage() (err error) {
  
         runner.CrunchLog.Printf("Using Docker image id '%s'", imageID)
  
-       _, err = runner.Docker.InspectImage(imageID)
+       _, _, err = runner.Docker.ImageInspectWithRaw(context.TODO(), imageID)
         if err != nil {
                 runner.CrunchLog.Print("Loading Docker image from keep")
  
@@ -184,10 +241,11 @@ func (runner *ContainerRunner) LoadImage() (err error) {
                         return fmt.Errorf("While creating ManifestFileReader for container image: %v", err)
                 }
  
-               err = runner.Docker.LoadImage(readCloser)
+               response, err := runner.Docker.ImageLoad(context.TODO(), readCloser, false)
                 if err != nil {
                         return fmt.Errorf("While loading container image into Docker: %v", err)
                 }
+               response.Body.Close()
         } else {
                 runner.CrunchLog.Print("Docker image is available")
         }
@@ -605,9 +663,8 @@ func (runner *ContainerRunner) AttachStreams() (err error) {
  
         runner.CrunchLog.Print("Attaching container streams")
  
-       var containerReader io.Reader
-       containerReader, err = runner.Docker.AttachContainer(runner.ContainerID,
-               &dockerclient.AttachOptions{Stream: true, Stdout: true, Stderr: true})
+       response, err := runner.Docker.ContainerAttach(context.TODO(), runner.ContainerID,
+               dockertypes.ContainerAttachOptions{Stream: true, Stdout: true, Stderr: true})
         if err != nil {
                 return fmt.Errorf("While attaching container stdout/stderr streams: %v", err)
         }
@@ -641,7 +698,7 @@ func (runner *ContainerRunner) AttachStreams() (err error) {
         }
         runner.Stderr = NewThrottledLogger(runner.NewLogWriter("stderr"))
  
-       go runner.ProcessDockerAttach(containerReader)
+       go runner.ProcessDockerAttach(response.Reader)
  
         return nil
  }
@@ -658,6 +715,15 @@ func (runner *ContainerRunner) CreateContainer() error {
         for k, v := range runner.Container.Environment {
                 runner.ContainerConfig.Env = append(runner.ContainerConfig.Env, k+"="+v)
         }
+
+       runner.HostConfig = dockercontainer.HostConfig{
+               Binds:  runner.Binds,
+               Cgroup: dockercontainer.CgroupSpec(runner.setCgroupParent),
+               LogConfig: dockercontainer.LogConfig{
+                       Type: "none",
+               },
+       }
+
         if wantAPI := runner.Container.RuntimeConstraints.API; wantAPI != nil && *wantAPI {
                 tok, err := runner.ContainerToken()
                 if err != nil {
@@ -668,24 +734,21 @@ func (runner *ContainerRunner) CreateContainer() error {
                         "ARVADOS_API_HOST="+os.Getenv("ARVADOS_API_HOST"),
                         "ARVADOS_API_HOST_INSECURE="+os.Getenv("ARVADOS_API_HOST_INSECURE"),
                 )
-               runner.ContainerConfig.NetworkDisabled = false
+               runner.HostConfig.NetworkMode = dockercontainer.NetworkMode(runner.networkMode)
         } else {
-               runner.ContainerConfig.NetworkDisabled = true
+               if runner.enableNetwork == "always" {
+                       runner.HostConfig.NetworkMode = dockercontainer.NetworkMode(runner.networkMode)
+               } else {
+                       runner.HostConfig.NetworkMode = dockercontainer.NetworkMode("none")
+               }
         }
  
-       var err error
-       runner.ContainerID, err = runner.Docker.CreateContainer(&runner.ContainerConfig, "", nil)
+       createdBody, err := runner.Docker.ContainerCreate(context.TODO(), &runner.ContainerConfig, &runner.HostConfig, nil, runner.Container.UUID)
         if err != nil {
                 return fmt.Errorf("While creating container: %v", err)
         }
  
-       runner.HostConfig = dockerclient.HostConfig{
-               Binds:        runner.Binds,
-               CgroupParent: runner.setCgroupParent,
-               LogConfig: dockerclient.LogConfig{
-                       Type: "none",
-               },
-       }
+       runner.ContainerID = createdBody.ID
  
         return runner.AttachStreams()
  }
@@ -698,7 +761,8 @@ func (runner *ContainerRunner) StartContainer() error {
         if runner.cCancelled {
                 return ErrCancelled
         }
-       err := runner.Docker.StartContainer(runner.ContainerID, &runner.HostConfig)
+       err := runner.Docker.ContainerStart(context.TODO(), runner.ContainerID,
+               dockertypes.ContainerStartOptions{})
         if err != nil {
                 return fmt.Errorf("could not start container: %v", err)
         }
@@ -711,21 +775,22 @@ func (runner *ContainerRunner) StartContainer() error {
  func (runner *ContainerRunner) WaitFinish() error {
         runner.CrunchLog.Print("Waiting for container to finish")
  
-       waitDocker := runner.Docker.Wait(runner.ContainerID)
+       waitDocker, err := runner.Docker.ContainerWait(context.TODO(), runner.ContainerID)
+       if err != nil {
+               return fmt.Errorf("container wait: %v", err)
+       }
+
+       runner.CrunchLog.Printf("Container exited with code: %v", waitDocker)
+       code := int(waitDocker)
+       runner.ExitCode = &code
+
         waitMount := runner.ArvMountExit
-       for waitDocker != nil {
-               select {
-               case err := <-waitMount:
-                       runner.CrunchLog.Printf("arv-mount exited before container finished: %v", err)
-                       waitMount = nil
-                       runner.stop()
-               case wr := <-waitDocker:
-                       if wr.Error != nil {
-                               return fmt.Errorf("While waiting for container to finish: %v", wr.Error)
-                       }
-                       runner.ExitCode = &wr.ExitCode
-                       waitDocker = nil
-               }
+       select {
+       case err := <-waitMount:
+               runner.CrunchLog.Printf("arv-mount exited before container finished: %v", err)
+               waitMount = nil
+               runner.stop()
+       default:
         }
  
         // wait for stdout/stderr to complete
@@ -1149,6 +1214,14 @@ func main() {
         cgroupParent := flag.String("cgroup-parent", "docker", "name of container's parent cgroup (ignored if -cgroup-parent-subsystem is used)")
         cgroupParentSubsystem := flag.String("cgroup-parent-subsystem", "", "use current cgroup for given subsystem as parent cgroup for container")
         caCertsPath := flag.String("ca-certs", "", "Path to TLS root certificates")
+       enableNetwork := flag.String("container-enable-networking", "default",
+               `Specify if networking should be enabled for container.  One of 'default', 'always':
+       default: only enable networking if container requests it.
+       always:  containers always have networking enabled
+       `)
+       networkMode := flag.String("container-network-mode", "default",
+               `Set networking mode for container.  Corresponds to Docker network mode (--net).
+       `)
         flag.Parse()
  
         containerId := flag.Arg(0)
@@ -1170,16 +1243,22 @@ func main() {
         }
         kc.Retries = 4
  
-       var docker *dockerclient.DockerClient
-       docker, err = dockerclient.NewDockerClient("unix:///var/run/docker.sock", nil)
+       var docker *dockerclient.Client
+       // API version 1.21 corresponds to Docker 1.9, which is currently the
+       // minimum version we want to support.
+       docker, err = dockerclient.NewClient(dockerclient.DefaultDockerHost, "1.21", nil, nil)
         if err != nil {
                 log.Fatalf("%s: %v", containerId, err)
         }
  
-       cr := NewContainerRunner(api, kc, docker, containerId)
+       dockerClientProxy := ThinDockerClientProxy{Docker: docker}
+
+       cr := NewContainerRunner(api, kc, dockerClientProxy, containerId)
         cr.statInterval = *statInterval
         cr.cgroupRoot = *cgroupRoot
         cr.expectCgroupParent = *cgroupParent
+       cr.enableNetwork = *enableNetwork
+       cr.networkMode = *networkMode
         if *cgroupParentSubsystem != "" {
                 p := findCgroup(*cgroupParentSubsystem)
                 cr.setCgroupParent = p
diff --git a/services/crunch-run/crunchrun_test.go b/services/crunch-run/crunchrun_test.go

index 7224c4f1b3d622051e1506da7bc1116f443ac003..98462f8fdcfda5a84fb93b5f56a22d536665a0f4 100644 (file)
--- a/services/crunch-run/crunchrun_test.go
+++ b/services/crunch-run/crunchrun_test.go
@@ -1,7 +1,9 @@
  package main
  
  import (
+       "bufio"
         "bytes"
+       "context"
         "crypto/md5"
         "encoding/json"
         "errors"
@@ -23,7 +25,10 @@ import (
         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
         "git.curoverse.com/arvados.git/sdk/go/keepclient"
         "git.curoverse.com/arvados.git/sdk/go/manifest"
-       "github.com/curoverse/dockerclient"
+
+       dockertypes "github.com/docker/docker/api/types"
+       dockercontainer "github.com/docker/docker/api/types/container"
+       dockernetwork "github.com/docker/docker/api/types/network"
         . "gopkg.in/check.v1"
  )
  
@@ -73,55 +78,36 @@ type TestDockerClient struct {
         logReader   io.ReadCloser
         logWriter   io.WriteCloser
         fn          func(t *TestDockerClient)
-       finish      chan dockerclient.WaitResult
+       finish      int
         stop        chan bool
         cwd         string
         env         []string
         api         *ArvTestClient
  }
  
-func NewTestDockerClient() *TestDockerClient {
+func NewTestDockerClient(exitCode int) *TestDockerClient {
         t := &TestDockerClient{}
         t.logReader, t.logWriter = io.Pipe()
-       t.finish = make(chan dockerclient.WaitResult)
+       t.finish = exitCode
         t.stop = make(chan bool)
         t.cwd = "/"
         return t
  }
  
-func (t *TestDockerClient) StopContainer(id string, timeout int) error {
-       t.stop <- true
-       return nil
+func (t *TestDockerClient) ContainerAttach(ctx context.Context, container string, options dockertypes.ContainerAttachOptions) (dockertypes.HijackedResponse, error) {
+       return dockertypes.HijackedResponse{Reader: bufio.NewReader(t.logReader)}, nil
  }
  
-func (t *TestDockerClient) InspectImage(id string) (*dockerclient.ImageInfo, error) {
-       if t.imageLoaded == id {
-               return &dockerclient.ImageInfo{}, nil
-       } else {
-               return nil, errors.New("")
-       }
-}
-
-func (t *TestDockerClient) LoadImage(reader io.Reader) error {
-       _, err := io.Copy(ioutil.Discard, reader)
-       if err != nil {
-               return err
-       } else {
-               t.imageLoaded = hwImageId
-               return nil
-       }
-}
-
-func (t *TestDockerClient) CreateContainer(config *dockerclient.ContainerConfig, name string, authConfig *dockerclient.AuthConfig) (string, error) {
+func (t *TestDockerClient) ContainerCreate(ctx context.Context, config *dockercontainer.Config, hostConfig *dockercontainer.HostConfig, networkingConfig *dockernetwork.NetworkingConfig, containerName string) (dockercontainer.ContainerCreateCreatedBody, error) {
         if config.WorkingDir != "" {
                 t.cwd = config.WorkingDir
         }
         t.env = config.Env
-       return "abcde", nil
+       return dockercontainer.ContainerCreateCreatedBody{ID: "abcde"}, nil
  }
  
-func (t *TestDockerClient) StartContainer(id string, config *dockerclient.HostConfig) error {
-       if id == "abcde" {
+func (t *TestDockerClient) ContainerStart(ctx context.Context, container string, options dockertypes.ContainerStartOptions) error {
+       if container == "abcde" {
                 go t.fn(t)
                 return nil
         } else {
@@ -129,15 +115,34 @@ func (t *TestDockerClient) StartContainer(id string, config *dockerclient.HostCo
         }
  }
  
-func (t *TestDockerClient) AttachContainer(id string, options *dockerclient.AttachOptions) (io.ReadCloser, error) {
-       return t.logReader, nil
+func (t *TestDockerClient) ContainerStop(ctx context.Context, container string, timeout *time.Duration) error {
+       t.stop <- true
+       return nil
+}
+
+func (t *TestDockerClient) ContainerWait(ctx context.Context, container string) (int64, error) {
+       return int64(t.finish), nil
+}
+
+func (t *TestDockerClient) ImageInspectWithRaw(ctx context.Context, image string) (dockertypes.ImageInspect, []byte, error) {
+       if t.imageLoaded == image {
+               return dockertypes.ImageInspect{}, nil, nil
+       } else {
+               return dockertypes.ImageInspect{}, nil, errors.New("")
+       }
  }
  
-func (t *TestDockerClient) Wait(id string) <-chan dockerclient.WaitResult {
-       return t.finish
+func (t *TestDockerClient) ImageLoad(ctx context.Context, input io.Reader, quiet bool) (dockertypes.ImageLoadResponse, error) {
+       _, err := io.Copy(ioutil.Discard, input)
+       if err != nil {
+               return dockertypes.ImageLoadResponse{}, err
+       } else {
+               t.imageLoaded = hwImageId
+               return dockertypes.ImageLoadResponse{Body: ioutil.NopCloser(input)}, nil
+       }
  }
  
-func (*TestDockerClient) RemoveImage(name string, force bool) ([]*dockerclient.ImageDelete, error) {
+func (*TestDockerClient) ImageRemove(ctx context.Context, image string, options dockertypes.ImageRemoveOptions) ([]dockertypes.ImageDeleteResponseItem, error) {
         return nil, nil
  }
  
@@ -287,12 +292,12 @@ func (client *KeepTestClient) ManifestFileReader(m manifest.Manifest, filename s
  
  func (s *TestSuite) TestLoadImage(c *C) {
         kc := &KeepTestClient{}
-       docker := NewTestDockerClient()
+       docker := NewTestDockerClient(0)
         cr := NewContainerRunner(&ArvTestClient{}, kc, docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
  
-       _, err := cr.Docker.RemoveImage(hwImageId, true)
+       _, err := cr.Docker.ImageRemove(nil, hwImageId, dockertypes.ImageRemoveOptions{})
  
-       _, err = cr.Docker.InspectImage(hwImageId)
+       _, _, err = cr.Docker.ImageInspectWithRaw(nil, hwImageId)
         c.Check(err, NotNil)
  
         cr.Container.ContainerImage = hwPDH
@@ -305,13 +310,13 @@ func (s *TestSuite) TestLoadImage(c *C) {
  
         c.Check(err, IsNil)
         defer func() {
-               cr.Docker.RemoveImage(hwImageId, true)
+               cr.Docker.ImageRemove(nil, hwImageId, dockertypes.ImageRemoveOptions{})
         }()
  
         c.Check(kc.Called, Equals, true)
         c.Check(cr.ContainerConfig.Image, Equals, hwImageId)
  
-       _, err = cr.Docker.InspectImage(hwImageId)
+       _, _, err = cr.Docker.ImageInspectWithRaw(nil, hwImageId)
         c.Check(err, IsNil)
  
         // (2) Test using image that's already loaded
@@ -403,7 +408,7 @@ func (s *TestSuite) TestLoadImageArvError(c *C) {
  
  func (s *TestSuite) TestLoadImageKeepError(c *C) {
         // (2) Keep error
-       docker := NewTestDockerClient()
+       docker := NewTestDockerClient(0)
         cr := NewContainerRunner(&ArvTestClient{}, KeepErrorTestClient{}, docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
         cr.Container.ContainerImage = hwPDH
  
@@ -422,7 +427,7 @@ func (s *TestSuite) TestLoadImageCollectionError(c *C) {
  
  func (s *TestSuite) TestLoadImageKeepReadError(c *C) {
         // (4) Collection doesn't contain image
-       docker := NewTestDockerClient()
+       docker := NewTestDockerClient(0)
         cr := NewContainerRunner(&ArvTestClient{}, KeepReadErrorTestClient{}, docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
         cr.Container.ContainerImage = hwPDH
  
@@ -463,11 +468,10 @@ func dockerLog(fd byte, msg string) []byte {
  }
  
  func (s *TestSuite) TestRunContainer(c *C) {
-       docker := NewTestDockerClient()
+       docker := NewTestDockerClient(0)
         docker.fn = func(t *TestDockerClient) {
                 t.logWriter.Write(dockerLog(1, "Hello world\n"))
                 t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{}
         }
         cr := NewContainerRunner(&ArvTestClient{}, &KeepTestClient{}, docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
  
@@ -559,14 +563,14 @@ func (s *TestSuite) TestUpdateContainerCancelled(c *C) {
  
  // Used by the TestFullRun*() test below to DRY up boilerplate setup to do full
  // dress rehearsal of the Run() function, starting from a JSON container record.
-func FullRunHelper(c *C, record string, extraMounts []string, fn func(t *TestDockerClient)) (api *ArvTestClient, cr *ContainerRunner, realTemp string) {
+func FullRunHelper(c *C, record string, extraMounts []string, exitCode int, fn func(t *TestDockerClient)) (api *ArvTestClient, cr *ContainerRunner, realTemp string) {
         rec := arvados.Container{}
         err := json.Unmarshal([]byte(record), &rec)
         c.Check(err, IsNil)
  
-       docker := NewTestDockerClient()
+       docker := NewTestDockerClient(exitCode)
         docker.fn = fn
-       docker.RemoveImage(hwImageId, true)
+       docker.ImageRemove(nil, hwImageId, dockertypes.ImageRemoveOptions{})
  
         api = &ArvTestClient{Container: rec}
         docker.api = api
@@ -626,10 +630,9 @@ func (s *TestSuite) TestFullRunHello(c *C) {
      "output_path": "/tmp",
      "priority": 1,
      "runtime_constraints": {}
-}`, nil, func(t *TestDockerClient) {
+}`, nil, 0, func(t *TestDockerClient) {
                 t.logWriter.Write(dockerLog(1, "hello world\n"))
                 t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{}
         })
  
         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
@@ -648,10 +651,9 @@ func (s *TestSuite) TestCrunchstat(c *C) {
                 "output_path": "/tmp",
                 "priority": 1,
                 "runtime_constraints": {}
-       }`, nil, func(t *TestDockerClient) {
+       }`, nil, 0, func(t *TestDockerClient) {
                 time.Sleep(time.Second)
                 t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{}
         })
  
         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
@@ -681,11 +683,11 @@ func (s *TestSuite) TestNodeInfoLog(c *C) {
                 "output_path": "/tmp",
                 "priority": 1,
                 "runtime_constraints": {}
-       }`, nil, func(t *TestDockerClient) {
-               time.Sleep(time.Second)
-               t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{}
-       })
+       }`, nil, 0,
+               func(t *TestDockerClient) {
+                       time.Sleep(time.Second)
+                       t.logWriter.Close()
+               })
  
         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
@@ -708,11 +710,11 @@ func (s *TestSuite) TestContainerRecordLog(c *C) {
                 "output_path": "/tmp",
                 "priority": 1,
                 "runtime_constraints": {}
-       }`, nil, func(t *TestDockerClient) {
-               time.Sleep(time.Second)
-               t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{}
-       })
+       }`, nil, 0,
+               func(t *TestDockerClient) {
+                       time.Sleep(time.Second)
+                       t.logWriter.Close()
+               })
  
         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
         c.Check(api.CalledWith("container.state", "Complete"), NotNil)
@@ -731,11 +733,10 @@ func (s *TestSuite) TestFullRunStderr(c *C) {
      "output_path": "/tmp",
      "priority": 1,
      "runtime_constraints": {}
-}`, nil, func(t *TestDockerClient) {
+}`, nil, 1, func(t *TestDockerClient) {
                 t.logWriter.Write(dockerLog(1, "hello\n"))
                 t.logWriter.Write(dockerLog(2, "world\n"))
                 t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{ExitCode: 1}
         })
  
         final := api.CalledWith("container.state", "Complete")
@@ -757,10 +758,9 @@ func (s *TestSuite) TestFullRunDefaultCwd(c *C) {
      "output_path": "/tmp",
      "priority": 1,
      "runtime_constraints": {}
-}`, nil, func(t *TestDockerClient) {
+}`, nil, 0, func(t *TestDockerClient) {
                 t.logWriter.Write(dockerLog(1, t.cwd+"\n"))
                 t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{ExitCode: 0}
         })
  
         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
@@ -779,10 +779,9 @@ func (s *TestSuite) TestFullRunSetCwd(c *C) {
      "output_path": "/tmp",
      "priority": 1,
      "runtime_constraints": {}
-}`, nil, func(t *TestDockerClient) {
+}`, nil, 0, func(t *TestDockerClient) {
                 t.logWriter.Write(dockerLog(1, t.cwd+"\n"))
                 t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{ExitCode: 0}
         })
  
         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
@@ -827,14 +826,13 @@ func (s *TestSuite) testStopContainer(c *C, setup func(cr *ContainerRunner)) {
         err := json.Unmarshal([]byte(record), &rec)
         c.Check(err, IsNil)
  
-       docker := NewTestDockerClient()
+       docker := NewTestDockerClient(0)
         docker.fn = func(t *TestDockerClient) {
                 <-t.stop
                 t.logWriter.Write(dockerLog(1, "foo\n"))
                 t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{ExitCode: 0}
         }
-       docker.RemoveImage(hwImageId, true)
+       docker.ImageRemove(nil, hwImageId, dockertypes.ImageRemoveOptions{})
  
         api := &ArvTestClient{Container: rec}
         cr := NewContainerRunner(api, &KeepTestClient{}, docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
@@ -872,10 +870,9 @@ func (s *TestSuite) TestFullRunSetEnv(c *C) {
      "output_path": "/tmp",
      "priority": 1,
      "runtime_constraints": {}
-}`, nil, func(t *TestDockerClient) {
+}`, nil, 0, func(t *TestDockerClient) {
                 t.logWriter.Write(dockerLog(1, t.env[0][7:]+"\n"))
                 t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{ExitCode: 0}
         })
  
         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
@@ -1130,10 +1127,9 @@ func (s *TestSuite) TestStdout(c *C) {
                 "runtime_constraints": {}
         }`
  
-       api, _, _ := FullRunHelper(c, helperRecord, nil, func(t *TestDockerClient) {
+       api, _, _ := FullRunHelper(c, helperRecord, nil, 0, func(t *TestDockerClient) {
                 t.logWriter.Write(dockerLog(1, t.env[0][7:]+"\n"))
                 t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{ExitCode: 0}
         })
  
         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
@@ -1147,9 +1143,9 @@ func StdoutErrorRunHelper(c *C, record string, fn func(t *TestDockerClient)) (ap
         err = json.Unmarshal([]byte(record), &rec)
         c.Check(err, IsNil)
  
-       docker := NewTestDockerClient()
+       docker := NewTestDockerClient(0)
         docker.fn = fn
-       docker.RemoveImage(hwImageId, true)
+       docker.ImageRemove(nil, hwImageId, dockertypes.ImageRemoveOptions{})
  
         api = &ArvTestClient{Container: rec}
         cr = NewContainerRunner(api, &KeepTestClient{}, docker, "zzzzz-zzzzz-zzzzzzzzzzzzzzz")
@@ -1202,10 +1198,9 @@ func (s *TestSuite) TestFullRunWithAPI(c *C) {
      "output_path": "/tmp",
      "priority": 1,
      "runtime_constraints": {"API": true}
-}`, nil, func(t *TestDockerClient) {
+}`, nil, 0, func(t *TestDockerClient) {
                 t.logWriter.Write(dockerLog(1, t.env[1][17:]+"\n"))
                 t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{ExitCode: 0}
         })
  
         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
@@ -1226,10 +1221,9 @@ func (s *TestSuite) TestFullRunSetOutput(c *C) {
      "output_path": "/tmp",
      "priority": 1,
      "runtime_constraints": {"API": true}
-}`, nil, func(t *TestDockerClient) {
+}`, nil, 0, func(t *TestDockerClient) {
                 t.api.Container.Output = "d4ab34d3d4f8a72f5c4973051ae69fab+122"
                 t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{ExitCode: 0}
         })
  
         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
@@ -1258,10 +1252,9 @@ func (s *TestSuite) TestStdoutWithExcludeFromOutputMountPointUnderOutputDir(c *C
  
         extraMounts := []string{"a3e8f74c6f101eae01fa08bfb4e49b3a+54"}
  
-       api, _, _ := FullRunHelper(c, helperRecord, extraMounts, func(t *TestDockerClient) {
+       api, _, _ := FullRunHelper(c, helperRecord, extraMounts, 0, func(t *TestDockerClient) {
                 t.logWriter.Write(dockerLog(1, t.env[0][7:]+"\n"))
                 t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{ExitCode: 0}
         })
  
         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
@@ -1294,10 +1287,9 @@ func (s *TestSuite) TestStdoutWithMultipleMountPointsUnderOutputDir(c *C) {
                 "a0def87f80dd594d4675809e83bd4f15+367/subdir1/subdir2/file2_in_subdir2.txt",
         }
  
-       api, runner, realtemp := FullRunHelper(c, helperRecord, extraMounts, func(t *TestDockerClient) {
+       api, runner, realtemp := FullRunHelper(c, helperRecord, extraMounts, 0, func(t *TestDockerClient) {
                 t.logWriter.Write(dockerLog(1, t.env[0][7:]+"\n"))
                 t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{ExitCode: 0}
         })
  
         c.Check(runner.Binds, DeepEquals, []string{realtemp + "/2:/tmp",
@@ -1347,10 +1339,9 @@ func (s *TestSuite) TestStdoutWithMountPointsUnderOutputDirDenormalizedManifest(
                 "b0def87f80dd594d4675809e83bd4f15+367/subdir1/file2_in_subdir1.txt",
         }
  
-       api, _, _ := FullRunHelper(c, helperRecord, extraMounts, func(t *TestDockerClient) {
+       api, _, _ := FullRunHelper(c, helperRecord, extraMounts, 0, func(t *TestDockerClient) {
                 t.logWriter.Write(dockerLog(1, t.env[0][7:]+"\n"))
                 t.logWriter.Close()
-               t.finish <- dockerclient.WaitResult{ExitCode: 0}
         })
  
         c.Check(api.CalledWith("container.exit_code", 0), NotNil)
diff --git a/services/dockercleaner/arvados-docker-cleaner.service b/services/dockercleaner/arvados-docker-cleaner.service

index 031058242a21179f15a0ef36ce9456f657811338..91b6b9973d8c946f1e04e48a010c428e9b4c555e 100644 (file)
--- a/services/dockercleaner/arvados-docker-cleaner.service
+++ b/services/dockercleaner/arvados-docker-cleaner.service
@@ -3,6 +3,10 @@ Description=Arvados Docker Image Cleaner
  Documentation=https://doc.arvados.org/
  After=network.target
  AssertPathExists=/etc/arvados/docker-cleaner/docker-cleaner.json
+# systemd<230
+StartLimitInterval=0
+# systemd>=230
+StartLimitIntervalSec=0
  
  [Service]
  Type=simple
diff --git a/services/fuse/arvados_fuse/command.py b/services/fuse/arvados_fuse/command.py

index 66f8a4d39319fb9bdc8f864962bc9c02a0d57ac3..fea6048798317f904393e5e9b0511895b68338ee 100644 (file)
--- a/services/fuse/arvados_fuse/command.py
+++ b/services/fuse/arvados_fuse/command.py
@@ -13,6 +13,7 @@ import time
  import arvados.commands._util as arv_cmd
  from arvados_fuse import crunchstat
  from arvados_fuse import *
+from arvados_fuse.unmount import unmount
  from arvados_fuse._version import __version__
  
  class ArgumentParser(argparse.ArgumentParser):
@@ -90,6 +91,13 @@ class ArgumentParser(argparse.ArgumentParser):
  
          self.add_argument('--crunchstat-interval', type=float, help="Write stats to stderr every N seconds (default disabled)", default=0)
  
+        unmount = self.add_mutually_exclusive_group()
+        unmount.add_argument('--unmount', action='store_true', default=False,
+                             help="Forcefully unmount the specified mountpoint (if it's a fuse mount) and exit.")
+        unmount.add_argument('--unmount-all', action='store_true', default=False,
+                             help="Forcefully unmount every fuse mount at or below the specified mountpoint and exit.")
+        unmount.add_argument('--replace', action='store_true', default=False,
+                             help="If a fuse mount is already present at mountpoint, forcefully unmount it before mounting")
          self.add_argument('--unmount-timeout',
                            type=float, default=2.0,
                            help="Time to wait for graceful shutdown after --exec program exits and filesystem is unmounted")
@@ -101,6 +109,7 @@ class ArgumentParser(argparse.ArgumentParser):
  
  class Mount(object):
      def __init__(self, args, logger=logging.getLogger('arvados.arv-mount')):
+        self.daemon = False
          self.logger = logger
          self.args = args
          self.listen_for_events = False
@@ -118,7 +127,16 @@ class Mount(object):
              exit(1)
  
      def __enter__(self):
+        if self.args.replace:
+            unmount(path=self.args.mountpoint,
+                    timeout=self.args.unmount_timeout)
          llfuse.init(self.operations, self.args.mountpoint, self._fuse_options())
+        if self.daemon:
+            daemon.DaemonContext(
+                working_directory=os.path.dirname(self.args.mountpoint),
+                files_preserve=range(
+                    3, resource.getrlimit(resource.RLIMIT_NOFILE)[1])
+            ).open()
          if self.listen_for_events and not self.args.disable_event_listening:
              self.operations.listen_for_events()
          self.llfuse_thread = threading.Thread(None, lambda: self._llfuse_main())
@@ -139,7 +157,11 @@ class Mount(object):
                                  self.args.unmount_timeout)
  
      def run(self):
-        if self.args.exec_args:
+        if self.args.unmount or self.args.unmount_all:
+            unmount(path=self.args.mountpoint,
+                    timeout=self.args.unmount_timeout,
+                    recursive=self.args.unmount_all)
+        elif self.args.exec_args:
              self._run_exec()
          else:
              self._run_standalone()
@@ -338,20 +360,9 @@ From here, the following directories are available:
  
      def _run_standalone(self):
          try:
-            llfuse.init(self.operations, self.args.mountpoint, self._fuse_options())
-
-            if not self.args.foreground:
-                self.daemon_ctx = daemon.DaemonContext(
-                    working_directory=os.path.dirname(self.args.mountpoint),
-                    files_preserve=range(
-                        3, resource.getrlimit(resource.RLIMIT_NOFILE)[1]))
-                self.daemon_ctx.open()
-
-            # Subscribe to change events from API server
-            if self.listen_for_events and not self.args.disable_event_listening:
-                self.operations.listen_for_events()
-
-            self._llfuse_main()
+            self.daemon = not self.args.foreground
+            with self:
+                self.llfuse_thread.join(timeout=None)
          except Exception as e:
              self.logger.exception('arv-mount: exception during mount: %s', e)
              exit(getattr(e, 'errno', 1))
diff --git a/services/fuse/arvados_fuse/unmount.py b/services/fuse/arvados_fuse/unmount.py

new file mode 100644 (file)

index 0000000..db78ddc
--- /dev/null
+++ b/services/fuse/arvados_fuse/unmount.py
@@ -0,0 +1,100 @@
+import collections
+import errno
+import os
+import subprocess
+import time
+
+
+MountInfo = collections.namedtuple(
+    'MountInfo', ['is_fuse', 'major', 'minor', 'mnttype', 'path'])
+
+
+def mountinfo():
+    mi = []
+    with open('/proc/self/mountinfo') as f:
+        for m in f.readlines():
+            mntid, pmntid, dev, root, path, extra = m.split(" ", 5)
+            mnttype = extra.split(" - ")[1].split(" ", 1)[0]
+            major, minor = dev.split(":")
+            mi.append(MountInfo(
+                is_fuse=(mnttype == "fuse" or mnttype.startswith("fuse.")),
+                major=major,
+                minor=minor,
+                mnttype=mnttype,
+                path=path,
+            ))
+    return mi
+
+
+def unmount(path, timeout=10, recursive=False):
+    """Unmount the fuse mount at path.
+
+    Unmounting is done by writing 1 to the "abort" control file in
+    sysfs to kill the fuse driver process, then executing "fusermount
+    -u -z" to detach the mount point, and repeating these steps until
+    the mount is no longer listed in /proc/self/mountinfo.
+
+    This procedure should enable a non-root user to reliably unmount
+    their own fuse filesystem without risk of deadlock.
+
+    Returns True if unmounting was successful, False if it wasn't a
+    fuse mount at all. Raises an exception if it cannot be unmounted.
+    """
+
+    path = os.path.realpath(path)
+
+    if recursive:
+        paths = []
+        for m in mountinfo():
+            if m.path == path or m.path.startswith(path+"/"):
+                paths.append(m.path)
+                if not m.is_fuse:
+                    raise Exception(
+                        "cannot unmount {}: non-fuse mountpoint {}".format(
+                            path, m))
+        for path in sorted(paths, key=len, reverse=True):
+            unmount(path, timeout=timeout, recursive=False)
+        return len(paths) > 0
+
+    was_mounted = False
+    attempted = False
+    if timeout is None:
+        deadline = None
+    else:
+        deadline = time.time() + timeout
+
+    while True:
+        mounted = False
+        for m in mountinfo():
+            if m.is_fuse:
+                try:
+                    if os.path.realpath(m.path) == path:
+                        was_mounted = True
+                        mounted = True
+                        break
+                except OSError:
+                    continue
+        if not mounted:
+            return was_mounted
+
+        if attempted:
+            delay = 1
+            if deadline:
+                delay = min(delay, deadline - time.time())
+                if delay <= 0:
+                    raise Exception("timed out")
+            time.sleep(delay)
+
+        try:
+            with open('/sys/fs/fuse/connections/{}/abort'.format(m.minor),
+                      'w') as f:
+                f.write("1")
+        except OSError as e:
+            if e.errno != errno.ENOENT:
+                raise
+
+        attempted = True
+        try:
+            subprocess.check_call(["fusermount", "-u", "-z", path])
+        except subprocess.CalledProcessError:
+            pass
diff --git a/services/fuse/tests/test_crunchstat.py b/services/fuse/tests/test_crunchstat.py

new file mode 100644 (file)

index 0000000..1fa28fb
--- /dev/null
+++ b/services/fuse/tests/test_crunchstat.py
@@ -0,0 +1,13 @@
+import subprocess
+
+from integration_test import IntegrationTest
+
+
+class CrunchstatTest(IntegrationTest):
+    def test_crunchstat(self):
+        output = subprocess.check_output(
+            ['./bin/arv-mount',
+             '--crunchstat-interval', '1',
+             self.mnt,
+             '--exec', 'echo', 'ok'])
+        self.assertEqual("ok\n", output)
diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py

index f623ae538633a75d87fc9b520c740f6ae117f6c8..52d1e02b47edae0eb94b7eaf878e8b0357365c95 100644 (file)
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -1,22 +1,15 @@
-import arvados
-import arvados.safeapi
-import arvados_fuse as fuse
-import glob
  import json
  import llfuse
+import logging
+import mock
  import os
-import shutil
  import subprocess
-import sys
-import tempfile
-import threading
  import time
  import unittest
-import logging
-import multiprocessing
+
+import arvados
+import arvados_fuse as fuse
  import run_test_server
-import mock
-import re
  
  from mount_test_base import MountTestBase
  
diff --git a/services/fuse/tests/test_unmount.py b/services/fuse/tests/test_unmount.py

new file mode 100644 (file)

index 0000000..972edaa
--- /dev/null
+++ b/services/fuse/tests/test_unmount.py
@@ -0,0 +1,61 @@
+import os
+import subprocess
+import time
+
+from integration_test import IntegrationTest
+
+class UnmountTest(IntegrationTest):
+    def setUp(self):
+        super(UnmountTest, self).setUp()
+        self.tmp = self.mnt
+        self.to_delete = []
+
+    def tearDown(self):
+        for d in self.to_delete:
+            os.rmdir(d)
+        super(UnmountTest, self).tearDown()
+
+    def test_replace(self):
+        subprocess.check_call(
+            ['./bin/arv-mount', '--subtype', 'test', '--replace',
+             self.mnt])
+        subprocess.check_call(
+            ['./bin/arv-mount', '--subtype', 'test', '--replace',
+             '--unmount-timeout', '10',
+             self.mnt])
+        subprocess.check_call(
+            ['./bin/arv-mount', '--subtype', 'test', '--replace',
+             '--unmount-timeout', '10',
+             self.mnt,
+             '--exec', 'true'])
+        for m in subprocess.check_output(['mount']).splitlines():
+            self.assertNotIn(' '+self.mnt+' ', m)
+
+    def _mounted(self, mounts):
+        all_mounts = subprocess.check_output(['mount', '-t', 'fuse.test'])
+        return [m for m in mounts
+                if ' '+m+' ' in all_mounts]
+
+    def test_unmount_children(self):
+        for d in ['foo', 'foo/bar', 'bar']:
+            mnt = self.tmp+'/'+d
+            os.mkdir(mnt)
+            self.to_delete.insert(0, mnt)
+        mounts = []
+        for d in ['bar', 'foo/bar']:
+            mnt = self.tmp+'/'+d
+            mounts.append(mnt)
+            subprocess.check_call(
+                ['./bin/arv-mount', '--subtype', 'test', mnt])
+
+        # Wait for mounts to attach
+        deadline = time.time() + 10
+        while self._mounted(mounts) != mounts:
+            time.sleep(0.1)
+            self.assertLess(time.time(), deadline)
+
+        self.assertEqual(mounts, self._mounted(mounts))
+        subprocess.check_call(['./bin/arv-mount', '--unmount', self.tmp])
+        self.assertEqual(mounts, self._mounted(mounts))
+        subprocess.check_call(['./bin/arv-mount', '--unmount-all', self.tmp])
+        self.assertEqual([], self._mounted(mounts))
diff --git a/services/keep-balance/keep-balance.service b/services/keep-balance/keep-balance.service

index a6f5b6e349a69e2c12b297f048f5e4e7d3d2decf..9f1cee3d903b2e8387439b8ae35432a35897fa2e 100644 (file)
--- a/services/keep-balance/keep-balance.service
+++ b/services/keep-balance/keep-balance.service
@@ -3,6 +3,10 @@ Description=Arvados Keep Balance
  Documentation=https://doc.arvados.org/
  After=network.target
  AssertPathExists=/etc/arvados/keep-balance/keep-balance.yml
+# systemd<230
+StartLimitInterval=0
+# systemd>=230
+StartLimitIntervalSec=0
  
  [Service]
  Type=simple
diff --git a/services/keep-web/keep-web.service b/services/keep-web/keep-web.service

index 24be7714d5881cf2897cb61b9ddbc751ed215392..3f6c41743e7a1f964efe5f33afeae75f31b7b8ec 100644 (file)
--- a/services/keep-web/keep-web.service
+++ b/services/keep-web/keep-web.service
@@ -3,11 +3,16 @@ Description=Arvados Keep web gateway
  Documentation=https://doc.arvados.org/
  After=network.target
  AssertPathExists=/etc/arvados/keep-web/keep-web.yml
+# systemd<230
+StartLimitInterval=0
+# systemd>=230
+StartLimitIntervalSec=0
  
  [Service]
  Type=notify
  ExecStart=/usr/bin/keep-web
  Restart=always
+RestartSec=1
  
  [Install]
  WantedBy=multi-user.target
diff --git a/services/keepproxy/keepproxy.service b/services/keepproxy/keepproxy.service

index c340fabc0e961c541af82ce9fca8d1dc04b292ca..887df22634e67f9240a23b2029d8bf67725a43b4 100644 (file)
--- a/services/keepproxy/keepproxy.service
+++ b/services/keepproxy/keepproxy.service
@@ -3,11 +3,16 @@ Description=Arvados Keep Proxy
  Documentation=https://doc.arvados.org/
  After=network.target
  AssertPathExists=/etc/arvados/keepproxy/keepproxy.yml
+# systemd<230
+StartLimitInterval=0
+# systemd>=230
+StartLimitIntervalSec=0
  
  [Service]
  Type=notify
  ExecStart=/usr/bin/keepproxy
  Restart=always
+RestartSec=1
  
  [Install]
  WantedBy=multi-user.target
diff --git a/services/keepstore/keepstore.service b/services/keepstore/keepstore.service

index b9e2793e1f7874842e063544365545d4bef44ee4..2ba19c6ba0b5982d52c6cbcef3ed3d2b74f71000 100644 (file)
--- a/services/keepstore/keepstore.service
+++ b/services/keepstore/keepstore.service
@@ -3,11 +3,16 @@ Description=Arvados Keep Storage Daemon
  Documentation=https://doc.arvados.org/
  After=network.target
  AssertPathExists=/etc/arvados/keepstore/keepstore.yml
+# systemd<230
+StartLimitInterval=0
+# systemd>=230
+StartLimitIntervalSec=0
  
  [Service]
  Type=notify
  ExecStart=/usr/bin/keepstore
  Restart=always
+RestartSec=1
  
  [Install]
  WantedBy=multi-user.target
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py

index fc3ff05777ab2a550263554ca4225c961b140a53..9ee26e336d2849b163d02ca77fdac39012afd7c1 100644 (file)
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -121,8 +121,14 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
                            self.cloud_size.name)
          self.cloud_node = self._cloud.create_node(self.cloud_size,
                                                    self.arvados_node)
-        if not self.cloud_node.size:
-             self.cloud_node.size = self.cloud_size
+
+        # The information included in the node size object we get from libcloud
+        # is inconsistent between cloud providers.  Replace libcloud NodeSize
+        # object with compatible CloudSizeWrapper object which merges the size
+        # info reported from the cloud with size information from the
+        # configuration file.
+        self.cloud_node.size = self.cloud_size
+
          self._logger.info("Cloud node %s created.", self.cloud_node.id)
          self._later.update_arvados_node_properties()
  
@@ -348,10 +354,6 @@ class ComputeNodeMonitorActor(config.actor_class):
          if self.arvados_node is None:
              return 'unpaired'
  
-        # This node is indicated as non-functioning by the cloud
-        if self._cloud.broken(self.cloud_node):
-            return 'down'
-
          state = self.arvados_node['crunch_worker_state']
  
          # If state information is not available because it is missing or the
diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py

index c78f1c6b8d63160c40e7e57d9b29f57d07e59dcf..29b04845b653190f88c306094486ac030add3af7 100644 (file)
--- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
@@ -219,6 +219,23 @@ class BaseComputeNodeDriver(RetryMixin):
          return (isinstance(exception, cls.CLOUD_ERRORS) or
                  type(exception) is Exception)
  
+    def destroy_node(self, cloud_node):
+        try:
+            return self.real.destroy_node(cloud_node)
+        except self.CLOUD_ERRORS as destroy_error:
+            # Sometimes the destroy node request succeeds but times out and
+            # raises an exception instead of returning success.  If this
+            # happens, we get a noisy stack trace.  Check if the node is still
+            # on the node list.  If it is gone, we can declare victory.
+            try:
+                self.search_for_now(cloud_node.id, 'list_nodes')
+            except ValueError:
+                # If we catch ValueError, that means search_for_now didn't find
+                # it, which means destroy_node actually succeeded.
+                return True
+            # The node is still on the list.  Re-raise.
+            raise
+
      # Now that we've defined all our own methods, delegate generic, public
      # attributes of libcloud drivers that we haven't defined ourselves.
      def _delegate_to_real(attr_name):
diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py

index 1c6d214fe8818e9dd49e94b413daa6609096a4c8..79e43cb52a881f37f0e91819d25977828e057941 100644 (file)
--- a/services/nodemanager/arvnodeman/computenode/driver/gce.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -136,6 +136,10 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
              raise
  
      def sync_node(self, cloud_node, arvados_node):
+        # Update the cloud node record to ensure we have the correct metadata
+        # fingerprint.
+        cloud_node = self.real.ex_get_node(cloud_node.name, cloud_node.extra['zone'])
+
          # We can't store the FQDN on the name attribute or anything like it,
          # because (a) names are static throughout the node's life (so FQDN
          # isn't available because we don't know it at node creation time) and
@@ -147,12 +151,8 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
              self._find_metadata(metadata_items, 'hostname')['value'] = hostname
          except KeyError:
              metadata_items.append({'key': 'hostname', 'value': hostname})
-        response = self.real.connection.async_request(
-            '/zones/{}/instances/{}/setMetadata'.format(
-                cloud_node.extra['zone'].name, cloud_node.name),
-            method='POST', data=metadata_req)
-        if not response.success():
-            raise Exception("setMetadata error: {}".format(response.error))
+
+        self.real.ex_set_node_metadata(cloud_node, metadata_items)
  
      @classmethod
      def node_fqdn(cls, node):
diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py

index b4f17849f1e77b296ee08ed632b2dab47b1e173b..94415465a21459a802190366da27c9f7abf7e71f 100644 (file)
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -232,7 +232,7 @@ class NodeManagerDaemonActor(actor_class):
      def try_pairing(self):
          for record in self.cloud_nodes.unpaired():
              for arv_rec in self.arvados_nodes.unpaired():
-                if record.actor.offer_arvados_pair(arv_rec.arvados_node).get():
+                if record.actor is not None and record.actor.offer_arvados_pair(arv_rec.arvados_node).get():
                      self._pair_nodes(record, arv_rec.arvados_node)
                      break
  
@@ -336,7 +336,7 @@ class NodeManagerDaemonActor(actor_class):
                  elif (nodes_wanted < 0) and self.booting:
                      self._later.stop_booting_node(size)
              except Exception as e:
-                self._logger.exception("while calculating nodes wanted for size %s", size)
+                self._logger.exception("while calculating nodes wanted for size %s", getattr(size, "id", "(id not available)"))
  
      def _check_poll_freshness(orig_func):
          """Decorator to inhibit a method when poll information is stale.
@@ -426,16 +426,25 @@ class NodeManagerDaemonActor(actor_class):
  
      @_check_poll_freshness
      def node_can_shutdown(self, node_actor):
-        if self._nodes_excess(node_actor.cloud_node.get().size) > 0:
-            self._begin_node_shutdown(node_actor, cancellable=True)
-        elif self.cloud_nodes.nodes.get(node_actor.cloud_node.get().id).arvados_node is None:
-            # Node is unpaired, which means it probably exceeded its booting
-            # grace period without a ping, so shut it down so we can boot a new
-            # node in its place.
-            self._begin_node_shutdown(node_actor, cancellable=False)
-        elif node_actor.in_state('down').get():
-            # Node is down and unlikely to come back.
-            self._begin_node_shutdown(node_actor, cancellable=False)
+        try:
+            if self._nodes_excess(node_actor.cloud_node.get().size) > 0:
+                self._begin_node_shutdown(node_actor, cancellable=True)
+            elif self.cloud_nodes.nodes.get(node_actor.cloud_node.get().id).arvados_node is None:
+                # Node is unpaired, which means it probably exceeded its booting
+                # grace period without a ping, so shut it down so we can boot a new
+                # node in its place.
+                self._begin_node_shutdown(node_actor, cancellable=False)
+            elif node_actor.in_state('down').get():
+                # Node is down and unlikely to come back.
+                self._begin_node_shutdown(node_actor, cancellable=False)
+        except pykka.ActorDeadError as e:
+            # The monitor actor sends shutdown suggestions every time the
+            # node's state is updated, and these go into the daemon actor's
+            # message queue.  It's possible that the node has already been shut
+            # down (which shuts down the node monitor actor).  In that case,
+            # this message is stale and we'll get ActorDeadError when we try to
+            # access node_actor.  Log the error.
+            self._logger.debug("ActorDeadError in node_can_shutdown: %s", e)
  
      def node_finished_shutdown(self, shutdown_actor):
          try:
diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py

index 84e061d867ff42033fd526e92440695702a3dd8c..d47dbdfa0306d82a27232f1099f8902085513b75 100644 (file)
--- a/services/nodemanager/tests/test_computenode_driver_gce.py
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -123,16 +123,15 @@ class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
          cloud_node = testutil.cloud_node_mock(
              2, metadata=start_metadata.copy(),
              zone=testutil.cloud_object_mock('testzone'))
+        self.driver_mock().ex_get_node.return_value = cloud_node
          driver = self.new_driver()
          driver.sync_node(cloud_node, arv_node)
-        args, kwargs = self.driver_mock().connection.async_request.call_args
-        self.assertEqual('/zones/testzone/instances/2/setMetadata', args[0])
-        for key in ['kind', 'fingerprint']:
-            self.assertEqual(start_metadata[key], kwargs['data'][key])
+        args, kwargs = self.driver_mock().ex_set_node_metadata.call_args
+        self.assertEqual(cloud_node, args[0])
          plain_metadata['hostname'] = 'compute1.zzzzz.arvadosapi.com'
          self.assertEqual(
              plain_metadata,
-            {item['key']: item['value'] for item in kwargs['data']['items']})
+            {item['key']: item['value'] for item in args[1]})
  
      def test_sync_node_updates_hostname_tag(self):
          self.check_sync_node_updates_hostname_tag(
@@ -145,9 +144,7 @@ class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
          arv_node = testutil.arvados_node_mock(8)
          cloud_node = testutil.cloud_node_mock(
              9, metadata={}, zone=testutil.cloud_object_mock('failzone'))
-        mock_response = self.driver_mock().connection.async_request()
-        mock_response.success.return_value = False
-        mock_response.error = 'sync error test'
+        mock_response = self.driver_mock().ex_set_node_metadata.side_effect = (Exception('sync error test'),)
          driver = self.new_driver()
          with self.assertRaises(Exception) as err_check:
              driver.sync_node(cloud_node, arv_node)
diff --git a/services/nodemanager/tests/test_daemon.py b/services/nodemanager/tests/test_daemon.py

index f1d168c1e647d132d179fd179862a3d97c121ed2..e49fc39eed3dad01be48004047341ec650a81a5e 100644 (file)
--- a/services/nodemanager/tests/test_daemon.py
+++ b/services/nodemanager/tests/test_daemon.py
@@ -529,27 +529,6 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
          self.daemon.node_finished_shutdown(self.last_shutdown).get(self.TIMEOUT)
          self.assertEqual(0, self.alive_monitor_count())
  
-    def test_broken_node_not_counted(self):
-        size = testutil.MockSize(8)
-        cloud_node = testutil.cloud_node_mock(8, size=size)
-        wishlist = [size]
-        self.make_daemon([cloud_node], [testutil.arvados_node_mock(8)],
-                         wishlist, avail_sizes=[(size, {"cores":1})])
-        self.assertEqual(1, self.alive_monitor_count())
-        self.assertFalse(self.node_setup.start.called)
-        monitor = self.monitor_list()[0].proxy()
-        shutdown_proxy = self.node_shutdown.start().proxy
-        shutdown_proxy().cloud_node.get.return_value = cloud_node
-        shutdown_proxy().success.get.return_value = False
-        self.cloud_factory().broken.return_value = True
-        self.daemon.update_server_wishlist([]).get(self.TIMEOUT)
-        self.daemon.node_can_shutdown(monitor).get(self.TIMEOUT)
-        self.daemon.node_finished_shutdown(shutdown_proxy()).get(self.TIMEOUT)
-        self.daemon.update_cloud_nodes([cloud_node]).get(self.TIMEOUT)
-        self.daemon.update_server_wishlist(wishlist).get(self.TIMEOUT)
-        self.stop_proxy(self.daemon)
-        self.assertEqual(1, self.node_setup.start.call_count)
-
      def test_nodes_shutting_down_replaced_below_max_nodes(self):
          size = testutil.MockSize(6)
          cloud_node = testutil.cloud_node_mock(6, size=size)
diff --git a/services/ws/arvados-ws.service b/services/ws/arvados-ws.service

index ebccf0c89d2aa942da613848a99698ae12f6504c..9d37f31170cbe02bd97484cd59c97391e38d1a11 100644 (file)
--- a/services/ws/arvados-ws.service
+++ b/services/ws/arvados-ws.service
@@ -3,11 +3,16 @@ Description=Arvados websocket server
  Documentation=https://doc.arvados.org/
  After=network.target
  AssertPathExists=/etc/arvados/ws/ws.yml
+# systemd<230
+StartLimitInterval=0
+# systemd>=230
+StartLimitIntervalSec=0
  
  [Service]
  Type=notify
  ExecStart=/usr/bin/arvados-ws
  Restart=always
+RestartSec=1
  
  [Install]
  WantedBy=multi-user.target
author	Lucas Di Pentima <lucas@curoverse.com>
	Thu, 6 Apr 2017 19:15:25 +0000 (16:15 -0300)
committer	Lucas Di Pentima <lucas@curoverse.com>
	Thu, 6 Apr 2017 19:15:25 +0000 (16:15 -0300)
apps/workbench/app/controllers/actions_controller.rb		patch \| blob \| history
apps/workbench/app/controllers/application_controller.rb		patch \| blob \| history
apps/workbench/app/controllers/collections_controller.rb		patch \| blob \| history
apps/workbench/app/helpers/application_helper.rb		patch \| blob \| history
apps/workbench/app/models/collection.rb		patch \| blob \| history
apps/workbench/app/views/collections/_show_files.html.erb		patch \| blob \| history
apps/workbench/config/routes.rb		patch \| blob \| history
apps/workbench/test/controllers/collections_controller_test.rb		patch \| blob \| history
apps/workbench/test/integration/collections_test.rb		patch \| blob \| history
build/build.list		patch \| blob \| history
build/package-test-dockerfiles/ubuntu1204/Dockerfile		patch \| blob \| history
doc/_config.yml		patch \| blob \| history
doc/api/methods/container_requests.html.textile.liquid		patch \| blob \| history
doc/install/crunch2-slurm/install-dispatch.html.textile.liquid		patch \| blob \| history
doc/install/install-api-server.html.textile.liquid		patch \| blob \| history
doc/install/install-ws.html.textile.liquid		patch \| blob \| history
docker/migrate-docker19/dnd.sh		patch \| blob \| history
docker/migrate-docker19/migrate.sh		patch \| blob \| history
sdk/cli/arvados-cli.gemspec		patch \| blob \| history
sdk/cwl/arvados_cwl/__init__.py		patch \| blob \| history
sdk/cwl/arvados_cwl/fsaccess.py		patch \| blob \| history
sdk/cwl/arvados_cwl/runner.py		patch \| blob \| history
sdk/cwl/setup.py		patch \| blob \| history
sdk/cwl/tests/test_submit.py		patch \| blob \| history
sdk/go/keepclient/support.go		patch \| blob \| history
sdk/python/arvados/api.py		patch \| blob \| history
sdk/python/arvados/cache.py	[new file with mode: 0644]	patch \| blob
sdk/python/arvados/commands/migrate19.py		patch \| blob \| history
sdk/python/arvados/commands/run.py		patch \| blob \| history
sdk/python/setup.py		patch \| blob \| history
sdk/python/tests/run_test_server.py		patch \| blob \| history
sdk/python/tests/test_cache.py	[new file with mode: 0644]	patch \| blob
sdk/ruby/arvados.gemspec		patch \| blob \| history
services/api/Rakefile		patch \| blob \| history
services/api/app/controllers/application_controller.rb		patch \| blob \| history
services/api/app/controllers/arvados/v1/nodes_controller.rb		patch \| blob \| history
services/api/app/models/arvados_model.rb		patch \| blob \| history
services/api/app/models/collection.rb		patch \| blob \| history
services/api/app/models/container.rb		patch \| blob \| history
services/api/app/models/container_request.rb		patch \| blob \| history
services/api/app/models/log.rb		patch \| blob \| history
services/api/config/application.default.yml		patch \| blob \| history
services/api/db/migrate/20170328215436_add_portable_data_hash_index_to_collections.rb	[new file with mode: 0644]	patch \| blob
services/api/db/migrate/20170330012505_add_output_ttl_to_container_requests.rb	[new file with mode: 0644]	patch \| blob
services/api/db/structure.sql		patch \| blob \| history
services/api/lib/audit_logs.rb	[new file with mode: 0644]	patch \| blob
services/api/lib/crunch_dispatch.rb		patch \| blob \| history
services/api/test/fixtures/collections.yml		patch \| blob \| history
services/api/test/fixtures/logs.yml		patch \| blob \| history
services/api/test/functional/arvados/v1/nodes_controller_test.rb		patch \| blob \| history
services/api/test/functional/arvados/v1/users_controller_test.rb		patch \| blob \| history
services/api/test/unit/collection_test.rb		patch \| blob \| history
services/api/test/unit/container_request_test.rb		patch \| blob \| history
services/api/test/unit/container_test.rb		patch \| blob \| history
services/api/test/unit/log_test.rb		patch \| blob \| history
services/arv-git-httpd/arvados-git-httpd.service		patch \| blob \| history
services/crunch-dispatch-slurm/crunch-dispatch-slurm.service		patch \| blob \| history
services/crunch-run/crunchrun.go		patch \| blob \| history
services/crunch-run/crunchrun_test.go		patch \| blob \| history
services/dockercleaner/arvados-docker-cleaner.service		patch \| blob \| history
services/fuse/arvados_fuse/command.py		patch \| blob \| history
services/fuse/arvados_fuse/unmount.py	[new file with mode: 0644]	patch \| blob
services/fuse/tests/test_crunchstat.py	[new file with mode: 0644]	patch \| blob
services/fuse/tests/test_mount.py		patch \| blob \| history
services/fuse/tests/test_unmount.py	[new file with mode: 0644]	patch \| blob
services/keep-balance/keep-balance.service		patch \| blob \| history
services/keep-web/keep-web.service		patch \| blob \| history
services/keepproxy/keepproxy.service		patch \| blob \| history
services/keepstore/keepstore.service		patch \| blob \| history
services/nodemanager/arvnodeman/computenode/dispatch/__init__.py		patch \| blob \| history
services/nodemanager/arvnodeman/computenode/driver/__init__.py		patch \| blob \| history
services/nodemanager/arvnodeman/computenode/driver/gce.py		patch \| blob \| history
services/nodemanager/arvnodeman/daemon.py		patch \| blob \| history
services/nodemanager/tests/test_computenode_driver_gce.py		patch \| blob \| history
services/nodemanager/tests/test_daemon.py		patch \| blob \| history
services/ws/arvados-ws.service		patch \| blob \| history